source-engine/vstdlib/coroutine.cpp

1158 lines
37 KiB
C++

//========= Copyright Valve Corporation, All rights reserved. ============//
//
// Purpose:
//
// Build Notes: In order for the coroutine system to work a few build options
// need to be set for coroutine.cpp itself. These are the VPC
// entries for those options:
// $Compiler
// {
// $EnableC++Exceptions "No"
// $BasicRuntimeChecks "Default"
// $EnableFloatingPointExceptions "No"
// }
//
// If you have not set these options you will get a strange popup in
// Visual Studio at the end of Coroutine_Continue().
//
//=============================================================================
//#include "pch_vstdlib.h"
#if defined(_DEBUG)
// Verify that something is false
#define DbgVerifyNot(x) Assert(!x)
#else
#define DbgVerifyNot(x) x
#endif
#include "vstdlib/coroutine.h"
#include "tier0/vprof.h"
#include "tier0/minidump.h"
#include "tier1/utllinkedlist.h"
#include "tier1/utlvector.h"
#include <setjmp.h>
// for debugging
//#define CHECK_STACK_CORRUPTION
#ifndef STEAM
#define PvAlloc(x) malloc(x)
#define FreePv(x) free(x)
#endif
#ifdef CHECK_STACK_CORRUPTION
#include "tier1/checksum_md5.h"
#include "../tier1/checksum_md5.cpp"
#endif // CHECK_STACK_CORRUPTION
//#define COROUTINE_TRACE
#ifdef COROUTINE_TRACE
#include "tier1/fmtstr.h"
static CFmtStr g_fmtstr;
#ifdef WIN32
extern "C" __declspec(dllimport) void __stdcall OutputDebugStringA( const char * );
#else
void OutputDebugStringA( const char *pchMsg ) { fprintf( stderr, pchMsg ); fflush( stderr ); }
#endif
#define CoroutineDbgMsg( fmt, ... ) \
{ \
g_fmtstr.sprintf( fmt, ##__VA_ARGS__ ); \
OutputDebugStringA( g_fmtstr ); \
}
#else
#define CoroutineDbgMsg( pchMsg, ... )
#endif // COROUTINE_TRACE
// memdbgon must be the last include file in a .cpp file!!!
#include "tier0/memdbgon.h"
#if defined( _MSC_VER ) && ( _MSC_VER >= 1900 ) && defined( PLATFORM_64BITS )
//the VS2105 longjmp() seems to freak out jumping back into a coroutine (just like linux if _FORTIFY_SOURCE is defined)
// I can't find an analogy to _FORTIFY_SOURCE for MSVC at the moment, so I wrote a quick assembly to longjmp() without any safety checks
extern "C" NORETURN void Coroutine_LongJmp_Unchecked( jmp_buf buffer, int nResult );
#define Coroutine_longjmp Coroutine_LongJmp_Unchecked
#ifdef _WIN64
#define Q_offsetof(s,m) (size_t)( (ptrdiff_t)&reinterpret_cast<const volatile char&>((((s *)0)->m)) )
#else
#define Q_offsetof(s,m) (size_t)&reinterpret_cast<const volatile char&>((((s *)0)->m))
#endif
#define SIZEOF_MEMBER( className, memberName ) sizeof( ((className*)nullptr)->memberName )
#define Validate_Jump_Buffer( _Member ) COMPILE_TIME_ASSERT( (Q_offsetof( _JUMP_BUFFER, _Member ) == Q_offsetof( _Duplicate_JUMP_BUFFER, _Member )) && (SIZEOF_MEMBER( _JUMP_BUFFER, _Member ) == SIZEOF_MEMBER( _Duplicate_JUMP_BUFFER, _Member )) )
//validate that the structure in assembly matches what the crt setjmp thinks it is
# if defined( PLATFORM_64BITS )
struct _Duplicate_JUMP_BUFFER
{
unsigned __int64 Frame;
unsigned __int64 Rbx;
unsigned __int64 Rsp;
unsigned __int64 Rbp;
unsigned __int64 Rsi;
unsigned __int64 Rdi;
unsigned __int64 R12;
unsigned __int64 R13;
unsigned __int64 R14;
unsigned __int64 R15;
unsigned __int64 Rip;
unsigned long MxCsr;
unsigned short FpCsr;
unsigned short Spare;
SETJMP_FLOAT128 Xmm6;
SETJMP_FLOAT128 Xmm7;
SETJMP_FLOAT128 Xmm8;
SETJMP_FLOAT128 Xmm9;
SETJMP_FLOAT128 Xmm10;
SETJMP_FLOAT128 Xmm11;
SETJMP_FLOAT128 Xmm12;
SETJMP_FLOAT128 Xmm13;
SETJMP_FLOAT128 Xmm14;
SETJMP_FLOAT128 Xmm15;
};
COMPILE_TIME_ASSERT( sizeof( _JUMP_BUFFER ) == sizeof( _Duplicate_JUMP_BUFFER ) );
Validate_Jump_Buffer( Frame );
Validate_Jump_Buffer( Rbx );
Validate_Jump_Buffer( Rsp );
Validate_Jump_Buffer( Rbp );
Validate_Jump_Buffer( Rsi );
Validate_Jump_Buffer( Rdi );
Validate_Jump_Buffer( R12 );
Validate_Jump_Buffer( R13 );
Validate_Jump_Buffer( R14 );
Validate_Jump_Buffer( R15 );
Validate_Jump_Buffer( Rip );
Validate_Jump_Buffer( MxCsr );
Validate_Jump_Buffer( FpCsr );
Validate_Jump_Buffer( Spare );
Validate_Jump_Buffer( Xmm6 );
Validate_Jump_Buffer( Xmm7 );
Validate_Jump_Buffer( Xmm8 );
Validate_Jump_Buffer( Xmm9 );
Validate_Jump_Buffer( Xmm10 );
Validate_Jump_Buffer( Xmm11 );
Validate_Jump_Buffer( Xmm12 );
Validate_Jump_Buffer( Xmm13 );
Validate_Jump_Buffer( Xmm14 );
Validate_Jump_Buffer( Xmm15 );
# else
struct _Duplicate_JUMP_BUFFER
{
unsigned long Ebp;
unsigned long Ebx;
unsigned long Edi;
unsigned long Esi;
unsigned long Esp;
unsigned long Eip;
unsigned long Registration;
unsigned long TryLevel;
unsigned long Cookie;
unsigned long UnwindFunc;
unsigned long UnwindData[6];
};
COMPILE_TIME_ASSERT( sizeof( _JUMP_BUFFER ) == sizeof( _Duplicate_JUMP_BUFFER ) );
Validate_Jump_Buffer( Ebp );
Validate_Jump_Buffer( Ebx );
Validate_Jump_Buffer( Edi );
Validate_Jump_Buffer( Esi );
Validate_Jump_Buffer( Esp );
Validate_Jump_Buffer( Eip );
Validate_Jump_Buffer( Registration );
Validate_Jump_Buffer( TryLevel );
Validate_Jump_Buffer( Cookie );
Validate_Jump_Buffer( UnwindFunc );
Validate_Jump_Buffer( UnwindData[6] );
# endif
#else
#define Coroutine_longjmp longjmp
#endif
// it *feels* like we should need barriers around our setjmp/longjmp calls, and the memcpy's
// to make sure the optimizer doesn't reorder us across register load/stores, so I've put them
// in what seem like the appropriate spots, but we seem to run ok without them, so...
#ifdef GNUC
#define RW_MEMORY_BARRIER /* __sync_synchronize() */
#else
#define RW_MEMORY_BARRIER /* _ReadWriteBarrier() */
#endif
// return values from setjmp()
static const int k_iSetJmpStateSaved = 0x00;
static const int k_iSetJmpContinue = 0x01;
static const int k_iSetJmpDone = 0x02;
static const int k_iSetJmpDbgBreak = 0x03;
// distance up the stack that coroutine functions stacks' start
#ifdef _PS3
// PS3 has a small stack. Hopefully we dont need 64k of padding!
static const int k_cubCoroutineStackGap = (3 * 1024);
static const int k_cubCoroutineStackGapSmall = 64;
#else
static const int k_cubCoroutineStackGap = (64 * 1024);
static const int k_cubCoroutineStackGapSmall = 64;
#endif
// Warning size for allocated stacks
#ifdef _DEBUG
// In debug builds, we'll end up with much more stack usage in some scenarios that isn't representative of release
// builds. We should still warn if we're going way above what we could expect the optimizer to save us from, but the
// warning is more salient in release.
static const int k_cubMaxCoroutineStackSize = (48 * 1024);
#else
static const int k_cubMaxCoroutineStackSize = (32 * 1024);
#endif // defined( _DEBUG )
#ifdef _WIN64
extern "C" byte *GetStackPtr64();
#define GetStackPtr( pStackPtr) byte *pStackPtr = GetStackPtr64();
#else
#ifdef WIN32
#define GetStackPtr( pStackPtr ) byte *pStackPtr; __asm mov pStackPtr, esp
#elif defined(GNUC)
// Apple's version of gcc/g++ doesn't return the expected value using the intrinsic, so
// do it the old fashioned way - this will also use asm on linux (since we don't compile
// with llvm/clang there) but that seems fine.
//#if defined(__llvm__) || defined(__clang__)
#define GetStackPtr( pStackPtr ) byte *pStackPtr = (byte*)__builtin_frame_address(0)
//#else
//#define GetStackPtr( pStackPtr ) register byte *pStackPtr __asm__( "esp" )
//#endif
#elif defined(__SNC__)
#define GetStackPtr( pStackPtr ) byte *pStackPtr = (byte*)__builtin_frame_address(0)
#else
#error
#endif
#endif
#ifdef _M_X64
#define _REGISTER_ALIGNMENT 16ull
int CalcAlignOffset( const unsigned char *p )
{
return static_cast<int>( AlignValue( p, _REGISTER_ALIGNMENT ) - p );
}
#endif
//-----------------------------------------------------------------------------
// Purpose: single coroutine descriptor
//-----------------------------------------------------------------------------
#if defined( _PS3 ) && defined( _DEBUG )
byte rgStackTempBuffer[65535];
#endif
class CCoroutine
{
public:
CCoroutine()
{
m_pSavedStack = NULL;
m_pStackHigh = m_pStackLow = NULL;
m_cubSavedStack = 0;
m_pFunc = NULL;
m_pchName = "(none)";
m_iJumpCode = 0;
m_pchDebugMsg = NULL;
#ifdef COROUTINE_TRACE
m_hCoroutine = -1;
#endif
#ifdef _M_X64
m_nAlignmentBytes = CalcAlignOffset( m_rgubRegisters );
#endif
#if defined( VPROF_ENABLED )
m_pVProfNodeScope = NULL;
#endif
}
jmp_buf &GetRegisters()
{
#ifdef _M_X64
// Did we get moved in memory in such a way that the registers became unaligned?
// If so, fix them up now
size_t align = _REGISTER_ALIGNMENT - 1;
unsigned char *pRegistersCur = &m_rgubRegisters[m_nAlignmentBytes];
if ( (size_t)pRegistersCur & align )
{
m_nAlignmentBytes = CalcAlignOffset( m_rgubRegisters );
unsigned char *pRegistersNew = &m_rgubRegisters[m_nAlignmentBytes];
Q_memmove( pRegistersNew, pRegistersCur, sizeof(jmp_buf) );
pRegistersCur = pRegistersNew;
}
return *reinterpret_cast<jmp_buf *>( pRegistersCur );
#else
return m_Registers;
#endif
}
~CCoroutine()
{
if ( m_pSavedStack )
{
FreePv( m_pSavedStack );
}
}
FORCEINLINE void RestoreStack()
{
if ( m_cubSavedStack )
{
Assert( m_pStackHigh );
Assert( m_pSavedStack );
#if defined( _PS3 ) && defined( _DEBUG )
// Our (and Sony's) memory tracking tools may try to walk the stack during a free() call
// if we do the free here at our normal point though the stack is invalid since it's in
// the middle of swapping. Instead move it to a temp buffer now and free while the stack
// frames in place are still ok.
Assert( m_cubSavedStack < Q_ARRAYSIZE( rgStackTempBuffer ) );
memcpy( &rgStackTempBuffer[0], m_pSavedStack, m_cubSavedStack );
FreePv( m_pSavedStack );
m_pSavedStack = &rgStackTempBuffer[0];
#endif
// Assert we're not about to trash our own immediate stack
GetStackPtr( pStack );
if ( pStack >= m_pStackLow && pStack <= m_pStackHigh )
{
CoroutineDbgMsg( g_fmtstr.sprintf( "Restoring stack over ESP (%x, %x, %x)\n", pStack, m_pStackLow, m_pStackHigh ) );
AssertMsg3( false, "Restoring stack over ESP (%p, %p, %p)\n", pStack, m_pStackLow, m_pStackHigh );
}
// Make sure we can access the our instance pointer after restoring the stack. This function is inlined, so the compiler could decide to
// use an existing coroutine pointer that is already on the stack from the previous function (does so on the PS3), and will be overwritten
// when we memcpy below. Any allocations here should be ok, as the caller should have advanced the stack past the stack area where the
// new stack will be copied
CCoroutine *pThis = (CCoroutine*)stackalloc( sizeof( CCoroutine* ) );
pThis = this;
RW_MEMORY_BARRIER;
memcpy( m_pStackLow, m_pSavedStack, m_cubSavedStack );
// WARNING: The stack has been replaced.. do not use previous stack variables or this
#ifdef CHECK_STACK_CORRUPTION
MD5Init( &pThis->m_md52 );
MD5Update( &pThis->m_md52, pThis->m_pStackLow, pThis->m_cubSavedStack );
MD5Final( pThis->m_digest2, &pThis->m_md52 );
Assert( 0 == Q_memcmp( pThis->m_digest, pThis->m_digest2, MD5_DIGEST_LENGTH ) );
#endif
// free the saved stack info
pThis->m_cubSavedStack = 0;
#if !defined( _PS3 ) || !defined( _DEBUG )
FreePv( pThis->m_pSavedStack );
#endif
pThis->m_pSavedStack = NULL;
// If we were the "main thread", reset our stack pos to zero
if ( NULL == pThis->m_pFunc )
{
pThis->m_pStackLow = pThis->m_pStackHigh = 0;
}
// resume accounting against the vprof node we were in when we yielded
// Make sure we are added after the coroutine we just copied onto the stack
#if defined( VPROF_ENABLED )
pThis->m_pVProfNodeScope = g_VProfCurrentProfile.GetCurrentNode();
if ( g_VProfCurrentProfile.IsEnabled() )
{
FOR_EACH_VEC_BACK( pThis->m_vecProfNodeStack, i )
{
g_VProfCurrentProfile.EnterScope(
pThis->m_vecProfNodeStack[i]->GetName(),
0,
g_VProfCurrentProfile.GetBudgetGroupName( pThis->m_vecProfNodeStack[i]->GetBudgetGroupID() ),
false,
g_VProfCurrentProfile.GetBudgetGroupFlags( pThis->m_vecProfNodeStack[i]->GetBudgetGroupID() )
);
}
}
pThis->m_vecProfNodeStack.Purge();
#endif
}
}
FORCEINLINE void SaveStack()
{
MEM_ALLOC_CREDIT_( "Coroutine saved stack" );
if ( m_pSavedStack )
{
FreePv( m_pSavedStack );
}
GetStackPtr( pLocal );
m_pStackLow = pLocal;
m_cubSavedStack = (m_pStackHigh - m_pStackLow);
m_pSavedStack = (byte *)PvAlloc( m_cubSavedStack );
// if you hit this assert, it's because you're allocating way too much stuff on the stack in your job
// check you haven't got any overly large string buffers allocated on the stack
Assert( m_cubSavedStack < k_cubMaxCoroutineStackSize );
#if defined( VPROF_ENABLED )
// Exit any current vprof scope when we yield, and remember the vprof stack so we can restore it when we run again
m_vecProfNodeStack.RemoveAll();
CVProfNode *pCurNode = g_VProfCurrentProfile.GetCurrentNode();
while ( pCurNode && m_pVProfNodeScope && pCurNode != m_pVProfNodeScope && pCurNode != g_VProfCurrentProfile.GetRoot() )
{
m_vecProfNodeStack.AddToTail( pCurNode );
g_VProfCurrentProfile.ExitScope();
pCurNode = g_VProfCurrentProfile.GetCurrentNode();
}
m_pVProfNodeScope = NULL;
#endif
RW_MEMORY_BARRIER;
// save the stack in the newly allocated slot
memcpy( m_pSavedStack, m_pStackLow, m_cubSavedStack );
#ifdef CHECK_STACK_CORRUPTION
MD5Init( &m_md5 );
MD5Update( &m_md5, m_pSavedStack, m_cubSavedStack );
MD5Final( m_digest, &m_md5 );
#endif
}
#ifdef DBGFLAG_VALIDATE
void Validate( CValidator &validator, const char *pchName )
{
validator.Push( "CCoroutine", this, pchName );
validator.ClaimMemory( m_pSavedStack );
validator.Pop();
}
#endif
#ifdef _M_X64
unsigned char m_rgubRegisters[sizeof(jmp_buf) + _REGISTER_ALIGNMENT];
int m_nAlignmentBytes;
#else
jmp_buf m_Registers;
#endif
byte *m_pStackHigh; // position of initial entry to the coroutine (stack ptr before continue is ran)
byte *m_pStackLow; // low point on the stack we plan on saving (stack ptr when we yield)
byte *m_pSavedStack; // pointer to the saved stack (allocated on heap)
int m_cubSavedStack; // amount of data on stack
const char *m_pchName;
int m_iJumpCode;
const char *m_pchDebugMsg;
#ifdef COROUTINE_TRACE
HCoroutine m_hCoroutine; // for debugging
#endif
CoroutineFunc_t m_pFunc;
void *m_pvParam;
#if defined( VPROF_ENABLED )
CUtlVector<CVProfNode *> m_vecProfNodeStack;
CVProfNode *m_pVProfNodeScope;
#endif
#ifdef CHECK_STACK_CORRUPTION
MD5Context_t m_md5;
unsigned char m_digest[MD5_DIGEST_LENGTH];
MD5Context_t m_md52;
unsigned char m_digest2[MD5_DIGEST_LENGTH];
#endif
};
//-----------------------------------------------------------------------------
// Purpose: manages list of all coroutines
//-----------------------------------------------------------------------------
class CCoroutineMgr
{
public:
CCoroutineMgr()
{
m_topofexceptionchain = 0;
// reserve the 0 index as the main coroutine
HCoroutine hMainCoroutine = m_ListCoroutines.AddToTail();
m_ListCoroutines[hMainCoroutine].m_pchName = "(main)";
#ifdef COROUTINE_TRACE
m_ListCoroutines[hMainCoroutine].m_hCoroutine = hMainCoroutine;
#endif
// mark it as currently running
m_VecCoroutineStack.AddToTail( hMainCoroutine );
}
HCoroutine CreateCoroutine( CoroutineFunc_t pFunc, void *pvParam )
{
HCoroutine hCoroutine = m_ListCoroutines.AddToTail();
CoroutineDbgMsg( g_fmtstr.sprintf( "Coroutine_Create() hCoroutine = %x pFunc = 0x%x pvParam = 0x%x\n", hCoroutine, pFunc, pvParam ) );
m_ListCoroutines[hCoroutine].m_pFunc = pFunc;
m_ListCoroutines[hCoroutine].m_pvParam = pvParam;
m_ListCoroutines[hCoroutine].m_pSavedStack = NULL;
m_ListCoroutines[hCoroutine].m_cubSavedStack = 0;
m_ListCoroutines[hCoroutine].m_pStackHigh = m_ListCoroutines[hCoroutine].m_pStackLow = NULL;
m_ListCoroutines[hCoroutine].m_pchName = "(no name set)";
#ifdef COROUTINE_TRACE
m_ListCoroutines[hCoroutine].m_hCoroutine = hCoroutine;
#endif
return hCoroutine;
}
HCoroutine GetActiveCoroutineHandle()
{
// look up the coroutine of the last item on the stack
return m_VecCoroutineStack[m_VecCoroutineStack.Count() - 1];
}
CCoroutine &GetActiveCoroutine()
{
// look up the coroutine of the last item on the stack
return m_ListCoroutines[GetActiveCoroutineHandle()];
}
CCoroutine &GetPreviouslyActiveCoroutine()
{
// look up the coroutine that ran the current coroutine
return m_ListCoroutines[m_VecCoroutineStack[m_VecCoroutineStack.Count() - 2]];
}
bool IsValidCoroutine( HCoroutine hCoroutine )
{
return m_ListCoroutines.IsValidIndex( hCoroutine ) && hCoroutine > 0;
}
void SetActiveCoroutine( HCoroutine hCoroutine )
{
m_VecCoroutineStack.AddToTail( hCoroutine );
}
void PopCoroutineStack()
{
Assert( m_VecCoroutineStack.Count() > 1 );
m_VecCoroutineStack.Remove( m_VecCoroutineStack.Count() - 1 );
}
bool IsAnyCoroutineActive()
{
return m_VecCoroutineStack.Count() > 1;
}
void DeleteCoroutine( HCoroutine hCoroutine )
{
m_ListCoroutines.Remove( hCoroutine );
}
#ifdef DBGFLAG_VALIDATE
void Validate( CValidator &validator, const char *pchName )
{
validator.Push( "CCoroutineMgr", this, pchName );
ValidateObj( m_ListCoroutines );
FOR_EACH_LL( m_ListCoroutines, iRoutine )
{
ValidateObj( m_ListCoroutines[iRoutine] );
}
ValidateObj( m_VecCoroutineStack );
validator.Pop();
}
#endif // DBGFLAG_VALIDATE
uint32 m_topofexceptionchain;
private:
CUtlLinkedList<CCoroutine, HCoroutine> m_ListCoroutines;
CUtlVector<HCoroutine> m_VecCoroutineStack;
};
CTHREADLOCALPTR(CCoroutineMgr) g_ThreadLocalCoroutineMgr;
CUtlVector< CCoroutineMgr * > g_VecPCoroutineMgr;
CThreadMutex g_ThreadMutexCoroutineMgr;
CCoroutineMgr &GCoroutineMgr()
{
if ( !g_ThreadLocalCoroutineMgr )
{
AUTO_LOCK( g_ThreadMutexCoroutineMgr );
g_ThreadLocalCoroutineMgr = new CCoroutineMgr();
g_VecPCoroutineMgr.AddToTail( g_ThreadLocalCoroutineMgr );
}
return *g_ThreadLocalCoroutineMgr;
}
//-----------------------------------------------------------------------------
// Purpose: call when a thread is quiting to release any per-thread memory
//-----------------------------------------------------------------------------
void Coroutine_ReleaseThreadMemory()
{
AUTO_LOCK( g_ThreadMutexCoroutineMgr );
if ( g_ThreadLocalCoroutineMgr != static_cast<const void*>( nullptr ) )
{
int iCoroutineMgr = g_VecPCoroutineMgr.Find( g_ThreadLocalCoroutineMgr );
delete g_VecPCoroutineMgr[iCoroutineMgr];
g_VecPCoroutineMgr.Remove( iCoroutineMgr );
}
}
// predecs
void Coroutine_Launch( CCoroutine &coroutine );
void Coroutine_Finish();
//-----------------------------------------------------------------------------
// Purpose: Creates a soroutine, specified by the function, returns a handle
//-----------------------------------------------------------------------------
HCoroutine Coroutine_Create( CoroutineFunc_t pFunc, void *pvParam )
{
return GCoroutineMgr().CreateCoroutine( pFunc, pvParam );
}
//-----------------------------------------------------------------------------
// Purpose: Continues a current coroutine
// input: hCoroutine - the coroutine to continue
// pchDebugMsg - if non-NULL, it will generate an assertion in
// that coroutine, then that coroutine will
// immediately yield back to this thread
//-----------------------------------------------------------------------------
static const char *k_pchDebugMsg_GenericBreak = (const char *)1;
bool Internal_Coroutine_Continue( HCoroutine hCoroutine, const char *pchDebugMsg, const char *pchName )
{
Assert( GCoroutineMgr().IsValidCoroutine(hCoroutine) );
bool bInCoroutineAlready = GCoroutineMgr().IsAnyCoroutineActive();
#ifdef _WIN32
#ifndef _WIN64
// make sure nobody has a try/catch block and then yielded
// because we hate that and we will crash
uint32 topofexceptionchain;
__asm mov eax, dword ptr fs:[0]
__asm mov topofexceptionchain, eax
if ( GCoroutineMgr().m_topofexceptionchain == 0 )
GCoroutineMgr().m_topofexceptionchain = topofexceptionchain;
else
{
Assert( topofexceptionchain == GCoroutineMgr().m_topofexceptionchain );
}
#endif
#endif
// start the new coroutine
GCoroutineMgr().SetActiveCoroutine( hCoroutine );
CCoroutine &coroutinePrev = GCoroutineMgr().GetPreviouslyActiveCoroutine();
CCoroutine &coroutine = GCoroutineMgr().GetActiveCoroutine();
if ( pchName )
coroutine.m_pchName = pchName;
CoroutineDbgMsg( g_fmtstr.sprintf( "Coroutine_Continue() %s#%x -> %s#%x\n", coroutinePrev.m_pchName, coroutinePrev.m_hCoroutine, coroutine.m_pchName, coroutine.m_hCoroutine ) );
bool bStillRunning = true;
// set the point for the coroutine to jump back to
RW_MEMORY_BARRIER;
int iResult = setjmp( coroutinePrev.GetRegisters() );
if ( iResult == k_iSetJmpStateSaved )
{
// copy the new stack in place
if ( coroutine.m_pSavedStack )
{
// save any of the main stack that overlaps where the coroutine stack is going to go
GetStackPtr( pStackSavePoint );
if ( pStackSavePoint <= coroutine.m_pStackHigh )
{
// save the main stack from where the coroutine stack wishes to start
// if the previous coroutine already had a stack save point, just save
// the whole thing.
if ( NULL == coroutinePrev.m_pStackHigh )
{
coroutinePrev.m_pStackHigh = coroutine.m_pStackHigh;
}
else
{
Assert( coroutine.m_pStackHigh <= coroutinePrev.m_pStackHigh );
}
coroutinePrev.SaveStack();
CoroutineDbgMsg( g_fmtstr.sprintf( "SaveStack() %s#%x [%x - %x]\n", coroutinePrev.m_pchName, coroutinePrev.m_hCoroutine, coroutinePrev.m_pStackLow, coroutinePrev.m_pStackHigh ) );
}
// If the coroutine's stack is close enough to where we are on the stack, we need to push ourselves
// down past it, so that the memcpy() doesn't screw up the RestoreStack->memcpy call chain.
if ( coroutine.m_pStackHigh > ( pStackSavePoint - 2048 ) )
{
// If the entire CR stack is above us, we don't need to pad ourselves.
if ( coroutine.m_pStackLow < pStackSavePoint )
{
// push ourselves down
int cubPush = pStackSavePoint - coroutine.m_pStackLow + 512;
volatile byte *pvStackGap = (byte*)stackalloc( cubPush );
pvStackGap[ cubPush-1 ] = 0xF;
CoroutineDbgMsg( g_fmtstr.sprintf( "Adjusting stack point by %d (%x <- %x)\n", cubPush, pvStackGap, &pvStackGap[cubPush] ) );
}
}
// This needs to go right here - after we've maybe padded the stack (so that iJumpCode does not
// get stepped on) and before the RestoreStack() call (because that might step on pchDebugMsg!).
if ( pchDebugMsg == NULL )
{
coroutine.m_iJumpCode = k_iSetJmpContinue;
coroutine.m_pchDebugMsg = NULL;
}
else if ( pchDebugMsg == k_pchDebugMsg_GenericBreak )
{
coroutine.m_iJumpCode = k_iSetJmpDbgBreak;
coroutine.m_pchDebugMsg = NULL;
}
else
{
coroutine.m_iJumpCode = k_iSetJmpDbgBreak;
coroutine.m_pchDebugMsg = pchDebugMsg;
}
// restore the coroutine stack
CoroutineDbgMsg( g_fmtstr.sprintf( "RestoreStack() %s#%x [%x - %x] (current %x)\n", coroutine.m_pchName, coroutine.m_hCoroutine, coroutine.m_pStackLow, coroutine.m_pStackHigh, pStackSavePoint ) );
coroutine.RestoreStack();
// the new stack is in place, so no code here can reference local stack vars
// move the program counter
RW_MEMORY_BARRIER;
Coroutine_longjmp( GCoroutineMgr().GetActiveCoroutine().GetRegisters(), GCoroutineMgr().GetActiveCoroutine().m_iJumpCode );
}
else
{
// set the stack pos for the new coroutine
// jump a long way forward on the stack
// this needs to be a stackalloc() instead of a static buffer, so it won't get optimized out in release build
int cubGap = bInCoroutineAlready ? k_cubCoroutineStackGapSmall : k_cubCoroutineStackGap;
volatile byte *pvStackGap = (byte*)stackalloc( cubGap );
pvStackGap[ cubGap-1 ] = 0xF;
// hasn't started yet, so launch
Coroutine_Launch( coroutine );
}
// when the job yields, the above setjmp() will be called again with non-zero value
// code here will never run
}
else if ( iResult == k_iSetJmpContinue )
{
// just pass through
}
else if ( iResult == k_iSetJmpDone )
{
// we're done, remove the coroutine
GCoroutineMgr().DeleteCoroutine( Coroutine_GetCurrentlyActive() );
bStillRunning = false;
}
// job has suspended itself, we'll get back to it later
GCoroutineMgr().PopCoroutineStack();
return bStillRunning;
}
//-----------------------------------------------------------------------------
// Purpose: Continues a current coroutine
//-----------------------------------------------------------------------------
bool Coroutine_Continue( HCoroutine hCoroutine, const char *pchName )
{
return Internal_Coroutine_Continue( hCoroutine, NULL, pchName );
}
//-----------------------------------------------------------------------------
// Purpose: launches a coroutine way ahead on the stack
//-----------------------------------------------------------------------------
void NOINLINE Coroutine_Launch( CCoroutine &coroutine )
{
#if defined( VPROF_ENABLED )
coroutine.m_pVProfNodeScope = g_VProfCurrentProfile.GetCurrentNode();
#endif
// set our marker
#ifndef _PS3
GetStackPtr( pEsp );
#else
// The stack pointer for the current stack frame points to the top of the stack which already includes space for the
// ABI linkage area. We need to include this area as part of our coroutine stack, as the calling function will copy
// the link register (return address to this function) into this area after calling m_pFunc below. Failing to do so
// could result in the coroutine to return to garbage when complete
uint64 *pStackFrameTwoUp = (uint64*)__builtin_frame_address(2);
// Need to terminate the stack frame sequence so if someone tries to walk the stack in a co-routine they don't go forever.
*pStackFrameTwoUp = 0;
// Need to track where we we save up to on yield, add a few bytes so we save just the beginning linkage area of the stack frame
// we added the null termination to.
byte * pEsp = ((byte*)pStackFrameTwoUp)+32;
#endif
#ifdef _WIN64
// Add a little extra padding, to capture the spill space for the registers
// that is required for us to reserve ABOVE the return address), and also
// align the stack
coroutine.m_pStackHigh = (byte *)( ((uintptr_t)pEsp + 32 + 15) & ~(uintptr_t)15 );
// On Win64, we need to be able to find an exception handler
// if we walk the stack to this point. Currently,
// this is as close to the root as we can go. If we
// try to go higher, we wil fail. That's actually
// OK at run time, because Coroutine_Finish doesn't
// return!
CatchAndWriteMiniDumpForVoidPtrFn( coroutine.m_pFunc, coroutine.m_pvParam, /*bExitQuietly*/ true );
#else
coroutine.m_pStackHigh = (byte *)pEsp;
// run the function directly
coroutine.m_pFunc( coroutine.m_pvParam );
#endif
// longjmp back to the main 'thread'
Coroutine_Finish();
}
//-----------------------------------------------------------------------------
// Purpose: cancels a currently running coroutine
//-----------------------------------------------------------------------------
void Coroutine_Cancel( HCoroutine hCoroutine )
{
GCoroutineMgr().DeleteCoroutine( hCoroutine );
}
//-----------------------------------------------------------------------------
// Purpose: cause a debug break in the specified coroutine
//-----------------------------------------------------------------------------
void Coroutine_DebugBreak( HCoroutine hCoroutine )
{
Internal_Coroutine_Continue( hCoroutine, k_pchDebugMsg_GenericBreak, NULL );
}
//-----------------------------------------------------------------------------
// Purpose: generate an assert (perhaps generating a minidump), with the
// specified failure message, in the specified coroutine
//-----------------------------------------------------------------------------
void Coroutine_DebugAssert( HCoroutine hCoroutine, const char *pchMsg )
{
Assert( pchMsg );
Internal_Coroutine_Continue( hCoroutine, pchMsg, NULL );
}
//-----------------------------------------------------------------------------
// Purpose: returns true if the code is currently running inside of a coroutine
//-----------------------------------------------------------------------------
bool Coroutine_IsActive()
{
return GCoroutineMgr().IsAnyCoroutineActive();
}
//-----------------------------------------------------------------------------
// Purpose: returns a handle the currently active coroutine
//-----------------------------------------------------------------------------
HCoroutine Coroutine_GetCurrentlyActive()
{
Assert( Coroutine_IsActive() );
return GCoroutineMgr().GetActiveCoroutineHandle();
}
//-----------------------------------------------------------------------------
// Purpose: lets the main thread continue
//-----------------------------------------------------------------------------
void Coroutine_YieldToMain()
{
// if you've hit this assert, it's because you're calling yield when not in a coroutine
Assert( Coroutine_IsActive() );
CCoroutine &coroutinePrev = GCoroutineMgr().GetPreviouslyActiveCoroutine();
CCoroutine &coroutine = GCoroutineMgr().GetActiveCoroutine();
CoroutineDbgMsg( g_fmtstr.sprintf( "Coroutine_YieldToMain() %s#%x -> %s#%x\n", coroutine.m_pchName, coroutine.m_hCoroutine, coroutinePrev.m_pchName, coroutinePrev.m_hCoroutine ) );
#ifdef _WIN32
#ifndef _WIN64
// make sure nobody has a try/catch block and then yielded
// because we hate that and we will crash
uint32 topofexceptionchain;
__asm mov eax, dword ptr fs:[0]
__asm mov topofexceptionchain, eax
if ( GCoroutineMgr().m_topofexceptionchain == 0 )
GCoroutineMgr().m_topofexceptionchain = topofexceptionchain;
else
{
Assert( topofexceptionchain == GCoroutineMgr().m_topofexceptionchain );
}
#endif
#endif
RW_MEMORY_BARRIER;
int iResult = setjmp( coroutine.GetRegisters() );
if ( ( iResult == k_iSetJmpStateSaved ) || ( iResult == k_iSetJmpDbgBreak ) )
{
// break / assert requested?
if ( iResult == k_iSetJmpDbgBreak )
{
// Assert (minidump) requested?
if ( coroutine.m_pchDebugMsg )
{
// Generate a failed assertion
AssertMsg1( !"Coroutine assert requested", "%s", coroutine.m_pchDebugMsg );
}
else
{
// If we were loaded only to debug, call a break
DebuggerBreakIfDebugging();
}
// Now IMMEDIATELY yield back to the main thread
}
// Clear message, regardless
coroutine.m_pchDebugMsg = NULL;
// save our stack - all the way to the top, err bottom err, the end of it ( where esp is )
coroutine.SaveStack();
CoroutineDbgMsg( g_fmtstr.sprintf( "SaveStack() %s#%x [%x - %x]\n", coroutine.m_pchName, coroutine.m_hCoroutine, coroutine.m_pStackLow, coroutine.m_pStackHigh ) );
// restore the main thread stack
// allocate a bunch of stack padding so we don't kill ourselves while in stack restoration
// If the coroutine's stack is close enough to where we are on the stack, we need to push ourselves
// down past it, so that the memcpy() doesn't screw up the RestoreStack->memcpy call chain.
GetStackPtr( pStackPtr );
if ( pStackPtr >= (coroutinePrev.m_pStackHigh - coroutinePrev.m_cubSavedStack) && ( pStackPtr - 2048 ) <= coroutinePrev.m_pStackHigh )
{
int cubPush = coroutinePrev.m_cubSavedStack + 512;
volatile byte *pvStackGap = (byte*)stackalloc( cubPush );
pvStackGap[ cubPush - 1 ] = 0xF;
CoroutineDbgMsg( g_fmtstr.sprintf( "Adjusting stack point by %d (%x <- %x)\n", cubPush, pvStackGap, &pvStackGap[cubPush] ) );
}
CoroutineDbgMsg( g_fmtstr.sprintf( "RestoreStack() %s#%x [%x - %x]\n", coroutinePrev.m_pchName, coroutinePrev.m_hCoroutine, coroutinePrev.m_pStackLow, coroutinePrev.m_pStackHigh ) );
coroutinePrev.RestoreStack();
// jump back to the main thread
// Our stack may have been mucked with, can't use local vars anymore!
RW_MEMORY_BARRIER;
Coroutine_longjmp( GCoroutineMgr().GetPreviouslyActiveCoroutine().GetRegisters(), k_iSetJmpContinue );
UNREACHABLE();
}
else
{
// we've been restored, now continue on our merry way
}
}
//-----------------------------------------------------------------------------
// Purpose: done with the Coroutine, terminate safely
//-----------------------------------------------------------------------------
void Coroutine_Finish()
{
Assert( Coroutine_IsActive() );
CoroutineDbgMsg( g_fmtstr.sprintf( "Coroutine_Finish() %s#%x -> %s#%x\n", GCoroutineMgr().GetActiveCoroutine().m_pchName, GCoroutineMgr().GetActiveCoroutineHandle(), GCoroutineMgr().GetPreviouslyActiveCoroutine().m_pchName, &GCoroutineMgr().GetPreviouslyActiveCoroutine() ) );
// allocate a bunch of stack padding so we don't kill ourselves while in stack restoration
volatile byte *pvStackGap = (byte*)stackalloc( GCoroutineMgr().GetPreviouslyActiveCoroutine().m_cubSavedStack + 512 );
pvStackGap[ GCoroutineMgr().GetPreviouslyActiveCoroutine().m_cubSavedStack + 511 ] = 0xf;
GCoroutineMgr().GetPreviouslyActiveCoroutine().RestoreStack();
RW_MEMORY_BARRIER;
// go back to the main thread, signaling that we're done
Coroutine_longjmp( GCoroutineMgr().GetPreviouslyActiveCoroutine().GetRegisters(), k_iSetJmpDone );
UNREACHABLE();
}
//-----------------------------------------------------------------------------
// Purpose: Coroutine that spawns another coroutine
//-----------------------------------------------------------------------------
void CoroutineTestFunc( void *pvRelaunch )
{
static const char *g_pchTestString = "test string";
char rgchT[256];
Q_strncpy( rgchT, g_pchTestString, sizeof(rgchT) );
// yield
Coroutine_YieldToMain();
// ensure the string is still valid
DbgVerifyNot( Q_strcmp( rgchT, g_pchTestString ) );
if ( !pvRelaunch )
{
// test launching coroutines inside of coroutines
HCoroutine hCoroutine = Coroutine_Create( &CoroutineTestFunc, (void *)(size_t)0xFFFFFFFF );
// first pass the coroutines should all still be running
DbgVerify( Coroutine_Continue( hCoroutine, NULL ) );
// second pass the coroutines should all be finished
DbgVerifyNot( Coroutine_Continue( hCoroutine, NULL ) );
}
}
// test that just spins a few times
void CoroutineTestL2( void * )
{
// spin a few times
for ( int i = 0; i < 5; i++ )
{
Coroutine_YieldToMain();
}
}
// level 1 of a test
void CoroutineTestL1( void *pvecCoroutineL2 )
{
CUtlVector<HCoroutine> &vecCoroutineL2 = *(CUtlVector<HCoroutine> *)pvecCoroutineL2;
int i = 20;
// launch a set of coroutines
for ( i = 0; i < 20; i++ )
{
HCoroutine hCoroutine = Coroutine_Create( &CoroutineTestL2, NULL );
vecCoroutineL2.AddToTail( hCoroutine );
Coroutine_Continue( hCoroutine, NULL );
// now yield back to main occasionally
if ( i % 2 == 1 )
Coroutine_YieldToMain();
}
Assert( i == 20 );
}
//-----------------------------------------------------------------------------
// Purpose: runs a self-test of the coroutine system
// it's working if it doesn't crash
//-----------------------------------------------------------------------------
bool Coroutine_Test()
{
// basic calling of a coroutine
HCoroutine hCoroutine = Coroutine_Create( &CoroutineTestFunc, NULL );
Coroutine_Continue( hCoroutine, NULL );
Coroutine_Continue( hCoroutine, NULL );
// now test
CUtlVector<HCoroutine> vecCoroutineL2;
hCoroutine = Coroutine_Create( &CoroutineTestL1, &vecCoroutineL2 );
Coroutine_Continue( hCoroutine, NULL );
// run the sub-coroutines until they're all done
while ( vecCoroutineL2.Count() )
{
if ( hCoroutine && !Coroutine_Continue( hCoroutine, NULL ) )
hCoroutine = NULL;
FOR_EACH_VEC_BACK( vecCoroutineL2, i )
{
if ( !Coroutine_Continue( vecCoroutineL2[i], NULL ) )
vecCoroutineL2.Remove( i );
}
}
// new one
hCoroutine = Coroutine_Create( &CoroutineTestFunc, NULL );
// it has yielded, now continue it's call
{
// pop our stack up so it collides with the coroutine stack position
Coroutine_Continue( hCoroutine, NULL );
volatile byte *pvAlloca = (byte*)stackalloc( k_cubCoroutineStackGapSmall );
pvAlloca[ k_cubCoroutineStackGapSmall-1 ] = 0xF;
Coroutine_Continue( hCoroutine, NULL );
}
// now do a whole bunch of them
static const int k_nSimultaneousCoroutines = 10 * 1000;
CUtlVector<HCoroutine> coroutines;
Assert( coroutines.Base() == NULL );
for (int i = 0; i < k_nSimultaneousCoroutines; i++)
{
coroutines.AddToTail( Coroutine_Create( &CoroutineTestFunc, NULL ) );
}
for (int i = 0; i < coroutines.Count(); i++)
{
// first pass the coroutines should all still be running
DbgVerify( Coroutine_Continue( coroutines[i], NULL ) );
}
for (int i = 0; i < coroutines.Count(); i++)
{
// second pass the coroutines should all be finished
DbgVerifyNot( Coroutine_Continue( coroutines[i], NULL ) );
}
return true;
}
//-----------------------------------------------------------------------------
// Purpose: returns approximate stack depth of current coroutine.
//-----------------------------------------------------------------------------
size_t Coroutine_GetStackDepth()
{
// should only get called from a coroutine
Assert( GCoroutineMgr().IsAnyCoroutineActive() );
if ( !GCoroutineMgr().IsAnyCoroutineActive() )
return 0;
GetStackPtr( pLocal );
CCoroutine &coroutine = GCoroutineMgr().GetActiveCoroutine();
return ( coroutine.m_pStackHigh - pLocal );
}
//-----------------------------------------------------------------------------
// Purpose: validates memory
//-----------------------------------------------------------------------------
void Coroutine_ValidateGlobals( class CValidator &validator )
{
#ifdef DBGFLAG_VALIDATE
AUTO_LOCK( g_ThreadMutexCoroutineMgr );
for ( int i = 0; i < g_VecPCoroutineMgr.Count(); i++ )
{
ValidatePtr( g_VecPCoroutineMgr[i] );
}
ValidateObj( g_VecPCoroutineMgr );
#endif
}