//========= Copyright Valve Corporation, All rights reserved. ============//
//
// Purpose: Defines a large symbol table (intp sized handles, can store more than 64k strings)
//
// $Header: $
// $NoKeywords: $
//===========================================================================//

#ifndef UTLSYMBOLLARGE_H
#define UTLSYMBOLLARGE_H

#ifdef _WIN32
#pragma once
#endif

#include "tier0/threadtools.h"
#include "tier1/utltshash.h"
#include "tier1/stringpool.h"
#include "tier0/vprof.h"
#include "tier1/utltshash.h"

//-----------------------------------------------------------------------------
// CUtlSymbolTableLarge:
// description:
//    This class defines a symbol table, which allows us to perform mappings
//    of strings to symbols and back. 
// 
//    This class stores the strings in a series of string pools. The returned CUtlSymbolLarge is just a pointer
//     to the string data, the hash precedes it in memory and is used to speed up searching, etc.
//-----------------------------------------------------------------------------

typedef intp UtlSymLargeId_t;

#define UTL_INVAL_SYMBOL_LARGE  ((UtlSymLargeId_t)~0)

class CUtlSymbolLarge
{
public:
	// constructor, destructor
	CUtlSymbolLarge() 
	{
		u.m_Id = UTL_INVAL_SYMBOL_LARGE;
	}

	CUtlSymbolLarge( UtlSymLargeId_t id )
	{
		u.m_Id = id;
	}
	CUtlSymbolLarge( CUtlSymbolLarge const& sym )
	{
		u.m_Id = sym.u.m_Id; 
	}

	// operator=
	CUtlSymbolLarge& operator=( CUtlSymbolLarge const& src ) 
	{ 
		u.m_Id = src.u.m_Id; 
		return *this; 
	}

	// operator==
	bool operator==( CUtlSymbolLarge const& src ) const 
	{ 
		return u.m_Id == src.u.m_Id; 
	}

	// operator==
	bool operator==( UtlSymLargeId_t const& src ) const 
	{ 
		return u.m_Id == src; 
	}

	// operator==
	bool operator!=( CUtlSymbolLarge const& src ) const 
	{ 
		return u.m_Id != src.u.m_Id; 
	}

	// operator==
	bool operator!=( UtlSymLargeId_t const& src ) const 
	{ 
		return u.m_Id != src; 
	}

	// Gets at the symbol
	operator UtlSymLargeId_t const() const 
	{ 
		return u.m_Id; 
	}

	// Gets the string associated with the symbol
	inline const char* String( ) const
	{
		if ( u.m_Id == UTL_INVAL_SYMBOL_LARGE )
			return "";
		return u.m_pAsString;
	}

	inline bool IsValid() const
	{
		return u.m_Id != UTL_INVAL_SYMBOL_LARGE ? true : false;
	}

private:
	// Disallowed
	CUtlSymbolLarge( const char* pStr );       // they need to go through the table to assign the ptr
	bool operator==( const char* pStr ) const; // disallow since we don't know if the table this is from was case sensitive or not... maybe we don't care

	union
	{
		UtlSymLargeId_t m_Id;
		char const *m_pAsString;
	} u;
};

#define MIN_STRING_POOL_SIZE	2048

inline uint32 CUtlSymbolLarge_Hash( bool CASEINSENSITIVE, const char *pString, int len )
{
	return ( CASEINSENSITIVE ? HashStringCaseless( pString ) : HashString( pString ) ); 
}

typedef uint32 LargeSymbolTableHashDecoration_t; 

// The structure consists of the hash immediately followed by the string data
struct CUtlSymbolTableLargeBaseTreeEntry_t
{
	LargeSymbolTableHashDecoration_t	m_Hash;
	// Variable length string data
	char								m_String[1];

	bool IsEmpty() const
	{
		return ( ( m_Hash == 0 ) && ( 0 == m_String[0] ) );
	}

	char const *String() const
	{
		return (const char *)&m_String[ 0 ];
	}

	CUtlSymbolLarge ToSymbol() const
	{
		return reinterpret_cast< UtlSymLargeId_t >( String() );
	}
	
	LargeSymbolTableHashDecoration_t HashValue() const
	{
		return m_Hash;
	}
};
	
template< class TreeType, bool CASEINSENSITIVE >
class CTreeEntryLess
{
public:
	CTreeEntryLess( int ignored = 0 ) {} // permits default initialization to NULL in CUtlRBTree
	bool operator!() const { return false; }
	bool operator()( CUtlSymbolTableLargeBaseTreeEntry_t * const &left, CUtlSymbolTableLargeBaseTreeEntry_t * const &right ) const
	{
		// compare the hashes
		if ( left->m_Hash == right->m_Hash )
		{
			// if the hashes match compare the strings
			if ( !CASEINSENSITIVE )
				return strcmp( left->String(), right->String() ) < 0;
			else
				return V_stricmp( left->String(), right->String() ) < 0;
		}
		else
		{
			return left->m_Hash < right->m_Hash;
		}
	}
};
	
// For non-threaded versions, simply index into CUtlRBTree
template< bool CASEINSENSITIVE >
class CNonThreadsafeTree : public CUtlRBTree<CUtlSymbolTableLargeBaseTreeEntry_t *, intp, CTreeEntryLess< CNonThreadsafeTree< CASEINSENSITIVE >, CASEINSENSITIVE > >
{
public:
	typedef CUtlRBTree<CUtlSymbolTableLargeBaseTreeEntry_t *, intp, CTreeEntryLess< CNonThreadsafeTree, CASEINSENSITIVE > > CNonThreadsafeTreeType;

	CNonThreadsafeTree() : 
		CNonThreadsafeTreeType( 0, 16 ) 
	{
	}
	inline void Commit() 
	{
		// Nothing, only matters for thread-safe tables
	}
	inline int Insert( CUtlSymbolTableLargeBaseTreeEntry_t *entry )
	{
		return CNonThreadsafeTreeType::Insert( entry );
	}
	inline int Find( CUtlSymbolTableLargeBaseTreeEntry_t *entry ) const
	{
		return CNonThreadsafeTreeType::Find( entry );
	}
	inline int InvalidIndex() const
	{
		return CNonThreadsafeTreeType::InvalidIndex();
	}
	inline int GetElements( int nFirstElement, int nCount, CUtlSymbolLarge *pElements ) const
	{
		CUtlVector< CUtlSymbolTableLargeBaseTreeEntry_t * > list;
		list.EnsureCount( nCount );
		for ( int i = 0; i < nCount; ++i )
		{
			pElements[ i ] = CNonThreadsafeTreeType::Element( i )->ToSymbol();
		}

		return nCount;
	}
};

// Since CUtlSymbolTableLargeBaseTreeEntry_t already has the hash 
//  contained inside of it, don't need to recompute a hash here
template < int BUCKET_COUNT, class KEYTYPE, bool CASEINSENSITIVE >
class CCThreadsafeTreeHashMethod
{
public:
	static int Hash( const KEYTYPE &key, int nBucketMask )
	{
		uint32 nHash = key->HashValue();
		return ( nHash & nBucketMask );
	}

	static bool Compare( CUtlSymbolTableLargeBaseTreeEntry_t * const &lhs, CUtlSymbolTableLargeBaseTreeEntry_t * const &rhs )
	{
		if ( lhs->m_Hash != rhs->m_Hash )
			return false;
		if ( !CASEINSENSITIVE )
		{
			return ( !Q_strcmp( lhs->String(), rhs->String() ) ? true : false );
		}

		return ( !Q_stricmp( lhs->String(), rhs->String() ) ? true : false );
	}
};

/*
  NOTE:  So the only crappy thing about using a CUtlTSHash here is that the KEYTYPE is a CUtlSymbolTableLargeBaseTreeEntry_t ptr which has both the 
   hash and the string since with strings there is a good chance of hash collision after you have a fair number of strings so we have to implement
   a Compare method (above) which falls back to strcmp/stricmp if the hashes are equal.  This means that all of the data is in the KEYTYPE of the hash and the 
   payload doesn't matter.  So I made the payload also be a pointer to a CUtlSymbolTableLargeBaseTreeEntry_t since that makes using the API more convenient

  TODO:  If we have a CUtlTSHash that was all about the existence of the KEYTYPE and didn't require a payload (or template on 'void') then we could eliminate
   50% of the pointer overhead used for this data structure.
*/

// Thread safe version is based on the 
template < bool CASEINSENSITIVE >
class CThreadsafeTree : public CUtlTSHash< CUtlSymbolTableLargeBaseTreeEntry_t *, 2048, CUtlSymbolTableLargeBaseTreeEntry_t *, CCThreadsafeTreeHashMethod< 2048, CUtlSymbolTableLargeBaseTreeEntry_t *, CASEINSENSITIVE > >
{
public:
	typedef CUtlTSHash< CUtlSymbolTableLargeBaseTreeEntry_t *, 2048, CUtlSymbolTableLargeBaseTreeEntry_t *, CCThreadsafeTreeHashMethod< 2048, CUtlSymbolTableLargeBaseTreeEntry_t *, CASEINSENSITIVE > > CThreadsafeTreeType;

	CThreadsafeTree() : 
		CThreadsafeTreeType( 32 ) 
	{
	}
	inline void Commit() 
	{
		CThreadsafeTreeType::Commit();
	}
	inline int Insert( CUtlSymbolTableLargeBaseTreeEntry_t *entry )
	{
		return CThreadsafeTreeType::Insert( entry, entry );
	}
	inline int Find( CUtlSymbolTableLargeBaseTreeEntry_t *entry )
	{
		return CThreadsafeTreeType::Find( entry );
	}
	inline int InvalidIndex() const
	{
		return CThreadsafeTreeType::InvalidHandle();
	}
	inline int GetElements( int nFirstElement, int nCount, CUtlSymbolLarge *pElements ) const
	{
		CUtlVector< UtlTSHashHandle_t > list;
		list.EnsureCount( nCount );
		int c = CThreadsafeTreeType::GetElements( nFirstElement, nCount, list.Base() );
		for ( int i = 0; i < c; ++i )
		{
			pElements[ i ] = CThreadsafeTreeType::Element( list[ i ] )->ToSymbol();
		}
		
		return c;
	}
};

// Base Class for threaded and non-threaded types
template < class TreeType, bool CASEINSENSITIVE, size_t POOL_SIZE = MIN_STRING_POOL_SIZE >
class CUtlSymbolTableLargeBase
{
public:
	// constructor, destructor
	CUtlSymbolTableLargeBase();
	~CUtlSymbolTableLargeBase();
	
	// Finds and/or creates a symbol based on the string
	CUtlSymbolLarge AddString( const char* pString );

	// Finds the symbol for pString
	CUtlSymbolLarge Find( const char* pString ) const;
	
	// Remove all symbols in the table.
	void  RemoveAll();

	int GetNumStrings( void ) const
	{
		return m_Lookup.Count();
	}

	void Commit()
	{
		m_Lookup.Commit();
	}

	// Returns elements in the table
	int GetElements( int nFirstElement, int nCount, CUtlSymbolLarge *pElements ) const
	{
		return m_Lookup.GetElements( nFirstElement, nCount, pElements );
	}

	uint64 GetMemoryUsage() const
	{
		uint64 unBytesUsed = 0u;

		for ( int i=0; i < m_StringPools.Count(); i++ )
		{
			StringPool_t *pPool = m_StringPools[i];

			unBytesUsed += (uint64)pPool->m_TotalLen;
		}
		return unBytesUsed;
	}


protected:

	struct StringPool_t
	{	
		int m_TotalLen;		// How large is 
		int m_SpaceUsed;
		char m_Data[1];
	};

	TreeType m_Lookup;

	// stores the string data
	CUtlVector< StringPool_t * > m_StringPools;

private:
	int FindPoolWithSpace( int len ) const;
};

//-----------------------------------------------------------------------------
// constructor, destructor
//-----------------------------------------------------------------------------
template < class TreeType, bool CASEINSENSITIVE, size_t POOL_SIZE >
inline CUtlSymbolTableLargeBase<TreeType, CASEINSENSITIVE, POOL_SIZE >::CUtlSymbolTableLargeBase() : 
	m_StringPools( 8 )
{
}

template < class TreeType, bool CASEINSENSITIVE, size_t POOL_SIZE >
inline CUtlSymbolTableLargeBase<TreeType, CASEINSENSITIVE, POOL_SIZE>::~CUtlSymbolTableLargeBase()
{
	// Release the stringpool string data
	RemoveAll();
}

template < class TreeType, bool CASEINSENSITIVE, size_t POOL_SIZE >
inline CUtlSymbolLarge CUtlSymbolTableLargeBase<TreeType, CASEINSENSITIVE, POOL_SIZE>::Find( const char* pString ) const
{	
	VPROF( "CUtlSymbolLarge::Find" );
	if (!pString)
		return CUtlSymbolLarge();

	// Passing this special invalid symbol makes the comparison function
	// use the string passed in the context
	int len = Q_strlen( pString ) + 1;

	CUtlSymbolTableLargeBaseTreeEntry_t *search = (CUtlSymbolTableLargeBaseTreeEntry_t *)_alloca( len + sizeof( LargeSymbolTableHashDecoration_t ) );
	search->m_Hash = CUtlSymbolLarge_Hash( CASEINSENSITIVE, pString, len );
	Q_memcpy( (char *)&search->m_String[ 0 ], pString, len );

	int idx = const_cast< TreeType & >(m_Lookup).Find( search );

	if ( idx == m_Lookup.InvalidIndex() )
		return UTL_INVAL_SYMBOL_LARGE;

	const CUtlSymbolTableLargeBaseTreeEntry_t *entry = m_Lookup[ idx ];
	return entry->ToSymbol();
}

template < class TreeType, bool CASEINSENSITIVE, size_t POOL_SIZE >
inline int CUtlSymbolTableLargeBase<TreeType, CASEINSENSITIVE, POOL_SIZE>::FindPoolWithSpace( int len )	const
{
	for ( int i=0; i < m_StringPools.Count(); i++ )
	{
		StringPool_t *pPool = m_StringPools[i];

		if ( (pPool->m_TotalLen - pPool->m_SpaceUsed) >= len )
		{
			return i;
		}
	}

	return -1;
}

//-----------------------------------------------------------------------------
// Finds and/or creates a symbol based on the string
//-----------------------------------------------------------------------------
template < class TreeType, bool CASEINSENSITIVE, size_t POOL_SIZE >
inline CUtlSymbolLarge CUtlSymbolTableLargeBase<TreeType, CASEINSENSITIVE, POOL_SIZE>::AddString( const char* pString )
{
	VPROF("CUtlSymbolLarge::AddString");
	if (!pString) 
		return UTL_INVAL_SYMBOL_LARGE;

	CUtlSymbolLarge id = Find( pString );
	if ( id != UTL_INVAL_SYMBOL_LARGE )
		return id;

	int lenString = Q_strlen(pString) + 1; // length of just the string
	int lenDecorated = lenString + sizeof(LargeSymbolTableHashDecoration_t); // and with its hash decoration
	// make sure that all strings are aligned on 2-byte boundaries so the hashes will read correctly
	// This assert seems to be invalid because LargeSymbolTableHashDecoration_t is always
	// a uint32, by design.
	//COMPILE_TIME_ASSERT(sizeof(LargeSymbolTableHashDecoration_t) == sizeof(intp));
	lenDecorated = ALIGN_VALUE(lenDecorated, sizeof( intp ) );

	// Find a pool with space for this string, or allocate a new one.
	int iPool = FindPoolWithSpace( lenDecorated );
	if ( iPool == -1 )
	{
		// Add a new pool.
		int newPoolSize = MAX( lenDecorated + sizeof( StringPool_t ), POOL_SIZE );
		StringPool_t *pPool = (StringPool_t*)malloc( newPoolSize );

		pPool->m_TotalLen = newPoolSize - sizeof( StringPool_t );
		pPool->m_SpaceUsed = 0;
		iPool = m_StringPools.AddToTail( pPool );
	}

	// Compute a hash
	LargeSymbolTableHashDecoration_t hash = CUtlSymbolLarge_Hash( CASEINSENSITIVE, pString, lenString );

	// Copy the string in.
	StringPool_t *pPool = m_StringPools[iPool];
	// Assert( pPool->m_SpaceUsed < 0xFFFF );	// Pool could be bigger than 2k
	// This should never happen, because if we had a string > 64k, it
	// would have been given its entire own pool.
	
	CUtlSymbolTableLargeBaseTreeEntry_t *entry = ( CUtlSymbolTableLargeBaseTreeEntry_t * )&pPool->m_Data[ pPool->m_SpaceUsed ];
	
	pPool->m_SpaceUsed += lenDecorated;

	entry->m_Hash = hash;
	char *pText = (char *)&entry->m_String [ 0 ];
	Q_memcpy( pText, pString, lenString );

	// insert the string into the database
	MEM_ALLOC_CREDIT();
	int idx = m_Lookup.Insert( entry );
	return m_Lookup.Element( idx )->ToSymbol();
}

//-----------------------------------------------------------------------------
// Remove all symbols in the table.
//-----------------------------------------------------------------------------
template < class TreeType, bool CASEINSENSITIVE, size_t POOL_SIZE >
inline void CUtlSymbolTableLargeBase<TreeType, CASEINSENSITIVE, POOL_SIZE>::RemoveAll()
{
	m_Lookup.Purge();

	for ( int i=0; i < m_StringPools.Count(); i++ )
	{
		StringPool_t * pString = m_StringPools[i];
		free( pString );
	}

	m_StringPools.RemoveAll();
}

// Case-sensitive
typedef CUtlSymbolTableLargeBase< CNonThreadsafeTree< false >, false > CUtlSymbolTableLarge;
// Case-insensitive
typedef CUtlSymbolTableLargeBase< CNonThreadsafeTree< true >, true > CUtlSymbolTableLarge_CI;
// Multi-threaded case-sensitive
typedef CUtlSymbolTableLargeBase< CThreadsafeTree< false >, false > CUtlSymbolTableLargeMT;
// Multi-threaded case-insensitive
typedef CUtlSymbolTableLargeBase< CThreadsafeTree< true >, true > CUtlSymbolTableLargeMT_CI;

#endif // UTLSYMBOLLARGE_H