Merge pull request #248 from nillerusr/mathlib-optimize

Mathlib optimize
This commit is contained in:
nillerusr 2023-05-05 16:17:29 +00:00 committed by GitHub
commit 697a9f34f9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 1484 additions and 1737 deletions

2
.gitignore vendored
View File

@ -37,5 +37,3 @@ waf3*/
.vscode/
.depproj/
source-engine.sln
hl2/

View File

@ -33,6 +33,7 @@ CAI_PolicingBehavior::CAI_PolicingBehavior( void )
m_bEnabled = false;
m_nNumWarnings = 0;
m_bTargetIsHostile = false;
m_hPoliceGoal = NULL;
}
//-----------------------------------------------------------------------------

View File

@ -31,9 +31,9 @@ public:
{
MEM_ALLOC_CREDIT_( "CMatCallQueue.m_Allocator" );
#ifdef SWDS
m_Allocator.Init( 2*1024, 0, 0, 4 );
m_Allocator.Init( 2*1024, 0, 0, 16 );
#else
m_Allocator.Init( IsX360() ? 2*1024*1024 : 8*1024*1024, 64*1024, 256*1024, 4 );
m_Allocator.Init( IsX360() ? 2*1024*1024 : 8*1024*1024, 64*1024, 256*1024, 16 );
#endif
m_FunctorFactory.SetAllocator( &m_Allocator );
m_pHead = m_pTail = NULL;

View File

@ -420,13 +420,6 @@ void MatrixGetColumn( const matrix3x4_t& in, int column, Vector &out )
out.z = in[2][column];
}
void MatrixSetColumn( const Vector &in, int column, matrix3x4_t& out )
{
out[0][column] = in.x;
out[1][column] = in.y;
out[2][column] = in.z;
}
void MatrixScaleBy ( const float flScale, matrix3x4_t &out )
{
out[0][0] *= flScale;
@ -1092,57 +1085,6 @@ void SetScaleMatrix( float x, float y, float z, matrix3x4_t &dst )
dst[2][0] = 0.0f; dst[2][1] = 0.0f; dst[2][2] = z; dst[2][3] = 0.0f;
}
//-----------------------------------------------------------------------------
// Purpose: Builds the matrix for a counterclockwise rotation about an arbitrary axis.
//
// | ax2 + (1 - ax2)cosQ axay(1 - cosQ) - azsinQ azax(1 - cosQ) + aysinQ |
// Ra(Q) = | axay(1 - cosQ) + azsinQ ay2 + (1 - ay2)cosQ ayaz(1 - cosQ) - axsinQ |
// | azax(1 - cosQ) - aysinQ ayaz(1 - cosQ) + axsinQ az2 + (1 - az2)cosQ |
//
// Input : mat -
// vAxisOrRot -
// angle -
//-----------------------------------------------------------------------------
void MatrixBuildRotationAboutAxis( const Vector &vAxisOfRot, float angleDegrees, matrix3x4_t &dst )
{
float radians;
float axisXSquared;
float axisYSquared;
float axisZSquared;
float fSin;
float fCos;
radians = angleDegrees * ( M_PI / 180.0 );
fSin = sin( radians );
fCos = cos( radians );
axisXSquared = vAxisOfRot[0] * vAxisOfRot[0];
axisYSquared = vAxisOfRot[1] * vAxisOfRot[1];
axisZSquared = vAxisOfRot[2] * vAxisOfRot[2];
// Column 0:
dst[0][0] = axisXSquared + (1 - axisXSquared) * fCos;
dst[1][0] = vAxisOfRot[0] * vAxisOfRot[1] * (1 - fCos) + vAxisOfRot[2] * fSin;
dst[2][0] = vAxisOfRot[2] * vAxisOfRot[0] * (1 - fCos) - vAxisOfRot[1] * fSin;
// Column 1:
dst[0][1] = vAxisOfRot[0] * vAxisOfRot[1] * (1 - fCos) - vAxisOfRot[2] * fSin;
dst[1][1] = axisYSquared + (1 - axisYSquared) * fCos;
dst[2][1] = vAxisOfRot[1] * vAxisOfRot[2] * (1 - fCos) + vAxisOfRot[0] * fSin;
// Column 2:
dst[0][2] = vAxisOfRot[2] * vAxisOfRot[0] * (1 - fCos) + vAxisOfRot[1] * fSin;
dst[1][2] = vAxisOfRot[1] * vAxisOfRot[2] * (1 - fCos) - vAxisOfRot[0] * fSin;
dst[2][2] = axisZSquared + (1 - axisZSquared) * fCos;
// Column 3:
dst[0][3] = 0;
dst[1][3] = 0;
dst[2][3] = 0;
}
//-----------------------------------------------------------------------------
// Computes the transpose
//-----------------------------------------------------------------------------
@ -1450,33 +1392,6 @@ void VectorYawRotate( const Vector &in, float flYaw, Vector &out)
out.z = in.z;
}
float Bias( float x, float biasAmt )
{
// WARNING: not thread safe
static float lastAmt = -1;
static float lastExponent = 0;
if( lastAmt != biasAmt )
{
lastExponent = log( biasAmt ) * -1.4427f; // (-1.4427 = 1 / log(0.5))
}
float fRet = pow( x, lastExponent );
Assert ( !IS_NAN( fRet ) );
return fRet;
}
float Gain( float x, float biasAmt )
{
// WARNING: not thread safe
if( x < 0.5 )
return 0.5f * Bias( 2*x, 1-biasAmt );
else
return 1 - 0.5f * Bias( 2 - 2*x, 1-biasAmt );
}
float SmoothCurve( float x )
{
// Actual smooth curve. Visualization:

File diff suppressed because it is too large Load Diff

View File

@ -22,10 +22,16 @@ extern float (*pfFastCos)(float x);
// The following are not declared as macros because they are often used in limiting situations,
// and sometimes the compiler simply refuses to inline them for some reason
#define FastSqrt(x) (*pfSqrt)(x)
#define FastRSqrt(x) (*pfRSqrt)(x)
#define FastRSqrtFast(x) (*pfRSqrtFast)(x)
#define FastSqrt(x) sqrtf(x)
#define FastRSqrt(x) (1.f/sqrtf(x))
#define FastRSqrtFast(x) (1.f/sqrtf(x))
#ifdef _WIN32
#define FastSinCos(x,s,c) (*pfFastSinCos)(x,s,c)
#else
#define FastSinCos(x,s,c) sincosf(x,s,c)
#endif
#define FastCos(x) (*pfFastCos)(x)
#if defined(__i386__) || defined(_M_IX86)

View File

@ -30,7 +30,6 @@
// FP exception clean so this not a turnkey operation.
//#define FP_EXCEPTIONS_ENABLED
#ifdef FP_EXCEPTIONS_ENABLED
#include <float.h> // For _clearfp and _controlfp_s
#endif
@ -93,37 +92,11 @@ private:
FPExceptionEnabler& operator=(const FPExceptionEnabler&);
};
#ifdef DEBUG // stop crashing edit-and-continue
FORCEINLINE float clamp( float val, float minVal, float maxVal )
inline float clamp( const float val, const float minVal, const float maxVal )
{
if ( maxVal < minVal )
return maxVal;
else if( val < minVal )
return minVal;
else if( val > maxVal )
return maxVal;
else
return val;
const float t = val < minVal ? minVal : val;
return t > maxVal ? maxVal : t;
}
#else // DEBUG
FORCEINLINE float clamp( float val, float minVal, float maxVal )
{
#if defined(__i386__) || defined(_M_IX86)
_mm_store_ss( &val,
_mm_min_ss(
_mm_max_ss(
_mm_load_ss(&val),
_mm_load_ss(&minVal) ),
_mm_load_ss(&maxVal) ) );
#else
val = fpmax(minVal, val);
val = fpmin(maxVal, val);
#endif
return val;
}
#endif // DEBUG
//
// Returns a clamped value in the range [min, max].
@ -131,17 +104,10 @@ FORCEINLINE float clamp( float val, float minVal, float maxVal )
template< class T >
inline T clamp( T const &val, T const &minVal, T const &maxVal )
{
if ( maxVal < minVal )
return maxVal;
else if( val < minVal )
return minVal;
else if( val > maxVal )
return maxVal;
else
return val;
const T t = val< minVal ? minVal : val;
return t > maxVal ? maxVal : t;
}
// plane_t structure
// !!! if this is changed, it must be changed in asm code too !!!
// FIXME: does the asm code even exist anymore?
@ -237,8 +203,8 @@ bool R_CullBoxSkipNear( const Vector& mins, const Vector& maxs, const Frustum_t
struct matrix3x4_t
{
matrix3x4_t() = default;
matrix3x4_t(
inline matrix3x4_t() = default;
inline matrix3x4_t(
float m00, float m01, float m02, float m03,
float m10, float m11, float m12, float m13,
float m20, float m21, float m22, float m23 )
@ -252,7 +218,7 @@ struct matrix3x4_t
// Creates a matrix where the X axis = forward
// the Y axis = left, and the Z axis = up
//-----------------------------------------------------------------------------
void Init( const Vector& xAxis, const Vector& yAxis, const Vector& zAxis, const Vector &vecOrigin )
inline void Init( const Vector& xAxis, const Vector& yAxis, const Vector& zAxis, const Vector &vecOrigin )
{
m_flMatVal[0][0] = xAxis.x; m_flMatVal[0][1] = yAxis.x; m_flMatVal[0][2] = zAxis.x; m_flMatVal[0][3] = vecOrigin.x;
m_flMatVal[1][0] = xAxis.y; m_flMatVal[1][1] = yAxis.y; m_flMatVal[1][2] = zAxis.y; m_flMatVal[1][3] = vecOrigin.y;
@ -263,26 +229,23 @@ struct matrix3x4_t
// Creates a matrix where the X axis = forward
// the Y axis = left, and the Z axis = up
//-----------------------------------------------------------------------------
matrix3x4_t( const Vector& xAxis, const Vector& yAxis, const Vector& zAxis, const Vector &vecOrigin )
inline matrix3x4_t( const Vector& xAxis, const Vector& yAxis, const Vector& zAxis, const Vector &vecOrigin )
{
Init( xAxis, yAxis, zAxis, vecOrigin );
}
inline void Invalidate( void )
{
for (int i = 0; i < 3; i++)
for( int i=0; i < 12; i++ )
{
for (int j = 0; j < 4; j++)
{
m_flMatVal[i][j] = VEC_T_NAN;
}
((float*)m_flMatVal)[i] = VEC_T_NAN;
}
}
float *operator[]( int i ) { Assert(( i >= 0 ) && ( i < 3 )); return m_flMatVal[i]; }
const float *operator[]( int i ) const { Assert(( i >= 0 ) && ( i < 3 )); return m_flMatVal[i]; }
float *Base() { return &m_flMatVal[0][0]; }
const float *Base() const { return &m_flMatVal[0][0]; }
inline float *operator[]( int i ) { Assert(( i >= 0 ) && ( i < 3 )); return m_flMatVal[i]; }
inline const float *operator[]( int i ) const { Assert(( i >= 0 ) && ( i < 3 )); return m_flMatVal[i]; }
inline float *Base() { return &m_flMatVal[0][0]; }
inline const float *Base() const { return &m_flMatVal[0][0]; }
float m_flMatVal[3][4];
};
@ -565,7 +528,13 @@ void MatrixInvert( const matrix3x4_t &in, matrix3x4_t &out );
bool MatricesAreEqual( const matrix3x4_t &src1, const matrix3x4_t &src2, float flTolerance = 1e-5 );
void MatrixGetColumn( const matrix3x4_t &in, int column, Vector &out );
void MatrixSetColumn( const Vector &in, int column, matrix3x4_t &out );
inline void MatrixSetColumn( const Vector &in, int column, matrix3x4_t& out )
{
out[0][column] = in.x;
out[1][column] = in.y;
out[2][column] = in.z;
}
inline void MatrixGetTranslation( const matrix3x4_t &in, Vector &out )
{
@ -1079,7 +1048,19 @@ void VectorYawRotate( const Vector& in, float flYaw, Vector &out);
// 0 1
//
// With a biasAmt of 0.5, Bias returns X.
float Bias( float x, float biasAmt );
inline float Bias( float x, float biasAmt )
{
// WARNING: not thread safe
static float lastAmt = -1;
static float lastExponent = 0;
if( lastAmt != biasAmt )
{
lastExponent = log( biasAmt ) * -1.4427f; // (-1.4427 = 1 / log(0.5))
}
float fRet = pow( x, lastExponent );
Assert ( !IS_NAN( fRet ) );
return fRet;
}
// Gain is similar to Bias, but biasAmt biases towards or away from 0.5.
@ -1111,9 +1092,14 @@ float Bias( float x, float biasAmt );
// |*****
// |___________________
// 0 1
float Gain( float x, float biasAmt );
inline float Gain( float x, float biasAmt )
{
// WARNING: not thread safe
if( x < 0.5 )
return 0.5f * Bias( 2*x, 1-biasAmt );
else
return 1 - 0.5f * Bias( 2 - 2*x, 1-biasAmt );
}
// SmoothCurve maps a 0-1 value into another 0-1 value based on a cosine wave
// where the derivatives of the function at 0 and 1 (and 0.5) are 0. This is useful for
// any fadein/fadeout effect where it should start and end smoothly.

View File

@ -35,7 +35,7 @@ class Vector2D;
// 4D Vector4D
//=========================================================
class Vector4D
class alignas(16) Vector4D
{
public:
// Members

File diff suppressed because it is too large Load Diff

View File

@ -1042,7 +1042,7 @@ typedef enum _D3DSHADER_PARAM_REGISTER_TYPE
D3DSPR_FORCE_DWORD = 0x7fffffff, // force 32-bit size enum
} D3DSHADER_PARAM_REGISTER_TYPE;
struct D3DMATRIX
struct alignas(16) D3DMATRIX
{
union
{

View File

@ -1042,7 +1042,7 @@ typedef enum _D3DSHADER_PARAM_REGISTER_TYPE
D3DSPR_FORCE_DWORD = 0x7fffffff, // force 32-bit size enum
} D3DSHADER_PARAM_REGISTER_TYPE;
struct D3DMATRIX
struct alignas(16) D3DMATRIX
{
union
{