mirror of
https://github.com/nillerusr/source-engine.git
synced 2025-04-11 18:57:47 +00:00
99% done Windows ARM32 port
This commit is contained in:
parent
bee760067e
commit
ee04c66aa5
1
.gitignore
vendored
1
.gitignore
vendored
@ -1,3 +1,4 @@
|
||||
*~
|
||||
*.mak
|
||||
*.mak.vpc_crc
|
||||
*.vpc_crc
|
||||
|
@ -89,9 +89,6 @@
|
||||
#define _sse2neon_likely(x) __builtin_expect(!!(x), 1)
|
||||
#define _sse2neon_unlikely(x) __builtin_expect(!!(x), 0)
|
||||
#elif defined(_MSC_VER)
|
||||
#if _MSVC_TRADITIONAL
|
||||
#error Using the traditional MSVC preprocessor is not supported! Use /Zc:preprocessor instead.
|
||||
#endif
|
||||
#ifndef FORCE_INLINE
|
||||
#define FORCE_INLINE static inline
|
||||
#endif
|
||||
@ -184,6 +181,10 @@
|
||||
} while (0)
|
||||
#endif
|
||||
|
||||
#ifdef _M_ARM
|
||||
#define vst1q_lane_s64(a, b, c)
|
||||
#endif
|
||||
|
||||
/* Memory barriers
|
||||
* __atomic_thread_fence does not include a compiler barrier; instead,
|
||||
* the barrier is part of __atomic_load/__atomic_store's "volatile-like"
|
||||
@ -202,8 +203,12 @@ FORCE_INLINE void _sse2neon_smp_mb(void)
|
||||
#elif defined(__GNUC__) || defined(__clang__)
|
||||
__atomic_thread_fence(__ATOMIC_SEQ_CST);
|
||||
#else /* MSVC */
|
||||
#ifdef _M_ARM
|
||||
__dmb(_ARM_BARRIER_ISH);
|
||||
#else
|
||||
__dmb(_ARM64_BARRIER_ISH);
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Architecture-specific build options */
|
||||
@ -268,7 +273,7 @@ FORCE_INLINE void _sse2neon_smp_mb(void)
|
||||
* we have to perform syscall instead.
|
||||
*/
|
||||
#if (!defined(__aarch64__) && !defined(_M_ARM64))
|
||||
#include <sys/time.h>
|
||||
#include <time.h>
|
||||
#endif
|
||||
|
||||
/* "__has_builtin" can be used to query support for built-in functions
|
||||
@ -574,10 +579,10 @@ FORCE_INLINE uint32_t _mm_crc32_u8(uint32_t, uint8_t);
|
||||
/* Backwards compatibility for compilers with lack of specific type support */
|
||||
|
||||
// Older gcc does not define vld1q_u8_x4 type
|
||||
#if defined(__GNUC__) && !defined(__clang__) && \
|
||||
#if defined(_M_ARM) || (defined(__GNUC__) && !defined(__clang__) && \
|
||||
((__GNUC__ <= 12 && defined(__arm__)) || \
|
||||
(__GNUC__ == 10 && __GNUC_MINOR__ < 3 && defined(__aarch64__)) || \
|
||||
(__GNUC__ <= 9 && defined(__aarch64__)))
|
||||
(__GNUC__ <= 9 && defined(__aarch64__))))
|
||||
FORCE_INLINE uint8x16x4_t _sse2neon_vld1q_u8_x4(const uint8_t *p)
|
||||
{
|
||||
uint8x16x4_t ret;
|
||||
@ -610,6 +615,9 @@ FORCE_INLINE uint8_t _sse2neon_vaddv_u8(uint8x8_t v8)
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(_M_ARM)
|
||||
#pragma message("TODO: Windows ARM32: Port many SSE2NEON functions")
|
||||
#else
|
||||
#if !defined(__aarch64__) && !defined(_M_ARM64)
|
||||
/* emulate vaddvq u8 variant */
|
||||
FORCE_INLINE uint8_t _sse2neon_vaddvq_u8(uint8x16_t a)
|
||||
@ -645,6 +653,7 @@ FORCE_INLINE uint16_t _sse2neon_vaddvq_u16(uint16x8_t a)
|
||||
return vaddvq_u16(a);
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* Function Naming Conventions
|
||||
* The naming convention of SSE intrinsics is straightforward. A generic SSE
|
||||
@ -1765,6 +1774,7 @@ FORCE_INLINE void _mm_free(void *addr)
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef _M_ARM
|
||||
FORCE_INLINE uint64_t _sse2neon_get_fpcr()
|
||||
{
|
||||
uint64_t value;
|
||||
@ -1808,6 +1818,7 @@ FORCE_INLINE unsigned int _sse2neon_mm_get_flush_zero_mode()
|
||||
|
||||
return r.field.bit24 ? _MM_FLUSH_ZERO_ON : _MM_FLUSH_ZERO_OFF;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Macro: Get the rounding mode bits from the MXCSR control and status register.
|
||||
// The rounding mode may contain any of the following flags: _MM_ROUND_NEAREST,
|
||||
@ -1826,6 +1837,8 @@ FORCE_INLINE unsigned int _MM_GET_ROUNDING_MODE()
|
||||
|
||||
#if defined(__aarch64__) || defined(_M_ARM64)
|
||||
r.value = _sse2neon_get_fpcr();
|
||||
#elif defined(_M_ARM)
|
||||
r.value = _MoveFromCoprocessor(10,7, 1,0,0);
|
||||
#else
|
||||
__asm__ __volatile__("vmrs %0, FPSCR" : "=r"(r.value)); /* read */
|
||||
#endif
|
||||
@ -2247,7 +2260,7 @@ FORCE_INLINE __m128 _mm_or_ps(__m128 a, __m128 b)
|
||||
FORCE_INLINE void _mm_prefetch(char const *p, int i)
|
||||
{
|
||||
(void) i;
|
||||
#if defined(_MSC_VER)
|
||||
#ifdef _M_ARM64
|
||||
switch (i) {
|
||||
case _MM_HINT_NTA:
|
||||
__prefetch2(p, 1);
|
||||
@ -2262,6 +2275,8 @@ FORCE_INLINE void _mm_prefetch(char const *p, int i)
|
||||
__prefetch2(p, 4);
|
||||
break;
|
||||
}
|
||||
#elif defined(_M_ARM)
|
||||
// TODO
|
||||
#else
|
||||
switch (i) {
|
||||
case _MM_HINT_NTA:
|
||||
@ -2348,6 +2363,7 @@ FORCE_INLINE __m64 _mm_sad_pu8(__m64 a, __m64 b)
|
||||
vset_lane_u16((int) vget_lane_u64(t, 0), vdup_n_u16(0), 0));
|
||||
}
|
||||
|
||||
#ifndef _M_ARM
|
||||
// Macro: Set the flush zero bits of the MXCSR control and status register to
|
||||
// the value in unsigned 32-bit integer a. The flush zero may contain any of the
|
||||
// following flags: _MM_FLUSH_ZERO_ON or _MM_FLUSH_ZERO_OFF
|
||||
@ -2379,6 +2395,7 @@ FORCE_INLINE void _sse2neon_mm_set_flush_zero_mode(unsigned int flag)
|
||||
__asm__ __volatile__("vmsr FPSCR, %0" ::"r"(r)); /* write */
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
// Set packed single-precision (32-bit) floating-point elements in dst with the
|
||||
// supplied values.
|
||||
@ -2404,6 +2421,7 @@ FORCE_INLINE __m128 _mm_set_ps1(float _w)
|
||||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_MM_SET_ROUNDING_MODE
|
||||
FORCE_INLINE void _MM_SET_ROUNDING_MODE(int rounding)
|
||||
{
|
||||
#ifndef _M_ARM
|
||||
union {
|
||||
fpcr_bitfield field;
|
||||
#if defined(__aarch64__) || defined(_M_ARM64)
|
||||
@ -2442,6 +2460,7 @@ FORCE_INLINE void _MM_SET_ROUNDING_MODE(int rounding)
|
||||
#else
|
||||
__asm__ __volatile__("vmsr FPSCR, %0" ::"r"(r)); /* write */
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
// Copy single-precision (32-bit) floating-point element a to the lower element
|
||||
@ -3206,6 +3225,7 @@ FORCE_INLINE __m128d _mm_cmpeq_sd(__m128d a, __m128d b)
|
||||
return _mm_move_sd(a, _mm_cmpeq_pd(a, b));
|
||||
}
|
||||
|
||||
#ifndef _M_ARM
|
||||
// Compare packed double-precision (64-bit) floating-point elements in a and b
|
||||
// for greater-than-or-equal, and store the results in dst.
|
||||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_pd
|
||||
@ -3247,6 +3267,7 @@ FORCE_INLINE __m128d _mm_cmpge_sd(__m128d a, __m128d b)
|
||||
return vreinterpretq_m128d_u64(vld1q_u64(d));
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
// Compare packed signed 16-bit integers in a and b for greater-than, and store
|
||||
// the results in dst.
|
||||
@ -3275,6 +3296,7 @@ FORCE_INLINE __m128i _mm_cmpgt_epi8(__m128i a, __m128i b)
|
||||
vcgtq_s8(vreinterpretq_s8_m128i(a), vreinterpretq_s8_m128i(b)));
|
||||
}
|
||||
|
||||
#ifndef _M_ARM
|
||||
// Compare packed double-precision (64-bit) floating-point elements in a and b
|
||||
// for greater-than, and store the results in dst.
|
||||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_pd
|
||||
@ -3358,6 +3380,7 @@ FORCE_INLINE __m128d _mm_cmple_sd(__m128d a, __m128d b)
|
||||
return vreinterpretq_m128d_u64(vld1q_u64(d));
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
// Compare packed signed 16-bit integers in a and b for less-than, and store the
|
||||
// results in dst. Note: This intrinsic emits the pcmpgtw instruction with the
|
||||
@ -3389,6 +3412,7 @@ FORCE_INLINE __m128i _mm_cmplt_epi8(__m128i a, __m128i b)
|
||||
vcltq_s8(vreinterpretq_s8_m128i(a), vreinterpretq_s8_m128i(b)));
|
||||
}
|
||||
|
||||
#ifndef _M_ARM
|
||||
// Compare packed double-precision (64-bit) floating-point elements in a and b
|
||||
// for less-than, and store the results in dst.
|
||||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_pd
|
||||
@ -3429,6 +3453,7 @@ FORCE_INLINE __m128d _mm_cmplt_sd(__m128d a, __m128d b)
|
||||
return vreinterpretq_m128d_u64(vld1q_u64(d));
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
// Compare packed double-precision (64-bit) floating-point elements in a and b
|
||||
// for not-equal, and store the results in dst.
|
||||
@ -3456,6 +3481,7 @@ FORCE_INLINE __m128d _mm_cmpneq_sd(__m128d a, __m128d b)
|
||||
return _mm_move_sd(a, _mm_cmpneq_pd(a, b));
|
||||
}
|
||||
|
||||
#ifndef _M_ARM
|
||||
// Compare packed double-precision (64-bit) floating-point elements in a and b
|
||||
// for not-greater-than-or-equal, and store the results in dst.
|
||||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnge_pd
|
||||
@ -3756,6 +3782,7 @@ FORCE_INLINE int _mm_comilt_sd(__m128d a, __m128d b)
|
||||
return (*(double *) &a0 < *(double *) &b0);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
// Compare the lower double-precision (64-bit) floating-point element in a and b
|
||||
// for equality, and return the boolean result (0 or 1).
|
||||
@ -4401,6 +4428,7 @@ FORCE_INLINE __m128i _mm_max_epu8(__m128i a, __m128i b)
|
||||
vmaxq_u8(vreinterpretq_u8_m128i(a), vreinterpretq_u8_m128i(b)));
|
||||
}
|
||||
|
||||
#ifndef _M_ARM
|
||||
// Compare packed double-precision (64-bit) floating-point elements in a and b,
|
||||
// and store packed maximum values in dst.
|
||||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_pd
|
||||
@ -4487,6 +4515,7 @@ FORCE_INLINE __m128d _mm_min_pd(__m128d a, __m128d b)
|
||||
return vreinterpretq_m128d_u64(vld1q_u64(d));
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
// Compare the lower double-precision (64-bit) floating-point elements in a and
|
||||
// b, store the minimum value in the lower element of dst, and copy the upper
|
||||
@ -4793,7 +4822,11 @@ FORCE_INLINE __m128i _mm_packus_epi16(const __m128i a, const __m128i b)
|
||||
FORCE_INLINE void _mm_pause()
|
||||
{
|
||||
#if defined(_MSC_VER)
|
||||
#ifdef _M_ARM
|
||||
__isb(_ARM_BARRIER_SY);
|
||||
#else
|
||||
__isb(_ARM64_BARRIER_SY);
|
||||
#endif
|
||||
#else
|
||||
__asm__ __volatile__("isb\n");
|
||||
#endif
|
||||
@ -7622,6 +7655,7 @@ FORCE_INLINE int _mm_testz_si128(__m128i a, __m128i b)
|
||||
}
|
||||
|
||||
/* SSE4.2 */
|
||||
#ifndef _M_ARM
|
||||
|
||||
const static uint16_t ALIGN_STRUCT(16) _sse2neon_cmpestr_mask16b[8] = {
|
||||
0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
|
||||
@ -8463,9 +8497,11 @@ FORCE_INLINE uint32_t _mm_crc32_u8(uint32_t crc, uint8_t v)
|
||||
return crc;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/* AES */
|
||||
|
||||
#if !defined(__ARM_FEATURE_CRYPTO) && !defined(_M_ARM64)
|
||||
#if !defined(__ARM_FEATURE_CRYPTO) && !defined(_M_ARM64) && !defined(_M_ARM)
|
||||
/* clang-format off */
|
||||
#define SSE2NEON_AES_SBOX(w) \
|
||||
{ \
|
||||
@ -8913,6 +8949,7 @@ FORCE_INLINE __m128i _mm_aeskeygenassist_si128(__m128i a, const int rcon)
|
||||
#undef SSE2NEON_MULTIPLY
|
||||
#endif
|
||||
|
||||
#elif defined(_M_ARM)
|
||||
#else /* __ARM_FEATURE_CRYPTO */
|
||||
// Implements equivalent of 'aesenc' by combining AESE (with an empty key) and
|
||||
// AESMC and then manually applying the real key as an xor operation. This
|
||||
@ -9034,6 +9071,7 @@ FORCE_INLINE __m128i _mm_clmulepi64_si128(__m128i _a, __m128i _b, const int imm)
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef _M_ARM
|
||||
FORCE_INLINE unsigned int _sse2neon_mm_get_denormals_zero_mode()
|
||||
{
|
||||
union {
|
||||
@ -9053,6 +9091,7 @@ FORCE_INLINE unsigned int _sse2neon_mm_get_denormals_zero_mode()
|
||||
|
||||
return r.field.bit24 ? _MM_DENORMALS_ZERO_ON : _MM_DENORMALS_ZERO_OFF;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Count the number of bits set to 1 in unsigned 32-bit integer a, and
|
||||
// return that count in dst.
|
||||
@ -9113,6 +9152,7 @@ FORCE_INLINE int64_t _mm_popcnt_u64(uint64_t a)
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifndef _M_ARM
|
||||
FORCE_INLINE void _sse2neon_mm_set_denormals_zero_mode(unsigned int flag)
|
||||
{
|
||||
// AArch32 Advanced SIMD arithmetic always uses the Flush-to-zero setting,
|
||||
@ -9140,6 +9180,7 @@ FORCE_INLINE void _sse2neon_mm_set_denormals_zero_mode(unsigned int flag)
|
||||
__asm__ __volatile__("vmsr FPSCR, %0" ::"r"(r)); /* write */
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
// Return the current 64-bit value of the processor's time-stamp counter.
|
||||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=rdtsc
|
||||
@ -9161,6 +9202,9 @@ FORCE_INLINE uint64_t _rdtsc(void)
|
||||
#endif
|
||||
|
||||
return val;
|
||||
#elif defined(_M_ARM)
|
||||
uint32_t val = _MoveFromCoprocessor(15,0, 9,13,0);
|
||||
return ((uint64_t)val) << 6;
|
||||
#else
|
||||
uint32_t pmccntr, pmuseren, pmcntenset;
|
||||
// Read the user mode Performance Monitoring Unit (PMU)
|
||||
|
@ -862,7 +862,7 @@ BOX TRACING
|
||||
|
||||
// Custom SIMD implementation for box brushes
|
||||
|
||||
const fltx4 Four_DistEpsilons={DIST_EPSILON,DIST_EPSILON,DIST_EPSILON,DIST_EPSILON};
|
||||
const fltx4 Four_DistEpsilons=FLTX4(DIST_EPSILON,DIST_EPSILON,DIST_EPSILON,DIST_EPSILON);
|
||||
const int32 ALIGN16 g_CubeFaceIndex0[4] ALIGN16_POST = {0,1,2,-1};
|
||||
const int32 ALIGN16 g_CubeFaceIndex1[4] ALIGN16_POST = {3,4,5,-1};
|
||||
bool IntersectRayWithBoxBrush( TraceInfo_t *pTraceInfo, const cbrush_t *pBrush, cboxbrush_t *pBox )
|
||||
@ -1572,7 +1572,7 @@ void FASTCALL CM_TraceToLeaf( TraceInfo_t * RESTRICT pTraceInfo, int ndxLeaf, fl
|
||||
fltx4 traceStart = LoadUnaligned3SIMD(pTraceInfo->m_start.Base());
|
||||
fltx4 traceDelta = LoadUnaligned3SIMD(pTraceInfo->m_delta.Base());
|
||||
fltx4 traceInvDelta = LoadUnaligned3SIMD(pTraceInfo->m_invDelta.Base());
|
||||
static const fltx4 vecEpsilon = {DISPCOLL_DIST_EPSILON,DISPCOLL_DIST_EPSILON,DISPCOLL_DIST_EPSILON,DISPCOLL_DIST_EPSILON};
|
||||
static const fltx4 vecEpsilon = FLTX4(DISPCOLL_DIST_EPSILON,DISPCOLL_DIST_EPSILON,DISPCOLL_DIST_EPSILON,DISPCOLL_DIST_EPSILON);
|
||||
// only used in !IS_POINT version:
|
||||
fltx4 extents;
|
||||
if (!IS_POINT)
|
||||
|
@ -40,7 +40,7 @@
|
||||
#include "materialsystem/materialsystem_config.h"
|
||||
#include "materialsystem/itexture.h"
|
||||
#include "IHammer.h"
|
||||
#if defined( _WIN32 ) && !defined( _X360 )
|
||||
#if defined( _WIN32 ) && !defined( _X360 ) && !defined(_M_ARM)
|
||||
#include <xmmintrin.h>
|
||||
#endif
|
||||
#include "staticpropmgr.h"
|
||||
|
@ -104,7 +104,7 @@ extern ConVar host_timer_spin_ms;
|
||||
extern float host_nexttick;
|
||||
extern IVEngineClient *engineClient;
|
||||
|
||||
#ifdef WIN32
|
||||
#if defined(_WIN32) && !defined(_M_ARM)
|
||||
static void cpu_frequency_monitoring_callback( IConVar *var, const char *pOldValue, float flOldValue )
|
||||
{
|
||||
// Set the specified interval for CPU frequency monitoring
|
||||
|
@ -2122,8 +2122,8 @@ int CDetailObjectSystem::SortSpritesBackToFront( int nLeaf, const Vector &viewOr
|
||||
#else
|
||||
#define MANTISSA_LSB_OFFSET 0
|
||||
#endif
|
||||
static fltx4 Four_MagicNumbers={ MAGIC_NUMBER, MAGIC_NUMBER, MAGIC_NUMBER, MAGIC_NUMBER };
|
||||
static fltx4 Four_255s={ 255.0, 255.0, 255.0, 255.0 };
|
||||
static fltx4 Four_MagicNumbers=FLTX4( MAGIC_NUMBER, MAGIC_NUMBER, MAGIC_NUMBER, MAGIC_NUMBER );
|
||||
static fltx4 Four_255s=FLTX4( 255.0, 255.0, 255.0, 255.0 );
|
||||
|
||||
static ALIGN16 int32 And255Mask[4] ALIGN16_POST = {0xff,0xff,0xff,0xff};
|
||||
#define PIXMASK ( * ( reinterpret_cast< fltx4 *>( &And255Mask ) ) )
|
||||
|
@ -167,8 +167,10 @@ InitReturnVal_t CInputSystem::Init()
|
||||
|
||||
joy_xcontroller_found.SetValue( 0 );
|
||||
|
||||
#ifdef USE_SDL
|
||||
if( !m_bConsoleTextMode )
|
||||
InitializeTouch();
|
||||
#endif
|
||||
|
||||
if ( IsPC() && !m_bConsoleTextMode )
|
||||
{
|
||||
@ -975,7 +977,9 @@ void CInputSystem::SetPrimaryUserId( int userId )
|
||||
//-----------------------------------------------------------------------------
|
||||
void CInputSystem::SetRumble( float fLeftMotor, float fRightMotor, int userId )
|
||||
{
|
||||
#ifdef USE_SDL
|
||||
SetXDeviceRumble( fLeftMotor, fRightMotor, userId );
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
@ -145,10 +145,16 @@ public:
|
||||
|
||||
struct JoystickInfo_t
|
||||
{
|
||||
#ifdef USE_SDL
|
||||
void *m_pDevice; // Really an SDL_GameController*, NULL if not present.
|
||||
void *m_pHaptic; // Really an SDL_Haptic*
|
||||
float m_fCurrentRumble;
|
||||
bool m_bRumbleEnabled;
|
||||
#elif defined(_WIN32)
|
||||
JOYINFOEX m_JoyInfoEx;
|
||||
#else
|
||||
#error
|
||||
#endif
|
||||
int m_nButtonCount;
|
||||
int m_nAxisFlags;
|
||||
int m_nDeviceId;
|
||||
@ -271,6 +277,9 @@ public:
|
||||
|
||||
//Added called and set to true when binding input and set to false once bound
|
||||
void SetNovintPure( bool bPure );
|
||||
#ifndef USE_SDL
|
||||
unsigned int AxisValue( JoystickAxis_t axis, JOYINFOEX& ji );
|
||||
#endif
|
||||
#else
|
||||
void SetNovintPure( bool bPure ) {} // to satify the IInput virtual interface
|
||||
#endif
|
||||
|
370
inputsystem/joystick_win32.cpp
Normal file
370
inputsystem/joystick_win32.cpp
Normal file
@ -0,0 +1,370 @@
|
||||
//========= Copyright Valve Corporation, All rights reserved. ============//
|
||||
//
|
||||
// Purpose: PC Joystick implementation for inputsystem.dll
|
||||
//
|
||||
//===========================================================================//
|
||||
|
||||
/* For force feedback testing. */
|
||||
#include "inputsystem.h"
|
||||
#include "tier1/convar.h"
|
||||
#include "tier0/icommandline.h"
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Joystick helpers
|
||||
//-----------------------------------------------------------------------------
|
||||
#define JOY_POVFWDRIGHT ( ( JOY_POVFORWARD + JOY_POVRIGHT ) >> 1 ) // 4500
|
||||
#define JOY_POVRIGHTBACK ( ( JOY_POVRIGHT + JOY_POVBACKWARD ) >> 1 ) // 13500
|
||||
#define JOY_POVFBACKLEFT ( ( JOY_POVBACKWARD + JOY_POVLEFT ) >> 1 ) // 22500
|
||||
#define JOY_POVLEFTFWD ( ( JOY_POVLEFT + JOY_POVFORWARD ) >> 1 ) // 31500
|
||||
|
||||
ConVar joy_wwhack1( "joy_wingmanwarrior_centerhack", "0", FCVAR_ARCHIVE, "Wingman warrior centering hack." );
|
||||
ConVar joy_axisbutton_threshold( "joy_axisbutton_threshold", "0.3", FCVAR_ARCHIVE, "Analog axis range before a button press is registered." );
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Initialize all joysticks
|
||||
//-----------------------------------------------------------------------------
|
||||
void CInputSystem::InitializeJoysticks( void )
|
||||
{
|
||||
// assume no joystick
|
||||
m_nJoystickCount = 0;
|
||||
|
||||
// abort startup if user requests no joystick
|
||||
if ( CommandLine()->FindParm("-nojoy" ) )
|
||||
return;
|
||||
|
||||
// verify joystick driver is present
|
||||
int nMaxJoysticks = joyGetNumDevs();
|
||||
if ( nMaxJoysticks > MAX_JOYSTICKS )
|
||||
{
|
||||
nMaxJoysticks = MAX_JOYSTICKS;
|
||||
}
|
||||
else if ( nMaxJoysticks <= 0 )
|
||||
{
|
||||
DevMsg( 1, "joystick not found -- driver not present\n");
|
||||
return;
|
||||
}
|
||||
|
||||
// cycle through the joysticks looking for valid ones
|
||||
MMRESULT mmr;
|
||||
for ( int i=0; i < nMaxJoysticks; i++ )
|
||||
{
|
||||
JOYINFOEX ji;
|
||||
Q_memset( &ji, 0, sizeof( ji ) );
|
||||
ji.dwSize = sizeof(ji);
|
||||
ji.dwFlags = JOY_RETURNCENTERED;
|
||||
mmr = joyGetPosEx( i, &ji );
|
||||
if ( mmr != JOYERR_NOERROR )
|
||||
continue;
|
||||
|
||||
// get the capabilities of the selected joystick
|
||||
// abort startup if command fails
|
||||
JOYCAPS jc;
|
||||
Q_memset( &jc, 0, sizeof( jc ) );
|
||||
mmr = joyGetDevCaps( i, &jc, sizeof( jc ) );
|
||||
if ( mmr != JOYERR_NOERROR )
|
||||
continue;
|
||||
|
||||
JoystickInfo_t &info = m_pJoystickInfo[m_nJoystickCount];
|
||||
info.m_nDeviceId = i;
|
||||
info.m_JoyInfoEx = ji;
|
||||
info.m_nButtonCount = (int)jc.wNumButtons;
|
||||
info.m_bHasPOVControl = ( jc.wCaps & JOYCAPS_HASPOV ) ? true : false;
|
||||
info.m_bDiagonalPOVControlEnabled = false;
|
||||
info.m_nFlags = JOY_RETURNCENTERED | JOY_RETURNBUTTONS | JOY_RETURNX | JOY_RETURNY;
|
||||
info.m_nAxisFlags = 0;
|
||||
if ( jc.wNumAxes >= 2 )
|
||||
{
|
||||
info.m_nAxisFlags |= 0x3;
|
||||
}
|
||||
if ( info.m_bHasPOVControl )
|
||||
{
|
||||
info.m_nFlags |= JOY_RETURNPOV;
|
||||
}
|
||||
if ( jc.wCaps & JOYCAPS_HASZ )
|
||||
{
|
||||
info.m_nFlags |= JOY_RETURNZ;
|
||||
info.m_nAxisFlags |= 0x4;
|
||||
}
|
||||
if ( jc.wCaps & JOYCAPS_HASR )
|
||||
{
|
||||
info.m_nFlags |= JOY_RETURNR;
|
||||
info.m_nAxisFlags |= 0x8;
|
||||
}
|
||||
if ( jc.wCaps & JOYCAPS_HASU )
|
||||
{
|
||||
info.m_nFlags |= JOY_RETURNU;
|
||||
info.m_nAxisFlags |= 0x10;
|
||||
}
|
||||
if ( jc.wCaps & JOYCAPS_HASV )
|
||||
{
|
||||
info.m_nFlags |= JOY_RETURNV;
|
||||
info.m_nAxisFlags |= 0x20;
|
||||
}
|
||||
info.m_nLastPolledButtons = 0;
|
||||
info.m_nLastPolledAxisButtons = 0;
|
||||
info.m_nLastPolledPOVState = 0;
|
||||
memset( info.m_pLastPolledAxes, 0, sizeof(info.m_pLastPolledAxes) );
|
||||
++m_nJoystickCount;
|
||||
|
||||
EnableJoystickInput( i, true );
|
||||
}
|
||||
}
|
||||
|
||||
void CInputSystem::ShutdownJoysticks()
|
||||
{
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Process the event
|
||||
//-----------------------------------------------------------------------------
|
||||
void CInputSystem::JoystickButtonEvent( ButtonCode_t button, int sample )
|
||||
{
|
||||
// package the key
|
||||
if ( sample )
|
||||
{
|
||||
PostButtonPressedEvent( IE_ButtonPressed, m_nLastSampleTick, button, button );
|
||||
}
|
||||
else
|
||||
{
|
||||
PostButtonReleasedEvent( IE_ButtonReleased, m_nLastSampleTick, button, button );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Update the joystick button state
|
||||
//-----------------------------------------------------------------------------
|
||||
void CInputSystem::UpdateJoystickButtonState( int nJoystick )
|
||||
{
|
||||
JoystickInfo_t &info = m_pJoystickInfo[nJoystick];
|
||||
JOYINFOEX& ji = info.m_JoyInfoEx;
|
||||
|
||||
// Standard joystick buttons
|
||||
unsigned int buttons = ji.dwButtons ^ info.m_nLastPolledButtons;
|
||||
if ( buttons )
|
||||
{
|
||||
for ( int j = 0 ; j < info.m_nButtonCount ; ++j )
|
||||
{
|
||||
int mask = buttons & ( 1 << j );
|
||||
if ( !mask )
|
||||
continue;
|
||||
|
||||
ButtonCode_t code = (ButtonCode_t)JOYSTICK_BUTTON( nJoystick, j );
|
||||
if ( mask & ji.dwButtons )
|
||||
{
|
||||
// down event
|
||||
JoystickButtonEvent( code, MAX_BUTTONSAMPLE );
|
||||
}
|
||||
else
|
||||
{
|
||||
// up event
|
||||
JoystickButtonEvent( code, 0 );
|
||||
}
|
||||
}
|
||||
|
||||
info.m_nLastPolledButtons = (unsigned int)ji.dwButtons;
|
||||
}
|
||||
|
||||
// Analog axis buttons
|
||||
const float minValue = joy_axisbutton_threshold.GetFloat() * MAX_BUTTONSAMPLE;
|
||||
for ( int j = 0 ; j < MAX_JOYSTICK_AXES; ++j )
|
||||
{
|
||||
if ( ( info.m_nAxisFlags & (1 << j) ) == 0 )
|
||||
continue;
|
||||
|
||||
// Positive side of the axis
|
||||
int mask = ( 1 << (j << 1) );
|
||||
ButtonCode_t code = JOYSTICK_AXIS_BUTTON( nJoystick, (j << 1) );
|
||||
float value = GetAnalogValue( JOYSTICK_AXIS( nJoystick, j ) );
|
||||
|
||||
if ( value > minValue && !(info.m_nLastPolledAxisButtons & mask) )
|
||||
{
|
||||
info.m_nLastPolledAxisButtons |= mask;
|
||||
JoystickButtonEvent( code, MAX_BUTTONSAMPLE );
|
||||
}
|
||||
if ( value <= minValue && (info.m_nLastPolledAxisButtons & mask) )
|
||||
{
|
||||
info.m_nLastPolledAxisButtons &= ~mask;
|
||||
JoystickButtonEvent( code, 0 );
|
||||
}
|
||||
|
||||
// Negative side of the axis
|
||||
mask <<= 1;
|
||||
code = (ButtonCode_t)( code + 1 );
|
||||
if ( value < -minValue && !(info.m_nLastPolledAxisButtons & mask) )
|
||||
{
|
||||
info.m_nLastPolledAxisButtons |= mask;
|
||||
JoystickButtonEvent( code, MAX_BUTTONSAMPLE );
|
||||
}
|
||||
if ( value >= -minValue && (info.m_nLastPolledAxisButtons & mask) )
|
||||
{
|
||||
info.m_nLastPolledAxisButtons &= ~mask;
|
||||
JoystickButtonEvent( code, 0 );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Purpose: Get raw joystick sample along axis
|
||||
//-----------------------------------------------------------------------------
|
||||
unsigned int CInputSystem::AxisValue( JoystickAxis_t axis, JOYINFOEX& ji )
|
||||
{
|
||||
switch (axis)
|
||||
{
|
||||
case JOY_AXIS_X:
|
||||
return (unsigned int)ji.dwXpos;
|
||||
case JOY_AXIS_Y:
|
||||
return (unsigned int)ji.dwYpos;
|
||||
case JOY_AXIS_Z:
|
||||
return (unsigned int)ji.dwZpos;
|
||||
case JOY_AXIS_R:
|
||||
return (unsigned int)ji.dwRpos;
|
||||
case JOY_AXIS_U:
|
||||
return (unsigned int)ji.dwUpos;
|
||||
case JOY_AXIS_V:
|
||||
return (unsigned int)ji.dwVpos;
|
||||
}
|
||||
// FIX: need to do some kind of error
|
||||
return (unsigned int)ji.dwXpos;
|
||||
}
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Update the joystick POV control
|
||||
//-----------------------------------------------------------------------------
|
||||
void CInputSystem::UpdateJoystickPOVControl( int nJoystick )
|
||||
{
|
||||
JoystickInfo_t &info = m_pJoystickInfo[nJoystick];
|
||||
JOYINFOEX& ji = info.m_JoyInfoEx;
|
||||
|
||||
if ( !info.m_bHasPOVControl )
|
||||
return;
|
||||
|
||||
// convert POV information into 4 bits of state information
|
||||
// this avoids any potential problems related to moving from one
|
||||
// direction to another without going through the center position
|
||||
unsigned int povstate = 0;
|
||||
|
||||
if ( ji.dwPOV != JOY_POVCENTERED )
|
||||
{
|
||||
if (ji.dwPOV == JOY_POVFORWARD) // 0
|
||||
{
|
||||
povstate |= 0x01;
|
||||
}
|
||||
if (ji.dwPOV == JOY_POVRIGHT) // 9000
|
||||
{
|
||||
povstate |= 0x02;
|
||||
}
|
||||
if (ji.dwPOV == JOY_POVBACKWARD) // 18000
|
||||
{
|
||||
povstate |= 0x04;
|
||||
}
|
||||
if (ji.dwPOV == JOY_POVLEFT) // 27000
|
||||
{
|
||||
povstate |= 0x08;
|
||||
}
|
||||
|
||||
// Deal with diagonals if user wants them
|
||||
if ( info.m_bDiagonalPOVControlEnabled )
|
||||
{
|
||||
if (ji.dwPOV == JOY_POVFWDRIGHT) // 4500
|
||||
{
|
||||
povstate |= ( 0x01 | 0x02 );
|
||||
}
|
||||
if (ji.dwPOV == JOY_POVRIGHTBACK) // 13500
|
||||
{
|
||||
povstate |= ( 0x02 | 0x04 );
|
||||
}
|
||||
if (ji.dwPOV == JOY_POVFBACKLEFT) // 22500
|
||||
{
|
||||
povstate |= ( 0x04 | 0x08 );
|
||||
}
|
||||
if (ji.dwPOV == JOY_POVLEFTFWD) // 31500
|
||||
{
|
||||
povstate |= ( 0x08 | 0x01 );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// determine which bits have changed and key an auxillary event for each change
|
||||
unsigned int buttons = povstate ^ info.m_nLastPolledPOVState;
|
||||
if ( buttons )
|
||||
{
|
||||
for ( int i = 0; i < JOYSTICK_POV_BUTTON_COUNT; ++i )
|
||||
{
|
||||
unsigned int mask = buttons & ( 1 << i );
|
||||
if ( !mask )
|
||||
continue;
|
||||
|
||||
ButtonCode_t code = (ButtonCode_t)JOYSTICK_POV_BUTTON( nJoystick, i );
|
||||
|
||||
if ( mask & povstate )
|
||||
{
|
||||
// Keydown on POV buttons
|
||||
JoystickButtonEvent( code, MAX_BUTTONSAMPLE );
|
||||
}
|
||||
else
|
||||
{
|
||||
// KeyUp on POV buttons
|
||||
JoystickButtonEvent( code, 0 );
|
||||
}
|
||||
}
|
||||
|
||||
// Latch old values
|
||||
info.m_nLastPolledPOVState = povstate;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Purpose: Sample the joystick
|
||||
//-----------------------------------------------------------------------------
|
||||
void CInputSystem::PollJoystick( void )
|
||||
{
|
||||
if ( !m_JoysticksEnabled.IsAnyFlagSet() )
|
||||
return;
|
||||
|
||||
InputState_t &state = m_InputState[ m_bIsPolling ];
|
||||
for ( int i = 0; i < m_nJoystickCount; ++i )
|
||||
{
|
||||
if ( !m_JoysticksEnabled.IsFlagSet( 1 << i ) )
|
||||
continue;
|
||||
|
||||
JoystickInfo_t &info = m_pJoystickInfo[i];
|
||||
JOYINFOEX& ji = info.m_JoyInfoEx;
|
||||
Q_memset( &ji, 0, sizeof( ji ) );
|
||||
ji.dwSize = sizeof( ji );
|
||||
ji.dwFlags = (DWORD)info.m_nFlags;
|
||||
|
||||
if ( joyGetPosEx( info.m_nDeviceId, &ji ) != JOYERR_NOERROR )
|
||||
continue;
|
||||
|
||||
// This hack fixes a bug in the Logitech WingMan Warrior DirectInput Driver
|
||||
// rather than having 32768 be the zero point, they have the zero point at 32668
|
||||
// go figure -- anyway, now we get the full resolution out of the device
|
||||
if ( joy_wwhack1.GetBool() )
|
||||
{
|
||||
ji.dwUpos += 100;
|
||||
}
|
||||
|
||||
// Poll joystick axes
|
||||
for ( int j = 0; j < MAX_JOYSTICK_AXES; ++j )
|
||||
{
|
||||
if ( ( info.m_nAxisFlags & ( 1 << j ) ) == 0 )
|
||||
continue;
|
||||
|
||||
AnalogCode_t code = JOYSTICK_AXIS( i, j );
|
||||
int nValue = AxisValue( (JoystickAxis_t)j, ji ) - MAX_BUTTONSAMPLE;
|
||||
state.m_pAnalogDelta[ code ] = nValue - state.m_pAnalogValue[ code ];
|
||||
state.m_pAnalogValue[ code ] = nValue;
|
||||
if ( state.m_pAnalogDelta[ code ] != 0 )
|
||||
{
|
||||
PostEvent( IE_AnalogValueChanged, m_nLastSampleTick, code, state.m_pAnalogValue[ code ], state.m_pAnalogDelta[ code ] );
|
||||
}
|
||||
}
|
||||
|
||||
UpdateJoystickButtonState( i );
|
||||
UpdateJoystickPOVControl( i );
|
||||
}
|
||||
}
|
@ -18,8 +18,6 @@ def configure(conf):
|
||||
def build(bld):
|
||||
source = [
|
||||
'inputsystem.cpp',
|
||||
'joystick_sdl.cpp',
|
||||
'touch_sdl.cpp',
|
||||
'key_translation.cpp',
|
||||
'steamcontroller.cpp',
|
||||
'../public/tier0/memoverride.cpp'
|
||||
@ -41,6 +39,12 @@ def build(bld):
|
||||
|
||||
libs = ['tier0','tier1','tier2','vstdlib','SDL2','steam_api']
|
||||
|
||||
if bld.options.SDL:
|
||||
source += ['joystick_sdl.cpp', 'touch_sdl.cpp']
|
||||
elif bld.env.DEST_OS == 'win32':
|
||||
source += ['joystick_win32.cpp']
|
||||
libs += ['WINMM']
|
||||
|
||||
if bld.env.DEST_OS == 'win32':
|
||||
libs += ['USER32']
|
||||
|
||||
|
@ -287,7 +287,7 @@ namespace ColorSpace
|
||||
{
|
||||
// preload 3.0f onto the returns so that we don't need to multiply the bumpAverage by it
|
||||
// straight away (eg, reschedule this dependent op)
|
||||
static const fltx4 vThree = { 3.0f, 3.0f, 3.0f, 0.0f };
|
||||
static const fltx4 vThree = FLTX4( 3.0f, 3.0f, 3.0f, 0.0f );
|
||||
fltx4 retValBump1 = MulSIMD( vThree, linearBumpColor1);
|
||||
fltx4 retValBump2 = MulSIMD( vThree, linearBumpColor2);
|
||||
fltx4 retValBump3 = MulSIMD( vThree, linearBumpColor3);
|
||||
|
@ -16,7 +16,7 @@
|
||||
// memdbgon must be the last include file in a .cpp file!!!
|
||||
#include "tier0/memdbgon.h"
|
||||
|
||||
#if !defined(COMPILER_MSVC64) && !defined(LINUX) && !defined(COMPILER_CLANG)
|
||||
#if defined(_M_IX86) && !defined(LINUX) && !defined(COMPILER_CLANG)
|
||||
// Implement for 64-bit Windows if needed.
|
||||
// Clang hits "fatal error: error in backend:" and other errors when trying
|
||||
// to compile the inline assembly below. 3DNow support is highly unlikely to
|
||||
|
@ -3258,7 +3258,7 @@ void MathLib_Init( float gamma, float texGamma, float brightness, int overbright
|
||||
|
||||
// SSE Generally performs better than 3DNow when present, so this is placed
|
||||
// first to allow SSE to override these settings.
|
||||
#if !defined( OSX ) && !defined( PLATFORM_WINDOWS_PC64 ) && !defined(LINUX) && !defined(PLATFORM_BSD)
|
||||
#ifdef _M_IX86
|
||||
if ( bAllow3DNow && pi.m_b3DNow )
|
||||
{
|
||||
s_b3DNowEnabled = true;
|
||||
@ -3291,7 +3291,7 @@ void MathLib_Init( float gamma, float texGamma, float brightness, int overbright
|
||||
pfRSqrt = _SSE_RSqrtAccurate;
|
||||
pfRSqrtFast = _SSE_RSqrtFast;
|
||||
#endif
|
||||
#ifdef PLATFORM_WINDOWS_PC32
|
||||
#ifdef _M_IX86
|
||||
pfFastSinCos = _SSE_SinCos;
|
||||
pfFastCos = _SSE_cos;
|
||||
#endif
|
||||
@ -3304,7 +3304,7 @@ void MathLib_Init( float gamma, float texGamma, float brightness, int overbright
|
||||
if ( bAllowSSE2 && pi.m_bSSE2 )
|
||||
{
|
||||
s_bSSE2Enabled = true;
|
||||
#ifdef PLATFORM_WINDOWS_PC32
|
||||
#ifdef _M_IX86
|
||||
pfFastSinCos = _SSE2_SinCos;
|
||||
pfFastCos = _SSE2_cos;
|
||||
#endif
|
||||
|
@ -91,7 +91,7 @@ float _SSE_Sqrt(float x)
|
||||
{
|
||||
Assert( s_bMathlibInitialized );
|
||||
float root = 0.f;
|
||||
#ifdef _WIN32
|
||||
#if defined(_WIN32) && !defined(_M_ARM)
|
||||
_asm
|
||||
{
|
||||
sqrtss xmm0, x
|
||||
@ -122,7 +122,7 @@ float _SSE_RSqrtAccurate(float x)
|
||||
}
|
||||
#else
|
||||
|
||||
#ifdef POSIX
|
||||
#if POSIX || defined(_M_ARM)
|
||||
const __m128 f3 = _mm_set_ss(3.0f); // 3 as SSE value
|
||||
const __m128 f05 = _mm_set_ss(0.5f); // 0.5 as SSE value
|
||||
#endif
|
||||
@ -131,7 +131,7 @@ const __m128 f05 = _mm_set_ss(0.5f); // 0.5 as SSE value
|
||||
float _SSE_RSqrtAccurate(float a)
|
||||
{
|
||||
|
||||
#ifdef _WIN32
|
||||
#if defined(_WIN32) && !defined(_M_ARM)
|
||||
float x;
|
||||
float half = 0.5f;
|
||||
float three = 3.f;
|
||||
@ -153,7 +153,7 @@ float _SSE_RSqrtAccurate(float a)
|
||||
}
|
||||
|
||||
return x;
|
||||
#elif POSIX
|
||||
#elif POSIX || defined(_M_ARM)
|
||||
__m128 xx = _mm_load_ss( &a );
|
||||
__m128 xr = _mm_rsqrt_ss( xx );
|
||||
__m128 xt;
|
||||
@ -764,7 +764,7 @@ float _SSE_cos( float x )
|
||||
//-----------------------------------------------------------------------------
|
||||
// SSE2 implementations of optimized routines:
|
||||
//-----------------------------------------------------------------------------
|
||||
#ifdef PLATFORM_WINDOWS_PC32
|
||||
#if defined(PLATFORM_WINDOWS_PC32) && !defined(_M_ARM)
|
||||
void _SSE2_SinCos(float x, float* s, float* c) // any x
|
||||
{
|
||||
#ifdef _WIN32
|
||||
@ -850,9 +850,7 @@ void _SSE2_SinCos(float x, float* s, float* c) // any x
|
||||
#error "Not Implemented"
|
||||
#endif
|
||||
}
|
||||
#endif // PLATFORM_WINDOWS_PC32
|
||||
|
||||
#ifdef PLATFORM_WINDOWS_PC32
|
||||
float _SSE2_cos(float x)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
@ -970,9 +968,7 @@ void VectorTransformSSE(const float *in1, const matrix3x4_t& in2, float *out1)
|
||||
#error "Not Implemented"
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
#if 0
|
||||
void VectorRotateSSE( const float *in1, const matrix3x4_t& in2, float *out1 )
|
||||
{
|
||||
Assert( s_bMathlibInitialized );
|
||||
@ -1026,9 +1022,7 @@ void VectorRotateSSE( const float *in1, const matrix3x4_t& in2, float *out1 )
|
||||
#error "Not Implemented"
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef _WIN32
|
||||
void _declspec(naked) _SSE_VectorMA( const float *start, float scale, const float *direction, float *dest )
|
||||
{
|
||||
// FIXME: This don't work!! It will overwrite memory in the write to dest
|
||||
@ -1057,7 +1051,6 @@ void _declspec(naked) _SSE_VectorMA( const float *start, float scale, const floa
|
||||
#endif
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef _WIN32
|
||||
#ifdef PFN_VECTORMA
|
||||
@ -1101,7 +1094,6 @@ float (__cdecl *pfVectorMA)(Vector& v) = _VectorMA;
|
||||
// NJS: (Nov 1 2002) -NOT- faster. may time a couple cycles faster in a single function like
|
||||
// this, but when inlined, and instruction scheduled, the C version is faster.
|
||||
// Verified this via VTune
|
||||
/*
|
||||
vec_t DotProduct (const vec_t *a, const vec_t *c)
|
||||
{
|
||||
vec_t temp;
|
||||
@ -1124,6 +1116,6 @@ vec_t DotProduct (const vec_t *a, const vec_t *c)
|
||||
ret
|
||||
}
|
||||
}
|
||||
*/
|
||||
#endif
|
||||
|
||||
#endif // COMPILER_MSVC64
|
||||
|
@ -7,35 +7,35 @@
|
||||
#include "mathlib/ssemath.h"
|
||||
#include "mathlib/ssequaternion.h"
|
||||
|
||||
const fltx4 Four_PointFives={0.5,0.5,0.5,0.5};
|
||||
const fltx4 Four_PointFives=FLTX4(0.5,0.5,0.5,0.5);
|
||||
#ifndef _X360
|
||||
const fltx4 Four_Zeros={0.0,0.0,0.0,0.0};
|
||||
const fltx4 Four_Ones={1.0,1.0,1.0,1.0};
|
||||
const fltx4 Four_Zeros=FLTX4(0.0,0.0,0.0,0.0);
|
||||
const fltx4 Four_Ones=FLTX4(1.0,1.0,1.0,1.0);
|
||||
#endif
|
||||
const fltx4 Four_Twos={2.0,2.0,2.0,2.0};
|
||||
const fltx4 Four_Threes={3.0,3.0,3.0,3.0};
|
||||
const fltx4 Four_Fours={4.0,4.0,4.0,4.0};
|
||||
const fltx4 Four_Origin={0,0,0,1};
|
||||
const fltx4 Four_NegativeOnes={-1,-1,-1,-1};
|
||||
const fltx4 Four_Twos=FLTX4(2.0,2.0,2.0,2.0);
|
||||
const fltx4 Four_Threes=FLTX4(3.0,3.0,3.0,3.0);
|
||||
const fltx4 Four_Fours=FLTX4(4.0,4.0,4.0,4.0);
|
||||
const fltx4 Four_Origin=FLTX4(0,0,0,1);
|
||||
const fltx4 Four_NegativeOnes=FLTX4(-1,-1,-1,-1);
|
||||
|
||||
const fltx4 Four_2ToThe21s={ (float) (1<<21), (float) (1<<21), (float) (1<<21), (float)(1<<21) };
|
||||
const fltx4 Four_2ToThe22s={ (float) (1<<22), (float) (1<<22), (float) (1<<22), (float)(1<<22) };
|
||||
const fltx4 Four_2ToThe23s={ (float) (1<<23), (float) (1<<23), (float) (1<<23), (float)(1<<23) };
|
||||
const fltx4 Four_2ToThe24s={ (float) (1<<24), (float) (1<<24), (float) (1<<24), (float)(1<<24) };
|
||||
const fltx4 Four_2ToThe21s=FLTX4( (float) (1<<21), (float) (1<<21), (float) (1<<21), (float)(1<<21) );
|
||||
const fltx4 Four_2ToThe22s=FLTX4( (float) (1<<22), (float) (1<<22), (float) (1<<22), (float)(1<<22) );
|
||||
const fltx4 Four_2ToThe23s=FLTX4( (float) (1<<23), (float) (1<<23), (float) (1<<23), (float)(1<<23) );
|
||||
const fltx4 Four_2ToThe24s=FLTX4( (float) (1<<24), (float) (1<<24), (float) (1<<24), (float)(1<<24) );
|
||||
|
||||
const fltx4 Four_Point225s={ .225, .225, .225, .225 };
|
||||
const fltx4 Four_Epsilons={FLT_EPSILON,FLT_EPSILON,FLT_EPSILON,FLT_EPSILON};
|
||||
const fltx4 Four_Point225s=FLTX4( .225, .225, .225, .225 );
|
||||
const fltx4 Four_Epsilons=FLTX4(FLT_EPSILON,FLT_EPSILON,FLT_EPSILON,FLT_EPSILON);
|
||||
|
||||
const fltx4 Four_FLT_MAX={FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX};
|
||||
const fltx4 Four_Negative_FLT_MAX={-FLT_MAX,-FLT_MAX,-FLT_MAX,-FLT_MAX};
|
||||
const fltx4 g_SIMD_0123 = { 0., 1., 2., 3. };
|
||||
const fltx4 Four_FLT_MAX=FLTX4(FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX);
|
||||
const fltx4 Four_Negative_FLT_MAX=FLTX4(-FLT_MAX,-FLT_MAX,-FLT_MAX,-FLT_MAX);
|
||||
const fltx4 g_SIMD_0123 = FLTX4( 0., 1., 2., 3. );
|
||||
|
||||
const fltx4 g_QuatMultRowSign[4] =
|
||||
{
|
||||
{ 1.0f, 1.0f, -1.0f, 1.0f },
|
||||
{ -1.0f, 1.0f, 1.0f, 1.0f },
|
||||
{ 1.0f, -1.0f, 1.0f, 1.0f },
|
||||
{ -1.0f, -1.0f, -1.0f, 1.0f }
|
||||
FLTX4( 1.0f, 1.0f, -1.0f, 1.0f ),
|
||||
FLTX4( -1.0f, 1.0f, 1.0f, 1.0f ),
|
||||
FLTX4( 1.0f, -1.0f, 1.0f, 1.0f ),
|
||||
FLTX4( -1.0f, -1.0f, -1.0f, 1.0f )
|
||||
};
|
||||
|
||||
const uint32 ALIGN16 g_SIMD_clear_signmask[4] ALIGN16_POST = {0x7fffffff,0x7fffffff,0x7fffffff,0x7fffffff};
|
||||
|
@ -20,7 +20,7 @@
|
||||
|
||||
#define MAGIC_NUMBER (1<<15) // gives 8 bits of fraction
|
||||
|
||||
static fltx4 Four_MagicNumbers = { MAGIC_NUMBER, MAGIC_NUMBER, MAGIC_NUMBER, MAGIC_NUMBER };
|
||||
static fltx4 Four_MagicNumbers = FLTX4( MAGIC_NUMBER, MAGIC_NUMBER, MAGIC_NUMBER, MAGIC_NUMBER );
|
||||
|
||||
|
||||
static ALIGN16 int32 idx_mask[4]= {0xffff, 0xffff, 0xffff, 0xffff};
|
||||
|
@ -1220,7 +1220,7 @@ inline void CVertexBuilder::FastVertexSSE( const ModelVertexDX7_t &vertex )
|
||||
Assert( m_CompressionType == VERTEX_COMPRESSION_NONE ); // FIXME: support compressed verts if needed
|
||||
Assert( m_nCurrentVertex < m_nMaxVertexCount );
|
||||
|
||||
#if defined( _WIN32 ) && !defined( _X360 ) && !defined( PLATFORM_64BITS )
|
||||
#if defined( _WIN32 ) && !defined( _X360 ) && !defined( PLATFORM_64BITS ) && !defined(_M_ARM)
|
||||
const void *pRead = &vertex;
|
||||
void *pCurrPos = m_pCurrPosition;
|
||||
__asm
|
||||
@ -1236,7 +1236,7 @@ inline void CVertexBuilder::FastVertexSSE( const ModelVertexDX7_t &vertex )
|
||||
movntps [edi + 16], xmm1
|
||||
movntps [edi + 32], xmm2
|
||||
}
|
||||
#elif defined(GNUC) || defined(PLATFORM_WINDOWS_PC64)
|
||||
#elif defined(GNUC) || defined(_WIN32)
|
||||
const char *pRead = (char *)&vertex;
|
||||
char *pCurrPos = (char *)m_pCurrPosition;
|
||||
__m128 m1 = _mm_load_ps( (float *)pRead );
|
||||
@ -1267,7 +1267,7 @@ inline void CVertexBuilder::Fast4VerticesSSE(
|
||||
Assert( m_CompressionType == VERTEX_COMPRESSION_NONE ); // FIXME: support compressed verts if needed
|
||||
Assert( m_nCurrentVertex < m_nMaxVertexCount-3 );
|
||||
|
||||
#if defined( _WIN32 ) && !defined( _X360 ) && !defined( PLATFORM_64BITS )
|
||||
#if defined( _WIN32 ) && !defined( _X360 ) && !defined( PLATFORM_64BITS ) && !defined(_M_ARM)
|
||||
void *pCurrPos = m_pCurrPosition;
|
||||
__asm
|
||||
{
|
||||
@ -1309,7 +1309,7 @@ inline void CVertexBuilder::Fast4VerticesSSE(
|
||||
movntps [edi + 80+96], xmm5
|
||||
|
||||
}
|
||||
#elif defined(__arm__) || defined(PLATFORM_WINDOWS_PC64)
|
||||
#elif defined(__arm__) || defined(_WIN32)
|
||||
const void *pReadA = &vtx_a;
|
||||
const void *pReadB = &vtx_b;
|
||||
const void *pReadC = &vtx_c;
|
||||
@ -1430,7 +1430,7 @@ inline void CVertexBuilder::FastVertexSSE( const ModelVertexDX8_t &vertex )
|
||||
Assert( m_CompressionType == VERTEX_COMPRESSION_NONE ); // FIXME: support compressed verts if needed
|
||||
Assert( m_nCurrentVertex < m_nMaxVertexCount );
|
||||
|
||||
#if defined( _WIN32 ) && !defined( _X360 ) && !defined( PLATFORM_64BITS )
|
||||
#if defined( _WIN32 ) && !defined( _X360 ) && !defined( PLATFORM_64BITS ) && !defined(_M_ARM)
|
||||
const void *pRead = &vertex;
|
||||
void *pCurrPos = m_pCurrPosition;
|
||||
__asm
|
||||
@ -1448,21 +1448,10 @@ inline void CVertexBuilder::FastVertexSSE( const ModelVertexDX8_t &vertex )
|
||||
movntps [edi + 32], xmm2
|
||||
movntps [edi + 48], xmm3
|
||||
}
|
||||
#elif defined(GNUC) || defined(PLATFORM_WINDOWS_PC64)
|
||||
#elif defined(GNUC) || defined(_WIN32)
|
||||
const void *pRead = &vertex;
|
||||
void *pCurrPos = m_pCurrPosition;
|
||||
|
||||
/* __asm__ __volatile__ (
|
||||
"movaps (%0), %%xmm0\n"
|
||||
"movaps 16(%0), %%xmm1\n"
|
||||
"movaps 32(%0), %%xmm2\n"
|
||||
"movaps 48(%0), %%xmm3\n"
|
||||
"movntps %%xmm0, (%1)\n"
|
||||
"movntps %%xmm1, 16(%1)\n"
|
||||
"movntps %%xmm2, 32(%1)\n"
|
||||
"movntps %%xmm3, 48(%1)\n"
|
||||
:: "r" (pRead), "r" (pCurrPos) : "memory"); */
|
||||
|
||||
__m128 m1 = _mm_load_ps( (float *)pRead );
|
||||
__m128 m2 = _mm_load_ps( (float *)((intp)pRead + 16) );
|
||||
__m128 m3 = _mm_load_ps( (float *)((intp)pRead + 32) );
|
||||
|
@ -405,6 +405,9 @@ void inline SinCos( float radians, float *sine, float *cosine )
|
||||
{
|
||||
#if defined( _X360 )
|
||||
XMScalarSinCos( sine, cosine, radians );
|
||||
#elif defined( PLATFORM_WINDOWS_PC64 ) || defined(_M_ARM)
|
||||
*sine = sin( radians );
|
||||
*cosine = cos( radians );
|
||||
#elif defined( PLATFORM_WINDOWS_PC32 )
|
||||
_asm
|
||||
{
|
||||
@ -417,11 +420,8 @@ void inline SinCos( float radians, float *sine, float *cosine )
|
||||
fstp DWORD PTR [edx]
|
||||
fstp DWORD PTR [eax]
|
||||
}
|
||||
#elif defined( PLATFORM_WINDOWS_PC64 )
|
||||
*sine = sin( radians );
|
||||
*cosine = cos( radians );
|
||||
#elif defined( OSX )
|
||||
__sincosf(radians, sine, cosine);
|
||||
__sincosf(radians, sine, cosine);
|
||||
#elif defined( POSIX )
|
||||
sincosf(radians, sine, cosine);
|
||||
#endif
|
||||
|
@ -135,7 +135,8 @@ public:
|
||||
Assert( m_pData );
|
||||
|
||||
static FourVectors value{Four_Zeros, Four_Zeros, Four_Zeros};
|
||||
memutils::set( m_pData, value, m_nHeight*m_nPaddedWidth );
|
||||
for (size_t n = m_nHeight * m_nPaddedWidth; n; n--)
|
||||
*(m_pData+n) = value;
|
||||
}
|
||||
|
||||
void RaiseToPower( float power );
|
||||
|
@ -63,6 +63,12 @@ typedef __m128 fltx4;
|
||||
typedef __m128 i32x4;
|
||||
typedef __m128 u32x4;
|
||||
|
||||
#ifdef _M_ARM
|
||||
#define FLTX4(w, x, y, z) {(w) + (unsigned long long(x) << 32), (y) + (unsigned long long(z) << 32)}
|
||||
#else
|
||||
#define FLTX4(w, x, y, z) {w, x, y, z}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
// The FLTX4 type is a fltx4 used as a parameter to a function.
|
||||
@ -1828,7 +1834,7 @@ FORCEINLINE fltx4 ReplicateX4( float flValue )
|
||||
FORCEINLINE float SubFloat( const fltx4 & a, int idx )
|
||||
{
|
||||
// NOTE: if the output goes into a register, this causes a Load-Hit-Store stall (don't mix fpu/vpu math!)
|
||||
#ifndef POSIX
|
||||
#if defined(_WIN32) && !defined(_M_ARM)
|
||||
return a.m128_f32[ idx ];
|
||||
#else
|
||||
return (reinterpret_cast<float const *>(&a))[idx];
|
||||
@ -1837,7 +1843,7 @@ FORCEINLINE float SubFloat( const fltx4 & a, int idx )
|
||||
|
||||
FORCEINLINE float & SubFloat( fltx4 & a, int idx )
|
||||
{
|
||||
#ifndef POSIX
|
||||
#if defined(_WIN32) && !defined(_M_ARM)
|
||||
return a.m128_f32[ idx ];
|
||||
#else
|
||||
return (reinterpret_cast<float *>(&a))[idx];
|
||||
@ -1851,8 +1857,8 @@ FORCEINLINE uint32 SubFloatConvertToInt( const fltx4 & a, int idx )
|
||||
|
||||
FORCEINLINE uint32 SubInt( const fltx4 & a, int idx )
|
||||
{
|
||||
#ifndef POSIX
|
||||
return a.m128_u32[idx];
|
||||
#if defined(_WIN32) && !defined(_M_ARM)
|
||||
return a.m128_u32[ idx ];
|
||||
#else
|
||||
return (reinterpret_cast<uint32 const *>(&a))[idx];
|
||||
#endif
|
||||
@ -1860,8 +1866,8 @@ FORCEINLINE uint32 SubInt( const fltx4 & a, int idx )
|
||||
|
||||
FORCEINLINE uint32 & SubInt( fltx4 & a, int idx )
|
||||
{
|
||||
#ifndef POSIX
|
||||
return a.m128_u32[idx];
|
||||
#if defined(_WIN32) && !defined(_M_ARM)
|
||||
return a.m128_u32[ idx ];
|
||||
#else
|
||||
return (reinterpret_cast<uint32 *>(&a))[idx];
|
||||
#endif
|
||||
|
@ -43,7 +43,7 @@ struct cplane_t;
|
||||
#define M_PI 3.14159265358979323846 // matches value in gcc v2 math.h
|
||||
#endif
|
||||
|
||||
class alignas(16) VMatrix
|
||||
class VMatrix
|
||||
{
|
||||
public:
|
||||
|
||||
|
@ -175,7 +175,9 @@ public:
|
||||
virtual void TexMagFilter( ShaderTexFilterMode_t texFilterMode ) = 0;
|
||||
virtual void TexWrap( ShaderTexCoordComponent_t coord, ShaderTexWrapMode_t wrapMode ) = 0;
|
||||
|
||||
#ifndef SHADERAPIDX10
|
||||
virtual void CopyRenderTargetToTexture( ShaderAPITextureHandle_t textureHandle ) = 0;
|
||||
#endif
|
||||
|
||||
// Binds a particular material to render with
|
||||
virtual void Bind( IMaterial* pMaterial ) = 0;
|
||||
@ -612,6 +614,7 @@ public:
|
||||
//extended clear buffers function with alpha independent from color
|
||||
virtual void ClearBuffersObeyStencilEx( bool bClearColor, bool bClearAlpha, bool bClearDepth ) = 0;
|
||||
|
||||
#ifndef SHADERAPIDX10
|
||||
// Allows copying a render target to another texture by specifying them both.
|
||||
virtual void CopyRenderTargetToScratchTexture( ShaderAPITextureHandle_t srcRt, ShaderAPITextureHandle_t dstTex, Rect_t *pSrcRect = NULL, Rect_t *pDstRect = NULL ) = 0;
|
||||
|
||||
@ -627,6 +630,7 @@ public:
|
||||
|
||||
virtual void CopyTextureToTexture( ShaderAPITextureHandle_t srcTex, ShaderAPITextureHandle_t dstTex ) = 0;
|
||||
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
|
@ -68,7 +68,7 @@ inline bool IsPowerOfTwo( T value )
|
||||
|
||||
// From crtdefs.h
|
||||
#if !defined(UNALIGNED)
|
||||
#if defined(_M_IA64) || defined(_M_AMD64)
|
||||
#if defined(_M_AMD64) || defined(_M_ARM)
|
||||
#define UNALIGNED __unaligned
|
||||
#else
|
||||
#define UNALIGNED
|
||||
|
@ -852,7 +852,9 @@ static FORCEINLINE double fsel(double fComparand, double fValGE, double fLT)
|
||||
//-----------------------------------------------------------------------------
|
||||
//#define CHECK_FLOAT_EXCEPTIONS 1
|
||||
|
||||
#if !defined( _X360 )
|
||||
#if defined (__arm__) || defined (__aarch64__)
|
||||
inline void SetupFPUControlWord() {}
|
||||
#elif !defined( _X360 )
|
||||
#if defined( _MSC_VER )
|
||||
|
||||
#if defined( PLATFORM_WINDOWS_PC64 )
|
||||
@ -898,8 +900,6 @@ static FORCEINLINE double fsel(double fComparand, double fValGE, double fLT)
|
||||
|
||||
#endif
|
||||
#endif
|
||||
#elif defined (__arm__) || defined (__aarch64__)
|
||||
inline void SetupFPUControlWord() {}
|
||||
#else
|
||||
inline void SetupFPUControlWord()
|
||||
{
|
||||
@ -1025,7 +1025,7 @@ inline T QWordSwapC( T dw )
|
||||
return output;
|
||||
}
|
||||
|
||||
#elif defined( _MSC_VER ) && !defined( PLATFORM_WINDOWS_PC64 )
|
||||
#elif defined( _MSC_VER ) && !defined( PLATFORM_WINDOWS_PC64 ) && !defined(_M_ARM)
|
||||
|
||||
#define WordSwap WordSwapAsm
|
||||
#define DWordSwap DWordSwapAsm
|
||||
@ -1229,8 +1229,18 @@ PLATFORM_INTERFACE time_t Plat_timegm( struct tm *timeptr );
|
||||
PLATFORM_INTERFACE struct tm * Plat_localtime( const time_t *timep, struct tm *result );
|
||||
|
||||
#if defined( _WIN32 ) && defined( _MSC_VER ) && ( _MSC_VER >= 1400 )
|
||||
#ifdef _M_X64
|
||||
extern "C" unsigned __int64 __rdtsc();
|
||||
#pragma intrinsic(__rdtsc)
|
||||
#else
|
||||
#include <intrin.h>
|
||||
#define MSVC_ARM_SYSREG(op0, op1, crn, crm, op2) \
|
||||
( ((op0 & 1) << 14) | \
|
||||
((op1 & 7) << 11) | \
|
||||
((crn & 15) << 7) | \
|
||||
((crm & 15) << 3) | \
|
||||
((op2 & 7) << 0) )
|
||||
#endif
|
||||
#endif
|
||||
|
||||
inline uint64 Plat_Rdtsc()
|
||||
@ -1241,15 +1251,15 @@ inline uint64 Plat_Rdtsc()
|
||||
return t.tv_sec * 1000000000ULL + t.tv_nsec;
|
||||
#elif defined( _X360 )
|
||||
return ( uint64 )__mftb32();
|
||||
#elif defined( _WIN64 )
|
||||
#elif defined( _M_IX86 )
|
||||
_asm rdtsc
|
||||
#elif defined( _M_ARM )
|
||||
uint32 val = _MoveFromCoprocessor(15,0, 9,13,0);
|
||||
return ((uint64)val) << 6;
|
||||
#elif defined( _M_ARM64 ) || defined( _M_ARM64EC )
|
||||
return _ReadStatusReg(MSVC_ARM_SYSREG(3,3, 9,12,5));
|
||||
#elif defined( COMPILER_MSVC )
|
||||
return ( uint64 )__rdtsc();
|
||||
#elif defined( _WIN32 )
|
||||
#if defined( _MSC_VER ) && ( _MSC_VER >= 1400 )
|
||||
return ( uint64 )__rdtsc();
|
||||
#else
|
||||
__asm rdtsc;
|
||||
__asm ret;
|
||||
#endif
|
||||
#elif defined( __i386__ )
|
||||
uint64 val;
|
||||
__asm__ __volatile__ ( "rdtsc" : "=A" (val) );
|
||||
|
@ -241,6 +241,8 @@ inline void ThreadPause()
|
||||
_mm_pause();
|
||||
#elif defined( COMPILER_MSVC32 )
|
||||
__asm pause;
|
||||
#elif defined(_M_ARM)
|
||||
__yield();
|
||||
#elif defined( COMPILER_MSVCX360 )
|
||||
YieldProcessor();
|
||||
__asm { or r0,r0,r0 }
|
||||
@ -445,7 +447,7 @@ PLATFORM_INTERFACE bool ThreadInterlockedAssignIf64( volatile int64 *pDest, int6
|
||||
|
||||
PLATFORM_INTERFACE int64 ThreadInterlockedExchange64( int64 volatile *, int64 value ) NOINLINE;
|
||||
|
||||
#ifdef COMPILER_MSVC32
|
||||
#if COMPILER_MSVC32 || _M_ARM
|
||||
PLATFORM_INTERFACE int64 ThreadInterlockedIncrement64( int64 volatile * ) NOINLINE;
|
||||
PLATFORM_INTERFACE int64 ThreadInterlockedDecrement64( int64 volatile * ) NOINLINE;
|
||||
PLATFORM_INTERFACE int64 ThreadInterlockedExchangeAdd64( int64 volatile *, int64 value ) NOINLINE;
|
||||
|
@ -657,7 +657,7 @@ static matrix3x4_t *ComputeSkinMatrix( mstudioboneweight_t &boneweights, matrix3
|
||||
static matrix3x4_t *ComputeSkinMatrixSSE( mstudioboneweight_t &boneweights, matrix3x4_t *pPoseToWorld, matrix3x4_t &result )
|
||||
{
|
||||
// NOTE: pPoseToWorld, being cache aligned, doesn't need explicit initialization
|
||||
#if defined( _WIN32 ) && !defined( _X360 ) && !defined( PLATFORM_64BITS )
|
||||
#if defined( _WIN32 ) && !defined( _X360 ) && defined(_M_IX86)
|
||||
switch( boneweights.numbones )
|
||||
{
|
||||
default:
|
||||
@ -866,11 +866,9 @@ static matrix3x4_t *ComputeSkinMatrixSSE( mstudioboneweight_t &boneweights, matr
|
||||
return &result;
|
||||
#endif
|
||||
}
|
||||
#elif POSIX || PLATFORM_WINDOWS_PC64
|
||||
#elif POSIX || _WIN32
|
||||
// #warning "ComputeSkinMatrixSSE C implementation only"
|
||||
return ComputeSkinMatrix( boneweights, pPoseToWorld, result );
|
||||
#elif defined( _X360 )
|
||||
return ComputeSkinMatrix( boneweights, pPoseToWorld, result );
|
||||
#else
|
||||
#error
|
||||
#endif
|
||||
@ -909,7 +907,7 @@ inline void CStudioRender::R_ComputeLightAtPoint3( const Vector &pos, const Vect
|
||||
|
||||
// define SPECIAL_SSE_MESH_PROCESSOR to enable code which contains a special optimized SSE lighting loop, significantly
|
||||
// improving software vertex processing performace.
|
||||
#if defined( _WIN32 ) && !defined( _X360 )
|
||||
#if defined( _WIN32 ) && !defined( _X360 ) && !defined(_M_ARM)
|
||||
#define SPECIAL_SSE_MESH_PROCESSOR
|
||||
#endif
|
||||
|
||||
|
@ -66,7 +66,7 @@ void R_LightAmbient_4D( const Vector& normal, Vector4D* pLightBoxColor, Vector &
|
||||
VectorMA( lv, normal[2]*normal[2], normal[2] > 0.f ? pLightBoxColor[4].AsVector3D() : pLightBoxColor[5].AsVector3D(), lv );
|
||||
}
|
||||
|
||||
#if defined( _WIN32 ) && !defined( _X360 )
|
||||
#if defined( _WIN32 ) && !defined( _X360 ) && !defined(_M_ARM)
|
||||
void R_LightAmbient_4D( const FourVectors& normal, Vector4D* pLightBoxColor, FourVectors &lv )
|
||||
{
|
||||
// VPROF( "R_LightAmbient" );
|
||||
|
@ -13,7 +13,7 @@
|
||||
|
||||
#include "tier0/platform.h"
|
||||
|
||||
#if defined( _WIN32 ) && !defined( _X360 )
|
||||
#if defined( _WIN32 ) && !defined( _X360 ) && !defined(_M_ARM)
|
||||
#include <xmmintrin.h>
|
||||
#endif
|
||||
|
||||
@ -40,7 +40,7 @@ float FASTCALL R_WorldLightDistanceFalloff( const LightDesc_t *wl, const Vector&
|
||||
// Copies lighting state into a buffer, returns number of lights copied
|
||||
int CopyLocalLightingState( int nMaxLights, LightDesc_t *pDest, int nLightCount, const LightDesc_t *pSrc );
|
||||
|
||||
#if defined( _WIN32 ) && !defined( _X360 )
|
||||
#if defined( _WIN32 ) && !defined( _X360 ) && !defined(_M_ARM)
|
||||
// SSE optimized versions
|
||||
void R_LightAmbient_4D( const FourVectors& normal, Vector4D* pLightBoxColor, FourVectors &lv );
|
||||
__m128 FASTCALL R_WorldLightDistanceFalloff( const LightDesc_t *wl, const FourVectors& delta );
|
||||
|
@ -22,7 +22,7 @@
|
||||
#include "flexrenderdata.h"
|
||||
#include "mathlib/compressed_vector.h"
|
||||
#include "r_studiolight.h"
|
||||
#if defined( _WIN32 ) && !defined( _X360 )
|
||||
#if defined( _WIN32 ) && !defined( _X360 ) && !defined(_M_ARM)
|
||||
#include <xmmintrin.h>
|
||||
#endif
|
||||
#include "tier0/dbg.h"
|
||||
|
@ -6,7 +6,7 @@
|
||||
//
|
||||
//===========================================================================//
|
||||
|
||||
#ifdef _WIN32
|
||||
#if defined(_WIN32) && !defined(_M_ARM)
|
||||
#include <windows.h>
|
||||
|
||||
#pragma warning( disable : 4530 ) // warning: exception handler -GX option
|
||||
|
@ -23,7 +23,7 @@
|
||||
#include "pch_tier0.h"
|
||||
#include "tier0/cpumonitoring.h"
|
||||
|
||||
#ifdef PLATFORM_WINDOWS_PC32
|
||||
#ifdef _M_IX86
|
||||
#include "tier0/threadtools.h"
|
||||
#define NOMINMAX
|
||||
#undef min
|
||||
|
@ -1740,7 +1740,7 @@ bool ThreadInterlockedAssignIf( int32 volatile *pDest, int32 value, int32 comper
|
||||
{
|
||||
Assert( (size_t)pDest % 4 == 0 );
|
||||
|
||||
#if !(defined(_WIN64) || defined (_X360))
|
||||
#if !(defined(_WIN64) || defined (_X360) || defined(_M_ARM))
|
||||
__asm
|
||||
{
|
||||
mov eax,comperand
|
||||
@ -1773,7 +1773,7 @@ void *ThreadInterlockedCompareExchangePointer( void * volatile *pDest, void *val
|
||||
bool ThreadInterlockedAssignPointerIf( void * volatile *pDest, void *value, void *comperand )
|
||||
{
|
||||
Assert( (size_t)pDest % 4 == 0 );
|
||||
#if !(defined(_WIN64) || defined (_X360))
|
||||
#if !(defined(_WIN64) || defined (_X360) || defined(_M_ARM))
|
||||
__asm
|
||||
{
|
||||
mov eax,comperand
|
||||
@ -1807,13 +1807,19 @@ int64 ThreadInterlockedCompareExchange64( int64 volatile *pDest, int64 value, in
|
||||
lock CMPXCHG8B [esi];
|
||||
}
|
||||
}
|
||||
#elif defined(_M_ARM)
|
||||
int64 ThreadInterlockedCompareExchange64( int64 volatile *pDest, int64 value, int64 comperand )
|
||||
{
|
||||
Assert( (size_t)pDest % 8 == 0 );
|
||||
return InterlockedCompareExchange64( pDest, value, comperand );
|
||||
}
|
||||
#endif
|
||||
|
||||
bool ThreadInterlockedAssignIf64(volatile int64 *pDest, int64 value, int64 comperand )
|
||||
{
|
||||
Assert( (size_t)pDest % 8 == 0 );
|
||||
|
||||
#if defined(_X360) || defined(_WIN64)
|
||||
#if defined(_X360) || defined(_WIN64) || defined(_M_ARM)
|
||||
return ( ThreadInterlockedCompareExchange64( pDest, value, comperand ) == comperand );
|
||||
#else
|
||||
__asm
|
||||
|
@ -22,8 +22,8 @@ def build(bld):
|
||||
'assert_dialog.cpp',
|
||||
'commandline.cpp',
|
||||
'cpu.cpp',
|
||||
'cpumonitoring.cpp',
|
||||
'cpu_usage.cpp',
|
||||
'cpumonitoring.cpp',
|
||||
'dbg.cpp',
|
||||
'dynfunction.cpp',
|
||||
'fasttimer.cpp',
|
||||
@ -54,10 +54,13 @@ def build(bld):
|
||||
'assert_dialog.rc',
|
||||
#'etwprof.cpp', [$WINDOWS]
|
||||
'platform.cpp',
|
||||
'pme.cpp',
|
||||
'vcrmode.cpp',
|
||||
'win32consoleio.cpp'
|
||||
]
|
||||
if bld.env.DEST_CPU == 'arm':
|
||||
source += ['pme_posix.cpp']
|
||||
else:
|
||||
source += ['pme.cpp']
|
||||
if bld.env.DEST_CPU == 'amd64':
|
||||
source += [
|
||||
'InterlockedCompareExchange128.masm'
|
||||
|
@ -6,7 +6,7 @@
|
||||
// $NoKeywords: $
|
||||
//=============================================================================//
|
||||
|
||||
#if defined( _X360 ) || defined( WIN64 )
|
||||
#if defined( _X360 ) || defined( WIN64 ) || defined(_M_ARM)
|
||||
|
||||
bool CheckMMXTechnology(void) { return false; }
|
||||
bool CheckSSETechnology(void) { return false; }
|
||||
|
@ -453,7 +453,7 @@ private:
|
||||
#ifdef WIN32
|
||||
static const
|
||||
#endif
|
||||
fltx4 g_IVPToHLDir = { 1.0f, -1.0f, 1.0f, 1.0f };
|
||||
fltx4 g_IVPToHLDir = FLTX4( 1.0f, -1.0f, 1.0f, 1.0f );
|
||||
|
||||
//static const fltx4 g_IVPToHLPosition = { IVP2HL(1.0f), -IVP2HL(1.0f), IVP2HL(1.0f), IVP2HL(1.0f) };
|
||||
|
||||
@ -680,7 +680,7 @@ bool CTraceIVP::BuildLeafmapCache( const leafmap_t * RESTRICT pLeafmap )
|
||||
#endif
|
||||
}
|
||||
|
||||
static const fltx4 g_IndexBase = {0,1,2,3};
|
||||
static const fltx4 g_IndexBase =FLTX4(0,1,2,3);
|
||||
int CTraceIVP::SupportMapCached( const Vector &dir, Vector *pOut ) const
|
||||
{
|
||||
VPROF("SupportMapCached");
|
||||
|
@ -218,7 +218,11 @@ extern "C" byte *GetStackPtr64();
|
||||
#define GetStackPtr( pStackPtr) byte *pStackPtr = GetStackPtr64();
|
||||
#else
|
||||
#ifdef WIN32
|
||||
#define GetStackPtr( pStackPtr ) byte *pStackPtr; __asm mov pStackPtr, esp
|
||||
# ifdef _M_ARM
|
||||
# define GetStackPtr( pStackPtr ) byte x; byte *pStackPtr = &x
|
||||
# else
|
||||
# define GetStackPtr( pStackPtr ) byte *pStackPtr; __asm mov pStackPtr, esp
|
||||
# endif
|
||||
#elif defined(GNUC)
|
||||
// Apple's version of gcc/g++ doesn't return the expected value using the intrinsic, so
|
||||
// do it the old fashioned way - this will also use asm on linux (since we don't compile
|
||||
@ -649,7 +653,7 @@ bool Internal_Coroutine_Continue( HCoroutine hCoroutine, const char *pchDebugMsg
|
||||
bool bInCoroutineAlready = GCoroutineMgr().IsAnyCoroutineActive();
|
||||
|
||||
#ifdef _WIN32
|
||||
#ifndef _WIN64
|
||||
#if !defined( _WIN64 ) && !defined( _M_ARM )
|
||||
// make sure nobody has a try/catch block and then yielded
|
||||
// because we hate that and we will crash
|
||||
uint32 topofexceptionchain;
|
||||
@ -897,7 +901,7 @@ void Coroutine_YieldToMain()
|
||||
CoroutineDbgMsg( g_fmtstr.sprintf( "Coroutine_YieldToMain() %s#%x -> %s#%x\n", coroutine.m_pchName, coroutine.m_hCoroutine, coroutinePrev.m_pchName, coroutinePrev.m_hCoroutine ) );
|
||||
|
||||
#ifdef _WIN32
|
||||
#ifndef _WIN64
|
||||
#if !defined( _WIN64 ) && !defined( _M_ARM )
|
||||
// make sure nobody has a try/catch block and then yielded
|
||||
// because we hate that and we will crash
|
||||
uint32 topofexceptionchain;
|
||||
|
36
wscript
36
wscript
@ -1,5 +1,6 @@
|
||||
#! /usr/bin/env python
|
||||
# encoding: utf-8
|
||||
# vim: noexpandtab
|
||||
# nillerusr
|
||||
|
||||
from __future__ import print_function
|
||||
@ -222,6 +223,8 @@ def define_platform(conf):
|
||||
'_ALLOW_MSC_VER_MISMATCH',
|
||||
'NO_X360_XDK'
|
||||
])
|
||||
if conf.env.DEST_CPU == 'arm':
|
||||
conf.env.append_unique('DEFINES', ['__arm__=1'])
|
||||
elif conf.env.DEST_OS == 'darwin':
|
||||
conf.env.append_unique('DEFINES', [
|
||||
'OSX=1', '_OSX=1',
|
||||
@ -384,14 +387,16 @@ def check_deps(conf):
|
||||
conf.check(lib='opus', uselib_store='OPUS')
|
||||
|
||||
if conf.env.DEST_OS == 'win32':
|
||||
conf.check(lib='libz', uselib_store='ZLIB', define_name='USE_ZLIB')
|
||||
# conf.check(lib='nvtc', uselib_store='NVTC')
|
||||
# conf.check(lib='ati_compress_mt_vc10', uselib_store='ATI_COMPRESS_MT_VC10')
|
||||
conf.check(lib='SDL2', uselib_store='SDL2')
|
||||
conf.check(lib='libjpeg', uselib_store='JPEG', define_name='HAVE_JPEG')
|
||||
conf.check(lib='libpng', uselib_store='PNG', define_name='HAVE_PNG')
|
||||
conf.check(lib='d3dx9', uselib_store='D3DX9')
|
||||
conf.check(lib='d3d9', uselib_store='D3D9')
|
||||
if conf.env.DEST_CPU == 'arm':
|
||||
conf.check(lib='d3d9', uselib_store='D3D9')
|
||||
conf.check(lib='d3dcompiler', uselib_store='D3DCOMPILER')
|
||||
else:
|
||||
conf.check(lib='libz', uselib_store='ZLIB', define_name='USE_ZLIB')
|
||||
conf.check(lib='SDL2', uselib_store='SDL2')
|
||||
conf.check(lib='libjpeg', uselib_store='JPEG', define_name='HAVE_JPEG')
|
||||
conf.check(lib='libpng', uselib_store='PNG', define_name='HAVE_PNG')
|
||||
conf.check(lib='d3dx9', uselib_store='D3DX9')
|
||||
conf.check(lib='d3d9', uselib_store='D3D9')
|
||||
conf.check(lib='dsound', uselib_store='DSOUND')
|
||||
conf.check(lib='dxguid', uselib_store='DXGUID')
|
||||
if conf.options.OPUS:
|
||||
@ -405,10 +410,12 @@ def configure(conf):
|
||||
# Force XP compability, all build targets should add
|
||||
# subsystem=bld.env.MSVC_SUBSYSTEM
|
||||
# TODO: wrapper around bld.stlib, bld.shlib and so on?
|
||||
conf.env.MSVC_SUBSYSTEM = 'WINDOWS,5.01'
|
||||
conf.env.MSVC_TARGETS = ['x86'] # explicitly request x86 target for MSVC
|
||||
if conf.options.ALLOW64:
|
||||
conf.env.MSVC_TARGETS = ['x64']
|
||||
conf.env.MSVC_TARGETS = ['x86' if not conf.options.ALLOW64 else 'x64']
|
||||
if conf.env.MSVC_TARGETS[0] == 'x86':
|
||||
conf.env.MSVC_SUBSYSTEM = 'WINDOWS,5.01'
|
||||
else:
|
||||
conf.env.MSVC_SUBSYSTEM = 'WINDOWS'
|
||||
|
||||
if sys.platform == 'win32':
|
||||
conf.load('msvc_pdb_ext msdev msvs')
|
||||
conf.load('subproject xcompile compiler_c compiler_cxx gitversion clang_compilation_database strip_on_install_v2 waf_unit_test enforce_pic')
|
||||
@ -504,7 +511,6 @@ def configure(conf):
|
||||
else:
|
||||
cflags += [
|
||||
'/I'+os.path.abspath('.')+'/thirdparty/SDL',
|
||||
'/arch:SSE' if conf.env.DEST_CPU == 'x86' else '/arch:AVX',
|
||||
'/GF',
|
||||
'/Gy',
|
||||
'/fp:fast',
|
||||
@ -514,6 +520,8 @@ def configure(conf):
|
||||
'/TP',
|
||||
'/EHsc'
|
||||
]
|
||||
if conf.env.DEST_CPU != 'arm':
|
||||
cflags += ['/arch:SSE' if conf.env.DEST_CPU == 'x86' else '/arch:AVX']
|
||||
|
||||
if conf.options.BUILD_TYPE == 'debug':
|
||||
linkflags += [
|
||||
@ -593,7 +601,7 @@ def configure(conf):
|
||||
def build(bld):
|
||||
os.environ["CCACHE_DIR"] = os.path.abspath('.ccache/'+bld.env.COMPILER_CC+'/'+bld.env.DEST_OS+'/'+bld.env.DEST_CPU)
|
||||
|
||||
if bld.env.DEST_OS in ['win32', 'android']:
|
||||
if bld.env.SDL and bld.env.DEST_OS in ['win32', 'android']:
|
||||
sdl_name = 'SDL2.dll' if bld.env.DEST_OS == 'win32' else 'libSDL2.so'
|
||||
sdl_path = os.path.join('lib', bld.env.DEST_OS, bld.env.DEST_CPU, sdl_name)
|
||||
bld.install_files(bld.env.LIBDIR, [sdl_path])
|
||||
|
Loading…
Reference in New Issue
Block a user