Merge 9cc3539f6d into b8558de63e

2025-07-16 19:03:56 +00:00 · 2023-10-23 21:32:05 +02:00 · 2023-10-23 21:32:05 +02:00 · 072921189c
commit 072921189c
parent b8558de63e 9cc3539f6d
43 changed files with 3234 additions and 2792 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,3 +1,4 @@
+*~
 *.mak
 *.mak.vpc_crc
 *.vpc_crc
--- a/common/sse2neon.h
+++ b/common/sse2neon.h
--- a/engine/cmodel.cpp
+++ b/engine/cmodel.cpp
@ -862,7 +862,7 @@ BOX TRACING

 // Custom SIMD implementation for box brushes

-const fltx4 Four_DistEpsilons={DIST_EPSILON,DIST_EPSILON,DIST_EPSILON,DIST_EPSILON};
+const fltx4 Four_DistEpsilons=FLTX4(DIST_EPSILON,DIST_EPSILON,DIST_EPSILON,DIST_EPSILON);
 const int32 ALIGN16 g_CubeFaceIndex0[4] ALIGN16_POST = {0,1,2,-1};
 const int32 ALIGN16 g_CubeFaceIndex1[4] ALIGN16_POST = {3,4,5,-1};
 bool IntersectRayWithBoxBrush( TraceInfo_t *pTraceInfo, const cbrush_t *pBrush, cboxbrush_t *pBox )
@ -1572,7 +1572,7 @@ void FASTCALL CM_TraceToLeaf( TraceInfo_t * RESTRICT pTraceInfo, int ndxLeaf, fl
 			fltx4 traceStart = LoadAlignedSIMD(pTraceInfo->m_start.Base());
 			fltx4 traceDelta = LoadAlignedSIMD(pTraceInfo->m_delta.Base());
 			fltx4 traceInvDelta = LoadAlignedSIMD(pTraceInfo->m_invDelta.Base());
-			static const fltx4 vecEpsilon = {DISPCOLL_DIST_EPSILON,DISPCOLL_DIST_EPSILON,DISPCOLL_DIST_EPSILON,DISPCOLL_DIST_EPSILON};
+			static const fltx4 vecEpsilon = FLTX4(DISPCOLL_DIST_EPSILON,DISPCOLL_DIST_EPSILON,DISPCOLL_DIST_EPSILON,DISPCOLL_DIST_EPSILON);
 			// only used in !IS_POINT version:
 			fltx4 extents;
 			if (!IS_POINT)
--- a/engine/gl_lightmap.cpp
+++ b/engine/gl_lightmap.cpp
@ -1333,6 +1333,7 @@ void CacheAndUnloadLightmapData()
 	host_state.worldbrush->unloadedlightmaps = true;
 }

+#pragma optimize("", off)
 //sorts the surfaces in place
 static void SortSurfacesByLightmapID( SurfaceHandle_t *pToSort, int iSurfaceCount )
 {
@ -1448,6 +1449,7 @@ void R_RedownloadAllLightmaps()

 	g_RebuildLightmaps = false;
 }
+#pragma optimize("", on)

 //-----------------------------------------------------------------------------
 // Purpose: flag the lightmaps as needing to be rebuilt (gamma change)
--- a/engine/l_studio.cpp
+++ b/engine/l_studio.cpp
@ -40,7 +40,7 @@
 #include "materialsystem/materialsystem_config.h"
 #include "materialsystem/itexture.h"
 #include "IHammer.h"
-#if defined( _WIN32 ) && !defined( _X360 )
+#if defined( _WIN32 ) && defined( _M_IX86 )
 #include <xmmintrin.h>
 #endif
 #include "staticpropmgr.h"
--- a/engine/sys_engine.cpp
+++ b/engine/sys_engine.cpp
@ -104,7 +104,7 @@ extern ConVar host_timer_spin_ms;
 extern float host_nexttick;
 extern IVEngineClient *engineClient;

-#ifdef WIN32
+#if defined(_WIN32)
 static void cpu_frequency_monitoring_callback( IConVar *var, const char *pOldValue, float flOldValue )
 {
 	// Set the specified interval for CPU frequency monitoring
--- a/game/client/client_virtualreality.cpp
+++ b/game/client/client_virtualreality.cpp
@ -152,7 +152,7 @@ CON_COMMAND( vr_toggle, "Toggles VR mode" )
 // --------------------------------------------------------------------
 // Purpose: Returns true if the matrix is orthonormal
 // --------------------------------------------------------------------
-bool IsOrthonormal ( VMatrix Mat, float fTolerance )
+bool IsOrthonormal ( VMatrix& Mat, float fTolerance )
 {
 	float LenFwd = Mat.GetForward().Length();
 	float LenUp = Mat.GetUp().Length();
--- a/game/client/detailobjectsystem.cpp
+++ b/game/client/detailobjectsystem.cpp
@ -2122,8 +2122,8 @@ int CDetailObjectSystem::SortSpritesBackToFront( int nLeaf, const Vector &viewOr
 #else
 #define MANTISSA_LSB_OFFSET 0
 #endif
-static fltx4 Four_MagicNumbers={ MAGIC_NUMBER, MAGIC_NUMBER, MAGIC_NUMBER, MAGIC_NUMBER };
-static fltx4 Four_255s={ 255.0, 255.0, 255.0, 255.0 };
+static fltx4 Four_MagicNumbers=FLTX4( MAGIC_NUMBER, MAGIC_NUMBER, MAGIC_NUMBER, MAGIC_NUMBER );
+static fltx4 Four_255s=FLTX4( 255.0, 255.0, 255.0, 255.0 );

 static ALIGN16 int32 And255Mask[4] ALIGN16_POST = {0xff,0xff,0xff,0xff};
 #define PIXMASK ( * ( reinterpret_cast< fltx4 *>( &And255Mask ) ) )
--- a/game/client/touch.cpp
+++ b/game/client/touch.cpp
@ -23,7 +23,7 @@ extern ConVar default_fov;

 extern IMatSystemSurface *g_pMatSystemSurface;

-#ifdef ANDROID
+#ifdef PLATFORM_MOBILE
 #define TOUCH_DEFAULT "1"
 #else
 #define TOUCH_DEFAULT "0"
--- a/gameui/OptionsDialog.cpp
+++ b/gameui/OptionsDialog.cpp
@ -87,9 +87,9 @@ COptionsDialog::COptionsDialog(vgui::Panel *parent) : PropertyDialog(parent, "Op
 	AddPage(new COptionsSubKeyboard(this), "#GameUI_Keyboard");
 	AddPage(new COptionsSubMouse(this), "#GameUI_Mouse");

-#ifdef ANDROID
-	AddPage(new COptionsSubTouch(this), "Touch");
-#endif
+	// Requires additional res file from extras_dir.vpk
+	if (IsMobile())
+		AddPage(new COptionsSubTouch(this), "Touch");

 	m_pOptionsSubAudio = new COptionsSubAudio(this);
 	AddPage(m_pOptionsSubAudio, "#GameUI_Audio");
--- a/2
+++ b/2
@ -1 +1 @@
-Subproject commit 4098acbbe3bc48320496f7533851640cc40cbb89
+Subproject commit 2b7c913bb0c804eab2203eef30b9ba4559a44ef4
--- a/launcher/launcher.cpp
+++ b/launcher/launcher.cpp
@ -653,6 +653,11 @@ void ReportDirtyDiskNoMaterialSystem()
 //-----------------------------------------------------------------------------
 bool CSourceAppSystemGroup::Create()
 {
+	if ( !CommandLine()->FindParm( "-nolog" ) )
+		DebugLogger()->Init("engine.log");
+	else
+		DebugLogger()->Disable();
+
 	IFileSystem *pFileSystem = (IFileSystem*)FindSystem( FILESYSTEM_INTERFACE_VERSION );
 	pFileSystem->InstallDirtyDiskReportFunc( ReportDirtyDiskNoMaterialSystem );

@ -767,11 +772,6 @@ bool CSourceAppSystemGroup::Create()

 bool CSourceAppSystemGroup::PreInit()
 {
-	if ( !CommandLine()->FindParm( "-nolog" ) )
-		DebugLogger()->Init("engine.log");
-	else
-		DebugLogger()->Disable();
-
 	CreateInterfaceFn factory = GetFactory();
 	ConnectTier1Libraries( &factory, 1 );
 	ConVar_Register( );
--- a/launcher_main/wscript
+++ b/launcher_main/wscript
@ -15,7 +15,7 @@ def configure(conf):
 	return

 def build(bld):
-	if bld.env.DEST_OS == 'android':
+	if bld.env.MOBILE and bld.env.DEST_OS=='android':
 		return

 	source = ['main.cpp']
--- a/2
+++ b/2
@ -1 +1 @@
-Subproject commit 86a66ee92d9fda0a09f54a435e850faa7ab5d0fa
+Subproject commit 09cc8293779b1f2c879cca276e9a3f715ab5309d
--- a/materialsystem/colorspace.h
+++ b/materialsystem/colorspace.h
@ -287,7 +287,7 @@ namespace ColorSpace
 	{
 		// preload 3.0f onto the returns so that we don't need to multiply the bumpAverage by it
 		// straight away (eg, reschedule this dependent op)
-		static const fltx4 vThree = { 3.0f, 3.0f, 3.0f, 0.0f };
+		static const fltx4 vThree = FLTX4( 3.0f, 3.0f, 3.0f, 0.0f );
 		fltx4 retValBump1 = MulSIMD( vThree, linearBumpColor1);
 		fltx4 retValBump2 = MulSIMD( vThree, linearBumpColor2);
 		fltx4 retValBump3 = MulSIMD( vThree, linearBumpColor3);
--- a/mathlib/3dnow.cpp
+++ b/mathlib/3dnow.cpp
@ -16,7 +16,7 @@
 // memdbgon must be the last include file in a .cpp file!!!
 #include "tier0/memdbgon.h"

-#if !defined(COMPILER_MSVC64) && !defined(LINUX) && !defined(COMPILER_CLANG)
+#if defined(_M_IX86) && !defined(LINUX) && !defined(COMPILER_CLANG)
 // Implement for 64-bit Windows if needed.
 // Clang hits "fatal error: error in backend:" and other errors when trying
 // to compile the inline assembly below. 3DNow support is highly unlikely to
--- a/mathlib/mathlib_base.cpp
+++ b/mathlib/mathlib_base.cpp
@ -3258,7 +3258,7 @@ void MathLib_Init( float gamma, float texGamma, float brightness, int overbright

 	// SSE Generally performs better than 3DNow when present, so this is placed 
 	// first to allow SSE to override these settings.
-#if !defined( OSX ) && !defined( PLATFORM_WINDOWS_PC64 ) && !defined(LINUX) && !defined(PLATFORM_BSD)
+#ifdef _M_IX86
 	if ( bAllow3DNow && pi.m_b3DNow )
 	{
 		s_b3DNowEnabled = true;
@ -3291,7 +3291,7 @@ void MathLib_Init( float gamma, float texGamma, float brightness, int overbright
 		pfRSqrt = _SSE_RSqrtAccurate;
 		pfRSqrtFast = _SSE_RSqrtFast;
 #endif
-#ifdef PLATFORM_WINDOWS_PC32
+#ifdef _M_IX86
 		pfFastSinCos = _SSE_SinCos;
 		pfFastCos = _SSE_cos;
 #endif
@ -3304,7 +3304,7 @@ void MathLib_Init( float gamma, float texGamma, float brightness, int overbright
 	if ( bAllowSSE2 && pi.m_bSSE2 )
 	{
 		s_bSSE2Enabled = true;
-#ifdef PLATFORM_WINDOWS_PC32
+#ifdef _M_IX86
 		pfFastSinCos = _SSE2_SinCos;
 		pfFastCos = _SSE2_cos;
 #endif
--- a/mathlib/sse.cpp
+++ b/mathlib/sse.cpp
@ -91,13 +91,13 @@ float _SSE_Sqrt(float x)
 {
 	Assert( s_bMathlibInitialized );
 	float	root = 0.f;
-#ifdef _WIN32
+#if defined(_WIN32) && !defined(_M_ARM) && !defined(_M_ARM64)
 	_asm
 	{
 		sqrtss		xmm0, x
 		movss		root, xmm0
 	}
-#elif POSIX
+#else
 	_mm_store_ss( &root, _mm_sqrt_ss( _mm_load_ss( &x ) ) );
 #endif
 	return root;
@ -122,7 +122,7 @@ float _SSE_RSqrtAccurate(float x)
 }
 #else

-#ifdef POSIX
+#if POSIX || defined(_M_ARM) || defined(_M_ARM64)
 const __m128  f3  = _mm_set_ss(3.0f);  // 3 as SSE value
 const __m128  f05 = _mm_set_ss(0.5f);  // 0.5 as SSE value
 #endif
@ -131,7 +131,7 @@ const __m128  f05 = _mm_set_ss(0.5f);  // 0.5 as SSE value
 float _SSE_RSqrtAccurate(float a)
 {

-#ifdef _WIN32
+#if defined(_WIN32) && !defined(_M_ARM) && !defined(_M_ARM64)
 	float x;
 	float half = 0.5f;
 	float three = 3.f;
@ -153,8 +153,8 @@ float _SSE_RSqrtAccurate(float a)
 	}

 	return x;
-#elif POSIX	
-	__m128  xx = _mm_load_ss( &a );
+#else
+    __m128  xx = _mm_load_ss( &a );
    __m128  xr = _mm_rsqrt_ss( xx );
    __m128  xt;
 	
@ -166,8 +166,6 @@ float _SSE_RSqrtAccurate(float a)
 	
    _mm_store_ss( &a, xr );
    return a;
-#else
-	#error "Not Implemented"
 #endif

 }
@ -764,7 +762,7 @@ float _SSE_cos( float x )
 //-----------------------------------------------------------------------------
 // SSE2 implementations of optimized routines:
 //-----------------------------------------------------------------------------
-#ifdef PLATFORM_WINDOWS_PC32
+#if defined(_M_IX86)
 void _SSE2_SinCos(float x, float* s, float* c)  // any x
 {
 #ifdef _WIN32
@ -850,9 +848,7 @@ void _SSE2_SinCos(float x, float* s, float* c)  // any x
 	#error "Not Implemented"
 #endif
 }
-#endif // PLATFORM_WINDOWS_PC32

-#ifdef PLATFORM_WINDOWS_PC32
 float _SSE2_cos(float x)  
 {
 #ifdef _WIN32
@ -970,9 +966,7 @@ void VectorTransformSSE(const float *in1, const matrix3x4_t& in2, float *out1)
 	#error "Not Implemented"
 #endif
 }
-#endif

-#if 0
 void VectorRotateSSE( const float *in1, const matrix3x4_t& in2, float *out1 )
 {
 	Assert( s_bMathlibInitialized );
@ -1026,9 +1020,7 @@ void VectorRotateSSE( const float *in1, const matrix3x4_t& in2, float *out1 )
 	#error "Not Implemented"
 #endif
 }
-#endif

-#ifdef _WIN32
 void _declspec(naked) _SSE_VectorMA( const float *start, float scale, const float *direction, float *dest )
 {
 	// FIXME: This don't work!! It will overwrite memory in the write to dest
@ -1057,7 +1049,6 @@ void _declspec(naked) _SSE_VectorMA( const float *start, float scale, const floa
 #endif
 	}
 }
-#endif

 #ifdef _WIN32
 #ifdef PFN_VECTORMA
@ -1101,7 +1092,6 @@ float (__cdecl *pfVectorMA)(Vector& v) = _VectorMA;
 //   NJS: (Nov 1 2002) -NOT- faster.  may time a couple cycles faster in a single function like 
 //   this, but when inlined, and instruction scheduled, the C version is faster.  
 //   Verified this via VTune
-/*
 vec_t DotProduct (const vec_t *a, const vec_t *c)
 {
 	vec_t temp;
@ -1124,6 +1114,6 @@ vec_t DotProduct (const vec_t *a, const vec_t *c)
 		ret
 	}
 }
-*/
+#endif

 #endif // COMPILER_MSVC64 
--- a/mathlib/sseconst.cpp
+++ b/mathlib/sseconst.cpp
@ -7,35 +7,35 @@
 #include "mathlib/ssemath.h"
 #include "mathlib/ssequaternion.h"

-const fltx4 Four_PointFives={0.5,0.5,0.5,0.5};
+const fltx4 Four_PointFives=FLTX4(0.5,0.5,0.5,0.5);
 #ifndef _X360
-const fltx4 Four_Zeros={0.0,0.0,0.0,0.0};
-const fltx4 Four_Ones={1.0,1.0,1.0,1.0};
+const fltx4 Four_Zeros=FLTX4(0.0,0.0,0.0,0.0);
+const fltx4 Four_Ones=FLTX4(1.0,1.0,1.0,1.0);
 #endif
-const fltx4 Four_Twos={2.0,2.0,2.0,2.0};
-const fltx4 Four_Threes={3.0,3.0,3.0,3.0};
-const fltx4 Four_Fours={4.0,4.0,4.0,4.0};
-const fltx4 Four_Origin={0,0,0,1};
-const fltx4 Four_NegativeOnes={-1,-1,-1,-1};
+const fltx4 Four_Twos=FLTX4(2.0,2.0,2.0,2.0);
+const fltx4 Four_Threes=FLTX4(3.0,3.0,3.0,3.0);
+const fltx4 Four_Fours=FLTX4(4.0,4.0,4.0,4.0);
+const fltx4 Four_Origin=FLTX4(0,0,0,1);
+const fltx4 Four_NegativeOnes=FLTX4(-1,-1,-1,-1);

-const fltx4 Four_2ToThe21s={ (float) (1<<21), (float) (1<<21), (float) (1<<21), (float)(1<<21) };
-const fltx4 Four_2ToThe22s={ (float) (1<<22), (float) (1<<22), (float) (1<<22), (float)(1<<22) };
-const fltx4 Four_2ToThe23s={ (float) (1<<23), (float) (1<<23), (float) (1<<23), (float)(1<<23) };
-const fltx4 Four_2ToThe24s={ (float) (1<<24), (float) (1<<24), (float) (1<<24), (float)(1<<24) };
+const fltx4 Four_2ToThe21s=FLTX4( (float) (1<<21), (float) (1<<21), (float) (1<<21), (float)(1<<21) );
+const fltx4 Four_2ToThe22s=FLTX4( (float) (1<<22), (float) (1<<22), (float) (1<<22), (float)(1<<22) );
+const fltx4 Four_2ToThe23s=FLTX4( (float) (1<<23), (float) (1<<23), (float) (1<<23), (float)(1<<23) );
+const fltx4 Four_2ToThe24s=FLTX4( (float) (1<<24), (float) (1<<24), (float) (1<<24), (float)(1<<24) );

-const fltx4 Four_Point225s={ .225, .225, .225, .225 };
-const fltx4 Four_Epsilons={FLT_EPSILON,FLT_EPSILON,FLT_EPSILON,FLT_EPSILON};
+const fltx4 Four_Point225s=FLTX4( .225, .225, .225, .225 );
+const fltx4 Four_Epsilons=FLTX4(FLT_EPSILON,FLT_EPSILON,FLT_EPSILON,FLT_EPSILON);

-const fltx4 Four_FLT_MAX={FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX};
-const fltx4 Four_Negative_FLT_MAX={-FLT_MAX,-FLT_MAX,-FLT_MAX,-FLT_MAX};
-const fltx4 g_SIMD_0123 = { 0., 1., 2., 3. };
+const fltx4 Four_FLT_MAX=FLTX4(FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX);
+const fltx4 Four_Negative_FLT_MAX=FLTX4(-FLT_MAX,-FLT_MAX,-FLT_MAX,-FLT_MAX);
+const fltx4 g_SIMD_0123 = FLTX4( 0., 1., 2., 3. );

 const fltx4 g_QuatMultRowSign[4] =
 {
-	{  1.0f,  1.0f, -1.0f, 1.0f },
-	{ -1.0f,  1.0f,  1.0f, 1.0f },
-	{  1.0f, -1.0f,  1.0f, 1.0f },
-	{ -1.0f, -1.0f, -1.0f, 1.0f }
+	FLTX4(  1.0f,  1.0f, -1.0f, 1.0f ),
+	FLTX4( -1.0f,  1.0f,  1.0f, 1.0f ),
+	FLTX4(  1.0f, -1.0f,  1.0f, 1.0f ),
+	FLTX4( -1.0f, -1.0f, -1.0f, 1.0f )
 };

 const uint32 ALIGN16 g_SIMD_clear_signmask[4] ALIGN16_POST = {0x7fffffff,0x7fffffff,0x7fffffff,0x7fffffff};
--- a/mathlib/ssenoise.cpp
+++ b/mathlib/ssenoise.cpp
@ -20,7 +20,7 @@

 #define MAGIC_NUMBER (1<<15)								// gives 8 bits of fraction

-static fltx4 Four_MagicNumbers = { MAGIC_NUMBER, MAGIC_NUMBER, MAGIC_NUMBER, MAGIC_NUMBER };
+static fltx4 Four_MagicNumbers = FLTX4( MAGIC_NUMBER, MAGIC_NUMBER, MAGIC_NUMBER, MAGIC_NUMBER );


 static ALIGN16 int32 idx_mask[4]= {0xffff, 0xffff, 0xffff, 0xffff};
--- a/public/materialsystem/imesh.h
+++ b/public/materialsystem/imesh.h
@ -1220,7 +1220,7 @@ inline void CVertexBuilder::FastVertexSSE( const ModelVertexDX7_t &vertex )
 	Assert( m_CompressionType == VERTEX_COMPRESSION_NONE ); // FIXME: support compressed verts if needed
 	Assert( m_nCurrentVertex < m_nMaxVertexCount );

-#if defined( _WIN32 ) && !defined( _X360 ) && !defined( PLATFORM_64BITS )
+#if defined( _WIN32 ) && defined( _M_IX86 )
 	const void *pRead = &vertex;
 	void *pCurrPos = m_pCurrPosition;
 	__asm
@ -1236,7 +1236,7 @@ inline void CVertexBuilder::FastVertexSSE( const ModelVertexDX7_t &vertex )
 			movntps [edi + 16], xmm1
 			movntps [edi + 32], xmm2
 	}
-#elif defined(GNUC) || defined(PLATFORM_WINDOWS_PC64)
+#else
 	const char *pRead = (char *)&vertex;
 	char *pCurrPos = (char *)m_pCurrPosition;
 	__m128 m1 = _mm_load_ps( (float *)pRead );
@ -1245,8 +1245,6 @@ inline void CVertexBuilder::FastVertexSSE( const ModelVertexDX7_t &vertex )
 	_mm_stream_ps( (float *)pCurrPos, m1 );
 	_mm_stream_ps( (float *)((intp)pCurrPos + 16), m2 );
 	_mm_stream_ps( (float *)((intp)pCurrPos + 32), m3 );
-#else
-	Error( "Implement CMeshBuilder::FastVertexSSE(dx7)" );
 #endif

 	IncrementFloatPointer( m_pCurrPosition, m_VertexSize_Position );
@ -1267,7 +1265,7 @@ inline void CVertexBuilder::Fast4VerticesSSE(
 	Assert( m_CompressionType == VERTEX_COMPRESSION_NONE ); // FIXME: support compressed verts if needed
 	Assert( m_nCurrentVertex < m_nMaxVertexCount-3 );

-#if defined( _WIN32 ) && !defined( _X360 ) && !defined( PLATFORM_64BITS )
+#if defined( _WIN32 ) && defined( _M_IX86 )
 	void *pCurrPos = m_pCurrPosition;
 	__asm
 	{
@ -1309,7 +1307,7 @@ inline void CVertexBuilder::Fast4VerticesSSE(
 			movntps [edi + 80+96], xmm5

 	}
-#elif defined(__arm__) || defined(PLATFORM_WINDOWS_PC64)
+#else
 	const void *pReadA = &vtx_a;
 	const void *pReadB = &vtx_b;
 	const void *pReadC = &vtx_c;
@ -1341,8 +1339,6 @@ inline void CVertexBuilder::Fast4VerticesSSE(
 	_mm_stream_ps( (float *)((intp)pCurrPos + 48+96), m4 );
 	_mm_stream_ps( (float *)((intp)pCurrPos + 64+96), m5 );
 	_mm_stream_ps( (float *)((intp)pCurrPos + 80+96), m6 );
-#else
-	Error( "Implement CMeshBuilder::Fast4VerticesSSE\n");
 #endif
 	IncrementFloatPointer( m_pCurrPosition, 4*m_VertexSize_Position );

@ -1430,7 +1426,7 @@ inline void CVertexBuilder::FastVertexSSE( const ModelVertexDX8_t &vertex )
 	Assert( m_CompressionType == VERTEX_COMPRESSION_NONE ); // FIXME: support compressed verts if needed
 	Assert( m_nCurrentVertex < m_nMaxVertexCount );

-#if defined( _WIN32 ) && !defined( _X360 ) && !defined( PLATFORM_64BITS )
+#if defined( _WIN32 ) && defined( _M_IX86 )
 	const void *pRead = &vertex;
 	void *pCurrPos = m_pCurrPosition;
 	__asm
@ -1448,21 +1444,10 @@ inline void CVertexBuilder::FastVertexSSE( const ModelVertexDX8_t &vertex )
 			movntps [edi + 32], xmm2
 			movntps [edi + 48], xmm3
 	}
-#elif defined(GNUC) || defined(PLATFORM_WINDOWS_PC64)
+#else
 	const void *pRead = &vertex;
 	void *pCurrPos = m_pCurrPosition;

-/*	__asm__ __volatile__ (
-						  "movaps (%0), %%xmm0\n"
-						  "movaps 16(%0), %%xmm1\n"
-						  "movaps 32(%0), %%xmm2\n"
-						  "movaps 48(%0), %%xmm3\n"
-						  "movntps %%xmm0, (%1)\n"
-						  "movntps %%xmm1, 16(%1)\n"
-						  "movntps %%xmm2, 32(%1)\n"
-						  "movntps %%xmm3, 48(%1)\n"
-						  :: "r" (pRead), "r" (pCurrPos) : "memory"); */
-
 	__m128 m1 = _mm_load_ps( (float *)pRead );
 	__m128 m2 = _mm_load_ps( (float *)((intp)pRead + 16) );
 	__m128 m3 = _mm_load_ps( (float *)((intp)pRead + 32) );
@ -1471,8 +1456,6 @@ inline void CVertexBuilder::FastVertexSSE( const ModelVertexDX8_t &vertex )
 	_mm_stream_ps( (float *)((intp)pCurrPos + 16), m2 );
 	_mm_stream_ps( (float *)((intp)pCurrPos + 32), m3 );
 	_mm_stream_ps( (float *)((intp)pCurrPos + 48), m4 );
-#else
-	Error( "Implement CMeshBuilder::FastVertexSSE((dx8)" );
 #endif

 	IncrementFloatPointer( m_pCurrPosition, m_VertexSize_Position );
--- a/public/mathlib/mathlib.h
+++ b/public/mathlib/mathlib.h
@ -405,6 +405,9 @@ void inline SinCos( float radians, float *sine, float *cosine )
 {
 #if defined( _X360 )
 	XMScalarSinCos( sine, cosine, radians );
+#elif defined( PLATFORM_WINDOWS_PC64 ) || defined(_M_ARM) || defined(_M_ARM64)
+	*sine = sin( radians );
+	*cosine = cos( radians );
 #elif defined( PLATFORM_WINDOWS_PC32 )
 	_asm
 	{
@ -417,11 +420,8 @@ void inline SinCos( float radians, float *sine, float *cosine )
 		fstp DWORD PTR [edx]
 		fstp DWORD PTR [eax]
 	}
-#elif defined( PLATFORM_WINDOWS_PC64 )
-	*sine = sin( radians );
-	*cosine = cos( radians );
 #elif defined( OSX )
-    __sincosf(radians, sine, cosine);
+	__sincosf(radians, sine, cosine);
 #elif defined( POSIX )
 	sincosf(radians, sine, cosine);
 #endif
@ -1188,7 +1188,7 @@ inline float SimpleSplineRemapValClamped( float val, float A, float B, float C,

 FORCEINLINE int RoundFloatToInt(float f)
 {
-#if defined(__i386__) || defined(_M_IX86) || defined( PLATFORM_WINDOWS_PC64 ) || defined(__x86_64__)
+#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(PLATFORM_WINDOWS_PC64)
 	return _mm_cvtss_si32(_mm_load_ss(&f));
 #elif defined( _X360 )
 #ifdef Assert
--- a/public/mathlib/simdvectormatrix.h
+++ b/public/mathlib/simdvectormatrix.h
@ -135,7 +135,12 @@ public:
 		Assert( m_pData );

 		static FourVectors value{Four_Zeros, Four_Zeros, Four_Zeros};
+#if defined(_M_ARM) // fuck msvc with C2719 error
+		for (size_t n = m_nHeight*m_nPaddedWidth; n; n--)
+			*(m_pData+n) = value;
+#else
 		memutils::set( m_pData, value, m_nHeight*m_nPaddedWidth );
+#endif
 	}

 	void RaiseToPower( float power );
--- a/public/mathlib/ssemath.h
+++ b/public/mathlib/ssemath.h
@ -8,7 +8,7 @@

 #if defined( _X360 )
 #include <xboxmath.h>
-#elif defined(__arm__) || defined(__aarch64__)
+#elif defined(__arm__) || defined(__aarch64__) || defined(_M_ARM) || defined(_M_ARM64)
 #include "sse2neon.h"
 #else
 #include <xmmintrin.h>
@ -63,6 +63,12 @@ typedef __m128 fltx4;
 typedef __m128 i32x4;
 typedef __m128 u32x4;

+#if defined(_M_ARM) || defined(_M_ARM64)
+#define FLTX4(w, x, y, z) {(w) + (unsigned long long(x) << 32), (y) + (unsigned long long(z) << 32)}
+#else
+#define FLTX4(w, x, y, z) {w, x, y, z}
+#endif
+
 #endif

 // The FLTX4 type is a fltx4 used as a parameter to a function.
@ -1828,7 +1834,7 @@ FORCEINLINE fltx4 ReplicateX4( float flValue )
 FORCEINLINE float SubFloat( const fltx4 & a, int idx )
 {
 	// NOTE: if the output goes into a register, this causes a Load-Hit-Store stall (don't mix fpu/vpu math!)
-#ifndef POSIX
+#if defined(_WIN32) && (defined(__i386__) || defined(__x86_64__))
 	return a.m128_f32[ idx ];
 #else
 	return (reinterpret_cast<float const *>(&a))[idx];
@ -1837,7 +1843,7 @@ FORCEINLINE float SubFloat( const fltx4 & a, int idx )

 FORCEINLINE float & SubFloat( fltx4 & a, int idx )
 {
-#ifndef POSIX
+#if defined(_WIN32) && (defined(__i386__) || defined(__x86_64__))
 	return a.m128_f32[ idx ];
 #else
 	return (reinterpret_cast<float *>(&a))[idx];
@ -1851,8 +1857,8 @@ FORCEINLINE uint32 SubFloatConvertToInt( const fltx4 & a, int idx )

 FORCEINLINE uint32 SubInt( const fltx4 & a, int idx )
 {
-#ifndef POSIX
-	return a.m128_u32[idx];
+#if defined(_WIN32) && (defined(__i386__) || defined(__x86_64__))
+	return a.m128_u32[ idx ];
 #else
 	return (reinterpret_cast<uint32 const *>(&a))[idx];
 #endif
@ -1860,8 +1866,8 @@ FORCEINLINE uint32 SubInt( const fltx4 & a, int idx )

 FORCEINLINE uint32 & SubInt( fltx4 & a, int idx )
 {
-#ifndef POSIX
-	return a.m128_u32[idx];
+#if defined(_WIN32) && (defined(__i386__) || defined(__x86_64__))
+	return a.m128_u32[ idx ];
 #else
 	return (reinterpret_cast<uint32 *>(&a))[idx];
 #endif
--- a/public/tier0/commonmacros.h
+++ b/public/tier0/commonmacros.h
@ -68,7 +68,7 @@ inline bool IsPowerOfTwo( T value )

 // From crtdefs.h
 #if !defined(UNALIGNED)
-#if defined(_M_IA64) || defined(_M_AMD64)
+#if defined(_M_AMD64) || defined(_M_ARM) || defined(_M_ARM64)
 #define UNALIGNED __unaligned
 #else
 #define UNALIGNED
--- a/public/tier0/platform.h
+++ b/public/tier0/platform.h
@ -276,6 +276,13 @@ typedef signed char int8;
 #else
 	#define IsAndroid()	false
 #endif
+
+#ifdef PLATFORM_MOBILE
+	#define IsMobile() true
+#else
+	#define IsMobile() false
+#endif
+
 // From steam/steamtypes.h
 // RTime32
 // We use this 32 bit time representing real world time.
@ -405,6 +412,18 @@ FIXME: Enable this when we no longer fear change =)
 #define __i386__	1
 #endif

+#ifdef _M_ARM64EC
+#define _M_ARM64	1
+#endif
+
+#ifdef _M_ARM
+#define __arm__		1
+#endif
+
+#ifdef _M_ARM64
+#define __aarch64__	1
+#endif
+
 #elif POSIX
 #if defined( OSX ) && defined( CARBON_WORKAROUND )
 #define DWORD unsigned int
@ -852,7 +871,9 @@ static FORCEINLINE double fsel(double fComparand, double fValGE, double fLT)
 //-----------------------------------------------------------------------------
 //#define CHECK_FLOAT_EXCEPTIONS		1

-#if !defined( _X360 )
+#if defined (__arm__) || defined (__aarch64__)
+	inline void SetupFPUControlWord() {}
+#elif !defined( _X360 )
 #if defined( _MSC_VER )

 	#if defined( PLATFORM_WINDOWS_PC64 )
@ -898,8 +919,6 @@ static FORCEINLINE double fsel(double fComparand, double fValGE, double fLT)

 		#endif
 	#endif
-#elif defined (__arm__) || defined (__aarch64__)
-	inline void SetupFPUControlWord() {}
 #else
 	inline void SetupFPUControlWord()
 	{
@ -1025,7 +1044,7 @@ inline T QWordSwapC( T dw )
 		return output;
 	}

-#elif defined( _MSC_VER ) && !defined( PLATFORM_WINDOWS_PC64 )
+#elif defined( _MSC_VER ) && defined( _M_X86 )

 	#define WordSwap  WordSwapAsm
 	#define DWordSwap DWordSwapAsm
@ -1229,8 +1248,18 @@ PLATFORM_INTERFACE time_t			Plat_timegm( struct tm *timeptr );
 PLATFORM_INTERFACE struct tm *		Plat_localtime( const time_t *timep, struct tm *result );

 #if defined( _WIN32 ) && defined( _MSC_VER ) && ( _MSC_VER >= 1400 )
+#ifdef _M_X64
 	extern "C" unsigned __int64 __rdtsc();
 	#pragma intrinsic(__rdtsc)
+#else
+#include <intrin.h>
+#define MSVC_ARM_SYSREG(op0, op1, crn, crm, op2) \
+        ( ((op0 & 1) << 14) | \
+          ((op1 & 7) << 11) | \
+          ((crn & 15) << 7) | \
+          ((crm & 15) << 3) | \
+          ((op2 & 7) << 0) )
+#endif
 #endif

 inline uint64 Plat_Rdtsc()
@ -1241,15 +1270,16 @@ inline uint64 Plat_Rdtsc()
 	return t.tv_sec * 1000000000ULL + t.tv_nsec;
 #elif defined( _X360 )
 	return ( uint64 )__mftb32();
-#elif defined( _WIN64 )
+#elif defined( _M_IX86 )
+	_asm rdtsc
+#elif defined( _M_ARM )
+	return __rdpmccntr64();
+	//uint32 val = _MoveFromCoprocessor(15,0, 9,13,0);
+	//return ((uint64)val) << 6;
+#elif defined( _M_ARM64 ) || defined( _M_ARM64EC )
+	return _ReadStatusReg(MSVC_ARM_SYSREG(3,3, 9,12,5));
+#elif defined( COMPILER_MSVC )
 	return ( uint64 )__rdtsc();
-#elif defined( _WIN32 )
-  #if defined( _MSC_VER ) && ( _MSC_VER >= 1400 )
-	return ( uint64 )__rdtsc();
-  #else
-    __asm rdtsc;
-	__asm ret;
-  #endif
 #elif defined( __i386__ )
 	uint64 val;
 	__asm__ __volatile__ ( "rdtsc" : "=A" (val) );
--- a/public/tier0/threadtools.h
+++ b/public/tier0/threadtools.h
@ -241,6 +241,8 @@ inline void ThreadPause()
 	_mm_pause();
 #elif defined( COMPILER_MSVC32 )
 	__asm pause;
+#elif defined(_M_ARM) || defined(_M_ARM64)
+	__yield();
 #elif defined( COMPILER_MSVCX360 )
 	YieldProcessor(); 
 	__asm { or r0,r0,r0 } 
@ -445,7 +447,7 @@ PLATFORM_INTERFACE bool ThreadInterlockedAssignIf64( volatile int64 *pDest, int6

 PLATFORM_INTERFACE int64 ThreadInterlockedExchange64( int64 volatile *, int64 value ) NOINLINE;

-#ifdef COMPILER_MSVC32
+#if COMPILER_MSVC32 || _M_ARM
 PLATFORM_INTERFACE int64 ThreadInterlockedIncrement64( int64 volatile * ) NOINLINE;
 PLATFORM_INTERFACE int64 ThreadInterlockedDecrement64( int64 volatile * ) NOINLINE;
 PLATFORM_INTERFACE int64 ThreadInterlockedExchangeAdd64( int64 volatile *, int64 value ) NOINLINE;
@ -465,7 +467,7 @@ inline int64 ThreadInterlockedDecrement64( int64 volatile *p )

 #endif

-#ifdef COMPILER_MSVC64
+#if COMPILER_MSVC64 || _M_ARM64
 // 64 bit windows can use intrinsics for these, 32-bit can't
 #pragma intrinsic( _InterlockedCompareExchange64 )
 #pragma intrinsic( _InterlockedExchange64 )
@ -492,7 +494,7 @@ inline bool ThreadInterlockedAssignIf( uint32 volatile *p, uint32 value, uint32
 //inline bool ThreadInterlockedAssignIf( int volatile *p, int value, int comperand )	{ return ThreadInterlockedAssignIf( (int32 volatile *)p, value, comperand ); }


-#if defined( _WIN64 )
+#if defined( _WIN64 ) && !defined(_M_ARM64)
 typedef __m128i int128;
 inline int128 int128_zero()	{ return _mm_setzero_si128(); }
 PLATFORM_INTERFACE bool ThreadInterlockedAssignIf128( volatile int128 *pDest, const int128 &value, const int128 &comperand ) NOINLINE;
--- a/public/tier0/tslist.h
+++ b/public/tier0/tslist.h
@ -34,7 +34,7 @@

 //-----------------------------------------------------------------------------

-#if defined( PLATFORM_64BITS )
+#if defined( PLATFORM_64BITS ) && !defined(_M_ARM64)

 #if defined (PLATFORM_WINDOWS) 
 //typedef __m128i int128;
@ -140,7 +140,7 @@ union TSLIST_HEAD_ALIGN TSLHead_t
 		uint32   DepthAndSequence;
 	} value32;

-#ifdef PLATFORM_64BITS
+#if defined(PLATFORM_64BITS) && !defined(_M_ARM64)
 	int128 value64x128;
 #else
 	int64 value64x128;
@ -194,7 +194,7 @@ public:

 #ifdef USE_NATIVE_SLIST
 		InitializeSListHead( &m_Head );
-#elif defined(PLATFORM_64BITS)
+#elif defined(PLATFORM_64BITS) && !defined(_M_ARM64)
 		m_Head.value64x128 = int128_zero();
 #else
 		m_Head.value64x128 = (int64)0;
@ -720,7 +720,7 @@ public:
 			intp	sequence;
 		} value;

-#ifdef PLATFORM_64BITS
+#if defined(PLATFORM_64BITS) && !defined(_M_ARM64)
 		int128 value64x128;
 #else
 		int64 value64x128;
--- a/serverbrowser/BaseGamesPage.cpp
+++ b/serverbrowser/BaseGamesPage.cpp
@ -582,7 +582,7 @@ void CBaseGamesPage::LoadFilterSettings()
 	m_bFilterNoEmptyServers = filter->GetInt("NoEmpty");
 	m_bFilterNoPasswordedServers = filter->GetInt("NoPassword");
 	m_bFilterReplayServers = filter->GetInt("Replay");
-	m_pQuickListCheckButton->SetSelected( filter->GetInt( "QuickList", IsAndroid() ) );
+	m_pQuickListCheckButton->SetSelected( filter->GetInt( "QuickList", IsMobile() ) );

 	int secureFilter = filter->GetInt("Secure");
 	m_pSecureFilter->ActivateItem(secureFilter);
--- a/studiorender/r_studiodraw.cpp
+++ b/studiorender/r_studiodraw.cpp
@ -657,7 +657,7 @@ static matrix3x4_t *ComputeSkinMatrix( mstudioboneweight_t &boneweights, matrix3
 static matrix3x4_t *ComputeSkinMatrixSSE( mstudioboneweight_t &boneweights, matrix3x4_t *pPoseToWorld, matrix3x4_t &result )
 {
 	// NOTE: pPoseToWorld, being cache aligned, doesn't need explicit initialization
-#if defined( _WIN32 ) && !defined( _X360 ) && !defined( PLATFORM_64BITS )
+#if defined( _WIN32 ) && defined(_M_IX86)
 	switch( boneweights.numbones )
 	{
 	default:
@ -866,11 +866,9 @@ static matrix3x4_t *ComputeSkinMatrixSSE( mstudioboneweight_t &boneweights, matr
 		return &result;
 #endif
 	}
-#elif POSIX || PLATFORM_WINDOWS_PC64
+#elif POSIX || _WIN32
 // #warning "ComputeSkinMatrixSSE C implementation only"
 	return ComputeSkinMatrix( boneweights, pPoseToWorld, result );
-#elif defined( _X360 )
-	return ComputeSkinMatrix( boneweights, pPoseToWorld, result );
 #else
 	#error
 #endif
--- a/studiorender/r_studiolight.cpp
+++ b/studiorender/r_studiolight.cpp
@ -72,7 +72,7 @@ void R_LightAmbient_4D( const FourVectors& normal, Vector4D* pLightBoxColor, Fou
 //	VPROF( "R_LightAmbient" );

 	// !!speed!! compute ambient color cube in sse format
-	static fltx4 FourZeros={0.,0.,0.,.0};
+	static fltx4 FourZeros=FLTX4(0.,0.,0.,.0);

 	// find the contributions from each axis
 	fltx4 NegMask=CmpLtSIMD(normal.x,FourZeros);
--- a/studiorender/r_studiolight.h
+++ b/studiorender/r_studiolight.h
@ -12,8 +12,9 @@


 #include "tier0/platform.h"
+#include "mathlib/ssemath.h"

-#if defined( _WIN32 ) && !defined( _X360 )
+#if defined( _WIN32 ) && !defined( _X360 ) && (defined(_M_IX86) || defined(_M_AMD64))
 #include <xmmintrin.h>
 #endif

--- a/studiorender/studiorender.h
+++ b/studiorender/studiorender.h
@ -22,9 +22,6 @@
 #include "flexrenderdata.h"
 #include "mathlib/compressed_vector.h"
 #include "r_studiolight.h"
-#if defined( _WIN32 ) && !defined( _X360 )
-#include <xmmintrin.h>
-#endif
 #include "tier0/dbg.h"


--- a/tier0/PMELib.cpp
+++ b/tier0/PMELib.cpp
@ -6,7 +6,7 @@
 //
 //===========================================================================//

-#ifdef _WIN32
+#if defined(_WIN32) && !defined(_M_ARM) && !defined(_M_ARM64)
 #include <windows.h>

 #pragma warning( disable : 4530 )   // warning: exception handler -GX option
--- a/tier0/cpu.cpp
+++ b/tier0/cpu.cpp
@ -22,7 +22,7 @@ const tchar* GetProcessorVendorId();

 static bool cpuid(uint32 function, uint32& out_eax, uint32& out_ebx, uint32& out_ecx, uint32& out_edx)
 {
-#if defined (__arm__) || defined (__aarch64__) || defined( _X360 )
+#if !defined(__i386__) && !defined(__x86_64)
 	return false;
 #elif defined(GNUC)

--- a/tier0/cpumonitoring.cpp
+++ b/tier0/cpumonitoring.cpp
@ -23,7 +23,7 @@
 #include "pch_tier0.h"
 #include "tier0/cpumonitoring.h"

-#ifdef PLATFORM_WINDOWS_PC32
+#ifdef _M_IX86
 #include "tier0/threadtools.h"
 #define NOMINMAX
 #undef min
--- a/tier0/stacktools.cpp
+++ b/tier0/stacktools.cpp
@ -175,7 +175,7 @@ int GetCallStack_Fast( void **pReturnAddressesOut, int iArrayCount, int iSkipCou
 {
 	//Only tested in windows. This function won't work with frame pointer omission enabled. "vpc /nofpo" all projects
 #if (defined( TIER0_FPO_DISABLED ) || defined( _DEBUG )) &&\
-	(defined( WIN32 ) && !defined( _X360 ) && !defined(_M_X64))
+	(defined( WIN32 ) && defined(_M_X86))
 	void *pStackCrawlEBP;
 	__asm
 	{
@ -1467,7 +1467,7 @@ CStackTop_CopyParentStack::CStackTop_CopyParentStack( void * const *pParentStack
 #if defined( ENABLE_RUNTIME_STACK_TRANSLATION )
 	//miniature version of GetCallStack_Fast()
 #if (defined( TIER0_FPO_DISABLED ) || defined( _DEBUG )) &&\
-	(defined( WIN32 ) && !defined( _X360 ) && !defined(_M_X64))
+	(defined( WIN32 ) && defined(_M_X86))
 	void *pStackCrawlEBP;
 	__asm
 	{
@ -1525,7 +1525,7 @@ CStackTop_ReferenceParentStack::CStackTop_ReferenceParentStack( void * const *pP
 #if defined( ENABLE_RUNTIME_STACK_TRANSLATION )
 	//miniature version of GetCallStack_Fast()
 #if (defined( TIER0_FPO_DISABLED ) || defined( _DEBUG )) &&\
-	(defined( WIN32 ) && !defined( _X360 ) && !defined(_M_X64))
+	(defined( WIN32 ) && defined(_M_X86))
 	void *pStackCrawlEBP;
 	__asm
 	{
--- a/tier0/threadtools.cpp
+++ b/tier0/threadtools.cpp
@ -1740,7 +1740,7 @@ bool ThreadInterlockedAssignIf( int32 volatile *pDest, int32 value, int32 comper
 {
 	Assert( (size_t)pDest % 4 == 0 );

-#if !(defined(_WIN64) || defined (_X360))
+#ifdef _M_IX86
 	__asm 
 	{
 		mov	eax,comperand
@ -1773,7 +1773,7 @@ void *ThreadInterlockedCompareExchangePointer( void * volatile *pDest, void *val
 bool ThreadInterlockedAssignPointerIf( void * volatile *pDest, void *value, void *comperand )
 {
 	Assert( (size_t)pDest % 4 == 0 );
-#if !(defined(_WIN64) || defined (_X360))
+#ifdef _M_IX86
 	__asm 
 	{
 		mov	eax,comperand
@ -1807,13 +1807,19 @@ int64 ThreadInterlockedCompareExchange64( int64 volatile *pDest, int64 value, in
 		lock CMPXCHG8B [esi];			
 	}
 }
+#elif defined(_M_ARM)
+int64 ThreadInterlockedCompareExchange64( int64 volatile *pDest, int64 value, int64 comperand )
+{
+	Assert( (size_t)pDest % 8 == 0 );
+	return InterlockedCompareExchange64( pDest, value, comperand );
+}
 #endif

 bool ThreadInterlockedAssignIf64(volatile int64 *pDest, int64 value, int64 comperand ) 
 {
 	Assert( (size_t)pDest % 8 == 0 );

-#if defined(_X360) || defined(_WIN64)
+#if defined(_X360) || defined(_WIN64) || defined(_M_ARM) || defined(_M_ARM64)
 	return ( ThreadInterlockedCompareExchange64( pDest, value, comperand ) == comperand ); 
 #else
 	__asm
@ -1833,7 +1839,7 @@ bool ThreadInterlockedAssignIf64(volatile int64 *pDest, int64 value, int64 compe
 #endif
 }

-#ifdef _WIN64
+#if defined(_WIN64) && !defined(_M_ARM64)
 bool ThreadInterlockedAssignIf128( volatile int128 *pDest, const int128 &value, const int128 &comperand )
 {
 	DbgAssert( ( (size_t)pDest % 16 ) == 0 );
--- a/tier0/wscript
+++ b/tier0/wscript
@ -54,10 +54,13 @@ def build(bld):
 			'assert_dialog.rc',
 			#'etwprof.cpp',			[$WINDOWS]
 			'platform.cpp',
-			'pme.cpp',
 			'vcrmode.cpp',
 			'win32consoleio.cpp'
 		]
+		if bld.env.DEST_CPU in ['arm', 'arm64']:
+			source += ['pme_posix.cpp']
+		else:
+			source += ['pme.cpp']
 		if bld.env.DEST_CPU == 'amd64':
 			source += [
 				'InterlockedCompareExchange128.masm'
--- a/tier1/processor_detect.cpp
+++ b/tier1/processor_detect.cpp
@ -6,7 +6,7 @@
 // $NoKeywords: $
 //=============================================================================//

-#if defined( _X360 ) || defined( WIN64 )
+#if defined( _X360 ) || defined( WIN64 ) || defined(_M_ARM) || defined(_M_ARM64)

 bool CheckMMXTechnology(void) { return false; }
 bool CheckSSETechnology(void) { return false; }
--- a/vphysics/trace.cpp
+++ b/vphysics/trace.cpp
@ -453,7 +453,7 @@ private:
 #ifdef WIN32
 static const 
 #endif
-fltx4 g_IVPToHLDir = { 1.0f, -1.0f, 1.0f, 1.0f };
+fltx4 g_IVPToHLDir = FLTX4( 1.0f, -1.0f, 1.0f, 1.0f );

 //static const fltx4 g_IVPToHLPosition = { IVP2HL(1.0f), -IVP2HL(1.0f), IVP2HL(1.0f), IVP2HL(1.0f) };

@ -680,7 +680,7 @@ bool CTraceIVP::BuildLeafmapCache( const leafmap_t * RESTRICT pLeafmap )
 #endif
 }

-static const fltx4 g_IndexBase = {0,1,2,3};
+static const fltx4 g_IndexBase =FLTX4(0,1,2,3);
 int CTraceIVP::SupportMapCached( const Vector &dir, Vector *pOut ) const
 {
 	VPROF("SupportMapCached");
--- a/vstdlib/coroutine.cpp
+++ b/vstdlib/coroutine.cpp
@ -218,7 +218,11 @@ extern "C" byte *GetStackPtr64();
 #define GetStackPtr( pStackPtr)		byte *pStackPtr = GetStackPtr64();
 #else
 #ifdef WIN32
-#define GetStackPtr( pStackPtr )	byte *pStackPtr;	__asm mov pStackPtr, esp	
+# if defined(_M_ARM) || defined(_M_ARM64)
+#  define GetStackPtr( pStackPtr )	byte x; byte *pStackPtr = &x
+# else
+#  define GetStackPtr( pStackPtr )	byte *pStackPtr;	__asm mov pStackPtr, esp	
+# endif
 #elif defined(GNUC)
 // Apple's version of gcc/g++ doesn't return the expected value using the intrinsic, so 
 // do it the old fashioned way - this will also use asm on linux (since we don't compile
@ -649,7 +653,7 @@ bool Internal_Coroutine_Continue( HCoroutine hCoroutine, const char *pchDebugMsg
 	bool bInCoroutineAlready = GCoroutineMgr().IsAnyCoroutineActive();

 #ifdef _WIN32
-#ifndef _WIN64
+#ifdef _M_IX86
 	// make sure nobody has a try/catch block and then yielded
 	// because we hate that and we will crash
 	uint32 topofexceptionchain;
@ -897,7 +901,7 @@ void Coroutine_YieldToMain()
 	CoroutineDbgMsg( g_fmtstr.sprintf( "Coroutine_YieldToMain() %s#%x -> %s#%x\n", coroutine.m_pchName, coroutine.m_hCoroutine, coroutinePrev.m_pchName, coroutinePrev.m_hCoroutine ) );

 #ifdef _WIN32
-#ifndef _WIN64
+#ifdef _M_IX86
 	// make sure nobody has a try/catch block and then yielded
 	// because we hate that and we will crash
 	uint32 topofexceptionchain;
--- a/49
+++ b/49
@ -1,5 +1,6 @@
 #! /usr/bin/env python
 # encoding: utf-8
+# vim: noexpandtab
 # nillerusr

 from __future__ import print_function
@ -189,6 +190,11 @@ def define_platform(conf):
 	if conf.options.ALLOW64:
 		conf.define('PLATFORM_64BITS', 1)

+	if (conf.env.DEST_OS == 'android' or
+	 (conf.env.DEST_OS == 'win32' and conf.env.DEST_CPU in ['arm', 'arm64'])):
+		conf.env.MOBILE = True
+		conf.env.append_unique('DEFINES', ['PLATFORM_MOBILE=1'])
+
 	if conf.env.DEST_OS == 'linux':
 		conf.define('_GLIBCXX_USE_CXX11_ABI',0)
 		conf.env.append_unique('DEFINES', [
@ -388,14 +394,18 @@ def check_deps(conf):
 		conf.check(lib='opus', uselib_store='OPUS')

 	if conf.env.DEST_OS == 'win32':
-		conf.check(lib='libz', uselib_store='ZLIB', define_name='USE_ZLIB')
-		# conf.check(lib='nvtc', uselib_store='NVTC')
-		# conf.check(lib='ati_compress_mt_vc10', uselib_store='ATI_COMPRESS_MT_VC10')
-		conf.check(lib='SDL2', uselib_store='SDL2')
-		conf.check(lib='libjpeg', uselib_store='JPEG', define_name='HAVE_JPEG')
-		conf.check(lib='libpng', uselib_store='PNG', define_name='HAVE_PNG')
-		conf.check(lib='d3dx9', uselib_store='D3DX9')
-		conf.check(lib='d3d9', uselib_store='D3D9')
+		if conf.env.DEST_CPU in ['arm', 'arm64']:
+			conf.check(lib='d3d9', uselib_store='D3D9')
+			conf.check(lib='d3dcompiler', uselib_store='D3DCOMPILER')
+			conf.check(lib='d3dx9', uselib_store='D3DX9')
+			conf.check(lib='SDL2', uselib_store='SDL2')
+		else:
+			conf.check(lib='libz', uselib_store='ZLIB', define_name='USE_ZLIB')
+			conf.check(lib='SDL2', uselib_store='SDL2')
+			conf.check(lib='libjpeg', uselib_store='JPEG', define_name='HAVE_JPEG')
+			conf.check(lib='libpng', uselib_store='PNG', define_name='HAVE_PNG')
+			conf.check(lib='d3dx9', uselib_store='D3DX9')
+			conf.check(lib='d3d9', uselib_store='D3D9')
 		conf.check(lib='dsound', uselib_store='DSOUND')
 		conf.check(lib='dxguid', uselib_store='DXGUID')
 		if conf.options.OPUS:
@ -409,10 +419,12 @@ def configure(conf):
 	# Force XP compability, all build targets should add
 	# subsystem=bld.env.MSVC_SUBSYSTEM
 	# TODO: wrapper around bld.stlib, bld.shlib and so on?
-	conf.env.MSVC_SUBSYSTEM = 'WINDOWS,5.01'
-	conf.env.MSVC_TARGETS = ['x86'] # explicitly request x86 target for MSVC
-	if conf.options.ALLOW64:
-		conf.env.MSVC_TARGETS = ['x64']
+	conf.env.MSVC_TARGETS = ['x86' if not conf.options.ALLOW64 else 'x64']
+	if conf.env.MSVC_TARGETS[0] == 'x86':
+		conf.env.MSVC_SUBSYSTEM = 'WINDOWS,5.01'
+	else:
+		conf.env.MSVC_SUBSYSTEM = 'WINDOWS'
+
 	if sys.platform == 'win32':
 		conf.load('msvc_pdb_ext msdev msvs')
 	conf.load('subproject xcompile compiler_c compiler_cxx gitversion clang_compilation_database strip_on_install_v2 waf_unit_test enforce_pic')
@ -508,7 +520,6 @@ def configure(conf):
 	else:
 		cflags += [
 			'/I'+os.path.abspath('.')+'/thirdparty/SDL',
-			'/arch:SSE' if conf.env.DEST_CPU == 'x86' else '/arch:AVX',
 			'/GF',
 			'/Gy',
 			'/fp:fast',
@ -518,6 +529,8 @@ def configure(conf):
 			'/TP',
 			'/EHsc'
 		]
+		if conf.env.DEST_CPU in ['x86', 'x86_64', 'amd64']:
+			cflags += ['/arch:SSE' if conf.env.DEST_CPU == 'x86' else '/arch:AVX']

 		if conf.options.BUILD_TYPE == 'debug':
 			linkflags += [
@ -597,11 +610,19 @@ def configure(conf):
 def build(bld):
 	os.environ["CCACHE_DIR"] = os.path.abspath('.ccache/'+bld.env.COMPILER_CC+'/'+bld.env.DEST_OS+'/'+bld.env.DEST_CPU)

+	base_lib_path = os.path.join('lib', bld.env.DEST_OS, bld.env.DEST_CPU)
 	if bld.env.DEST_OS in ['win32', 'android']:
 		sdl_name = 'SDL2.dll' if bld.env.DEST_OS == 'win32' else 'libSDL2.so'
-		sdl_path = os.path.join('lib', bld.env.DEST_OS, bld.env.DEST_CPU, sdl_name)
+		sdl_path = os.path.join(base_lib_path, sdl_name)
 		bld.install_files(bld.env.LIBDIR, [sdl_path])

+	if bld.env.DEST_OS == 'win32' and bld.env.DEST_CPU in ['arm', 'arm64']:
+		# because Windows ARM doesn't have D3DX and it is deprecated since Windows 8
+		bld.install_files(bld.env.LIBDIR, [
+			os.path.join(base_lib_path, 'd3dx9.dll'),
+			os.path.join(base_lib_path, 'd3dxof.dll'),
+		])
+
 	if bld.env.DEST_OS == 'win32':
 		projects['game'] += ['utils/bzip2']
 		projects['dedicated'] += ['utils/bzip2']