diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 222b081e..66098613 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -10,7 +10,7 @@ jobs:
     - uses: actions/checkout@v2
     - name: Build linux-i386
       run: |
-        scripts/build-ubuntu-i386.sh
+        scripts/build-ubuntu-i386.sh -u
 
   build-linux-amd64:
     runs-on: ubuntu-20.04
@@ -19,7 +19,7 @@ jobs:
     - uses: actions/checkout@v2
     - name: Build linux-amd64
       run: |
-        scripts/build-ubuntu-amd64.sh
+        scripts/build-ubuntu-amd64.sh -u
 
   build-android-armv7a:
     runs-on: ubuntu-20.04
@@ -38,7 +38,7 @@ jobs:
     - name: Build windows-i386
       run: |
         git submodule init && git submodule update
-        ./waf.bat configure -T debug
+        ./waf.bat configure -T debug -u
         ./waf.bat build
 
   build-windows-amd64:
@@ -49,7 +49,7 @@ jobs:
     - name: Build windows-amd64
       run: |
         git submodule init && git submodule update
-        ./waf.bat configure -T debug -8
+        ./waf.bat configure -T debug -8 -u
         ./waf.bat build
 
   build-dedicated-windows-i386:
@@ -99,7 +99,7 @@ jobs:
     - uses: actions/checkout@v2
     - name: Build macos-amd64
       run: |
-        scripts/build-macos-amd64.sh
+        scripts/build-macos-amd64.sh -u
 
   build-dedicated-macos-amd64:
     runs-on: macos-latest
diff --git a/materialsystem/cmaterialsystem.cpp b/materialsystem/cmaterialsystem.cpp
index e7870712..75676b3c 100644
--- a/materialsystem/cmaterialsystem.cpp
+++ b/materialsystem/cmaterialsystem.cpp
@@ -672,7 +672,7 @@ bool CMaterialSystem::Connect( CreateInterfaceFn factory )
 	g_pLauncherMgr = (ILauncherMgr *)factory( "SDLMgrInterface001" /*SDL_MGR_INTERFACE_VERSION*/, NULL );		
 	if ( !g_pLauncherMgr )
 	{
-		return false;
+		Warning("Cannot connect SDL!\n");
 	}
 #endif // USE_SDL
 #endif // !DEDICATED
diff --git a/utils/common/cmdlib.cpp b/utils/common/cmdlib.cpp
index 5ab2267a..103f170e 100644
--- a/utils/common/cmdlib.cpp
+++ b/utils/common/cmdlib.cpp
@@ -803,7 +803,7 @@ FileHandle_t SafeOpenRead( const char *filename )
 void SafeRead( FileHandle_t f, void *buffer, int count)
 {
 	if ( g_pFileSystem->Read (buffer, count, f) != (size_t)count)
-		Error ("File read failure");
+		Error ("File read failure\n");
 }
 
 
diff --git a/utils/common/filesystem_tools.cpp b/utils/common/filesystem_tools.cpp
index 9401c4ad..a06dc33c 100644
--- a/utils/common/filesystem_tools.cpp
+++ b/utils/common/filesystem_tools.cpp
@@ -62,7 +62,9 @@ void FileSystem_SetupStandardDirectories( const char *pFilename, const char *pGa
 
 	Q_MakeAbsolutePath( qdir, sizeof( qdir ), pFilename, NULL );
 	Q_StripFilename( qdir );
+#ifdef _WIN32 // lol
 	Q_strlower( qdir );
+#endif
 	if ( qdir[0] != 0 )
 	{
 		Q_AppendSlash( qdir, sizeof( qdir ) );
diff --git a/utils/vbsp/writebsp.cpp b/utils/vbsp/writebsp.cpp
index 005990d6..bb7d0216 100644
--- a/utils/vbsp/writebsp.cpp
+++ b/utils/vbsp/writebsp.cpp
@@ -966,7 +966,7 @@ void SetModelNumbers (void)
 		}
 		else
 		{
-			sprintf (value, "");
+			value[0] = '\0';
 		}
 		SetKeyValue (&entities[i], "model", value);
 	}
diff --git a/utils/vrad/leaf_ambient_lighting.cpp b/utils/vrad/leaf_ambient_lighting.cpp
index 507e0a51..502014f9 100644
--- a/utils/vrad/leaf_ambient_lighting.cpp
+++ b/utils/vrad/leaf_ambient_lighting.cpp
@@ -193,8 +193,8 @@ bool IsLeafAmbientSurfaceLight( dworldlight_t *wl )
 	if ( wl->style != 0 )
 		return false;
 
-	float intensity = MAX( wl->intensity[0], wl->intensity[1] );
-	intensity = MAX( intensity, wl->intensity[2] );
+	float intensity = max( wl->intensity[0], wl->intensity[1] );
+	intensity = max( intensity, wl->intensity[2] );
 	
 	return (intensity * g_flWorldLightMinEmitSurfaceDistanceRatio) < g_flWorldLightMinEmitSurface;
 }
@@ -650,7 +650,7 @@ void ComputePerLeafAmbientLighting()
 	{
 		// Distribute the work among the workers.
 		VMPI_SetCurrentStage( "ComputeLeafAmbientLighting" );
-		DistributeWork( numleafs, VMPI_DISTRIBUTEWORK_PACKETID, VMPI_ProcessLeafAmbient, VMPI_ReceiveLeafAmbientResults );
+		DistributeWork( numleafs, VMPI_ProcessLeafAmbient, VMPI_ReceiveLeafAmbientResults );
 	}
 	else
 #endif
diff --git a/utils/vrad/lightmap.cpp b/utils/vrad/lightmap.cpp
index 66101262..ebb87345 100644
--- a/utils/vrad/lightmap.cpp
+++ b/utils/vrad/lightmap.cpp
@@ -33,6 +33,120 @@ enum
 
 #define SMOOTHING_GROUP_HARD_EDGE	0xff000000
 
+//==========================================================================//
+// Ambient occlusion
+//==========================================================================//
+bool g_bNoSoften = false;
+bool g_bNoAO = false;
+
+float CalculateAmbientOcclusion( Vector *pPosition, Vector *pNormal )
+{
+	// Just call through to the simd version of this function
+	FourVectors position4;
+	position4.DuplicateVector( *pPosition );
+
+	FourVectors normal4;
+	position4.DuplicateVector( *pNormal );
+
+	fltx4 ao = CalculateAmbientOcclusion4( position4, normal4, -1 );
+
+	return SubFloat( ao, 0 );
+}
+
+inline FourVectors Mul( const FourVectors &a, const fltx4 &b )	
+{
+	FourVectors ret;
+	ret.x = MulSIMD( a.x, b );
+	ret.y = MulSIMD( a.y, b );
+	ret.z = MulSIMD( a.z, b );
+	return ret;
+}
+inline FourVectors operator+( const FourVectors &a, const FourVectors &b )
+{
+	FourVectors ret;
+	ret.x = AddSIMD( a.x, b.x );
+	ret.y = AddSIMD( a.y, b.y );
+	ret.z = AddSIMD( a.z, b.z );
+	return ret;
+}
+inline FourVectors operator-(const FourVectors &a, const FourVectors &b)
+{
+	FourVectors ret;
+	ret.x=SubSIMD(a.x,b.x);
+	ret.y=SubSIMD(a.y,b.y);
+	ret.z=SubSIMD(a.z,b.z);
+	return ret;
+}
+
+fltx4 CalculateAmbientOcclusion4( const FourVectors &position4, const FourVectors &normal4, int static_prop_index_to_ignore )
+{
+	if ( g_bNoAO )
+	{
+		return Four_Ones;
+	}
+
+	DirectionalSampler_t sampler;
+	int nSamples = 32;
+	if ( do_fast )
+	{
+		nSamples /= 2;
+	}
+
+	fltx4 totalVisible = Four_Zeros;
+	fltx4 totalPossibleVisible = Four_Zeros;
+	for ( int i = 0; i < nSamples; i++ )
+	{
+		FourVectors rayStart = position4;
+		rayStart += normal4;
+
+		// Ray direction on the sphere
+		FourVectors rayDirection;
+		rayDirection.DuplicateVector( sampler.NextValue() );
+
+		// Mirror ray along normal so all rays are on the hemisphere defined by the normal
+		fltx4 rayDotN = rayDirection * normal4; // dot product
+		fltx4 absRayDotN = fabs( rayDotN );
+		rayDirection = rayDirection - Mul( normal4, rayDotN ) + Mul( normal4, absRayDotN );
+
+		// Set length of ray
+		FourVectors rayEnd = rayDirection;
+		rayEnd *= 36.0f;
+		rayEnd += rayStart;
+
+		// Raytrace for visibility function
+		fltx4 fractionVisible = Four_Ones;
+		TestLine_IgnoreSky( rayStart, rayEnd, &fractionVisible, static_prop_index_to_ignore );
+		totalVisible = AddSIMD( totalVisible, MulSIMD( fractionVisible, absRayDotN ) );
+		totalPossibleVisible = AddSIMD( totalPossibleVisible, absRayDotN );
+	}
+
+	fltx4 ao = DivSIMD( totalVisible, totalPossibleVisible );
+	ao = MulSIMD( ao, ao ); // Square ao term - This is an artistic choice by the CS:GO team
+	return ao;
+}
+
+//==========================================================================//
+// Give surfaces a softer look instead of the harsher linear N.L look
+//==========================================================================//
+float SoftenCosineTerm( float flDot )
+{
+	if ( g_bNoSoften )
+		return flDot;
+
+	flDot = MAX( flDot, 0.0f );
+	return ( flDot + ( flDot * flDot ) ) * 0.5f; // This is cheaper than an exponent in shader code
+}
+
+fltx4 SoftenCosineTerm( fltx4 dots )
+{
+	if ( g_bNoSoften )
+		return dots;
+
+	dots = MaxSIMD( dots, Four_Zeros );
+	fltx4 dotsSquared = MulSIMD( dots, dots );
+	return MulSIMD( AddSIMD( dots, dotsSquared ), Four_PointFives );
+}
+
 //==========================================================================//
 // CNormalList.
 //==========================================================================//
@@ -98,7 +212,7 @@ int CNormalList::FindOrAddNormal( Vector const &vNormal )
 
 	// Look for a matching vector in there.
 	CUtlVector<int> *pGridElement = &m_NormalGrid[gi[0]][gi[1]][gi[2]];
-	for( int i=0; i < pGridElement->Size(); i++ )
+	for( int i=0; i < pGridElement->Count(); i++ )
 	{
 		int iNormal = pGridElement->Element(i);
 
@@ -109,7 +223,7 @@ int CNormalList::FindOrAddNormal( Vector const &vNormal )
 	}
 
 	// Ok, add a new one.
-	pGridElement->AddToTail( m_Normals.Size() );
+	pGridElement->AddToTail( m_Normals.Count() );
 	return m_Normals.AddToTail( vNormal );
 }
 
@@ -364,14 +478,14 @@ void SaveVertexNormals( void )
 		}
 	}
 
-	if( normalList.m_Normals.Size() > MAX_MAP_VERTNORMALS )
+	if( normalList.m_Normals.Count() > MAX_MAP_VERTNORMALS )
 	{
 		Error( "g_numvertnormals > MAX_MAP_VERTNORMALS" );
 	}
 
 	// Copy the list of unique vert normals into g_vertnormals.
-	g_numvertnormals = normalList.m_Normals.Size();
-	memcpy( g_vertnormals, normalList.m_Normals.Base(), sizeof(g_vertnormals[0]) * normalList.m_Normals.Size() );
+	g_numvertnormals = normalList.m_Normals.Count();
+	memcpy( g_vertnormals, normalList.m_Normals.Base(), sizeof(g_vertnormals[0]) * normalList.m_Normals.Count() );
 }
 
 /*
@@ -410,7 +524,7 @@ void ErrorLightInfo(const char *s, lightinfo_t *l)
 	//
 	else
 	{
-		Warning("%s at (degenerate face)\n\tmaterial=%s\n", s, TexDataStringTable_GetString( dtexdata[tex->texdata].nameStringTableID ));
+		Warning("%s at (degenerate face)\n\tmaterial=%s\n", TexDataStringTable_GetString( dtexdata[tex->texdata].nameStringTableID ));
 	}
 }
 
@@ -1130,8 +1244,22 @@ static void ParseLightGeneric( entity_t *e, directlight_t *dl )
 	Vector	        dest;
 
 	dl->light.style = (int)FloatForKey (e, "style");
-	
-	// get intenfsity
+	dl->m_bSkyLightIsDirectionalLight = false;
+
+	if( (int)FloatForKeyWithDefault(e, "_castentityshadow", 1.0f ) != 0 )
+	{
+		dl->light.flags |= DWL_FLAGS_CASTENTITYSHADOWS;
+	}
+	else
+	{
+		dl->light.flags &= ~DWL_FLAGS_CASTENTITYSHADOWS;
+	}
+
+	Vector shadowOffset( 0.0f, 0.0f, 0.0f );
+	GetVectorForKey (e, "_shadoworiginoffset", shadowOffset );
+	dl->light.shadow_cast_offset = shadowOffset;
+
+	// get intensity
 	if( g_bHDR && LightForKey( e, "_lightHDR", dl->light.intensity ) ) 
 	{
 	}
@@ -1172,90 +1300,102 @@ static void ParseLightGeneric( entity_t *e, directlight_t *dl )
 
 static void SetLightFalloffParams( entity_t * e, directlight_t * dl )
 {
-	float d50=FloatForKey( e, "_fifty_percent_distance" );
 	dl->m_flStartFadeDistance = 0;
 	dl->m_flEndFadeDistance = - 1;
 	dl->m_flCapDist = 1.0e22;
-	if ( d50 )
+	if ( g_bFiniteFalloffModel )
 	{
 		float d0 = FloatForKey( e, "_zero_percent_distance" );
-		if ( d0 < d50 )
-		{
-			Warning( "light has _fifty_percent_distance of %f but _zero_percent_distance of %f\n", d50, d0);
-			d0 = 2.0 * d50;
-		}
-		float a = 0, b = 1, c = 0;
-		if ( ! SolveInverseQuadraticMonotonic( 0, 1.0, d50, 2.0, d0, 256.0, a, b, c ))
-		{
-			Warning( "can't solve quadratic for light %f %f\n", d50, d0 );
-		}
-		// it it possible that the parameters couldn't be used because of enforing monoticity. If so, rescale so at
-		// least the 50 percent value is right
-//		printf("50 percent=%f 0 percent=%f\n",d50,d0);
-// 		printf("a=%f b=%f c=%f\n",a,b,c);
-		float v50 = c + d50 * ( b + d50 * a );
-		float scale = 2.0 / v50;
-		a *= scale;
-		b *= scale;
-		c *= scale;
-// 		printf("scaled=%f a=%f b=%f c=%f\n",scale,a,b,c);
-// 		for(float d=0;d<1000;d+=20)
-// 			printf("at %f, %f\n",d,1.0/(c+d*(b+d*a)));
-		dl->light.quadratic_attn = a;
-		dl->light.linear_attn = b;
-		dl->light.constant_attn = c;
-
-
-
-		if ( IntForKey(e, "_hardfalloff" ) )
-		{
-			dl->m_flEndFadeDistance = d0;
-			dl->m_flStartFadeDistance = 0.75 * d0 + 0.25 * d50;		// start fading 3/4 way between 50 and 0. could allow adjust
-		}
-		else
-		{
-			// now, we will find the point at which the 1/x term reaches its maximum value, and
-			// prevent the light from going past there. If a user specifes an extreme falloff, the
-			// quadratic will start making the light brighter at some distance. We handle this by
-			// fading it from the minimum brightess point down to zero at 10x the minimum distance
-			if ( fabs( a ) > 0. )
-			{
-				float flMax = b / ( - 2.0 * a );				// where f' = 0
-				if ( flMax > 0.0 )
-				{
-					dl->m_flCapDist = flMax;
-					dl->m_flStartFadeDistance = flMax;
-					dl->m_flEndFadeDistance = 10.0 * flMax;
-				}
-			}
-		}
+		dl->light.constant_attn = 1.0;
+		dl->light.linear_attn = 0; //-2.0 * ( 1.0 / d0 );
+		dl->light.quadratic_attn = -1.0 / ( d0 * d0 );
+// 		for(float d=0;d<200;d+=20)
+// 			printf("at %f, %f\n",d,1.0+d*d*dl->light.quadratic_attn );
 	}
 	else
 	{
-		dl->light.constant_attn = FloatForKey (e, "_constant_attn" );
-		dl->light.linear_attn = FloatForKey (e, "_linear_attn" );
-		dl->light.quadratic_attn = FloatForKey (e, "_quadratic_attn" );
-
-		dl->light.radius = FloatForKey (e, "_distance");
-
-		// clamp values to >= 0
-		if ( dl->light.constant_attn < EQUAL_EPSILON )
-			dl->light.constant_attn = 0;
-
-		if ( dl->light.linear_attn < EQUAL_EPSILON )
-			dl->light.linear_attn = 0;
-
-		if ( dl->light.quadratic_attn < EQUAL_EPSILON )
-			dl->light.quadratic_attn = 0;
-
-		if ( dl->light.constant_attn < EQUAL_EPSILON && dl->light.linear_attn < EQUAL_EPSILON && dl->light.quadratic_attn < EQUAL_EPSILON )
-			dl->light.constant_attn = 1;
-
-		// scale intensity for unit 100 distance
-		float ratio = ( dl->light.constant_attn + 100 * dl->light.linear_attn + 100 * 100 * dl->light.quadratic_attn );
-		if ( ratio > 0 )
+		float d50=FloatForKey( e, "_fifty_percent_distance" );
+		if ( d50 )
 		{
-			VectorScale( dl->light.intensity, ratio, dl->light.intensity );
+			float d0 = FloatForKey( e, "_zero_percent_distance" );
+			if ( d0 < d50 )
+			{
+				Warning( "light has _fifty_percent_distance of %f but _zero_percent_distance of %f\n", d50, d0);
+				d0 = 2.0 * d50;
+			}
+			float a = 0, b = 1, c = 0;
+			if ( ! SolveInverseQuadraticMonotonic( 0, 1.0, d50, 2.0, d0, 256.0, a, b, c ))
+			{
+				Warning( "can't solve quadratic for light %f %f\n", d50, d0 );
+			}
+			// it it possible that the parameters couldn't be used because of enforing monoticity. If so, rescale so at
+			// least the 50 percent value is right
+//		printf("50 percent=%f 0 percent=%f\n",d50,d0);
+// 		printf("a=%f b=%f c=%f\n",a,b,c);
+			float v50 = c + d50 * ( b + d50 * a );
+			float scale = 2.0 / v50;
+			a *= scale;
+			b *= scale;
+			c *= scale;
+// 		printf("scaled=%f a=%f b=%f c=%f\n",scale,a,b,c);
+// 		for(float d=0;d<200;d+=20)
+// 			printf("at %f, %f\n",d,1.0/(c+d*(b+d*a)));
+			dl->light.quadratic_attn = a;
+			dl->light.linear_attn = b;
+			dl->light.constant_attn = c;
+
+
+
+			if ( IntForKey(e, "_hardfalloff" ) )
+			{
+				dl->m_flEndFadeDistance = d0;
+				dl->m_flStartFadeDistance = 0.75 * d0 + 0.25 * d50;		// start fading 3/4 way between 50 and 0. could allow adjust
+			}
+			else
+			{
+				// now, we will find the point at which the 1/x term reaches its maximum value, and
+				// prevent the light from going past there. If a user specifes an extreme falloff, the
+				// quadratic will start making the light brighter at some distance. We handle this by
+				// fading it from the minimum brightess point down to zero at 10x the minimum distance
+				if ( fabs( a ) > 0. )
+				{
+					float flMax = b / ( - 2.0 * a );				// where f' = 0
+					if ( flMax > 0.0 )
+					{
+						dl->m_flCapDist = flMax;
+						dl->m_flStartFadeDistance = flMax;
+						dl->m_flEndFadeDistance = 10.0 * flMax;
+					}
+				}
+			}
+		}
+		else
+		{
+			dl->light.constant_attn = FloatForKey (e, "_constant_attn" );
+			dl->light.linear_attn = FloatForKey (e, "_linear_attn" );
+			dl->light.quadratic_attn = FloatForKey (e, "_quadratic_attn" );
+
+			dl->light.radius = FloatForKey (e, "_distance");
+
+			// clamp values to >= 0
+			if ( dl->light.constant_attn < EQUAL_EPSILON )
+				dl->light.constant_attn = 0;
+
+			if ( dl->light.linear_attn < EQUAL_EPSILON )
+				dl->light.linear_attn = 0;
+
+			if ( dl->light.quadratic_attn < EQUAL_EPSILON )
+				dl->light.quadratic_attn = 0;
+
+			if ( dl->light.constant_attn < EQUAL_EPSILON && dl->light.linear_attn < EQUAL_EPSILON && dl->light.quadratic_attn < EQUAL_EPSILON )
+				dl->light.constant_attn = 1;
+
+			// scale intensity for unit 100 distance
+			float ratio = ( dl->light.constant_attn + 100 * dl->light.linear_attn + 100 * 100 * dl->light.quadratic_attn );
+			if ( ratio > 0 )
+			{
+				VectorScale( dl->light.intensity, ratio, dl->light.intensity );
+			}
 		}
 	}
 }
@@ -1329,8 +1469,8 @@ bool CanLeafTraceToSky( int iLeaf )
 	for ( int j = 0; j < NUMVERTEXNORMALS; j+=4 )
 	{
 		// search back to see if we can hit a sky brush
-		delta.LoadAndSwizzle( g_anorms[j], g_anorms[MIN( j+1, NUMVERTEXNORMALS-1 )],
-			g_anorms[MIN( j+2, NUMVERTEXNORMALS-1 )], g_anorms[MIN( j+3, NUMVERTEXNORMALS-1 )] );
+		delta.LoadAndSwizzle( g_anorms[j], g_anorms[min( j+1, NUMVERTEXNORMALS-1 )],
+			g_anorms[min( j+2, NUMVERTEXNORMALS-1 )], g_anorms[min( j+3, NUMVERTEXNORMALS-1 )] );
 		delta *= -MAX_TRACE_LENGTH;
 		delta += center4;
 
@@ -1343,8 +1483,10 @@ bool CanLeafTraceToSky( int iLeaf )
 	return false;
 }
 
-void BuildVisForLightEnvironment( void )
+void BuildVisForLightEnvironment( int nNumLights, directlight_t** pLights )
 {
+	// FIXME: The work in this function is executed redundantly for multiple emit_skylight lights.
+
 	// Create the vis.
 	for ( int iLeaf = 0; iLeaf < numleafs; ++iLeaf )
 	{
@@ -1365,8 +1507,11 @@ void BuildVisForLightEnvironment( void )
 				{
 					dleafs[iLeaf].flags |= LEAF_FLAGS_SKY;
 				}
-				MergeDLightVis( gSkyLight, dleafs[iLeaf].cluster );
-				MergeDLightVis( gAmbient, dleafs[iLeaf].cluster );
+
+				for ( int iLight = 0; iLight < nNumLights; ++iLight )
+				{
+					MergeDLightVis( pLights[iLight], dleafs[iLeaf].cluster );
+				}
 				break;
 			}
 		}
@@ -1482,15 +1627,17 @@ static void ParseLightEnvironment( entity_t* e, directlight_t* dl )
 
 	ParseLightGeneric( e, dl );
 
-	char *angle_str=ValueForKeyWithDefault( e, "SunSpreadAngle" );
-	if (angle_str)
-	{
-		g_SunAngularExtent=atof(angle_str);
-		g_SunAngularExtent=sin((M_PI/180.0)*g_SunAngularExtent);
-		printf("sun extent from map=%f\n",g_SunAngularExtent);
-	}
 	if ( !gSkyLight )
 	{
+		char *angle_str=ValueForKeyWithDefault( e, "SunSpreadAngle" );
+		if (angle_str)
+		{
+			g_SunAngularExtent=atof(angle_str);
+			g_SunAngularExtent=sin((M_PI/180.0)*g_SunAngularExtent);
+			dl->m_flSkyLightSunAngularExtent = g_SunAngularExtent;
+			printf("sun extent from map=%f\n",g_SunAngularExtent);
+		}
+
 		// Sky light.
 		gSkyLight = dl;
 		dl->light.type = emit_skylight;
@@ -1513,7 +1660,12 @@ static void ParseLightEnvironment( entity_t* e, directlight_t* dl )
 						 gAmbient->light.intensity );
 		}
 		
-		BuildVisForLightEnvironment();
+		// skylight and ambient light never cast entity shadows
+		gSkyLight->light.flags &= ~DWL_FLAGS_CASTENTITYSHADOWS;
+		gAmbient->light.flags &= ~DWL_FLAGS_CASTENTITYSHADOWS;
+
+		directlight_t* lights[] = { gSkyLight, gAmbient };
+		BuildVisForLightEnvironment( 2, lights );
  
 		// Add sky and sky ambient lights to the list.
 		AddDLightToActiveList( gSkyLight );
@@ -1521,6 +1673,33 @@ static void ParseLightEnvironment( entity_t* e, directlight_t* dl )
 	}
 }
 
+static void ParseLightDirectional( entity_t* e, directlight_t* dl )
+{
+	Vector dest;
+	GetVectorForKey (e, "origin", dest );
+	dl = AllocDLight( dest, true );
+
+	ParseLightGeneric( e, dl );
+
+	char *angle_str=ValueForKeyWithDefault( e, "SunSpreadAngle" );
+	if (angle_str)
+	{
+		dl->m_flSkyLightSunAngularExtent = atof(angle_str);
+		dl->m_flSkyLightSunAngularExtent = sin((M_PI/180.0)*dl->m_flSkyLightSunAngularExtent);
+	}
+
+	dl->light.type = emit_skylight;
+	// For the engine, emit_skylight is the type we want.
+	// Set an additional flag identifying this as "not the global skylight" for vrad. This will cause it to use the angular extent associated with this light
+	// instead of the global one.
+	dl->m_bSkyLightIsDirectionalLight = true;
+
+	// directional lights never cast entity shadows
+	dl->light.flags &= ~DWL_FLAGS_CASTENTITYSHADOWS;
+
+	BuildVisForLightEnvironment( 1, &dl );
+}
+
 static void ParseLightPoint( entity_t* e, directlight_t* dl )
 {
 	Vector dest;
@@ -1573,6 +1752,8 @@ void CreateDirectLights (void)
 			dl = AllocDLight( p->origin, true );
 
 			dl->light.type = emit_surface;
+			dl->light.flags &= ~DWL_FLAGS_CASTENTITYSHADOWS;
+
 			VectorCopy (p->normal, dl->light.normal);
 			Assert( VectorLength( p->normal ) > 1.0e-20 );
 			// scale intensity by number of texture instances
@@ -1605,6 +1786,10 @@ void CreateDirectLights (void)
 		{
 			ParseLightEnvironment( e, dl );
 		}
+		else if (!strcmp(name, "light_directional")) 
+		{
+			ParseLightDirectional( e, dl );
+		}
 		else if (!strcmp(name, "light")) 
 		{
 			ParseLightPoint( e, dl );
@@ -1648,6 +1833,7 @@ void ExportDirectLightsToWorldLights()
 		// FIXME: why does vrad want 0 to 255 and not 0 to 1??
 		VectorScale( dl->light.intensity, (1.0 / 255.0), wl->intensity );
 		VectorCopy( dl->light.normal, wl->normal );
+		VectorCopy( dl->light.shadow_cast_offset, wl->shadow_cast_offset );
 		wl->stopdot	= dl->light.stopdot;
 		wl->stopdot2 = dl->light.stopdot2;
 		wl->exponent = dl->light.exponent;
@@ -1655,7 +1841,7 @@ void ExportDirectLightsToWorldLights()
 		wl->constant_attn = dl->light.constant_attn;
 		wl->linear_attn = dl->light.linear_attn;
 		wl->quadratic_attn = dl->light.quadratic_attn;
-		wl->flags = 0;
+		wl->flags = dl->light.flags;
 	}
 }
 
@@ -1668,7 +1854,7 @@ void ExportDirectLightsToWorldLights()
 
 #define CONSTANT_DOT (.7/2)
 
-#define NSAMPLES_SUN_AREA_LIGHT 30							// number of samples to take for an
+#define NSAMPLES_SUN_AREA_LIGHT 300							// number of samples to take for an
                                                             // non-point sun light
 
 // Helper function - gathers light from sun (emit_skylight)
@@ -1679,21 +1865,29 @@ void GatherSampleSkyLightSSE( SSE_sampleLightOutput_t &out, directlight_t *dl, i
 {
 	bool bIgnoreNormals = ( nLFlags & GATHERLFLAGS_IGNORE_NORMALS ) != 0;
 	bool force_fast = ( nLFlags & GATHERLFLAGS_FORCE_FAST ) != 0;
-
 	fltx4 dot;
 
+	float fSunAngularExtent = g_SunAngularExtent;
+	if ( dl->m_bSkyLightIsDirectionalLight )
+	{
+		fSunAngularExtent = dl->m_flSkyLightSunAngularExtent;
+	}
+
 	if ( bIgnoreNormals )
 		dot = ReplicateX4( CONSTANT_DOT );
 	else
 		dot = NegSIMD( pNormals[0] * dl->light.normal );
 
 	dot = MaxSIMD( dot, Four_Zeros );
+
+	dot = SoftenCosineTerm( dot );
+
 	int zeroMask = TestSignSIMD ( CmpEqSIMD( dot, Four_Zeros ) );
 	if (zeroMask == 0xF)
 		return;
 
 	int nsamples = 1;
-	if ( g_SunAngularExtent > 0.0f )
+	if ( fSunAngularExtent > 0.0f )
 	{
 		nsamples = NSAMPLES_SUN_AREA_LIGHT;
 		if ( do_fast || force_fast )
@@ -1715,7 +1909,7 @@ void GatherSampleSkyLightSSE( SSE_sampleLightOutput_t &out, directlight_t *dl, i
 		{
 			// jitter light source location
 			Vector ofs = sampler.NextValue();
-			ofs *= MAX_TRACE_LENGTH * g_SunAngularExtent;
+			ofs *= MAX_TRACE_LENGTH * fSunAngularExtent;
 			delta += ofs;
 		}
 		FourVectors delta4;
@@ -1728,17 +1922,25 @@ void GatherSampleSkyLightSSE( SSE_sampleLightOutput_t &out, directlight_t *dl, i
 	}
 
 	fltx4 seeAmount = MulSIMD ( totalFractionVisible, ReplicateX4 ( 1.0f / nsamples ) );
+
 	out.m_flDot[0] = MulSIMD ( dot, seeAmount );
 	out.m_flFalloff = Four_Ones;
-	out.m_flSunAmount = MulSIMD ( seeAmount, ReplicateX4( 10000.0f ) );
+	out.m_flSunAmount[0] = MulSIMD( out.m_flDot[0], out.m_flFalloff );
+
 	for ( int i = 1; i < normalCount; i++ )
 	{
 		if ( bIgnoreNormals )
-			out.m_flDot[i] = ReplicateX4 ( CONSTANT_DOT );
+		{
+			out.m_flDot[i] = ReplicateX4( CONSTANT_DOT );
+			out.m_flSunAmount[i] = Four_Zeros;
+		}
 		else
 		{
 			out.m_flDot[i] = NegSIMD( pNormals[i] * dl->light.normal );
+			out.m_flDot[i] = MaxSIMD( out.m_flDot[i], Four_Zeros );
+			out.m_flDot[i] = SoftenCosineTerm( out.m_flDot[i] );
 			out.m_flDot[i] = MulSIMD( out.m_flDot[i], seeAmount );
+			out.m_flSunAmount[i] = MulSIMD( out.m_flDot[i], out.m_flFalloff );
 		}
 	}
 }
@@ -1781,6 +1983,8 @@ void GatherSampleAmbientSkySSE( SSE_sampleLightOutput_t &out, directlight_t *dl,
 		else
 			dots[0] = NegSIMD( pNormals[0] * anorm );
 
+		dots[0] = SoftenCosineTerm( dots[0] );
+
 		fltx4 validity = CmpGtSIMD( dots[0], ReplicateX4( EQUAL_EPSILON ) );
 
 		// No possibility of anybody getting lit
@@ -1797,6 +2001,9 @@ void GatherSampleAmbientSkySSE( SSE_sampleLightOutput_t &out, directlight_t *dl,
 				dots[i] = ReplicateX4( CONSTANT_DOT );
 			else
 				dots[i] = NegSIMD( pNormals[i] * anorm );
+
+			dots[i] = SoftenCosineTerm( dots[i] );
+
 			fltx4 validity2 = CmpGtSIMD( dots[i], ReplicateX4 ( EQUAL_EPSILON ) );
 			dots[i] = AndSIMD( validity2, dots[i] );
 			possibleHitCount[i] = AddSIMD( AndSIMD( AndSIMD( validity, validity2 ), Four_Ones ), possibleHitCount[i] );
@@ -1830,8 +2037,8 @@ void GatherSampleAmbientSkySSE( SSE_sampleLightOutput_t &out, directlight_t *dl,
 		out.m_flDot[i] = MulSIMD( factor, sumdot );
 		out.m_flDot[i] = ReciprocalSIMD( out.m_flDot[i] );
 		out.m_flDot[i] = MulSIMD( ambient_intensity[i], out.m_flDot[i] );
+		out.m_flSunAmount[i] = Four_Zeros;
 	}
-
 }
 
 // Helper function - gathers light from area lights, spot lights, and point lights
@@ -1865,6 +2072,8 @@ void GatherSampleStandardLightSSE( SSE_sampleLightOutput_t &out, directlight_t *
 		dot = delta * pNormals[0];
 	dot = MaxSIMD( Four_Zeros, dot );
 
+	dot = SoftenCosineTerm( dot );
+
 	// Affix dot to zero if past fade distz
 	bool bHasHardFalloff = ( dl->m_flEndFadeDistance > dl->m_flStartFadeDistance );
 	if ( bHasHardFalloff )
@@ -1893,7 +2102,14 @@ void GatherSampleStandardLightSSE( SSE_sampleLightOutput_t &out, directlight_t *
 		out.m_flFalloff = MulSIMD( out.m_flFalloff, quadratic );
 		out.m_flFalloff = AddSIMD( out.m_flFalloff, MulSIMD( linear, falloffEvalDist ) );
 		out.m_flFalloff = AddSIMD( out.m_flFalloff, constant );
-		out.m_flFalloff = ReciprocalSIMD( out.m_flFalloff );
+		if ( g_bFiniteFalloffModel )
+		{
+			out.m_flFalloff = MaxSIMD( Four_Zeros, out.m_flFalloff );
+		}
+		else
+		{
+			out.m_flFalloff = ReciprocalSIMD( out.m_flFalloff );
+		}
 		break;
 
 	case emit_surface:
@@ -1932,7 +2148,14 @@ void GatherSampleStandardLightSSE( SSE_sampleLightOutput_t &out, directlight_t *
 		out.m_flFalloff = MulSIMD( out.m_flFalloff, quadratic );
 		out.m_flFalloff = AddSIMD( out.m_flFalloff, MulSIMD( linear, falloffEvalDist ) );
 		out.m_flFalloff = AddSIMD( out.m_flFalloff, constant );
-		out.m_flFalloff = ReciprocalSIMD( out.m_flFalloff );
+		if ( g_bFiniteFalloffModel )
+		{
+			out.m_flFalloff = MaxSIMD( Four_Zeros, out.m_flFalloff );
+		}
+		else
+		{
+			out.m_flFalloff = ReciprocalSIMD( out.m_flFalloff );
+		}
 		out.m_flFalloff = MulSIMD( out.m_flFalloff, dot2 );
 
 		// outside the inner cone
@@ -1978,20 +2201,27 @@ void GatherSampleStandardLightSSE( SSE_sampleLightOutput_t &out, directlight_t *
 		out.m_flFalloff = MulSIMD( mult, out.m_flFalloff );
 	}
 
-	// Raytrace for visibility function
-	fltx4 fractionVisible = Four_Ones;
-	TestLine( pos, src, &fractionVisible, static_prop_index_to_ignore);
-	dot = MulSIMD( fractionVisible, dot );
-	out.m_flDot[0] = dot;
+	if ( !( nLFlags & GATHERLFLAGS_NO_OCCLUSION ) )
+	{
+		// Raytrace for visibility function
+		fltx4 fractionVisible = Four_Ones;
+		TestLine( pos, src, &fractionVisible, static_prop_index_to_ignore);
+		dot = MulSIMD( fractionVisible, dot );
+	}
 
+	out.m_flDot[0] = dot;
 	for ( int i = 1; i < normalCount; i++ )
 	{
 		if ( bIgnoreNormals )
-			out.m_flDot[i] = ReplicateX4( (float) CONSTANT_DOT );
+		{
+			out.m_flDot[i] = ReplicateX4( (float)CONSTANT_DOT );
+			out.m_flSunAmount[i] = Four_Zeros; 
+		}
 		else
 		{
 			out.m_flDot[i] = pNormals[i] * delta;
 			out.m_flDot[i] = MaxSIMD( Four_Zeros, out.m_flDot[i] );
+			out.m_flSunAmount[i] = Four_Zeros; 
 		}
 	}
 }
@@ -2009,9 +2239,11 @@ void GatherSampleLightSSE( SSE_sampleLightOutput_t &out, directlight_t *dl, int
 					   float flEpsilon )
 {
 	for ( int b = 0; b < normalCount; b++ )
+	{
 		out.m_flDot[b] = Four_Zeros;
+		out.m_flSunAmount[b] = Four_Zeros;
+	}
 	out.m_flFalloff = Four_Zeros;
-	out.m_flSunAmount = Four_Zeros;
 	Assert( normalCount <= (NUM_BUMP_VECTS+1) );
 
 	// skylights work fundamentally differently than normal lights
@@ -2036,18 +2268,40 @@ void GatherSampleLightSSE( SSE_sampleLightOutput_t &out, directlight_t *dl, int
 		return;
 	}
 
+	// Ambient occlusion for the 4 sample positions & normals
+	fltx4 ao = Four_Ones;
+	bool bIgnoreNormals = (nLFlags & GATHERLFLAGS_IGNORE_NORMALS) != 0;
+	if ( !bIgnoreNormals ) // Don't calculate ambient occlusion for objects that ignore normals for gathering light
+	{
+		if ( nLFlags & GATHERLFLAGS_STATICPROP )
+		{
+			// for static props we want the sun amount for the basis normals to be mutliplied by the ao of the main vertex normal only.
+			// lightmaps using this path do not send basis normals here, we do so for static props to take advantage of the SIMD optimisation this path provides.
+			ao = CalculateAmbientOcclusion4( pos, *pNormals, static_prop_index_to_ignore );
+			fltx4 ao0 = SplatXSIMD( ao );
+			ao = ao0;
+		}
+		else
+		{
+			ao = CalculateAmbientOcclusion4( pos, *pNormals, static_prop_index_to_ignore );
+		}
+	}
+
 	// NOTE: Notice here that if the light is on the back side of the face
 	// (tested by checking the dot product of the face normal and the light position)
 	// we don't want it to contribute to *any* of the bumped lightmaps. It glows
 	// in disturbing ways if we don't do this.
 	out.m_flDot[0] = MaxSIMD ( out.m_flDot[0], Four_Zeros );
 	fltx4 notZero = CmpGtSIMD( out.m_flDot[0], Four_Zeros );
+	out.m_flDot[0] = MulSIMD( out.m_flDot[0], ao );
+	out.m_flSunAmount[0] = MulSIMD( out.m_flSunAmount[0], ao );
 	for ( int n = 1; n < normalCount; n++ )
 	{
 		out.m_flDot[n] = MaxSIMD( out.m_flDot[n], Four_Zeros );
 		out.m_flDot[n] = AndSIMD( out.m_flDot[n], notZero );
+		out.m_flDot[n] = MulSIMD( out.m_flDot[n], ao );
+		out.m_flSunAmount[n] = MulSIMD( out.m_flSunAmount[n], ao );
 	}
-
 }
 
 /*
@@ -2438,7 +2692,6 @@ static int FindOrAllocateLightstyleSamples( dface_t* f, facelight_t	*fl, int lig
 //-----------------------------------------------------------------------------
 static void ComputeIlluminationPointAndNormalsSSE( lightinfo_t const& l, FourVectors const &pos, FourVectors const &norm, SSE_SampleInfo_t* pInfo, int numSamples )
 {
-
 	Vector v[4];
 
 	pInfo->m_Points = pos;
@@ -2487,6 +2740,44 @@ static void ComputeIlluminationPointAndNormalsSSE( lightinfo_t const& l, FourVec
 		pInfo->m_Clusters[i] = ClusterFromPoint( pos.Vec( i ) );
 }
 
+//-----------------------------------------------------------------------------
+// Compute the illumination point + normal for the sample on a displacement
+// (see ComputeIlluminationPointAndNormalsSSE above)
+//-----------------------------------------------------------------------------
+static void ComputeIlluminationPointAndNormalsForDisp( lightinfo_t const& l, FourVectors &pos, FourVectors &norm, SSE_SampleInfo_t* pInfo )
+{
+	pInfo->m_PointNormals[ 0 ] = norm;
+
+	if ( pInfo->m_NormalCount > 1 )
+	{
+		Vector bv[ 4 ][ NUM_BUMP_VECTS ];
+		for ( int j = 0; j < 4; j++ )
+		{
+			// TODO: using Vec may slow things down a bit
+			GetBumpNormals( pInfo->m_pTexInfo->textureVecsTexelsPerWorldUnits[ 0 ],
+							pInfo->m_pTexInfo->textureVecsTexelsPerWorldUnits[ 1 ],
+							l.facenormal, norm.Vec( j ), bv[ j ] );
+		}
+		for ( int b = 0; b < NUM_BUMP_VECTS; b++ )
+		{
+			pInfo->m_PointNormals[ b + 1 ].LoadAndSwizzle( bv[ 0 ][ b ], bv[ 1 ][ b ], bv[ 2 ][ b ], bv[ 3 ][ b ] );
+		}
+	}
+
+	pInfo->m_Points = pos;
+
+	// FIXME: move sample point off the surface a bit, this is done so that
+	// light sampling will not be affected by a bug	where raycasts will
+	// intersect with the face being lit. We really should just have that
+	// logic in GatherSampleLight
+	FourVectors faceNormal = norm;
+	pInfo->m_Points += faceNormal;
+
+	// TODO: this may slow things down a bit ( using Vec )
+	for ( int j = 0; j < 4; j++ )
+		pInfo->m_Clusters[ j ] = ClusterFromPoint( pos.Vec( j ) );
+}
+
 //-----------------------------------------------------------------------------
 // Iterates over all lights and computes lighting at up to 4 sample points
 //-----------------------------------------------------------------------------
@@ -2536,11 +2827,7 @@ static void GatherSampleLightAt4Points( SSE_SampleInfo_t& info, int sampleIdx, i
 			if (info.m_WarnFace != info.m_FaceNum)
 			{
 				Warning ("\nWARNING: Too many light styles on a face at (%f, %f, %f)\n",
-#ifdef VRAD_SSE
-					info.m_Points.x.m128_f32[0], info.m_Points.y.m128_f32[0], info.m_Points.z.m128_f32[0] );
-#else
 					info.m_Points.x[0], info.m_Points.y[0], info.m_Points.z[0] );
-#endif
 				info.m_WarnFace = info.m_FaceNum;
 			}
 			continue;
@@ -2564,7 +2851,7 @@ static void GatherSampleLightAt4Points( SSE_SampleInfo_t& info, int sampleIdx, i
 		{
 			for ( int i = 0; i < numSamples; i++ )
 			{
-				pLightmaps[n][sampleIdx + i].AddLight( SubFloat( fxdot[n], i ), dl->light.intensity, SubFloat( out.m_flSunAmount, i ) );
+				pLightmaps[n][sampleIdx + i].AddLight( SubFloat( fxdot[n], i ), dl->light.intensity, SubFloat( out.m_flSunAmount[n], i ) );
 			}
 		}
 	}
@@ -2638,16 +2925,16 @@ static void ResampleLightAt4Points( SSE_SampleInfo_t& info, int lightStyleIndex,
 		{
 			for( int n = 0; n < info.m_NormalCount; ++n )
 			{
-				pLightmap[i][n].AddLight( SubFloat( fxdot[n], i ), dl->light.intensity, SubFloat( out.m_flSunAmount, i ) );
+				pLightmap[i][n].AddLight( SubFloat( fxdot[n], i ), dl->light.intensity, SubFloat( out.m_flSunAmount[n], i ) );
 			}
 		}
 	}
 }
 
-bool PointsInWinding ( FourVectors const & point, winding_t *w, int &invalidBits )
+bool PointsInWinding( FourVectors const & point, winding_t *w, int &invalidBits )
 {
 	FourVectors edge, toPt, cross, testCross, p0, p1;
-	fltx4 invalidMask;
+	fltx4 invalidMask = Four_Zeros;
 
 	//
 	// get the first normal to test
@@ -2659,9 +2946,11 @@ bool PointsInWinding ( FourVectors const & point, winding_t *w, int &invalidBits
 	edge = p1;
 	edge -= p0;
 	testCross = edge ^ toPt;
-	testCross.VectorNormalizeFast();
+	// safer against /0 - testCross.VectorNormalizeFast();
+	fltx4 mag_sq = testCross * testCross;
+	testCross *= ReciprocalSqrtEstSaturateSIMD( mag_sq );
 
-	for( int ndxPt = 1; ndxPt < w->numpoints; ndxPt++ )
+	for ( int ndxPt = 1; ndxPt < w->numpoints; ndxPt++ )
 	{
 		p0.DuplicateVector( w->p[ndxPt] );
 		p1.DuplicateVector( w->p[(ndxPt+1)%w->numpoints] );
@@ -2670,7 +2959,9 @@ bool PointsInWinding ( FourVectors const & point, winding_t *w, int &invalidBits
 		edge = p1;
 		edge -= p0;
 		cross = edge ^ toPt;
-		cross.VectorNormalizeFast();
+		// safer against /0 - cross.VectorNormalizeFast();
+		mag_sq = cross * cross;
+		cross *= ReciprocalSqrtEstSaturateSIMD( mag_sq );
 
 		fltx4 dot = cross * testCross;
 		invalidMask = OrSIMD( invalidMask, CmpLtSIMD( dot, Four_Zeros ) );
@@ -2698,8 +2989,9 @@ static int SupersampleLightAtPoint( lightinfo_t& l, SSE_SampleInfo_t& info,
 
 	// Some parameters related to supersampling
 	float sampleWidth = ( flags & NON_AMBIENT_ONLY ) ? 4 : 2;
+
 	float cscale = 1.0f / sampleWidth;
-	float csshift = -((sampleWidth - 1) * cscale) / 2.0;
+	float csshift = -( ( sampleWidth - 1 ) * cscale ) / 2.0;
 
 	// Clear out the light values
 	for (int i = 0; i < info.m_NormalCount; ++i )
@@ -2712,12 +3004,35 @@ static int SupersampleLightAtPoint( lightinfo_t& l, SSE_SampleInfo_t& info,
 
 	FourVectors superSampleLightCoord;
 	FourVectors superSamplePosition;
+	superSamplePosition.DuplicateVector( sample.pos );
+
+	Vector wsError;
+	FourVectors superSampleWorldSpaceError;
+
+	float stepU = 0.0f;
+	float stepV = 0.0f;
+	if ( info.m_IsDispFace )
+	{
+		// compensate for error when transforming back to worldspace (only enabled for displacements)
+		Vector toWorld;
+		LuxelSpaceToWorld( &l, temp.x, temp.y, toWorld );
+		VectorSubtract( sample.pos, toWorld, wsError );
+		superSampleWorldSpaceError.DuplicateVector( wsError );
+
+		// lightmap size
+		int width = l.face->m_LightmapTextureSizeInLuxels[ 0 ] + 1;
+		int height = l.face->m_LightmapTextureSizeInLuxels[ 1 ] + 1;
+
+		// calculate the steps in uv space
+		stepU = 1.0f / (float)width;
+		stepV = 1.0f / (float)height;
+	}
 
 	if ( flags & NON_AMBIENT_ONLY )
 	{
 		float aRow[4];
 		for ( int coord = 0; coord < 4; ++coord )
-			aRow[coord] = csshift + coord * cscale;
+			aRow[ coord ] = csshift + coord * cscale;
 		fltx4 sseRow = LoadUnalignedSIMD( aRow );
 
 		for (int s = 0; s < 4; ++s)
@@ -2732,6 +3047,29 @@ static int SupersampleLightAtPoint( lightinfo_t& l, SSE_SampleInfo_t& info,
 			// Figure out where the supersample exists in the world, and make sure
 			// it lies within the sample winding
 			LuxelSpaceToWorld( &l, superSampleLightCoord[0], superSampleLightCoord[1], superSamplePosition );
+ 
+			if ( info.m_IsDispFace )
+			{
+				// Fix up error from world to luxel and back again
+				superSamplePosition += superSampleWorldSpaceError;
+
+				// Find pos and norm for disp from uv supersample offsets
+ 				Vector vDispP[4], vDispN[4];
+  				for ( int i = 0; i < 4; i++ )
+ 				{
+ 					vDispP[ i ] = superSamplePosition.Vec( i );
+					Vector2D uv;
+
+					uv.x = sample.coord[0] + ( aRow[ s ] * stepU );
+					uv.y = sample.coord[1] + ( aRow[ i ] * stepV );
+
+					StaticDispMgr()->GetDispSurfPointAndNormalFromUV( info.m_FaceNum, vDispP[ i ], vDispN[ i ], uv, false );
+				}
+				superSamplePosition = FourVectors( vDispP[ 0 ], vDispP[ 1 ], vDispP[ 2 ], vDispP[ 3 ] );
+				superSampleNormal = FourVectors( vDispN[ 0 ], vDispN[ 1 ], vDispN[ 2 ], vDispN[ 3 ] );
+
+				ComputeIlluminationPointAndNormalsForDisp( l, superSamplePosition, superSampleNormal, &info );
+			}
 
 			// A winding should exist only if the sample wasn't a uniform luxel, or if g_bDumpPatches is true.
 			int invalidBits = 0;
@@ -2740,7 +3078,8 @@ static int SupersampleLightAtPoint( lightinfo_t& l, SSE_SampleInfo_t& info,
 
 			// Compute the super-sample illumination point and normal
 			// We're assuming the flat normal is the same for all supersamples
-			ComputeIlluminationPointAndNormalsSSE( l, superSamplePosition, superSampleNormal, &info, 4 );
+			if ( !info.m_IsDispFace )
+				ComputeIlluminationPointAndNormalsSSE( l, superSamplePosition, superSampleNormal, &info, 4 );
 
 			// Resample the non-ambient light at this point...
 			LightingValue_t result[4][NUM_BUMP_VECTS+1];
@@ -2753,7 +3092,7 @@ static int SupersampleLightAtPoint( lightinfo_t& l, SSE_SampleInfo_t& info,
 				{
 					for ( int n = 0; n < info.m_NormalCount; ++n )
 					{
-						pLight[n].AddLight( result[i][n] );
+						pLight[ n ].AddLight( result[ i ][ n ] );
 					}
 					++subsampleCount;
 				}
@@ -2770,11 +3109,36 @@ static int SupersampleLightAtPoint( lightinfo_t& l, SSE_SampleInfo_t& info,
 
 		LuxelSpaceToWorld( &l, superSampleLightCoord[0], superSampleLightCoord[1], superSamplePosition );
 
+		if ( info.m_IsDispFace )
+		{
+			// Fix up error from world to luxel and back again
+			superSamplePosition += superSampleWorldSpaceError;
+
+			// Find pos and norm for disp from uv supersample offsets
+			Vector vDispP[ 4 ], vDispN[ 4 ];
+			for ( int i = 0; i < 4; i++ )
+			{
+				vDispP[ i ] = superSamplePosition.Vec( i );
+				Vector uvOffsets = superSampleOffsets.Vec( i );
+				Vector2D uv;
+
+				uv.x = sample.coord[ 0 ] + ( uvOffsets.x * stepU );
+				uv.y = sample.coord[ 1 ] + ( uvOffsets.y * stepV );
+
+				StaticDispMgr()->GetDispSurfPointAndNormalFromUV( info.m_FaceNum, vDispP[ i ], vDispN[ i ], uv, false );
+			}
+			superSamplePosition = FourVectors( vDispP[ 0 ], vDispP[ 1 ], vDispP[ 2 ], vDispP[ 3 ] );
+			superSampleNormal = FourVectors( vDispN[ 0 ], vDispN[ 1 ], vDispN[ 2 ], vDispN[ 3 ] );
+
+			ComputeIlluminationPointAndNormalsForDisp( l, superSamplePosition, superSampleNormal, &info );
+		}
+
 		int invalidBits = 0;
 		if ( sample.w && !PointsInWinding( superSamplePosition, sample.w, invalidBits ) )
 			return 0;
 
-		ComputeIlluminationPointAndNormalsSSE( l, superSamplePosition, superSampleNormal, &info, 4 );
+		if ( !info.m_IsDispFace )
+			ComputeIlluminationPointAndNormalsSSE( l, superSamplePosition, superSampleNormal, &info, 4 );
 
 		LightingValue_t result[4][NUM_BUMP_VECTS+1];
 		ResampleLightAt4Points( info, lightStyleIndex, AMBIENT_ONLY, result );
@@ -2786,7 +3150,7 @@ static int SupersampleLightAtPoint( lightinfo_t& l, SSE_SampleInfo_t& info,
 			{
 				for ( int n = 0; n < info.m_NormalCount; ++n )
 				{
-					pLight[n].AddLight( result[i][n] );
+					pLight[ n ].AddLight( result[ i ][ n ] );
 				}
 				++subsampleCount;
 			}
@@ -2951,17 +3315,16 @@ static void BuildSupersampleFaceLights( lightinfo_t& l, SSE_SampleInfo_t& info,
 			if ( ambientSupersampleCount > 0 && directSupersampleCount > 0 )
 			{
 				// Add the ambient + directional terms together, stick it back into the lightmap
-				for (int n = 0; n < info.m_NormalCount; ++n)
+				for ( int n = 0; n < info.m_NormalCount; ++n )
 				{
-					ppLightSamples[n][i].Zero();
-					ppLightSamples[n][i].AddWeighted( pDirectLight[n],1.0f / directSupersampleCount );
-					ppLightSamples[n][i].AddWeighted( pAmbientLight[n], 1.0f / ambientSupersampleCount );
+					ppLightSamples[ n ][ i ].Zero();
+					ppLightSamples[ n ][ i ].AddWeighted( pDirectLight[ n ], 1.0f / directSupersampleCount );
+					ppLightSamples[ n ][ i ].AddWeighted( pAmbientLight[ n ], 1.0f / ambientSupersampleCount );
 				}
 
 				// Recompute the luxel intensity based on the supersampling
 				ComputeLuxelIntensity( info, i, ppLightSamples, pSampleIntensity );
 			}
-
 		}
 
 		// We've finished another pass
@@ -3125,12 +3488,12 @@ void BuildFacelights (int iThread, int facenum)
 		int nSample = 4 * grp;
 
 		sample_t *sample = sampleInfo.m_pFaceLight->sample + nSample;
-		int numSamples = MIN ( 4, sampleInfo.m_pFaceLight->numsamples - nSample );
+		int numSamples = min ( 4, sampleInfo.m_pFaceLight->numsamples - nSample );
 
 		FourVectors positions;
 		FourVectors normals;
 
-		for ( int i = 0; i < 4; i++ )
+		for ( i = 0; i < 4; i++ )
 		{
 			v[i] = ( i < numSamples ) ? sample[i].pos : sample[numSamples - 1].pos;
 			n[i] = ( i < numSamples ) ? sample[i].normal : sample[numSamples - 1].normal;
@@ -3143,7 +3506,7 @@ void BuildFacelights (int iThread, int facenum)
 		// Fixup sample normals in case of smooth faces
 		if ( !l.isflat )
 		{
-			for ( int i = 0; i < numSamples; i++ )
+			for ( i = 0; i < numSamples; i++ )
 				sample[i].normal = sampleInfo.m_PointNormals[0].Vec( i );
 		}
 
@@ -3166,8 +3529,9 @@ void BuildFacelights (int iThread, int facenum)
 		return;
 	}
 
-	// get rid of the -extra functionality on displacement surfaces
-	if (do_extra && !sampleInfo.m_IsDispFace)
+	// Enabling supersampling for displacements (previous revision always disabled do_extra for disp)
+	// improves continuity significantly between disp and brush surfaces, especially when using high frequency alpha shadow materials
+	if ( do_extra ) 
 	{
 		// For each lightstyle, perform a supersampling pass
 		for ( i = 0; i < MAXLIGHTMAPS; ++i )
@@ -3363,6 +3727,59 @@ void BuildPatchLights( int facenum )
 }
 
 
+void BuildStaticPropPatchlights( int iThread, int nPatch )
+{
+	if ( g_Patches[ nPatch ].faceNumber >= 0 )
+	{
+		// Not a static prop patch
+		return;
+	}
+	CPatch &patch = g_Patches[ nPatch ];
+
+	// Random sample locations
+	Vector vecOrigin = patch.winding->p[ 0 ];
+	Vector vecU = patch.winding->p[ 2 ] - patch.winding->p[ 0 ];
+	Vector vecV = patch.winding->p[ 1 ] - patch.winding->p[ 0 ];
+	int nSampleCount = Max( 1, int( patch.area / 16.0f ) );
+	float flSampleArea = patch.area / float( nSampleCount );
+	float flSampleFrac = 1.0f / float( nSampleCount );
+	sample_t *pSamples = new sample_t[ nSampleCount ];
+	memset( pSamples, 0, sizeof( sample_t )*nSampleCount );
+	for ( int i = 0; i < nSampleCount; i++ )
+	{
+		// Shitty. Should be jittered instead or some other better distribution over the triangle.
+		float flU = RandomFloat();
+		float flV = RandomFloat();
+		if ( flU + flV > 1.0f )
+		{
+			flU = 1.0f - flU;
+			flV = 1.0f - flV;
+			std::swap( flU, flV );
+		}
+		pSamples[ i ].pos = vecOrigin + flU * vecU + flV * vecV;
+		pSamples[ i ].normal = patch.normal;
+		pSamples[ i ].area = flSampleArea;
+	}
+
+	Vector directColor( 0.0f, 0.0f, 0.0f );
+	float flSunAmount = 0.0f;
+
+	// sample the lights at each sample location
+	for ( int i = 0; i < nSampleCount; i++ )
+	{
+		sample_t *sample = pSamples + i;
+
+		directColor.Init( 0.0f, 0.0f, 0.0f );
+		flSunAmount = 0.0f;
+		ComputeDirectLightingAtPoint( sample->pos, &sample->normal, &directColor, &flSunAmount, 1, false, iThread, -1, 0 );
+		directColor *= g_flStaticPropBounceBoost;
+		patch.totallight.light[ 0 ] += directColor * flSampleFrac;
+		patch.directlight += directColor * flSampleFrac;
+	}
+
+	delete[] pSamples;
+}
+
 /*
   =============
   PrecompLightmapOffsets
@@ -3417,7 +3834,10 @@ void PrecompLightmapOffsets()
 		{
 	        lightdatasize += nLuxels * 4 * lightstyles;
 		}
-    }
+
+		// Add room for additional light data here that will be packed into lightmap alpha
+		lightdatasize += nLuxels * 4 * lightstyles;
+	}
 
 	// The incremental lighting code needs us to preserve the contents of dlightdata
 	// since it only recomposites lighting for faces that have lights that touch them.
@@ -3556,51 +3976,30 @@ static void LinearToBumpedLightmap(
 // Convert a RGBExp32 to a RGBA8888
 // This matches the engine's conversion, so the lighting result is consistent.
 //-----------------------------------------------------------------------------
-void ConvertRGBExp32ToRGBA8888( const ColorRGBExp32 *pSrc, unsigned char *pDst, Vector* _optOutLinear )
+void ConvertRGBExp32ToRGBA8888( const ColorRGBExp32 *pSrc, unsigned char *pDst )
 {
 	Vector		linearColor;
+	Vector		vertexColor;
 
 	// convert from ColorRGBExp32 to linear space
 	linearColor[0] = TexLightToLinear( ((ColorRGBExp32 *)pSrc)->r, ((ColorRGBExp32 *)pSrc)->exponent );
 	linearColor[1] = TexLightToLinear( ((ColorRGBExp32 *)pSrc)->g, ((ColorRGBExp32 *)pSrc)->exponent );
 	linearColor[2] = TexLightToLinear( ((ColorRGBExp32 *)pSrc)->b, ((ColorRGBExp32 *)pSrc)->exponent );
 
-	ConvertLinearToRGBA8888( &linearColor, pDst );
-	if ( _optOutLinear )
-		*_optOutLinear = linearColor;
-}
-
-//-----------------------------------------------------------------------------
-// Converts a RGBExp32 to a linear color value.
-//-----------------------------------------------------------------------------
-void ConvertRGBExp32ToLinear(const ColorRGBExp32 *pSrc, Vector* pDst)
-{
-
-	(*pDst)[0] = TexLightToLinear(((ColorRGBExp32 *)pSrc)->r, ((ColorRGBExp32 *)pSrc)->exponent);
-	(*pDst)[1] = TexLightToLinear(((ColorRGBExp32 *)pSrc)->g, ((ColorRGBExp32 *)pSrc)->exponent);
-	(*pDst)[2] = TexLightToLinear(((ColorRGBExp32 *)pSrc)->b, ((ColorRGBExp32 *)pSrc)->exponent);
-}
-
-//-----------------------------------------------------------------------------
-// Converts a linear color value (suitable for combining linearly) to an RBGA8888 value expected by the engine.
-//-----------------------------------------------------------------------------
-void ConvertLinearToRGBA8888(const Vector *pSrcLinear, unsigned char *pDst)
-{
-	Vector		vertexColor;
-
 	// convert from linear space to lightmap space
 	// cannot use mathlib routine directly because it doesn't match
 	// the colorspace version found in the engine, which *is* the same sequence here
-	vertexColor[0] = LinearToVertexLight((*pSrcLinear)[0]);
-	vertexColor[1] = LinearToVertexLight((*pSrcLinear)[1]);
-	vertexColor[2] = LinearToVertexLight((*pSrcLinear)[2]);
+	vertexColor[0] = LinearToVertexLight( linearColor[0] );
+	vertexColor[1] = LinearToVertexLight( linearColor[1] );
+	vertexColor[2] = LinearToVertexLight( linearColor[2] );
 
 	// this is really a color normalization with a floor
-	ColorClamp(vertexColor);
+	ColorClamp( vertexColor );
 
 	// final [0..255] scale
-	pDst[0] = RoundFloatToByte(vertexColor[0] * 255.0f);
-	pDst[1] = RoundFloatToByte(vertexColor[1] * 255.0f);
-	pDst[2] = RoundFloatToByte(vertexColor[2] * 255.0f);
+	pDst[0] = RoundFloatToByte( vertexColor[0] * 255.0f );
+	pDst[1] = RoundFloatToByte( vertexColor[1] * 255.0f );
+	pDst[2] = RoundFloatToByte( vertexColor[2] * 255.0f );
 	pDst[3] = 255;
 }
+
diff --git a/utils/vrad/lightmap.h b/utils/vrad/lightmap.h
index a4c698da..42e732e1 100644
--- a/utils/vrad/lightmap.h
+++ b/utils/vrad/lightmap.h
@@ -137,5 +137,10 @@ void FreeDLights();
 
 void ExportDirectLightsToWorldLights();
 
+float CalculateAmbientOcclusion( Vector *pPosition, Vector *pNormal );
+fltx4 CalculateAmbientOcclusion4( const FourVectors &position4, const FourVectors &normal4, int static_prop_index_to_ignore );
+
+float SoftenCosineTerm( float flDot );
+fltx4 SoftenCosineTerm( fltx4 dots );
 
 #endif // LIGHTMAP_H
diff --git a/utils/vrad/mpivrad.cpp b/utils/vrad/mpivrad.cpp
index 5b7bfc03..1a081cda 100644
--- a/utils/vrad/mpivrad.cpp
+++ b/utils/vrad/mpivrad.cpp
@@ -29,7 +29,7 @@
 #include "mpi_stats.h"
 #include "vmpi_distribute_work.h"
 #include "vmpi_tools_shared.h"
-
+#include "tier0/fasttimer.h"
 
 
 
@@ -60,9 +60,13 @@ bool VRAD_DispatchFn( MessageBuffer *pBuf, int iSource, int iPacketID )
 	}
 }
 CDispatchReg g_VRADDispatchReg( VMPI_VRAD_PACKET_ID, VRAD_DispatchFn ); // register to handle the messages we want
-CDispatchReg g_DistributeWorkReg( VMPI_DISTRIBUTEWORK_PACKETID, DistributeWorkDispatch );
 
 
+VMPI_REGISTER_PACKET_ID( VMPI_VRAD_PACKET_ID )
+VMPI_REGISTER_SUBPACKET_ID( VMPI_VRAD_PACKET_ID, VMPI_SUBPACKETID_VIS_LEAFS	)
+VMPI_REGISTER_SUBPACKET_ID( VMPI_VRAD_PACKET_ID, VMPI_SUBPACKETID_BUILDFACELIGHTS )
+VMPI_REGISTER_SUBPACKET_ID( VMPI_VRAD_PACKET_ID, VMPI_SUBPACKETID_PLIGHTDATA_RESULTS )
+	
 
 void VRAD_SetupMPI( int &argc, char **&argv )
 {
@@ -238,7 +242,6 @@ void RunMPIBuildFacelights()
 	VMPI_SetCurrentStage( "RunMPIBuildFaceLights" );
 	double elapsed = DistributeWork( 
 		numfaces, 
-		VMPI_DISTRIBUTEWORK_PACKETID,
 		MPI_ProcessFaces, 
 		MPI_ReceiveFaceResults );
 
@@ -265,7 +268,7 @@ void RunMPIBuildFacelights()
 	else
 	{
 		if ( g_iVMPIVerboseLevel >= 1 )
-			Msg( "\n\n%.1f%% CPU utilization during BuildFaceLights\n\n", ( g_CPUTime.GetSeconds() * 100 / elapsed ) );
+			Msg( "\n\n%.1f%% CPU utilization during BuildFaceLights\n\n", (int)( g_CPUTime.GetSeconds() * 100 / elapsed ) );
 	}
 }
 
@@ -396,7 +399,6 @@ void RunMPIBuildVisLeafs()
 	
 	double elapsed = DistributeWork( 
 		dvis->numclusters, 
-		VMPI_DISTRIBUTEWORK_PACKETID,
 		MPI_ProcessVisLeafs, 
 		MPI_ReceiveVisLeafsResults );
 
diff --git a/utils/vrad/mpivrad.h b/utils/vrad/mpivrad.h
index 01c841b3..078d85bc 100644
--- a/utils/vrad/mpivrad.h
+++ b/utils/vrad/mpivrad.h
@@ -18,9 +18,6 @@
 	#define VMPI_SUBPACKETID_BUILDFACELIGHTS	1
 	#define VMPI_SUBPACKETID_PLIGHTDATA_RESULTS	2
 
-// DistributeWork owns this packet ID.
-#define VMPI_DISTRIBUTEWORK_PACKETID			2
-
 
 // Called first thing in the exe.
 void		VRAD_SetupMPI( int &argc, char **&argv );
diff --git a/utils/vrad/radial.cpp b/utils/vrad/radial.cpp
index 26dd1a72..2f196414 100644
--- a/utils/vrad/radial.cpp
+++ b/utils/vrad/radial.cpp
@@ -647,7 +647,7 @@ void FinalLightFace( int iThread, int facenum )
 	float		    minlight;
 	int			    lightstyles;
 	LightingValue_t lb[NUM_BUMP_VECTS + 1], v[NUM_BUMP_VECTS + 1];
-	unsigned char   *pdata[NUM_BUMP_VECTS + 1];
+	unsigned char   *pdata[NUM_BUMP_VECTS + 2]; // +2 is for flat and additional lightmap alpha data
 	int				bumpSample;
 	radial_t	    *rad = NULL;
 	radial_t	    *prad = NULL;
@@ -734,9 +734,9 @@ void FinalLightFace( int iThread, int facenum )
 		// it isn't going to use those positions (see loop over bumpSample below)
 		// The file offset is correctly computed to only store space for 1 set
 		// of light data if we don't have bumped lighting.
-		for( bumpSample = 0; bumpSample < bumpSampleCount; ++bumpSample )
+		for( bumpSample = 0; bumpSample < bumpSampleCount + 1; ++bumpSample ) // The +1 is for the additional lightmap alpha data
 		{
-			pdata[bumpSample] = &(*pdlightdata)[f->lightofs + (k * bumpSampleCount + bumpSample) * fl->numluxels*4]; 
+			pdata[bumpSample] = &(*pdlightdata)[f->lightofs + ( ( k * ( bumpSampleCount + 1 ) ) + bumpSample) * fl->numluxels*4]; 
 		}
 
 		// Compute the average luxel color, but not for the bump samples
@@ -773,11 +773,11 @@ void FinalLightFace( int iThread, int facenum )
 				// v is indirect light that is received on the luxel.
 				if( !bDisp )
 				{
-					SampleRadial( prad, fl->luxel[j], v, bumpSampleCount );
+					SampleRadial( prad, fl->luxel[j], v, bumpSampleCount ); // indirect on brushes
 				}
 				else
 				{
-					StaticDispMgr()->SampleRadial( facenum, prad, fl->luxel[j], j, v, bumpSampleCount, true );
+					StaticDispMgr()->SampleRadial( facenum, prad, fl->luxel[j], j, v, bumpSampleCount, true ); // indirect on displacements
 				}
 
 				for( bumpSample = 0; bumpSample < bumpSampleCount; ++bumpSample )
@@ -840,6 +840,16 @@ void FinalLightFace( int iThread, int facenum )
 				// convert to a 4 byte r,g,b,signed exponent format
 				VectorToColorRGBExp32( Vector( lb[bumpSample].m_vecLighting.x, lb[bumpSample].m_vecLighting.y,
 											   lb[bumpSample].m_vecLighting.z ), *( ColorRGBExp32 *)pdata[bumpSample] );
+
+				// Generate additional lightmap alpha data
+				if ( bumpSample == 0 )
+				{
+					pdata[bumpSampleCount][0] = uint8( clamp( lb[0].m_flDirectSunAmount, 0.0f, 1.0f ) * 255.0f + 0.5f );
+					pdata[bumpSampleCount][1] = uint8( clamp( lb[1].m_flDirectSunAmount, 0.0f, 1.0f ) * 255.0f + 0.5f );
+					pdata[bumpSampleCount][2] = uint8( clamp( lb[2].m_flDirectSunAmount, 0.0f, 1.0f ) * 255.0f + 0.5f );
+					pdata[bumpSampleCount][3] = uint8( clamp( lb[3].m_flDirectSunAmount, 0.0f, 1.0f ) * 255.0f + 0.5f );
+					pdata[bumpSampleCount]+=4;
+				}
 #endif
 
 				pdata[bumpSample] += 4;
diff --git a/utils/vrad/trace.cpp b/utils/vrad/trace.cpp
index 3bca8350..e8b65d39 100644
--- a/utils/vrad/trace.cpp
+++ b/utils/vrad/trace.cpp
@@ -133,11 +133,7 @@ public:
 			addedCoverage[s] = 0.0f;
 			if ( ( sign >> s) & 0x1 )
 			{
-#ifdef VRAD_SSE
-				addedCoverage[s] = ComputeCoverageFromTexture( b0->m128_f32[s], b1->m128_f32[s], b2->m128_f32[s], hitID );
-#else
-				addedCoverage[s] = ComputeCoverageFromTexture( b0[0][s], b1[0][s], b2[0][s], hitID );
-#endif
+				addedCoverage[s] = ComputeCoverageFromTexture( (*b0)[s], (*b1)[s], (*b2)[s], hitID );
 			}
 		}
 		m_coverage = AddSIMD( m_coverage, LoadUnalignedSIMD( addedCoverage ) );
@@ -173,11 +169,7 @@ void TestLine( const FourVectors& start, const FourVectors& stop,
 	{
 		visibility[i] = 1.0f;
 		if ( ( rt_result.HitIds[i] != -1 ) &&
-#ifdef VRAD_SSE
-		     ( rt_result.HitDistance.m128_f32[i] < len.m128_f32[i] ) )
-#else
 		     ( rt_result.HitDistance[i] < len[i] ) )
-#endif
 		{
 			visibility[i] = 0.0f;
 		}
@@ -187,7 +179,68 @@ void TestLine( const FourVectors& start, const FourVectors& stop,
 		*pFractionVisible = MinSIMD( *pFractionVisible, coverageCallback.GetFractionVisible() );
 }
 
+void TestLine_IgnoreSky( const FourVectors& start, const FourVectors& stop,
+						 fltx4 *pFractionVisible, int static_prop_index_to_ignore )
+{
+	FourRays myrays;
+	myrays.origin = start;
+	myrays.direction = stop;
+	myrays.direction -= myrays.origin;
+	fltx4 len = myrays.direction.length();
+	myrays.direction *= ReciprocalSIMD( len );
 
+	RayTracingResult rt_result;
+	CCoverageCountTexture coverageCallback;
+
+	g_RtEnv.Trace4Rays(myrays, Four_Zeros, len, &rt_result, TRACE_ID_STATICPROP | static_prop_index_to_ignore, g_bTextureShadows ? &coverageCallback : 0 );
+
+	// Assume we can see the targets unless we get hits
+	float visibility[4];
+	for ( int i = 0; i < 4; i++ )
+	{
+		visibility[i] = 1.0f;
+		if ( ( rt_result.HitIds[i] != -1 ) &&
+			 ( rt_result.HitDistance[i] < len[i] ) )
+		{
+			int id = g_RtEnv.OptimizedTriangleList[rt_result.HitIds[i]].m_Data.m_IntersectData.m_nTriangleID;
+			if ( !( id & TRACE_ID_SKY ) )
+			{
+				visibility[i] = 0.0f;
+			}
+		}
+	}
+	*pFractionVisible = LoadUnalignedSIMD( visibility );
+	if ( g_bTextureShadows )
+		*pFractionVisible = MinSIMD( *pFractionVisible, coverageCallback.GetFractionVisible() );
+}
+
+void TestLine_LightBlockers( const FourVectors& start, const FourVectors& stop,
+							 fltx4 *pFractionVisible )
+{
+	FourRays myrays;
+	myrays.origin = start;
+	myrays.direction = stop;
+	myrays.direction -= myrays.origin;
+	fltx4 len = myrays.direction.length();
+	myrays.direction *= ReciprocalSIMD( len );
+
+	RayTracingResult rt_result;
+
+	g_RtEnv_LightBlockers.Trace4Rays( myrays, Four_Zeros, len, &rt_result, -1, NULL );
+
+	// Assume we can see the targets unless we get hits
+	float visibility[4] = { 1.0f, 1.0f, 1.0f, 1.0f };
+	if ( (rt_result.HitIds[0] != -1) &&
+		 (rt_result.HitDistance[0] < len[0]) )
+	{
+		fltx4 dotRaySurfaceN = rt_result.surface_normal * myrays.direction;
+		if ( dotRaySurfaceN[0] > 0.0f ) 
+		{
+			visibility[0] = 0.0f;
+		}
+	}
+	*pFractionVisible = LoadUnalignedSIMD( visibility );
+}
 
 /*
 ================
@@ -381,11 +434,7 @@ void TestLine_DoesHitSky( FourVectors const& start, FourVectors const& stop,
 	{
 		aOcclusion[i] = 0.0f;
 		if ( ( rt_result.HitIds[i] != -1 ) &&
-#ifdef VRAD_SSE
-		     ( rt_result.HitDistance.m128_f32[i] < len.m128_f32[i] ) )
-#else
 		     ( rt_result.HitDistance[i] < len[i] ) )
-#endif
 		{
 			int id = g_RtEnv.OptimizedTriangleList[rt_result.HitIds[i]].m_Data.m_IntersectData.m_nTriangleID;
 			if ( !( id & TRACE_ID_SKY ) )
@@ -501,22 +550,92 @@ dmodel_t *BrushmodelForEntity( entity_t *pEntity )
 	return NULL;
 }
 
+// Add one that casts textureshadows
+void AddTexturedBrushWinding( winding_t *w, const VMatrix &xform, texinfo_t *tx, int shadowMaterialIndex )
+{
+	Vector2D uv[MAX_POINTS_ON_WINDING];
+	int mappingWidth = 32;
+	int mappingHeight = 32;
+	GetShadowTextureMapping( shadowMaterialIndex, &mappingWidth, &mappingHeight );
+
+	for ( int j = 0; j < w->numpoints; j++ )
+	{
+		// base texture coordinate
+		uv[j].x = DotProduct( w->p[j].Base(), tx->textureVecsTexelsPerWorldUnits[0] ) + 
+			tx->textureVecsTexelsPerWorldUnits[0][3];
+		uv[j].x /= float(mappingWidth);
+
+		uv[j].y = DotProduct( w->p[j].Base(), tx->textureVecsTexelsPerWorldUnits[1] ) + 
+			tx->textureVecsTexelsPerWorldUnits[1][3];
+		uv[j].y /= float(mappingHeight);
+	}
+	Vector v0, v1, v2;
+
+	for ( int j = 2; j < w->numpoints; j++ )
+	{
+		v0 = xform.VMul4x3(w->p[0]);
+		v1 = xform.VMul4x3(w->p[j-1]);
+		v2 = xform.VMul4x3(w->p[j]);
+		float coverage = ComputeCoverageForTriangle(shadowMaterialIndex, uv[0], uv[j-1], uv[j] );
+		int index = -1;
+		unsigned short flags = 0;
+		Vector fullCoverage(0,0,1);
+		if ( coverage < 1.0 )
+		{
+			index = AddShadowTextureTriangle( shadowMaterialIndex, uv[0], uv[j-1], uv[j] );
+			flags = FCACHETRI_TRANSPARENT;
+			fullCoverage.x = coverage;
+		}
+
+		g_RtEnv.AddTriangle(TRACE_ID_OPAQUE, v0, v1, v2, fullCoverage, flags, index);
+	}
+}
+
 void AddBrushToRaytraceEnvironment( dbrush_t *pBrush, const VMatrix &xform )
 {
-	if ( !( pBrush->contents & MASK_OPAQUE ) )
+	int materialIndexList[256];
+	bool bTextureShadows = false;
+	
+	if ( !( pBrush->contents & (MASK_OPAQUE) ) && !(g_bTextureShadows && (pBrush->contents & CONTENTS_GRATE)) )
 		return;
 
+	if ( pBrush->contents & CONTENTS_LADDER )
+		return;
+
+	// load any transparent textures for shadows
+	if ( g_bTextureShadows && (pBrush->contents & CONTENTS_GRATE) && pBrush->numsides < ARRAYSIZE(materialIndexList) )
+	{
+		for (int i = 0; i < pBrush->numsides; i++ )
+		{
+			dbrushside_t *side = &dbrushsides[pBrush->firstside + i];
+			texinfo_t *tx = &texinfo[side->texinfo];
+			dtexdata_t *pTexData = &dtexdata[tx->texdata];
+			const char *pMaterialName = TexDataStringTable_GetString( pTexData->nameStringTableID );
+			materialIndexList[i] = LoadShadowTexture( pMaterialName );
+			if ( materialIndexList[i] >= 0 )
+			{
+				bTextureShadows = true;
+			}
+		}
+	}
 	Vector v0, v1, v2;
+
 	for (int i = 0; i < pBrush->numsides; i++ )
 	{
 		dbrushside_t *side = &dbrushsides[pBrush->firstside + i];
 		dplane_t *plane = &dplanes[side->planenum];
 		texinfo_t *tx = &texinfo[side->texinfo];
 		winding_t *w = BaseWindingForPlane (plane->normal, plane->dist);
+		bool bIsLightBlocker = false;
 
 		if ( tx->flags & SURF_SKY || side->dispinfo )
 			continue;
 
+		if ( ( pBrush->contents & ( CONTENTS_OPAQUE | CONTENTS_SOLID ) ) && ( tx->flags & SURF_NODRAW ) )
+		{
+			bIsLightBlocker = true;
+		}
+
 		for (int j=0 ; j<pBrush->numsides && w; j++)
 		{
 			if (i == j)
@@ -529,14 +648,28 @@ void AddBrushToRaytraceEnvironment( dbrush_t *pBrush, const VMatrix &xform )
 		}
 		if ( w )
 		{
-			for ( int j = 2; j < w->numpoints; j++ )
+			if ( bTextureShadows && materialIndexList[i] >= 0 )
 			{
-				v0 = xform.VMul4x3(w->p[0]);
-				v1 = xform.VMul4x3(w->p[j-1]);
-				v2 = xform.VMul4x3(w->p[j]);
-				Vector fullCoverage;
-				fullCoverage.x = 1.0f;
-				g_RtEnv.AddTriangle(TRACE_ID_OPAQUE, v0, v1, v2, fullCoverage);
+				AddTexturedBrushWinding( w, xform, tx, materialIndexList[i] );
+			}
+			else
+			{
+				// opaque
+				Vector fullCoverage(1,1,1);
+				for ( int j = 2; j < w->numpoints; j++ )
+				{
+					v0 = xform.VMul4x3(w->p[0]);
+					v1 = xform.VMul4x3(w->p[j-1]);
+					v2 = xform.VMul4x3(w->p[j]);
+					g_RtEnv.AddTriangle( TRACE_ID_OPAQUE, v0, v1, v2, fullCoverage );
+
+					// light blockers
+					if ( bIsLightBlocker )
+					{
+						g_RtEnv_LightBlockers.AddTriangle( TRACE_ID_OPAQUE, v0, v1, v2, fullCoverage );
+						g_RtEnv_RadiosityPatches.AddTriangle( TRACE_ID_OPAQUE, v0, v1, v2, fullCoverage );
+					}
+				}
 			}
 			FreeWinding( w );
 		}
@@ -615,7 +748,7 @@ void AddBrushesForRayTrace( void )
 	CUtlVector<int> brushList;
 	GetBrushes_r ( dmodels[0].headnode, brushList );
 
-	for ( int i = 0; i < brushList.Size(); i++ )
+	for ( int i = 0; i < brushList.Count(); i++ )
 	{
 		dbrush_t *brush = &dbrushes[brushList[i]];
 		AddBrushToRaytraceEnvironment ( brush, identity );
diff --git a/utils/vrad/vismat.cpp b/utils/vrad/vismat.cpp
index d33cd017..93d6e252 100644
--- a/utils/vrad/vismat.cpp
+++ b/utils/vrad/vismat.cpp
@@ -296,6 +296,29 @@ void AddDispsToClusterTable( void )
 }
 
 
+struct ClusterPatchList_t
+{
+	CUtlVector<int>	patches;
+};
+
+static CUtlVector<ClusterPatchList_t> g_ClusterStaticPropPatches;
+
+void AddStaticPropPatchesToClusterTable()
+{
+	g_ClusterStaticPropPatches.SetCount( g_ClusterLeaves.Count() );
+
+	for ( int i = 0; i < g_Patches.Count(); i++ )
+	{
+		const CPatch &patch = g_Patches[ i ];
+		if ( patch.faceNumber >= 0 || patch.clusterNumber < 0 )
+		{
+			continue;
+		}
+
+		g_ClusterStaticPropPatches[ patch.clusterNumber ].patches.AddToTail( i );
+	}
+}
+
 /*
 ==============
 BuildVisRow
@@ -345,7 +368,7 @@ void BuildVisRow (int patchnum, byte *pvs, int head, transfer_t *transfers, CTra
 			}
 		}
 
-		int dispCount = g_ClusterDispFaces[j].dispFaces.Size();
+		int dispCount = g_ClusterDispFaces[j].dispFaces.Count();
 		for( int ndxDisp = 0; ndxDisp < dispCount; ndxDisp++ )
 		{
 			int ndxFace = g_ClusterDispFaces[j].dispFaces[ndxDisp];
@@ -360,6 +383,20 @@ void BuildVisRow (int patchnum, byte *pvs, int head, transfer_t *transfers, CTra
 
 			TestPatchToFace( patchnum, ndxFace, head, transfers, transferMaker, iThread );
 		}
+
+		if ( g_bStaticPropBounce )
+		{
+			// Test static prop patches
+			int staticPropPatchCount = g_ClusterStaticPropPatches[ j ].patches.Count();
+			for ( int i = 0; i < staticPropPatchCount; i++ )
+			{
+				int nPatchIdx = g_ClusterStaticPropPatches[ j ].patches[ i ];
+				if ( nPatchIdx != patchnum )
+				{
+					TestPatchToPatch( patchnum, nPatchIdx, head, transfers, transferMaker, iThread );
+				}
+			}
+		}
 	}
 
 
diff --git a/utils/vrad/vrad.cpp b/utils/vrad/vrad.cpp
index 1eb47467..d13cf873 100644
--- a/utils/vrad/vrad.cpp
+++ b/utils/vrad/vrad.cpp
@@ -40,7 +40,7 @@ every surface must be divided into at least two patches each axis
 */
 
 CUtlVector<CPatch>		g_Patches;			
-CUtlVector<int>			g_FacePatches;		// contains all patches, children first
+CUtlVector<int>			g_FacePatches;		// constains all patches, children first
 CUtlVector<int>			faceParents;		// contains only root patches, use next parent to iterate
 CUtlVector<int>			clusterChildren;
 CUtlVector<Vector>		emitlight;
@@ -66,8 +66,7 @@ bool		g_bDumpRtEnv = false;
 bool		bRed2Black = true;
 bool		g_bFastAmbient = false;
 bool        g_bNoSkyRecurse = false;
-bool		g_bDumpPropLightmaps = false;
-
+bool        g_bFiniteFalloffModel = false;					// whether to use 1/xxx or not
 
 int			junk;
 
@@ -77,6 +76,7 @@ float		lightscale = 1.0;
 float		dlight_threshold = 0.1;  // was DIRECT_LIGHT constant
 
 char		source[MAX_PATH] = "";
+char		platformPath[MAX_PATH] = "";
 
 char		level_name[MAX_PATH] = "";	// map filename, without extension or path info
 
@@ -93,12 +93,14 @@ bool		g_bInterrupt = false;	// Wsed with background lighting in WC. Tells VRAD
 float g_SunAngularExtent=0.0;
 
 float g_flSkySampleScale = 1.0;
+float g_flStaticPropSampleScale = 4.0;
 
 bool g_bLargeDispSampleRadius = false;
 
 bool g_bOnlyStaticProps = false;
 bool g_bShowStaticPropNormals = false;
-
+bool g_bStaticPropBounce = false;
+float g_flStaticPropBounceBoost = 1.0f;
 
 float		qgamma = 0.5;
 float		indirect_sun = 1.0;
@@ -125,11 +127,15 @@ bool		g_bStaticPropLighting = false;
 bool        g_bStaticPropPolys = false;
 bool        g_bTextureShadows = false;
 bool        g_bDisablePropSelfShadowing = false;
-
+bool		g_bFastStaticProps = false;
+bool		g_bDumpBumpStaticProps = false;
+bool		g_bDisableStaticPropVertexInSolidTest = false;
 
 CUtlVector<byte> g_FacesVisibleToLights;
 
 RayTracingEnvironment g_RtEnv;
+RayTracingEnvironment g_RtEnv_LightBlockers; // ray tracing environment consisting solely of light blockers - used in conjunction with bsp to solve indirect lighting for static props (as opposed to using the full RTE).
+RayTracingEnvironment g_RtEnv_RadiosityPatches;
 
 dface_t *g_pFaces=0;
 
@@ -290,7 +296,7 @@ void ReadLightFile (char *filename)
 			texlights[j].filename = filename;
 			file_texlights ++;
 			
-			num_texlights = MAX( num_texlights, j + 1 );
+			num_texlights = max( num_texlights, j + 1 );
 		}
 	}
 	qprintf ( "[%i texlights parsed from '%s']\n\n", file_texlights, filename);
@@ -305,8 +311,6 @@ LightForTexture
 */
 void LightForTexture( const char *name, Vector& result )
 {
-	int		i;
-
 	result[ 0 ] = result[ 1 ] = result[ 2 ] = 0;
 
 	char baseFilename[ MAX_PATH ];
@@ -346,7 +350,7 @@ void LightForTexture( const char *name, Vector& result )
 		}
 	}
 
-	for (i=0 ; i<num_texlights ; i++)
+	for (int i=0 ; i<num_texlights ; i++)
 	{
 		if (!Q_strcasecmp (name, texlights[i].name))
 		{
@@ -548,6 +552,7 @@ void MakePatchForFace (int fn, winding_t *w)
 	patch->child2 = g_Patches.InvalidIndex();
 	patch->parent = g_Patches.InvalidIndex();
 	patch->needsBumpmap = tx->flags & SURF_BUMPLIGHT ? true : false;
+	patch->staticPropIdx = -1;
 
 	// link and save patch data
 	patch->ndxNext = g_FacePatches.Element( fn );
@@ -739,6 +744,11 @@ void MakePatches (void)
 
 	// make the displacement surface patches
 	StaticDispMgr()->MakePatches();
+
+	if ( g_bStaticPropBounce )
+	{
+		StaticPropMgr()->MakePatches();
+	}
 }
 
 /*
@@ -755,6 +765,12 @@ SUBDIVIDE
 //-----------------------------------------------------------------------------
 bool PreventSubdivision( CPatch *patch )
 {
+	if ( patch->faceNumber < 0 )
+	{
+		// static prop patch
+		return true;
+	}
+
 	dface_t *f = g_pFaces + patch->faceNumber;
 	texinfo_t *tx = &texinfo[f->texinfo];
 
@@ -825,7 +841,7 @@ int CreateChildPatch( int nParentIndex, winding_t *pWinding, float flArea, const
 			if ( (child->face_maxs[i] == child->maxs[i] || child->face_mins[i] == child->mins[i] )
 			  && total[i] > minchop )
 			{
-				child->chop = MAX( minchop, child->chop / 2 );
+				child->chop = max( minchop, child->chop / 2 );
 				break;
 			}
 		}
@@ -885,7 +901,7 @@ void SubdividePatch( int ndxPatch )
 			if (patch->chop > minchop)
 			{
 				bSubdivide = true;
-				patch->chop = MAX( minchop, patch->chop / 2 );
+				patch->chop = max( minchop, patch->chop / 2 );
 			}
 		}
 	}
@@ -936,7 +952,7 @@ void SubdividePatches (void)
 	if (numbounce == 0)
 		return;
 
-	unsigned int uiPatchCount = g_Patches.Size();
+	unsigned int uiPatchCount = g_Patches.Count();
 	qprintf ("%i patches before subdivision\n", uiPatchCount);
 
 	for (i = 0; i < uiPatchCount; i++)
@@ -944,6 +960,12 @@ void SubdividePatches (void)
 		CPatch *pCur = &g_Patches.Element( i );
 		pCur->planeDist = pCur->plane->dist;
 
+		if ( pCur->faceNumber < 0 )
+		{
+			// This and all following patches are "fake" staticprop patches. Set up parent data structure for them.
+			break;
+		}
+
 		pCur->ndxNextParent = faceParents.Element( pCur->faceNumber );
 		faceParents[pCur->faceNumber] = pCur - g_Patches.Base();
 	}
@@ -974,10 +996,16 @@ void SubdividePatches (void)
 		g_FacePatches[i] = g_FacePatches.InvalidIndex();
 	}
 
-	uiPatchCount = g_Patches.Size();
+	uiPatchCount = g_Patches.Count();
 	for (i = 0; i < uiPatchCount; i++)
 	{
 		CPatch *pCur = &g_Patches.Element( i );
+		if ( pCur->faceNumber < 0)
+		{
+			// Static prop patches don't have an associated face
+			continue;
+		}
+
 		pCur->ndxNext = g_FacePatches.Element( pCur->faceNumber );
 		g_FacePatches[pCur->faceNumber] = pCur - g_Patches.Base();
 
@@ -1282,7 +1310,7 @@ void WriteWorld (char *name, int iBump)
 	if (!out)
 		Error ("Couldn't open %s", name);
 
-	unsigned int uiPatchCount = g_Patches.Size();
+	unsigned int uiPatchCount = g_Patches.Count();
 	for (j=0; j<uiPatchCount; j++)
 	{
 		patch = &g_Patches.Element( j );
@@ -1323,7 +1351,7 @@ void WriteRTEnv (char *name)
 	winding_t *triw = AllocWinding( 3 );
 	triw->numpoints = 3;
 
-	for( int i = 0; i < g_RtEnv.OptimizedTriangleList.Size(); i++ )
+	for( int i = 0; i < g_RtEnv.OptimizedTriangleList.Count(); i++ )
 	{
 		triw->p[0] = g_RtEnv.OptimizedTriangleList[i].Vertex( 0);
 		triw->p[1] = g_RtEnv.OptimizedTriangleList[i].Vertex( 1);
@@ -1424,7 +1452,7 @@ void CollectLight( Vector& total )
 	VectorFill( total, 0 );
 
 	// process patches in reverse order so that children are processed before their parents
-	unsigned int uiPatchCount = g_Patches.Size();
+	unsigned int uiPatchCount = g_Patches.Count();
 	for( i = uiPatchCount - 1; i >= 0; i-- )
 	{
 		patch = &g_Patches.Element( i );
@@ -1563,7 +1591,7 @@ void GatherLight (int threadnum, void *pUserData)
 			Vector normals[NUM_BUMP_VECTS+1];
 
 			// Disps
-			bool bDisp = ( g_pFaces[patch->faceNumber].dispinfo != -1 ); 
+			bool bDisp = ( patch->faceNumber >= 0 ) && ( g_pFaces[ patch->faceNumber ].dispinfo != -1 );
 			if ( bDisp )
 			{
 				normals[0] = patch->normal;
@@ -1663,7 +1691,7 @@ void BounceLight (void)
 	char		name[64];
 	qboolean	bouncing = numbounce > 0;
 
-	unsigned int uiPatchCount = g_Patches.Size();
+	unsigned int uiPatchCount = g_Patches.Count();
 	for (i=0 ; i<uiPatchCount; i++)
 	{
 		// totallight has a copy of the direct lighting.  Move it to the emitted light and zero it out (to integrate bounces only)
@@ -1711,7 +1739,7 @@ void BounceLight (void)
 	{
 		// transfer light from to the leaf patches from other patches via transfers
 		// this moves shooter->emitlight to receiver->addlight
-		unsigned int uiPatchCount = g_Patches.Size();
+		uiPatchCount = g_Patches.Count();
 		RunThreadsOn (uiPatchCount, true, GatherLight);
 		// move newly received light (addlight) to light to be sent out (emitlight)
 		// start at children and pull light up to parents
@@ -1828,6 +1856,11 @@ void RadWorld_Start()
 	// add displacement faces to cluster table
 	AddDispsToClusterTable();
 
+	if ( g_bStaticPropBounce )
+	{
+		AddStaticPropPatchesToClusterTable();
+	}
+
 	// create directlights out of patches and lights
 	CreateDirectLights ();
 
@@ -1940,6 +1973,21 @@ void MakeAllScales (void)
 
 	qprintf ("transfer lists: %5.1f megs\n"
 		, (float)total_transfer * sizeof(transfer_t) / (1024*1024));
+
+	if ( g_bStaticPropBounce )
+	{
+		int nTransfers = 0;
+		for ( int i = 0; i < g_Patches.Count(); i++ )
+		{
+			CPatch *pCur = &g_Patches.Element( i );
+			if ( pCur->faceNumber >= 0 )
+			{
+				continue;
+			}
+			nTransfers += pCur->numtransfers;
+		}
+		Msg( "static prop patch transfers %d\n", nTransfers );
+	}
 }
 
 
@@ -2025,17 +2073,51 @@ bool RadWorld_Go()
 		BuildFacesVisibleToLights( true );
 	}
 
-#ifdef MPI
 	// build initial facelights
+#ifdef MPI
 	if (g_bUseMPI) 
 	{
 		// RunThreadsOnIndividual (numfaces, true, BuildFacelights);
 		RunMPIBuildFacelights();
+		if ( g_bStaticPropBounce )
+		{
+			RunThreadsOnIndividual( g_Patches.Count(), true, BuildStaticPropPatchlights );
+		}
 	}
 	else 
 #endif
 	{
-		RunThreadsOnIndividual (numfaces, true, BuildFacelights);
+		RunThreadsOnIndividual( numfaces, true, BuildFacelights );
+		if ( g_bStaticPropBounce )
+		{
+			RunThreadsOnIndividual( g_Patches.Count(), true, BuildStaticPropPatchlights );
+		}
+#if 0
+		IScratchPad3D *pPad = ScratchPad3D_Create();
+		pPad->SetAutoFlush( false );
+		float flMax = 0.0f;
+		for ( int i = 0; i < g_Patches.Count(); i++ )
+		{
+			if ( g_Patches[ i ].child1 != g_Patches.InvalidIndex() || g_Patches[ i ].child2 != g_Patches.InvalidIndex() )
+				continue;
+			Vector vLight = g_Patches[ i ].directlight;
+			flMax = Max( flMax, vLight.x );
+			flMax = Max( flMax, vLight.y );
+			flMax = Max( flMax, vLight.z );
+		}
+		for ( int i = 0; i < g_Patches.Count(); i++ )
+		{
+			if ( g_Patches[ i ].child1 != g_Patches.InvalidIndex() || g_Patches[ i ].child2 != g_Patches.InvalidIndex() )
+				continue;
+			Vector vLight = g_Patches[ i ].directlight * g_Patches[i].reflectivity;
+			vLight /= flMax;
+			vLight.x = SrgbLinearToGamma( vLight.x );
+			vLight.y = SrgbLinearToGamma( vLight.y );
+			vLight.z = SrgbLinearToGamma( vLight.z );
+			pPad->DrawPolygon( CSPVertList( g_Patches[ i ].winding->p, g_Patches[ i ].winding->numpoints, CSPColor( vLight ) ) );
+		}
+		pPad->Release();
+#endif
 	}
 
 	// Was the process interrupted?
@@ -2068,10 +2150,10 @@ bool RadWorld_Go()
 		if (numbounce > 0)
 		{
 			// allocate memory for emitlight/addlight
-			emitlight.SetSize( g_Patches.Size() );
-			memset( emitlight.Base(), 0, g_Patches.Size() * sizeof( Vector ) );
-			addlight.SetSize( g_Patches.Size() );
-			memset( addlight.Base(), 0, g_Patches.Size() * sizeof( bumplights_t ) );
+			emitlight.SetSize( g_Patches.Count() );
+			memset( emitlight.Base(), 0, g_Patches.Count() * sizeof( Vector ) );
+			addlight.SetSize( g_Patches.Count() );
+			memset( addlight.Base(), 0, g_Patches.Count() * sizeof( bumplights_t ) );
 
 			MakeAllScales ();
 
@@ -2087,12 +2169,13 @@ bool RadWorld_Go()
 		StaticDispMgr()->InsertPatchSampleDataIntoHashTable();
 		StaticDispMgr()->EndTimer();
 
-#ifdef MPI
 		// blend bounced light into direct light and save
+#ifdef MPI
 		VMPI_SetCurrentStage( "FinalLightFace" );
 		if ( !g_bUseMPI || g_bMPIMaster )
 #endif
 			RunThreadsOnIndividual (numfaces, true, FinalLightFace);
+		
 #ifdef MPI
 		// Distribute the lighting data to workers.
 		VMPI_DistributeLightData();
@@ -2131,7 +2214,6 @@ void InitDumpPatchesFiles()
 	}
 }
 
-extern IFileSystem *g_pOriginalPassThruFileSystem;
 
 void VRAD_LoadBSP( char const *pFilename )
 {
@@ -2204,23 +2286,7 @@ void VRAD_LoadBSP( char const *pFilename )
 	VMPI_SetCurrentStage( "LoadBSPFile" );
 #endif
 	LoadBSPFile (source);
-
-#ifdef MPI
-	// Add this bsp to our search path so embedded resources can be found
-	if ( g_bUseMPI && g_bMPIMaster )
-	{
-		// MPI Master, MPI workers don't need to do anything
-		g_pOriginalPassThruFileSystem->AddSearchPath(source, "GAME", PATH_ADD_TO_HEAD);
-		g_pOriginalPassThruFileSystem->AddSearchPath(source, "MOD", PATH_ADD_TO_HEAD);
-	}
-	else if ( !g_bUseMPI )
-#endif
-	{
-		// Non-MPI
-		g_pFullFileSystem->AddSearchPath(source, "GAME", PATH_ADD_TO_HEAD);
-		g_pFullFileSystem->AddSearchPath(source, "MOD", PATH_ADD_TO_HEAD);
-	}
-
+	
 	// now, set whether or not static prop lighting is present
 	if (g_bStaticPropLighting)
 		g_LevelFlags |= g_bHDR? LVLFLAGS_BAKED_STATIC_PROP_LIGHTING_HDR : LVLFLAGS_BAKED_STATIC_PROP_LIGHTING_NONHDR;
@@ -2295,6 +2361,7 @@ void VRAD_LoadBSP( char const *pFilename )
 	printf ( "Setting up ray-trace acceleration structure... ");
 	float start = Plat_FloatTime();
 	g_RtEnv.SetupAccelerationStructure();
+	g_RtEnv_LightBlockers.SetupAccelerationStructure();
 	float end = Plat_FloatTime();
 	printf ( "Done (%.2f seconds)\n", end-start );
 
@@ -2390,16 +2457,29 @@ int ParseCommandLine( int argc, char **argv, bool *onlydetail )
 {
 	*onlydetail = false;
 
-	int mapArg = -1;
-
 	// default to LDR
 	SetHDRMode( false );
 	int i;
 	for( i=1 ; i<argc ; i++ )
 	{
-		if ( !Q_stricmp( argv[i], "-StaticPropLighting" ) )
+		if ( !Q_stricmp( argv[i], "-StaticPropLighting" ) ) // use -final for higher quality
 		{
 			g_bStaticPropLighting = true;
+			extern int g_numVradStaticPropsLightingStreams;
+			g_numVradStaticPropsLightingStreams = 3;
+		}
+		else if ( !Q_stricmp( argv[i], "-StaticPropLightingFinal" ) ) // slower, higher quality - deprecated, remove soon
+		{
+			g_bStaticPropLighting = true;
+			extern int g_numVradStaticPropsLightingStreams;
+			g_numVradStaticPropsLightingStreams = 3;
+		}
+		else if ( !Q_stricmp( argv[i], "-StaticPropLighting3" ) ) // dump bump data - deprecated, remove soon
+		{
+			g_bStaticPropLighting = true;
+			extern int g_numVradStaticPropsLightingStreams;
+			g_numVradStaticPropsLightingStreams = 3;
+			g_bDumpBumpStaticProps = true;
 		}
 		else if ( !stricmp( argv[i], "-StaticPropNormals" ) )
 		{
@@ -2417,11 +2497,15 @@ int ParseCommandLine( int argc, char **argv, bool *onlydetail )
 		{
 			g_bDisablePropSelfShadowing = true;
 		}
+		else if ( !stricmp( argv[i], "-StaticPropDisableInSolidTest" ) )
+		{
+			g_bDisableStaticPropVertexInSolidTest = true;
+		}
 		else if ( !Q_stricmp( argv[i], "-textureshadows" ) )
 		{
 			g_bTextureShadows = true;
 		}
-		else if ( !strcmp(argv[i], "-dump") )
+		else if ( !strcmp( argv[i], "-dump" ) )
 		{
 			g_bDumpPatches = true;
 		}
@@ -2445,26 +2529,21 @@ int ParseCommandLine( int argc, char **argv, bool *onlydetail )
 		{
 			g_bLargeDispSampleRadius = true;
 		}
-		else if (!Q_stricmp( argv[i], "-dumppropmaps"))
-		{
-			g_bDumpPropLightmaps = true;
-		}
 		else if (!Q_stricmp(argv[i],"-bounce"))
 		{
 			if ( ++i < argc )
 			{
-				int bounceParam = atoi (argv[i]);
-				if ( bounceParam < 0 )
+				numbounce = atoi (argv[i]);
+				if ( numbounce < 0 )
 				{
 					Warning("Error: expected non-negative value after '-bounce'\n" );
-					return -1;
+					return 1;
 				}
-				numbounce = (unsigned)bounceParam;
 			}
 			else
 			{
 				Warning("Error: expected a value after '-bounce'\n" );
-				return -1;
+				return 1;
 			}
 		}
 		else if (!Q_stricmp(argv[i],"-verbose") || !Q_stricmp(argv[i],"-v"))
@@ -2479,13 +2558,13 @@ int ParseCommandLine( int argc, char **argv, bool *onlydetail )
 				if ( numthreads <= 0 )
 				{
 					Warning("Error: expected positive value after '-threads'\n" );
-					return -1;
+					return 1;
 				}
 			}
 			else
 			{
 				Warning("Error: expected a value after '-threads'\n" );
-				return -1;
+				return 1;
 			}
 		}
 		else if ( !Q_stricmp(argv[i], "-lights" ) )
@@ -2497,7 +2576,7 @@ int ParseCommandLine( int argc, char **argv, bool *onlydetail )
 			else
 			{
 				Warning("Error: expected a filepath after '-lights'\n" );
-				return -1;
+				return 1;
 			}
 		}
 		else if (!Q_stricmp(argv[i],"-noextra"))
@@ -2515,6 +2594,7 @@ int ParseCommandLine( int argc, char **argv, bool *onlydetail )
 		else if (!Q_stricmp(argv[i],"-fast"))
 		{
 			do_fast = true;
+			g_bFastStaticProps = true;
 		}
 		else if (!Q_stricmp(argv[i],"-noskyboxrecurse"))
 		{
@@ -2523,6 +2603,11 @@ int ParseCommandLine( int argc, char **argv, bool *onlydetail )
 		else if (!Q_stricmp(argv[i],"-final"))
 		{
 			g_flSkySampleScale = 16.0;
+			g_flStaticPropSampleScale = 16.0;
+		}
+		else if (!Q_stricmp( argv[i], "-finitefalloff" ) )
+		{
+			g_bFiniteFalloffModel = true;
 		}
 		else if (!Q_stricmp(argv[i],"-extrasky"))
 		{
@@ -2533,7 +2618,19 @@ int ParseCommandLine( int argc, char **argv, bool *onlydetail )
 			else
 			{
 				Warning("Error: expected a scale factor after '-extrasky'\n" );
-				return -1;
+				return 1;
+			}
+		}
+		else if ( !Q_stricmp( argv[i], "-staticpropsamplescale" ) )
+		{
+			if ( ++i < argc && *argv[i] )
+			{
+				g_flStaticPropSampleScale = atof( argv[i] );
+			}
+			else
+			{
+				Warning( "Error: expected a scale factor after '-extraskystaticprops'\n" );
+				return 1;
 			}
 		}
 		else if (!Q_stricmp(argv[i],"-centersamples"))
@@ -2549,7 +2646,7 @@ int ParseCommandLine( int argc, char **argv, bool *onlydetail )
 			else
 			{
 				Warning("Error: expected an angle after '-smooth'\n" );
-				return -1;
+				return 1;
 			}
 		}
 		else if (!Q_stricmp(argv[i],"-dlightmap"))
@@ -2567,7 +2664,7 @@ int ParseCommandLine( int argc, char **argv, bool *onlydetail )
 			else
 			{
 				Warning("Error: expected a value after '-luxeldensity'\n" );
-				return -1;
+				return 1;
 			}
 		}
 		else if( !Q_stricmp( argv[i], "-low" ) )
@@ -2593,7 +2690,7 @@ int ParseCommandLine( int argc, char **argv, bool *onlydetail )
 			else
 			{
 				Warning("Error: expected an angular extent value (0..180) '-softsun'\n" );
-				return -1;
+				return 1;
 			}
 		}
 		else if ( !Q_stricmp( argv[i], "-maxdispsamplesize" ) )
@@ -2605,7 +2702,7 @@ int ParseCommandLine( int argc, char **argv, bool *onlydetail )
 			else
 			{
 				Warning( "Error: expected a sample size after '-maxdispsamplesize'\n" );
-				return -1;
+				return 1;
 			}
 		}
 		else if ( stricmp( argv[i], "-StopOnExit" ) == 0 )
@@ -2648,13 +2745,13 @@ int ParseCommandLine( int argc, char **argv, bool *onlydetail )
 				if ( maxchop < 1 )
 				{
 					Warning("Error: expected positive value after '-maxchop'\n" );
-					return -1;
+					return 1;
 				}
 			}
 			else
 			{
 				Warning("Error: expected a value after '-maxchop'\n" );
-				return -1;
+				return 1;
 			}
 		}
 		else if (!Q_stricmp(argv[i],"-chop"))
@@ -2665,14 +2762,14 @@ int ParseCommandLine( int argc, char **argv, bool *onlydetail )
 				if ( minchop < 1 )
 				{
 					Warning("Error: expected positive value after '-chop'\n" );
-					return -1;
+					return 1;
 				}
-				minchop = MIN( minchop, maxchop );
+				minchop = min( minchop, maxchop );
 			}
 			else
 			{
 				Warning("Error: expected a value after '-chop'\n" );
-				return -1;
+				return 1;
 			}
 		}
 		else if ( !Q_stricmp( argv[i], "-dispchop" ) )
@@ -2683,13 +2780,13 @@ int ParseCommandLine( int argc, char **argv, bool *onlydetail )
 				if ( dispchop < 1.0f )
 				{
 					Warning( "Error: expected positive value after '-dipschop'\n" );
-					return -1;
+					return 1;
 				}
 			}
 			else
 			{
 				Warning( "Error: expected a value after '-dispchop'\n" );
-				return -1;
+				return 1;
 			}
 		}
 		else if ( !Q_stricmp( argv[i], "-disppatchradius" ) )
@@ -2700,16 +2797,55 @@ int ParseCommandLine( int argc, char **argv, bool *onlydetail )
 				if ( g_MaxDispPatchRadius < 10.0f )
 				{
 					Warning( "Error: g_MaxDispPatchRadius < 10.0\n" );
-					return -1;
+					return 1;
 				}
 			}
 			else
 			{
 				Warning( "Error: expected a value after '-disppatchradius'\n" );
-				return -1;
+				return 1;
 			}
 		}
-
+		else if ( !Q_stricmp( argv[i], "-reflectivityscale" ) )
+		{
+			if ( ++i < argc )
+			{
+				reflectivityScale = (float)atof (argv[i]);
+			}
+			else
+			{
+				Warning("Error: expected a value after '-reflectivityscale'\n" );
+				return 1;
+			}
+		}
+		else if ( !Q_stricmp( argv[i],"-ambient" ) )
+		{
+			if ( i+3 < argc )
+			{
+				ambient[0] = (float)atof (argv[++i]) * 128;
+				ambient[1] = (float)atof (argv[++i]) * 128;
+				ambient[2] = (float)atof (argv[++i]) * 128;
+			}
+			else
+			{
+				Warning("Error: expected three color values after '-ambient'\n" );
+				return 1;
+			}
+		}
+		else if ( !Q_stricmp( argv[ i ], "-StaticPropBounce" ) )
+		{
+			if ( i + 1 < argc )
+			{
+				g_flStaticPropBounceBoost = (float)atof( argv[ ++i ] );
+			}
+			else
+			{
+				Warning("Error: expected bounce scale after '-StaticPropBounce'\n" );
+				return 1;
+			}
+			
+			g_bStaticPropBounce = true;
+		}
 #if ALLOWDEBUGOPTIONS
 		else if (!Q_stricmp(argv[i],"-scale"))
 		{
@@ -2720,21 +2856,7 @@ int ParseCommandLine( int argc, char **argv, bool *onlydetail )
 			else
 			{
 				Warning("Error: expected a value after '-scale'\n" );
-				return -1;
-			}
-		}
-		else if (!Q_stricmp(argv[i],"-ambient"))
-		{
-			if ( i+3 < argc )
-			{
- 				ambient[0] = (float)atof (argv[++i]) * 128;
- 				ambient[1] = (float)atof (argv[++i]) * 128;
- 				ambient[2] = (float)atof (argv[++i]) * 128;
-			}
-			else
-			{
-				Warning("Error: expected three color values after '-ambient'\n" );
-				return -1;
+				return 1;
 			}
 		}
 		else if (!Q_stricmp(argv[i],"-dlight"))
@@ -2746,7 +2868,7 @@ int ParseCommandLine( int argc, char **argv, bool *onlydetail )
 			else
 			{
 				Warning("Error: expected a value after '-dlight'\n" );
-				return -1;
+				return 1;
 			}
 		}
 		else if (!Q_stricmp(argv[i],"-sky"))
@@ -2758,7 +2880,7 @@ int ParseCommandLine( int argc, char **argv, bool *onlydetail )
 			else
 			{
 				Warning("Error: expected a value after '-sky'\n" );
-				return -1;
+				return 1;
 			}
 		}
 		else if (!Q_stricmp(argv[i],"-notexscale"))
@@ -2774,10 +2896,14 @@ int ParseCommandLine( int argc, char **argv, bool *onlydetail )
 			else
 			{
 				Warning("Error: expected a light threshold after '-coring'\n" );
-				return -1;
+				return 1;
 			}
 		}
 #endif
+		else if ( !Q_stricmp( argv[i], "-tempcontent" ) )
+		{
+			// ... Do nothing, just let this pass to the filesystem
+		}
 #ifdef MPI
 		// NOTE: the -mpi checks must come last here because they allow the previous argument 
 		// to be -mpi as well. If it game before something else like -game, then if the previous
@@ -2792,17 +2918,17 @@ int ParseCommandLine( int argc, char **argv, bool *onlydetail )
 				break;
 		}
 #endif
-		else if ( mapArg == -1 )
+		else if ( !Q_stricmp( argv[i], "-processheap" ) )
 		{
-			mapArg = i;
+			// ... Do nothing, just let this pass to the mem system
 		}
 		else
 		{
-			return -1;
+			break;
 		}
 	}
 
-	return mapArg;
+	return i;
 }
 
 
@@ -2832,6 +2958,7 @@ void PrintUsage( int argc, char **argv )
 		"  -fast           : Quick and dirty lighting.\n"
 		"  -fastambient    : Per-leaf ambient sampling is lower quality to save compute time.\n"
 		"  -final          : High quality processing. equivalent to -extrasky 16.\n"
+		"  -finitefalloff  : use an alternative falloff model that falls off to exactly zero at the zero_percent_distance.\n"
 		"  -extrasky n     : trace N times as many rays for indirect light and sky ambient.\n"
 		"  -low            : Run as an idle-priority process.\n"
 #ifdef MPI
@@ -2853,9 +2980,11 @@ void PrintUsage( int argc, char **argv )
 		"                    level lights file.\n"
 		"  -noextra        : Disable supersampling.\n"
 		"  -debugextra     : Places debugging data in lightmaps to visualize\n"
-		"                    supersampling.\n"
+		"                    supersampling.\n" 
 		"  -smooth #       : Set the threshold for smoothing groups, in degrees\n"
 		"                    (default 45).\n"
+		);
+	Warning(	
 		"  -dlightmap      : Force direct lighting into different lightmap than\n"
 		"                    radiosity.\n"
 		"  -stoponexit	   : Wait for a keypress on exit.\n"
@@ -2873,19 +3002,18 @@ void PrintUsage( int argc, char **argv )
 		"  -softsun <n>    : Treat the sun as an area light source of size <n> degrees."
 		"                    Produces soft shadows.\n"
 		"                    Recommended values are between 0 and 5. Default is 0.\n"
-#ifdef _WIN32
 		"  -FullMinidumps  : Write large minidumps on crash.\n"
-#endif
 		"  -chop           : Smallest number of luxel widths for a bounce patch, used on edges\n"
-		"  -maxchop		   : Coarsest allowed number of luxel widths for a patch, used in face interiors\n"
-		"\n"
-		"  -LargeDispSampleRadius: This can be used if there are splotches of bounced light\n"
-		"                          on terrain. The compile will take longer, but it will gather\n"
-		"                          light across a wider area.\n"
-        "  -StaticPropLighting   : generate backed static prop vertex lighting\n"
+		"  -maxchop	: Coarsest allowed number of luxel widths for a patch, used in face interiors\n"
+		"  -LargeDispSampleRadius: This can be used if there are splotches of bounced\n"
+		"                          light on terrain. The compile will take longer, but\n"
+		"                          it will gather light across a wider area.\n"
+        "  -StaticPropLighting   : generate baked static prop vertex lighting\n"
+		"  -StaticPropLightingFinal   : generate baked static prop vertex lighting (uses higher/final quality processing)\n"
         "  -StaticPropPolys   : Perform shadow tests of static props at polygon precision\n"
         "  -OnlyStaticProps   : Only perform direct static prop lighting (vrad debug option)\n"
 		"  -StaticPropNormals : when lighting static props, just show their normal vector\n"
+		"  -StaticPropBounce  : Enable static props to bounce light. Experimental option, doesn't work with VMPI right now.\n"
 		"  -textureshadows : Allows texture alpha channels to block light - rays intersecting alpha surfaces will sample the texture\n"
 		"  -noskyboxrecurse : Turn off recursion into 3d skybox (skybox shadows on world)\n"
 		"  -nossprops      : Globally disable self-shadowing on static props\n"
@@ -2937,23 +3065,17 @@ int RunVRAD( int argc, char **argv )
 	Msg("\n      Valve Radiosity Simulator     \n");
 
 	Q_strncpy(g_FileName, argv[0], MAX_PATH);
-
 	verbose = true;  // Originally FALSE
 
 	bool onlydetail;
 	int i = ParseCommandLine( argc, argv, &onlydetail );
-	if (i == -1)
+	if (i != argc - 1)
 	{
 		PrintUsage( argc, argv );
 		DeleteCmdLine( argc, argv );
-		CmdLib_Exit( 1 );
+		Plat_ExitProcess( 0 );
 	}
 
-	// Initialize the filesystem, so additional commandline options can be loaded
-	Q_StripExtension( argv[ i ], source, sizeof( source ) );
-	CmdLib_InitFileSystem( argv[ i ] );
-	Q_FileBase( source, source, sizeof( source ) );
-
 	VRAD_LoadBSP( argv[i] );
 
 	if ( (! onlydetail) && (! g_bOnlyStaticProps ) )
@@ -2984,7 +3106,14 @@ int VRAD_Main(int argc, char **argv)
 #ifdef MPI
 	// This must come first.
 	VRAD_SetupMPI( argc, argv );
+#endif
 
+	// Initialize the filesystem, so additional commandline options can be loaded
+	Q_StripExtension( argv[ argc - 1 ], source, sizeof( source ) );
+	CmdLib_InitFileSystem( argv[ argc - 1 ] );
+	Q_FileBase( source, source, sizeof( source ) );
+
+#ifdef MPI
 #if !defined( _DEBUG )
 	if ( g_bUseMPI && !g_bMPIMaster )
 	{
diff --git a/utils/vrad/vrad.h b/utils/vrad/vrad.h
index 37218762..a4ee65fd 100644
--- a/utils/vrad/vrad.h
+++ b/utils/vrad/vrad.h
@@ -34,12 +34,15 @@
 
 #ifdef _WIN32
 #include <windows.h>
+#pragma warning(disable: 4142 4028)
 #include <io.h>
+#pragma warning(default: 4142 4028)
 #include <direct.h>
 #endif
 
 #include <sys/types.h>
 #include <sys/stat.h>
+
 #include <fcntl.h>
 #include <ctype.h>
 
@@ -77,6 +80,11 @@ struct directlight_t
 	float	soffset;
 	float	toffset;
 
+	// Flag indicating that even though light.type is emit_skylight, treat this light as a
+	// directional light source in vrad
+	bool	m_bSkyLightIsDirectionalLight;
+	float	m_flSkyLightSunAngularExtent;
+
 	int		dorecalc; // position, vector, spot angle, etc.
 	IncrementalLightID	m_IncrementalID;
 
@@ -89,6 +97,8 @@ struct directlight_t
 
 	directlight_t(void)
 	{
+		m_bSkyLightIsDirectionalLight = false;
+		m_flSkyLightSunAngularExtent = 0.0f;
 		m_flEndFadeDistance = -1.0;							// end<start indicates not set
 		m_flStartFadeDistance= 0.0;
 		m_flCapDist = 1.0e22;
@@ -232,6 +242,7 @@ struct CPatch
 //	struct		patch_s		*nextparent;		    // next in face
 //	struct		patch_s		*nextclusterchild;		// next terminal child in cluster
 
+	int			staticPropIdx;				// Static prop this patch is from.
 	int			numtransfers;
 	transfer_t	*transfers;
 
@@ -277,26 +288,39 @@ extern qboolean		g_bLowPriority;
 extern qboolean		do_fast;
 extern bool			g_bInterrupt;		// Was used with background lighting in WC. Tells VRAD to stop lighting.
 extern IIncremental *g_pIncremental;	// null if not doing incremental lighting
-extern bool			g_bDumpPropLightmaps;
 
 extern float g_flSkySampleScale;								// extra sampling factor for indirect light
+extern float g_flStaticPropSampleScale;							// extra sampling factor for indirect light (for static props)
 
 extern bool g_bLargeDispSampleRadius;
 extern bool g_bStaticPropPolys;
 extern bool g_bTextureShadows;
 extern bool g_bShowStaticPropNormals;
 extern bool g_bDisablePropSelfShadowing;
+extern bool g_bFiniteFalloffModel;							// whether to use 1/xxx or not
+extern bool g_bFastStaticProps;
+extern bool	g_bDumpBumpStaticProps;
+extern bool	g_bDisableStaticPropVertexInSolidTest;
+extern bool g_bStaticPropBounce;
+extern float g_flStaticPropBounceBoost;
 
 extern CUtlVector<char const *> g_NonShadowCastingMaterialStrings;
 extern void ForceTextureShadowsOnModel( const char *pModelName );
 extern bool IsModelTextureShadowsForced( const char *pModelName );
+extern int LoadShadowTexture( const char *pMaterialName );
+extern int AddShadowTextureTriangle( int shadowTextureIndex, const Vector2D &t0, const Vector2D &t1, const Vector2D &t2 );
+extern float ComputeCoverageForTriangle( int shadowTextureIndex, const Vector2D &t0, const Vector2D &t1, const Vector2D &t2 );
+extern void GetShadowTextureMapping( int shadowTextureIndex, int *pWidth, int *pHeight );
 
 // Raytracing
 
 #define TRACE_ID_SKY           0x01000000  // sky face ray blocker
 #define TRACE_ID_OPAQUE        0x02000000  // everyday light blocking face
 #define TRACE_ID_STATICPROP    0x04000000  // static prop - lower bits are prop ID
+#define TRACE_ID_PATCH		   0x08000000  // patch - lower bits are patch ID
 extern RayTracingEnvironment g_RtEnv;
+extern RayTracingEnvironment g_RtEnv_LightBlockers;
+extern RayTracingEnvironment g_RtEnv_RadiosityPatches;	// Contains patches for final gather of indirect light for static prop lighting.
 
 #include "mpivrad.h"
 
@@ -307,6 +331,7 @@ void MakeShadowSplits (void);
 void BuildVisMatrix (void);
 void BuildClusterTable( void );
 void AddDispsToClusterTable( void );
+void AddStaticPropPatchesToClusterTable();
 void FreeVisMatrix (void);
 // qboolean CheckVisBit (unsigned int p1, unsigned int p2);
 void TouchVMFFile (void);
@@ -357,13 +382,11 @@ qboolean IsIncremental(char *filename);
 int SaveIncremental(char *filename);
 int PartialHead (void);
 void BuildFacelights (int facenum, int threadnum);
+void BuildStaticPropPatchlights( int iThread, int nPatch );
 void PrecompLightmapOffsets();
 void FinalLightFace (int threadnum, int facenum);
 void PvsForOrigin (Vector& org, byte *pvs);
-void ConvertRGBExp32ToRGBA8888( const ColorRGBExp32 *pSrc, unsigned char *pDst, Vector* _optOutLinear = NULL );
-void ConvertRGBExp32ToLinear(const ColorRGBExp32 *pSrc, Vector* pDst);
-void ConvertLinearToRGBA8888( const Vector *pSrc, unsigned char *pDst );
-
+void ConvertRGBExp32ToRGBA8888( const ColorRGBExp32 *pSrc, unsigned char *pDst );
 
 inline byte PVSCheck( const byte *pvs, int iCluster )
 {
@@ -382,6 +405,9 @@ inline byte PVSCheck( const byte *pvs, int iCluster )
 // outputs 1 in fractionVisible if no occlusion, 0 if full occlusion, and in-between values
 void TestLine( FourVectors const& start, FourVectors const& stop, fltx4 *pFractionVisible, int static_prop_index_to_ignore=-1);
 
+void TestLine_IgnoreSky( FourVectors const& start, FourVectors const& stop, fltx4 *pFractionVisible, int static_prop_index_to_ignore=-1);
+void TestLine_LightBlockers( const FourVectors& start, const FourVectors& stop, fltx4 *pFractionVisible );
+
 // returns 1 if the ray sees the sky, 0 if it doesn't, and in-between values for partial coverage
 void TestLine_DoesHitSky( FourVectors const& start, FourVectors const& stop,
                           fltx4 *pFractionVisible, bool canRecurse = true, int static_prop_to_skip=-1, bool bDoDebug = false );
@@ -442,12 +468,14 @@ float TraceLeafBrushes( int leafIndex, const Vector &start, const Vector &end, C
 struct SSE_sampleLightOutput_t
 {
 	fltx4 m_flDot[NUM_BUMP_VECTS+1];
+	fltx4 m_flSunAmount[NUM_BUMP_VECTS + 1];
 	fltx4 m_flFalloff;
-	fltx4 m_flSunAmount;
 };
 
-#define GATHERLFLAGS_FORCE_FAST 1
-#define GATHERLFLAGS_IGNORE_NORMALS 2
+#define GATHERLFLAGS_FORCE_FAST     1	/* Use 4x fewer rays when sampling area lights */
+#define GATHERLFLAGS_IGNORE_NORMALS 2	/* Ignore surface normals in lighting calculations */
+#define GATHERLFLAGS_NO_OCCLUSION   4	/* Ignore occlusion for local lights (but not sun, sky or bounce lighting) */
+#define GATHERLFLAGS_STATICPROP		8	/* Paths for static props */
 
 // SSE Gather light stuff
 void GatherSampleLightSSE( SSE_sampleLightOutput_t &out, directlight_t *dl, int facenum, 
@@ -471,6 +499,10 @@ void GatherSampleLightSSE( SSE_sampleLightOutput_t &out, directlight_t *dl, int
 //						  int static_prop_to_skip=-1,
 //						  float flEpsilon = 0.0 );
 
+void ComputeDirectLightingAtPoint( Vector &position, Vector *normals, Vector *outColors, float *outSunAmount, int numNormals, bool bSkipSkyLight, int iThread,
+								   int static_prop_id_to_skip = -1, int nLFlags = 0 );
+
+
 //-----------------------------------------------------------------------------
 // VRad Displacements
 //-----------------------------------------------------------------------------
@@ -514,6 +546,8 @@ public:
 
 	// utility
 	virtual	void GetDispSurfNormal( int ndxFace, Vector &pt, Vector &ptNormal, bool bInside ) = 0;
+	virtual void GetDispSurfPointAndNormalFromUV( int ndxFace, Vector &pt, Vector &ptNormal,
+												  Vector2D &uv, bool bInside ) = 0;
 	virtual void GetDispSurf( int ndxFace, CVRADDispColl **ppDispTree ) = 0;
 
 	// bsp tree functions
@@ -577,7 +611,9 @@ extern int patchSamplesAdded;
 
 void ComputeDetailPropLighting( int iThread );
 void ComputeIndirectLightingAtPoint( Vector &position, Vector &normal, Vector &outColor, 
-									 int iThread, bool force_fast = false, bool bIgnoreNormals = false );
+									 int iThread, bool force_fast = false, bool bIgnoreNormals = false, int nStaticPropToSkip = -1 );
+void ComputeIndirectLightingAtPoint( Vector &position, Vector *normals, Vector *outColors, int numNormals, 
+									 int iThread, bool force_fast = false, bool bIgnoreNormals = false, int nStaticPropToSkip = -1 );
 
 //-----------------------------------------------------------------------------
 // VRad static props
@@ -598,6 +634,7 @@ public:
 	virtual void Shutdown() = 0;
 	virtual void ComputeLighting( int iThread ) = 0;
 	virtual void AddPolysForRayTrace() = 0;
+	virtual void MakePatches() = 0;
 };
 
 //extern PropTested_t s_PropTested[MAX_TOOL_THREADS+1];
diff --git a/utils/vrad/vrad_dispcoll.cpp b/utils/vrad/vrad_dispcoll.cpp
index b04b2f98..74004f4e 100644
--- a/utils/vrad/vrad_dispcoll.cpp
+++ b/utils/vrad/vrad_dispcoll.cpp
@@ -101,7 +101,7 @@ void CVRADDispColl::CalcSampleRadius2AndBox( dface_t *pFace )
 	m_flSampleHeight = flHeight;
 
 	// Calculate the sample radius squared.
-	float flSampleRadius = sqrt( ( ( flWidth * flWidth ) + ( flHeight * flHeight ) ) ) * 2.2f;//RADIALDIST2; 
+	float flSampleRadius = sqrt( ( ( flWidth * flWidth ) + ( flHeight * flHeight ) ) ); // * 2.2f;//RADIALDIST2; // AV - Removing the 2.2 scalar since 1.0 works better with CS:GO
 	if ( flSampleRadius > g_flMaxDispSampleSize )
 	{
 		flSampleRadius = g_flMaxDispSampleSize;
@@ -110,8 +110,7 @@ void CVRADDispColl::CalcSampleRadius2AndBox( dface_t *pFace )
 
 	// Calculate the patch radius - the max sample edge length * the number of luxels per edge "chop."
 	float flSampleSize = max( m_flSampleWidth, m_flSampleHeight );
-	// Calculate the patch radius - the MAX sample edge length * the number of luxels per edge "chop."
-	float flPatchSampleRadius = flSampleSize * dispchop * 2.2f;
+	float flPatchSampleRadius = flSampleSize * dispchop * ( g_bLargeDispSampleRadius ? 2.2f : 1.0f ); // AV - Removing the 2.2 scalar since 1.0 works better with CS:GO. TS - It fixes lighting artefacts in maps with many small displacements.
 	if ( flPatchSampleRadius > g_MaxDispPatchRadius )
 	{
 		flPatchSampleRadius = g_MaxDispPatchRadius;
@@ -441,7 +440,7 @@ void CVRADDispColl::CreateChildPatchesFromRoot( int iParentPatch, int *pChildPat
 	vecEdges[3] = pParentPatch->winding->p[3] - pParentPatch->winding->p[0];
 
 	// Should the patch be subdivided - check the area.
-	float flMaxLength  = MAX( m_flSampleWidth, m_flSampleHeight );
+	float flMaxLength  = max( m_flSampleWidth, m_flSampleHeight );
 	float flMinEdgeLength = flMaxLength * dispchop;
 
 	// Find the longest edge.
@@ -552,7 +551,7 @@ void CVRADDispColl::CreateChildPatches( int iParentPatch, int nLevel )
 		return;
 
 	// Should the patch be subdivided - check the area.
-	float flMaxLength  = MAX( m_flSampleWidth, m_flSampleHeight );
+	float flMaxLength  = max( m_flSampleWidth, m_flSampleHeight );
 	float flMinEdgeLength = flMaxLength * dispchop;
 
 	// Split along the longest edge.
@@ -660,14 +659,14 @@ void CVRADDispColl::CreateChildPatchesSub( int iParentPatch )
 		return;
 
 	// Should the patch be subdivided - check the area.
-	float flMaxLength  = MAX( m_flSampleWidth, m_flSampleHeight );
+	float flMaxLength  = max( m_flSampleWidth, m_flSampleHeight );
 	float flMinEdgeLength = flMaxLength * dispchop;
 
 	// Split along the longest edge.
 	Vector vecEdges[3];
 	vecEdges[0] = pParentPatch->winding->p[1] - pParentPatch->winding->p[0];
-	vecEdges[1] = pParentPatch->winding->p[2] - pParentPatch->winding->p[1];
-	vecEdges[2] = pParentPatch->winding->p[0] - pParentPatch->winding->p[2];
+	vecEdges[1] = pParentPatch->winding->p[2] - pParentPatch->winding->p[0];
+	vecEdges[2] = pParentPatch->winding->p[2] - pParentPatch->winding->p[1];
 
 	// Find the longest edge.
 	float flEdgeLength = 0.0f;
@@ -818,6 +817,7 @@ bool CVRADDispColl::InitParentPatch( int iPatch, Vector *pPoints, float &flArea
 	pPatch->parent = g_Patches.InvalidIndex();
 	pPatch->ndxNextClusterChild = g_Patches.InvalidIndex();
 	pPatch->ndxNextParent = g_Patches.InvalidIndex();
+	pPatch->staticPropIdx = -1;
 
 	Vector vecEdges[2];
 	vecEdges[0] = pPoints[1] - pPoints[0];
@@ -911,6 +911,7 @@ bool CVRADDispColl::InitPatch( int iPatch, int iParentPatch, int iChild, Vector
 
 	// Clear the patch data.
 	memset( pPatch, 0, sizeof( CPatch ) );
+	pPatch->staticPropIdx = -1;
 
 	// Setup the parent if we are not the parent.
 	CPatch *pParentPatch = NULL;
@@ -1067,7 +1068,7 @@ void CVRADDispColl::AddPolysForRayTrace( void )
 	if ( !( m_nContents & MASK_OPAQUE ) )
 		return;
 
-	for ( int ndxTri = 0; ndxTri < m_aTris.Size(); ndxTri++ )
+	for ( int ndxTri = 0; ndxTri < m_aTris.Count(); ndxTri++ )
 	{
 		CDispCollTri *tri = m_aTris.Base() + ndxTri;
 		int v[3];
diff --git a/utils/vrad/vraddetailprops.cpp b/utils/vrad/vraddetailprops.cpp
index 8c40bb01..09f514f6 100644
--- a/utils/vrad/vraddetailprops.cpp
+++ b/utils/vrad/vraddetailprops.cpp
@@ -24,6 +24,8 @@
 #endif
 #include "byteswap.h"
 
+extern float SoftenCosineTerm( float flDot );
+extern float CalculateAmbientOcclusion( Vector *pPosition, Vector *pNormal );
 bool LoadStudioModel( char const* pModelName, CUtlBuffer& buf );
 
 
@@ -228,12 +230,8 @@ static void ComputeMaxDirectLighting( DetailObjectLump_t& prop, Vector* maxcolor
 		origin4.DuplicateVector( origin );
 		normal4.DuplicateVector( normal );
 
-		GatherSampleLightSSE ( out, dl, -1, origin4, &normal4, 1, iThread );
-#ifdef VRAD_SSE
-		VectorMA( maxcolor[dl->light.style], out.m_flFalloff.m128_f32[0] * out.m_flDot[0].m128_f32[0], dl->light.intensity, maxcolor[dl->light.style] );
-#else
+		GatherSampleLightSSE ( out, dl, -1, origin4, &normal4, 1, iThread, GATHERLFLAGS_STATICPROP );
 		VectorMA( maxcolor[dl->light.style], out.m_flFalloff[0] * out.m_flDot[0][0], dl->light.intensity, maxcolor[dl->light.style] );
-#endif
 	}
 }
 
@@ -659,23 +657,80 @@ static void ComputeAmbientLightingAtPoint( int iThread, const Vector &origin, Ve
 }
 
 //-----------------------------------------------------------------------------
-// Trace hemispherical rays from a vertex, accumulating indirect
-// sources at each ray termination.
+//
+// Trace a ray from position. in the specified direction to determine a positive
+// hit for indirect lighting.
+//
+// Fire ray out from start, with end as start + direction*MAX_TRACE_LENGTH
+// If hit then fire ray back to start to see if it hits a back facing surface that would natually block the incoming light ray
+// If still okay then test explicitly against light blockers, test only in the hit to start direction
+// Update surfEnum and return true if a valid intersection for indirect light.
+//
 //-----------------------------------------------------------------------------
-void ComputeIndirectLightingAtPoint( Vector &position, Vector &normal, Vector &outColor,
-									 int iThread, bool force_fast, bool bIgnoreNormals )
+bool TraceIndirectLightingSample( Vector &position, Vector &direction, CLightSurface &surfEnum, int iThread, bool force_fast )
 {
 	Ray_t			ray;
-	CLightSurface	surfEnum(iThread);
 
-	outColor.Init();
+	// trace to determine surface
+	Vector vEnd, vStart;
+	VectorScale( direction, MAX_TRACE_LENGTH, vEnd );
+	VectorAdd( position, vEnd, vEnd );
 
+	if ( force_fast )
+	{
+		vStart = position;
+	}
+	else
+	{
+		// offset ray start position to compensate for ray leakage due to coincident surfaces (we are seeing some ray tests leak in some situations - e.g. prop vertex lies on ground plane)
+		VectorScale( direction, -EQUAL_EPSILON, vStart );
+		VectorAdd( position, vStart, vStart );
+	}
+	ray.Init( vStart, vEnd, vec3_origin, vec3_origin );
+	if ( !surfEnum.FindIntersection( ray ) )
+		return false;
+
+	// Now test explicitly against light blockers (surfaces don't exist in the bsp nodes we're checking here, and this feels a safer change than updating indirect lighting for static props to use the slower rte path for all rays)
+	// test from hitfrac back to start only
+	VectorScale( direction, MAX_TRACE_LENGTH * surfEnum.m_HitFrac, vEnd );
+	VectorAdd( position, vEnd, vEnd );
+	FourVectors rayStart, rayEnd, rayDirection;
+	fltx4 fractionVisible = Four_Ones;
+	rayStart.DuplicateVector( vStart );
+	rayEnd.DuplicateVector( vEnd );
+
+//	rayDirection.DuplicateVector( direction );
+//	TestLine_LightBlockers( rayStart, rayEnd, &fractionVisible );
+
+	rayDirection.DuplicateVector( -direction );
+	TestLine_LightBlockers( rayEnd, rayStart, &fractionVisible );
+
+
+	if ( fractionVisible[0] < 1.0f )
+	{
+		// ray hit blocker
+		return false;
+	}
+
+	return true;
+}
+
+//-----------------------------------------------------------------------------
+// Trace hemispherical rays from a vertex, accumulating indirect
+// sources at each ray termination.
+//
+// force_fast = false currently implies 'new/improved' static prop lighting is to be used.
+//-----------------------------------------------------------------------------
+void ComputeIndirectLightingAtPoint( Vector &position, Vector &normal, Vector &outColor,
+									 int iThread, bool force_fast, bool bIgnoreNormals, int nStaticPropToSkip )
+{
+	outColor.Zero();
 	
 	int nSamples = NUMVERTEXNORMALS;
-	if ( do_fast || force_fast )
-		nSamples /= 4;
-	else
-		nSamples *= g_flSkySampleScale;
+ 	if ( do_fast || force_fast )
+ 		nSamples /= 4;
+ 	else
+ 		nSamples *= g_flStaticPropSampleScale;
 
 	float totalDot = 0;
 	DirectionalSampler_t sampler;
@@ -696,15 +751,44 @@ void ComputeIndirectLightingAtPoint( Vector &position, Vector &normal, Vector &o
 		}
 
 		totalDot += dot;
+	
+		// trace static prop indirect
+		Vector staticPropIndirectColor( 0.0f, 0.0f, 0.0f );
+		float flStaticPropHitDist = FLT_MAX;
+		if ( g_bStaticPropBounce )
+		{
+			FourRays myrays;
+			myrays.origin.DuplicateVector( position );
+			myrays.direction.DuplicateVector( samplingNormal );
+			RayTracingResult rt_result;
+			g_RtEnv_RadiosityPatches.Trace4Rays( myrays, ReplicateX4( 10.0f ), ReplicateX4( MAX_TRACE_LENGTH ), &rt_result );
+			if ( rt_result.HitIds[ 0 ] != -1 )
+			{
+				const TriIntersectData_t &intersectData = g_RtEnv_RadiosityPatches.OptimizedTriangleList[ rt_result.HitIds[ 0 ] ].m_Data.m_IntersectData;
+				int nId = intersectData.m_nTriangleID;
+				if ( nId & TRACE_ID_PATCH )
+				{
+					int nPatchId = nId & ~TRACE_ID_PATCH;
+					CPatch &patch = g_Patches[ nPatchId ];
+					if ( patch.staticPropIdx != nStaticPropToSkip )
+					{
+						staticPropIndirectColor = dot * ( patch.totallight.light[ 0 ] + patch.directlight ) * patch.reflectivity;
+						flStaticPropHitDist = SubFloat( rt_result.HitDistance, 0 );
+					}
+				}
+			}
+		}
+
+		// important to put the constructor here to init m_hitfrac, etc
+		CLightSurface	surfEnum( iThread );
 
 		// trace to determine surface
-		Vector vEnd;
-		VectorScale( samplingNormal, MAX_TRACE_LENGTH, vEnd );
-		VectorAdd( position, vEnd, vEnd );
-
-		ray.Init( position, vEnd, vec3_origin, vec3_origin );
-		if ( !surfEnum.FindIntersection( ray ) )
+		if ( !TraceIndirectLightingSample( position, samplingNormal, surfEnum, iThread, force_fast ) ||
+			 flStaticPropHitDist < surfEnum.m_HitFrac * MAX_TRACE_LENGTH )
+		{
+			VectorAdd( outColor, staticPropIndirectColor, outColor );	// we may have hit a static prop patch
 			continue;
+		}
 
 		// get color from surface lightmap
 		texinfo_t* pTex = &texinfo[surfEnum.m_pSurface->texinfo];
@@ -721,7 +805,6 @@ void ComputeIndirectLightingAtPoint( Vector &position, Vector &normal, Vector &o
 			continue;
 		}
 
-
 		Vector lightmapColor;
 		if ( !surfEnum.m_bHasLuxel )
 		{
@@ -744,18 +827,282 @@ void ComputeIndirectLightingAtPoint( Vector &position, Vector &normal, Vector &o
 			ColorRGBExp32ToVector( *pLightmap, lightmapColor );
 		}
 
-		float invLengthSqr = 1.0f / (1.0f + ((vEnd - position) * surfEnum.m_HitFrac / 128.0).LengthSqr());
-		// Include falloff using invsqrlaw.
-		VectorMultiply( lightmapColor, invLengthSqr * dtexdata[pTex->texdata].reflectivity, lightmapColor );
+		if ( force_fast )
+		{
+			VectorMultiply( lightmapColor, dtexdata[pTex->texdata].reflectivity, lightmapColor );
+		}
+		else
+		{
+			// Include dot falloff on accumulating irradiance here
+			// have tried using inv sqr falloff from TF2 changes to vrad (CL#2394791 & 2395471), but the result is very sensitive to the scale factor that is used (too dark or too bright otherwise)
+			// this seems to give the most natural looking result (static props matching brushes)
+			VectorMultiply( lightmapColor, dot * dtexdata[pTex->texdata].reflectivity, lightmapColor );
+		}
 		VectorAdd( outColor, lightmapColor, outColor );
 	}
 
 	if ( totalDot )
 	{
-		VectorScale( outColor, 1.0f/totalDot, outColor );
+		VectorScale( outColor, 1.0f / totalDot, outColor );
 	}
 }
 
+void ComputeIndirectLightingAtPoint( Vector &position, Vector *normals, Vector *outColors, int numNormals,
+											 int iThread, bool force_fast, bool bIgnoreNormals, int nStaticPropToSkip )
+{
+	const Vector vZero(0.0f, 0.0f, 0.0f);
+
+	if ( numNormals != ( NUM_BUMP_VECTS + 1 ) )
+	{
+		for ( int k = 0; k < numNormals; ++k )
+		{
+			ComputeIndirectLightingAtPoint( position, normals[k], outColors[k], iThread, force_fast, bIgnoreNormals, nStaticPropToSkip );
+		}
+		return;
+	}
+
+	// optimize/unroll for num_bump_vects = 3
+	outColors[0].Zero();
+	outColors[1].Zero();
+	outColors[2].Zero();
+	outColors[3].Zero();
+
+	int nSamples = NUMVERTEXNORMALS;
+	if ( do_fast || force_fast )
+		nSamples /= 4;
+	else
+		nSamples *= g_flStaticPropSampleScale;
+
+	float totalDot[4] = {0.0f, 0.0f, 0.0f, 0.0f};
+	DirectionalSampler_t sampler;
+	for ( int j = 0; j < nSamples; j++ )
+	{
+		Vector samplingNormal = sampler.NextValue();
+		float dot[4];
+
+		if ( bIgnoreNormals )
+		{
+			dot[0] = dot[1] = dot[2] = dot[3] = (0.7071 / 2);
+		}
+		else
+		{
+			samplingNormal.NormalizeInPlace();
+			dot[0] = DotProduct( normals[0], samplingNormal );
+			dot[1] = DotProduct( normals[1], samplingNormal );
+			dot[2] = DotProduct( normals[2], samplingNormal );
+			dot[3] = DotProduct( normals[3], samplingNormal );
+		}
+
+		bool bDoRayTrace = false;
+		bool bIncLighting[4] = {false, false, false, false};
+
+		if ( dot[0] > EQUAL_EPSILON )
+		{
+			dot[0] = SoftenCosineTerm( dot[0] );
+			totalDot[0] += dot[0];
+			bDoRayTrace = true;
+			bIncLighting[0] = true;
+		}
+		else
+		{
+			dot[0] = 0.0f;
+		}
+
+		if ( dot[1] > EQUAL_EPSILON )
+		{
+			dot[1] = SoftenCosineTerm( dot[1] );
+			totalDot[1] += dot[1];
+			bDoRayTrace = true;
+			bIncLighting[1] = true;
+		}
+		else
+		{
+			dot[1] = 0.0f;
+		}
+
+		if ( dot[2] > EQUAL_EPSILON )
+		{
+			dot[2] = SoftenCosineTerm( dot[2] );
+			totalDot[2] += dot[2];
+			bDoRayTrace = true;
+			bIncLighting[2] = true;
+		}
+		else
+		{
+			dot[2] = 0.0f;
+		}
+
+		if ( dot[3] > EQUAL_EPSILON )
+		{
+			dot[3] = SoftenCosineTerm( dot[3] );
+			totalDot[3] += dot[3];
+			bDoRayTrace = true;
+			bIncLighting[3] = true;
+		}
+		else
+		{
+			dot[3] = 0.0f;
+		}
+
+		// important to skip 
+		if ( dot[0] <= EQUAL_EPSILON )
+		{
+			continue;
+		}
+
+		if ( bDoRayTrace )
+ 		{
+			Vector staticPropIndirectColor( 0.0f, 0.0f, 0.0f );
+			float flStaticPropHitDist = FLT_MAX;
+			if ( g_bStaticPropBounce )
+			{
+				FourRays myrays;
+				myrays.origin.DuplicateVector( position );
+				myrays.direction.DuplicateVector( samplingNormal );
+				RayTracingResult rt_result;
+				g_RtEnv_RadiosityPatches.Trace4Rays( myrays, ReplicateX4( 10.0f ), ReplicateX4( MAX_TRACE_LENGTH ), &rt_result );
+				if ( rt_result.HitIds[ 0 ] != -1 )
+				{
+					const TriIntersectData_t &intersectData = g_RtEnv_RadiosityPatches.OptimizedTriangleList[ rt_result.HitIds[ 0 ] ].m_Data.m_IntersectData;
+					int nId = intersectData.m_nTriangleID;
+					if ( nId & TRACE_ID_PATCH )
+					{
+						int nPatchId = nId & ~TRACE_ID_PATCH;
+						CPatch &patch = g_Patches[ nPatchId ];
+						if ( patch.staticPropIdx != nStaticPropToSkip )
+						{
+							staticPropIndirectColor = ( patch.totallight.light[ 0 ] + patch.directlight ) * patch.reflectivity;
+							flStaticPropHitDist = SubFloat( rt_result.HitDistance, 0 );
+						}
+					}
+				}
+			}
+
+
+			// important to put the constructor here to init m_hitfrac, etc
+			CLightSurface	surfEnum( iThread );
+
+			// trace to determine surface
+			if ( !TraceIndirectLightingSample( position, samplingNormal, surfEnum, iThread, force_fast ) ||
+				 flStaticPropHitDist < surfEnum.m_HitFrac * MAX_TRACE_LENGTH )
+			{
+				// The dot values are 0 if bIncLighting is false so we don't actually need to branch here.
+				VectorAdd( outColors[ 0 ], dot[ 0 ] * staticPropIndirectColor, outColors[ 0 ] );	// we may have hit a static prop patch
+				VectorAdd( outColors[ 1 ], dot[ 1 ] * staticPropIndirectColor, outColors[ 1 ] );
+				VectorAdd( outColors[ 2 ], dot[ 2 ] * staticPropIndirectColor, outColors[ 2 ] );
+				VectorAdd( outColors[ 3 ], dot[ 3 ] * staticPropIndirectColor, outColors[ 3 ] );
+				continue;
+			}
+
+			// get color from surface lightmap
+			texinfo_t* pTex = &texinfo[surfEnum.m_pSurface->texinfo];
+			if ( !pTex || pTex->flags & SURF_SKY )
+			{
+				// ignore contribution from sky
+				// sky ambient already accounted for during direct pass
+				continue;
+			}
+
+			if ( surfEnum.m_pSurface->styles[0] == 255 || surfEnum.m_pSurface->lightofs < 0 )
+			{
+				// no light affects this face
+				continue;
+			}
+
+			Vector lightmapColor;
+			Vector lightmapColors[4];
+			if ( !surfEnum.m_bHasLuxel )
+			{
+				ColorRGBExp32* pAvgLightmapColor = dface_AvgLightColor( surfEnum.m_pSurface, 0 );
+				ColorRGBExp32ToVector( *pAvgLightmapColor, lightmapColor );
+			}
+			else
+			{
+				// get color from displacement
+				int smax = (surfEnum.m_pSurface->m_LightmapTextureSizeInLuxels[0]) + 1;
+				int tmax = (surfEnum.m_pSurface->m_LightmapTextureSizeInLuxels[1]) + 1;
+
+				// luxelcoord is in the space of the accumulated lightmap page; we need to convert
+				// it to be in the space of the surface
+				int ds = clamp( (int)surfEnum.m_LuxelCoord.x, 0, smax - 1 );
+				int dt = clamp( (int)surfEnum.m_LuxelCoord.y, 0, tmax - 1 );
+
+				ColorRGBExp32* pLightmap = (ColorRGBExp32*)&(*pdlightdata)[surfEnum.m_pSurface->lightofs];
+				pLightmap += dt * smax + ds;
+				ColorRGBExp32ToVector( *pLightmap, lightmapColor );
+			}
+
+			lightmapColor.Max( vZero );
+
+			if ( force_fast )
+			{
+				VectorMultiply( lightmapColor, dtexdata[pTex->texdata].reflectivity, lightmapColors[0] );
+
+				if ( bIncLighting[0] )
+				{
+					VectorAdd( outColors[0], lightmapColors[0], outColors[0] );
+				}
+				if ( bIncLighting[1] )
+				{
+					VectorAdd( outColors[1], lightmapColors[0], outColors[1] );
+				}
+				if ( bIncLighting[2] )
+				{
+					VectorAdd( outColors[2], lightmapColors[0], outColors[2] );
+				}
+				if ( bIncLighting[3] )
+				{
+					VectorAdd( outColors[3], lightmapColors[0], outColors[3] );
+				}
+			}
+			else
+			{
+				// Include dot falloff on accumulating irradiance here
+				// have tried using inv sqr falloff from TF2 changes to vrad (CL#2394791 & 2395471), but the result is very sensitive to the scale factor that is used (too dark or too bright otherwise)
+				// this seems to give the most natural looking result (static props matching brushes)
+				if ( bIncLighting[0] )
+				{
+					VectorMultiply( lightmapColor, dot[0] * dtexdata[pTex->texdata].reflectivity, lightmapColors[0] );
+					VectorAdd( outColors[0], lightmapColors[0], outColors[0] );
+				}
+				if ( bIncLighting[1] )
+				{
+					VectorMultiply( lightmapColor, dot[1] * dtexdata[pTex->texdata].reflectivity, lightmapColors[1] );
+					VectorAdd( outColors[1], lightmapColors[1], outColors[1] );
+				}
+				if ( bIncLighting[2] )
+				{
+					VectorMultiply( lightmapColor, dot[2] * dtexdata[pTex->texdata].reflectivity, lightmapColors[2] );
+					VectorAdd( outColors[2], lightmapColors[2], outColors[2] );
+				}
+				if ( bIncLighting[3] )
+				{
+					VectorMultiply( lightmapColor, dot[3] * dtexdata[pTex->texdata].reflectivity, lightmapColors[3] );
+					VectorAdd( outColors[3], lightmapColors[3], outColors[3] );
+				}
+			}
+		}
+	}
+
+	if ( totalDot[0] )
+	{
+		VectorScale( outColors[0], 1.0f / totalDot[0], outColors[0] );
+	}
+	if ( totalDot[1] )
+	{
+		VectorScale( outColors[1], 1.0f / totalDot[1], outColors[1] );
+	}
+	if ( totalDot[2] )
+	{
+		VectorScale( outColors[2], 1.0f / totalDot[2], outColors[2] );
+	}
+	if ( totalDot[3] )
+	{
+		VectorScale( outColors[3], 1.0f / totalDot[3], outColors[3] );
+	}
+}
+
+
 static void ComputeAmbientLighting( int iThread, DetailObjectLump_t& prop, Vector color[MAX_LIGHTSTYLES] )
 {
 	Vector origin, normal;
@@ -821,7 +1168,7 @@ static void ComputeLighting( DetailObjectLump_t& prop, int iThread )
 		{
 			if (!hasLightstyles)
 			{
-				prop.m_LightStyles = s_pDetailPropLightStyleLump->Size();
+				prop.m_LightStyles = s_pDetailPropLightStyleLump->Count();
 				hasLightstyles = true;
 			}
 
@@ -921,14 +1268,14 @@ static void WriteDetailLightingLump( int lumpID, int lumpVersion, CUtlVector<Det
 	GameLumpHandle_t handle = g_GameLumps.GetGameLumpHandle(lumpID);
 	if (handle != g_GameLumps.InvalidGameLump())
 		g_GameLumps.DestroyGameLump(handle);
-	int lightsize = lumpData.Size() * sizeof(DetailPropLightstylesLump_t);
+	int lightsize = lumpData.Count() * sizeof(DetailPropLightstylesLump_t);
 	int lumpsize = lightsize + sizeof(int);
 
 	handle = g_GameLumps.CreateGameLump( lumpID, lumpsize, 0, lumpVersion );
 
 	// Serialize the data
 	CUtlBuffer buf( g_GameLumps.GetGameLump(handle), lumpsize );
-	buf.PutInt( lumpData.Size() );
+	buf.PutInt( lumpData.Count() );
 	if (lightsize)
 		buf.Put( lumpData.Base(), lightsize );
 }
diff --git a/utils/vrad/vraddisps.cpp b/utils/vrad/vraddisps.cpp
index 957ffcce..4aea6361 100644
--- a/utils/vrad/vraddisps.cpp
+++ b/utils/vrad/vraddisps.cpp
@@ -105,6 +105,8 @@ public:
 
 	// utility
 	void GetDispSurfNormal( int ndxFace, Vector &pt, Vector &ptNormal, bool bInside );
+	void GetDispSurfPointAndNormalFromUV( int ndxFace, Vector &pt, Vector &ptNormal,
+										  Vector2D &uv, bool bInside );
 	void GetDispSurf( int ndxFace, CVRADDispColl **ppDispTree );
 
 	// bsp tree functions
@@ -163,7 +165,7 @@ private:
 											radial_t *pRadial, int ndxRadial, bool bBump,
 											CUtlVector<CPatch*> &interestingPatches );
 
-	bool IsNeighbor( int iDispFace, int iNeighborFace );
+	bool IsNeighbor( int iDispFace, int iNeighborFace, bool bCheck2ndDegreeNeighbors = false );
 
 	void GetInterestingPatchesForLuxels( 
 		int ndxFace,
@@ -329,7 +331,7 @@ void CVRadDispMgr::Init( void )
 void CVRadDispMgr::Shutdown( void )
 {
 	// remove all displacements from the tree
-	for( int ndxDisp = m_DispTrees.Size(); ndxDisp >= 0; ndxDisp-- )
+	for( int ndxDisp = m_DispTrees.Count(); ndxDisp >= 0; ndxDisp-- )
 	{
 		RemoveDispFromTree( ndxDisp );
 	}
@@ -500,7 +502,7 @@ void CVRadDispMgr::MakePatches( void )
 	float flTotalArea = 0.0f;
 
 	// Create patches for all of the displacements.
-	int nTreeCount = m_DispTrees.Size();
+	int nTreeCount = m_DispTrees.Count();
 	for( int iTree = 0; iTree < nTreeCount; ++iTree )
 	{
 		// Get the current displacement collision tree.
@@ -537,12 +539,12 @@ void CVRadDispMgr::SubdividePatch( int iPatch )
 //-----------------------------------------------------------------------------
 void CVRadDispMgr::StartRayTest( DispTested_t &dispTested )
 {
-	if( m_DispTrees.Size() > 0 )
+	if( m_DispTrees.Count() > 0 )
 	{
 		if( dispTested.m_pTested == 0 )
 		{
-			dispTested.m_pTested = new int[m_DispTrees.Size()];
-			memset( dispTested.m_pTested, 0, m_DispTrees.Size() * sizeof( int ) );
+			dispTested.m_pTested = new int[m_DispTrees.Count()];
+			memset( dispTested.m_pTested, 0, m_DispTrees.Count() * sizeof( int ) );
 			dispTested.m_Enum = 0;
 		}
 		++dispTested.m_Enum;
@@ -613,7 +615,7 @@ void CVRadDispMgr::ClipRayToDispInLeaf( DispTested_t &dispTested, Ray_t const &r
 
 void CVRadDispMgr::AddPolysForRayTrace( void )
 {
-	int nTreeCount = m_DispTrees.Size();
+	int nTreeCount = m_DispTrees.Count();
 	for( int iTree = 0; iTree < nTreeCount; ++iTree )
 	{
 		// Get the current displacement collision tree.
@@ -656,6 +658,32 @@ void CVRadDispMgr::GetDispSurfNormal( int ndxFace, Vector &pt, Vector &ptNormal,
 	pDispTree->DispUVToSurfPoint( uv, pt, 1.0f );
 }
 
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+void CVRadDispMgr::GetDispSurfPointAndNormalFromUV( int ndxFace, Vector &pt, Vector &ptNormal,
+													Vector2D &uv, bool bInside )
+{
+	// get the displacement surface data
+	DispCollTree_t &dispTree = m_DispTrees[ g_pFaces[ ndxFace ].dispinfo ];
+	CVRADDispColl *pDispTree = dispTree.m_pDispTree;
+
+	if ( bInside )
+	{
+		if ( uv[ 0 ] < 0.0f || uv[ 0 ] > 1.0f ) { Msg( "Disp UV (%f) outside bounds!\n", uv[ 0 ] ); }
+		if ( uv[ 1 ] < 0.0f || uv[ 1 ] > 1.0f ) { Msg( "Disp UV (%f) outside bounds!\n", uv[ 1 ] ); }
+	}
+
+	if ( uv[ 0 ] < 0.0f ) { uv[ 0 ] = 0.0f; }
+	if ( uv[ 0 ] > 1.0f ) { uv[ 0 ] = 1.0f; }
+	if ( uv[ 1 ] < 0.0f ) { uv[ 1 ] = 0.0f; }
+	if ( uv[ 1 ] > 1.0f ) { uv[ 1 ] = 1.0f; }
+
+	// get the normal at "pt"
+	pDispTree->DispUVToSurfNormal( uv, ptNormal );
+
+	// get the new "pt"
+	pDispTree->DispUVToSurfPoint( uv, pt, 1.0f );
+}
 
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
@@ -778,7 +806,7 @@ bool CVRadDispMgr::DispFaceList_EnumerateLeaf( int ndxLeaf, intp context )
 
 		// check to see if the face already lives in the list
 		int ndx;
-		int size = m_EnumDispFaceList.m_FaceList.Size();
+		int size = m_EnumDispFaceList.m_FaceList.Count();
 		for( ndx = 0; ndx < size; ndx++ )
 		{
 			if( m_EnumDispFaceList.m_FaceList[ndx] == ndxLeafFace )
@@ -807,7 +835,7 @@ bool CVRadDispMgr::DispFaceList_EnumerateElement( int userId, intp context )
 
 	// check to see if the displacement already lives in the list
 	int ndx;
-	int size = m_EnumDispFaceList.m_DispList.Size();
+	int size = m_EnumDispFaceList.m_DispList.Count();
 	for( ndx = 0; ndx < size; ndx++ )
 	{
 		if( m_EnumDispFaceList.m_DispList[ndx] == pDispTree )
@@ -906,7 +934,7 @@ void AddSampleLightToRadial( Vector const &samplePos, Vector const &sampleNormal
 
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
-bool CVRadDispMgr::IsNeighbor( int iFace, int iNeighborFace )
+bool CVRadDispMgr::IsNeighbor( int iFace, int iNeighborFace, bool bCheck2ndDegreeNeighbors )
 {
 	if ( iFace == iNeighborFace )
 		return true;
@@ -918,6 +946,19 @@ bool CVRadDispMgr::IsNeighbor( int iFace, int iNeighborFace )
 			return true;
 	}
 
+	if ( bCheck2ndDegreeNeighbors )
+	{
+		for ( int iNeighbor = 0; iNeighbor < pFaceNeighbor->numneighbors; iNeighbor++ )
+		{
+			faceneighbor_t *pFaceNeighbor2 = &faceneighbor[ pFaceNeighbor->neighbor[ iNeighbor ] ];
+			for ( int iNeighbor2 = 0; iNeighbor2 < pFaceNeighbor2->numneighbors; iNeighbor2++ )
+			{
+				if ( pFaceNeighbor2->neighbor[ iNeighbor2 ] == iNeighborFace )
+					return true;
+			}
+		}
+	}
+
 	return false;
 }
 
@@ -1134,7 +1175,7 @@ void AddPatchLightToRadial( Vector const &patchOrigin, Vector const &patchNormal
 		if( bNeighborBump )
 		{
 			float flScale = patchNormal.Dot( normals[0] );
-			flScale = MAX( 0.0f, flScale );
+			flScale = max( 0.0f, flScale );
 			float flBumpInfluence = influence * flScale;
 
 			for( int ndxBump = 0; ndxBump < ( NUM_BUMP_VECTS+1 ); ndxBump++ )
@@ -1147,7 +1188,7 @@ void AddPatchLightToRadial( Vector const &patchOrigin, Vector const &patchNormal
 		else
 		{
 			float flScale = patchNormal.Dot( normals[0] );
-			flScale = MAX( 0.0f, flScale );
+			flScale = max( 0.0f, flScale );
 			float flBumpInfluence = influence * flScale * 0.05f;
 
 			for( int ndxBump = 0; ndxBump < ( NUM_BUMP_VECTS+1 ); ndxBump++ )
@@ -1161,7 +1202,7 @@ void AddPatchLightToRadial( Vector const &patchOrigin, Vector const &patchNormal
 	else
 	{
 		float flScale = patchNormal.Dot( luxelNormal );
-		flScale = MAX( 0.0f, flScale );
+		flScale = max( 0.0f, flScale );
 		influence *= flScale;
 		pRadial->light[0][ndxRadial].AddWeighted( pPatchLight[0], influence );
 
@@ -1337,7 +1378,7 @@ void CVRadDispMgr::GetInterestingPatchesForLuxels(
 						{
 							pPatch->m_IterationKey = curIterationKey;
 							
-							if ( IsNeighbor( ndxFace, pPatch->faceNumber ) )
+							if ( IsNeighbor( ndxFace, pPatch->faceNumber, g_bLargeDispSampleRadius ) )
 							{
 								interestingPatches.AddToTail( pPatch );
 							}
diff --git a/utils/vrad/vradstaticprops.cpp b/utils/vrad/vradstaticprops.cpp
index 69e889d0..eea93c9a 100644
--- a/utils/vrad/vradstaticprops.cpp
+++ b/utils/vrad/vradstaticprops.cpp
@@ -27,201 +27,226 @@
 #include "pacifier.h"
 #include "materialsystem/imaterial.h"
 #include "materialsystem/hardwareverts.h"
-#include "materialsystem/hardwaretexels.h"
 #include "byteswap.h"
+#include "mpivrad.h"
 #include "vtf/vtf.h"
 #include "tier1/utldict.h"
 #include "tier1/utlsymbol.h"
-#include "bitmap/tgawriter.h"
+#include "tier3/tier3.h"
 
 #ifdef MPI
-#include "mpivrad.h"
 #include "messbuf.h"
 #include "vmpi.h"
 #include "vmpi_distribute_work.h"
 #endif
-
+#include "iscratchpad3d.h"
 
 #define ALIGN_TO_POW2(x,y) (((x)+(y-1))&~(y-1))
 
+int g_numVradStaticPropsLightingStreams = 3;
+
+static const TableVector g_localUpBumpBasis[NUM_BUMP_VECTS] = 
+{
+	// consistent basis wrt lightmaps
+ 	{	OO_SQRT_2_OVER_3, 0.0f, OO_SQRT_3 },
+ 	{  -OO_SQRT_6, OO_SQRT_2, OO_SQRT_3 },
+ 	{  -OO_SQRT_6, -OO_SQRT_2, OO_SQRT_3 }
+};
+
+void GetStaticPropBumpNormals( const Vector& sVect, const Vector& tVect, const Vector& flatNormal, 
+							   const Vector& phongNormal, Vector bumpNormals[NUM_BUMP_VECTS] )
+{
+	Vector tmpNormal;
+	bool leftHanded;
+	int i;
+
+	assert( NUM_BUMP_VECTS == 3 );
+
+	// Are we left or right handed?
+	CrossProduct( sVect, tVect, tmpNormal );
+	if( DotProduct( flatNormal, tmpNormal ) < 0.0f )
+	{
+		leftHanded = true;
+	}
+	else
+	{
+		leftHanded = false;
+	}
+
+	// Build a basis for the face around the phong normal
+	matrix3x4_t smoothBasis;
+	CrossProduct( phongNormal.Base(), sVect.Base(), smoothBasis[1] );
+	VectorNormalize( smoothBasis[1] );
+	CrossProduct( smoothBasis[1], phongNormal.Base(), smoothBasis[0] );
+	VectorNormalize( smoothBasis[0] );
+	VectorCopy( phongNormal.Base(), smoothBasis[2] );
+
+	if( leftHanded )
+	{
+		VectorNegate( smoothBasis[1] );
+	}
+
+	// move the g_localUpBumpBasis into world space to create bumpNormals
+	for( i = 0; i < 3; i++ )
+	{
+		VectorIRotate( g_localUpBumpBasis[i], smoothBasis, bumpNormals[i] );
+	}
+}
+
 // identifies a vertex embedded in solid
 // lighting will be copied from nearest valid neighbor
 struct badVertex_t
 {
 	int		m_ColorVertex;
 	Vector	m_Position;
-	Vector	m_Normal;
+	Vector	m_Normals[ NUM_BUMP_VECTS + 1 ];
 };
 
 // a final colored vertex
 struct colorVertex_t
 {
-	Vector	m_Color;
+	Vector	m_Colors[ NUM_BUMP_VECTS + 1 ];
+	float   m_SunAmount[ NUM_BUMP_VECTS + 1 ];
 	Vector	m_Position;
 	bool	m_bValid;
 };
 
-// a texel suitable for a model
-struct colorTexel_t
-{
-	Vector		m_Color;
-	Vector		m_WorldPosition;
-	Vector		m_WorldNormal;
-	float		m_fDistanceToTri; // If we are outside of the triangle, how far away is it?
-	bool		m_bValid;
-	bool		m_bPossiblyInteresting;
-
-};
-
 class CComputeStaticPropLightingResults
 {
 public:
 	~CComputeStaticPropLightingResults()
 	{
 		m_ColorVertsArrays.PurgeAndDeleteElements();
-		m_ColorTexelsArrays.PurgeAndDeleteElements();
 	}
 	
 	CUtlVector< CUtlVector<colorVertex_t>* > m_ColorVertsArrays;
-	CUtlVector< CUtlVector<colorTexel_t>* > m_ColorTexelsArrays;
 };
 
-//-----------------------------------------------------------------------------
-struct Rasterizer
+Vector NormalizeVertexBumpedLighting( Vector const *pColorNormal, Vector *pColorBumps )
 {
-	struct Location
+	const Vector &linearUnbumped = *( ( const Vector * )pColorNormal );
+	Vector linearBump1 = *( ( const Vector * )(pColorBumps + 0) );
+	Vector linearBump2 = *( ( const Vector * )(pColorBumps + 1) );
+	Vector linearBump3 = *( ( const Vector * )(pColorBumps + 2) );
+
+	const float flNormalizationFactor = 1.0f / 3.0f;
+
+	// find a scale factor which makes the average of the 3 bumped mapped vectors match the
+	// straight up vector (if possible), so that flat bumpmapped areas match non-bumpmapped
+	// areas.
+	Vector bumpAverage = linearBump1;
+	bumpAverage += linearBump2;
+	bumpAverage += linearBump3;
+	bumpAverage *= flNormalizationFactor;
+
+	Vector correctionScale;
+
+	if( *( int * )&bumpAverage[0] != 0 &&
+		*( int * )&bumpAverage[1] != 0 &&
+		*( int * )&bumpAverage[2] != 0 )
 	{
-		Vector barycentric;
-		Vector2D uv;
-		bool   insideTriangle;
-	};
-
-	Rasterizer(Vector2D t0, Vector2D t1, Vector2D t2, size_t resX, size_t resY)
-	: mT0(t0)
-	, mT1(t1)
-	, mT2(t2)
-	, mResX(resX)
-	, mResY(resY)
-	, mUvStepX(1.0f / resX)
-	, mUvStepY(1.0f / resY)
-	{ 
-		Build();
+		// fast path when we know that we don't have to worry about divide by zero.
+		VectorDivide( linearUnbumped, bumpAverage, correctionScale );
 	}
-
-	CUtlVector< Location >::iterator begin() { return mRasterizedLocations.begin(); }
-	CUtlVector< Location >::iterator end() { return mRasterizedLocations.end(); }
-
-	void Build();
-
-	inline size_t GetRow(float y) const { return size_t(y * mResY); }
-	inline size_t GetCol(float x) const { return size_t(x * mResX); }
-
-	inline size_t GetLinearPos( const CUtlVector< Location >::iterator& it ) const
+	else
 	{
-		// Given an iterator, return what the linear position in the buffer would be for the data.
-		return (size_t)(GetRow(it->uv.y) * mResX)
-			 + (size_t)(GetCol(it->uv.x));
-	}
-	
-private:
-	const Vector2D mT0, mT1, mT2;
-	const size_t mResX, mResY;
-	const float mUvStepX, mUvStepY;
-
-	// Right now, we just fill this out and directly iterate over it. 
-	// It could be large. This is a memory/speed tradeoff. We could instead generate them
-	// on demand. 
-	CUtlVector< Location > mRasterizedLocations;
-};
-
-//-----------------------------------------------------------------------------
-inline Vector ComputeBarycentric( Vector2D _edgeC, Vector2D _edgeA, Vector2D _edgeB, float _dAA, float _dAB, float _dBB, float _invDenom )
-{
-	float dCA = _edgeC.Dot(_edgeA);
-	float dCB = _edgeC.Dot(_edgeB);
-	
-	Vector retVal;
-	retVal.y = (_dBB * dCA - _dAB * dCB) * _invDenom;
-	retVal.z = (_dAA * dCB - _dAB * dCA) * _invDenom;
-	retVal.x = 1.0f - retVal.y - retVal.z;
-
-	return retVal;
-}
-
-//-----------------------------------------------------------------------------
-void Rasterizer::Build()
-{
-	// For now, use the barycentric method. It's easy, I'm lazy. 
-	// We can optimize later if it's a performance issue.
-	const float baseX = mUvStepX / 2.0f;
-	const float baseY = mUvStepY / 2.0f;
-
-
-	float fMinX = MIN(MIN(mT0.x, mT1.x), mT2.x);
-	float fMinY = MIN(MIN(mT0.y, mT1.y), mT2.y);
-	float fMaxX = MAX(MAX(mT0.x, mT1.x), mT2.x);
-	float fMaxY = MAX(MAX(mT0.y, mT1.y), mT2.y);
-
-	// Degenerate. Consider warning about these, but otherwise no problem.
-	if (fMinX == fMaxX || fMinY == fMaxY)
-		return;
-
-	// Clamp to 0..1
-	fMinX = MAX(0, fMinX);
-	fMinY = MAX(0, fMinY);
-	fMaxX = MIN(1.0f, fMaxX);
-	fMaxY = MIN(1.0f, fMaxY);
-
-	// We puff the interesting area up by 1 so we can hit an inflated region for the necessary bilerp data.
-	// If we wanted to support better texturing (almost definitely unnecessary), we'd change this to a larger size.
-	const int kFilterSampleRadius = 1;
-
-	int iMinX = GetCol(fMinX) - kFilterSampleRadius;
-	int iMinY = GetRow(fMinY) - kFilterSampleRadius;
-	int iMaxX = GetCol(fMaxX) + 1 + kFilterSampleRadius;
-	int iMaxY = GetRow(fMaxY) + 1 + kFilterSampleRadius;
-
-	// Clamp to valid texture (integer) locations
-	iMinX = MAX(0, iMinX);
-	iMinY = MAX(0, iMinY);
-	iMaxX = MIN(iMaxX, mResX - 1);
-	iMaxY = MIN(iMaxY, mResY - 1);
-
-	// Set the size to be as expected. 
-	// TODO: Pass this in from outside to minimize allocations
-	// TODO: Pass this in from outside to minimize allocations
-	int count = (iMaxY - iMinY + 1) 
-		      * (iMaxX - iMinX + 1);
-	mRasterizedLocations.EnsureCount(count);
-	memset( mRasterizedLocations.Base(), 0, mRasterizedLocations.Count() * sizeof( Location ) );
-	
-	// Computing Barycentrics adapted from here http://gamedev.stackexchange.com/questions/23743/whats-the-most-efficient-way-to-find-barycentric-coordinates
-	Vector2D edgeA = mT1 - mT0;
-	Vector2D edgeB = mT2 - mT0;
-
-	float dAA = edgeA.Dot(edgeA);
-	float dAB = edgeA.Dot(edgeB);
-	float dBB = edgeB.Dot(edgeB);
-	float invDenom = 1.0f / (dAA * dBB - dAB * dAB);
-
-	int linearPos = 0; 
-	for (int j = iMinY; j <= iMaxY; ++j) {
-		for (int i = iMinX; i <= iMaxX; ++i) {
-			Vector2D testPt( i * mUvStepX + baseX, j * mUvStepY + baseY );
-			Vector barycentric = ComputeBarycentric( testPt - mT0, edgeA, edgeB, dAA, dAB, dBB, invDenom );
-
-			// Test whether the point is inside the triangle. 
-			// MCJOHNTODO: Edge rules and whatnot--right now we re-rasterize points on the edge.
-			Location& newLoc = mRasterizedLocations[linearPos++];
-			newLoc.barycentric = barycentric;
-			newLoc.uv = testPt;
-
-			newLoc.insideTriangle = (barycentric.x >= 0.0f && barycentric.x <= 1.0f && barycentric.y >= 0.0f && barycentric.y <= 1.0f && barycentric.z >= 0.0f && barycentric.z <= 1.0f);
+		correctionScale.Init( 0.0f, 0.0f, 0.0f );
+		if( bumpAverage[0] != 0.0f )
+		{
+			correctionScale[0] = linearUnbumped[0] / bumpAverage[0];
+		}
+		if( bumpAverage[1] != 0.0f )
+		{
+			correctionScale[1] = linearUnbumped[1] / bumpAverage[1];
+		}
+		if( bumpAverage[2] != 0.0f )
+		{
+			correctionScale[2] = linearUnbumped[2] / bumpAverage[2];
 		}
 	}
+	linearBump1 *= correctionScale;
+	linearBump2 *= correctionScale;
+	linearBump3 *= correctionScale;
+
+	*((Vector *) (pColorBumps + 0)) = linearBump1;
+	*((Vector *) (pColorBumps + 1)) = linearBump2;
+	*((Vector *) (pColorBumps + 2)) = linearBump3;
+
+	return correctionScale;
 }
 
 
+void NormalizeVertexBumpedSunAmount( float const *pSunAmount0, float *pSunAmount1, float *pSunAmount2, float *pSunAmount3 )
+{
+	const float &linearSunAmountUnbumped = *((const float *)pSunAmount0);
+	float linearSunAmount1 = *((const float *)(pSunAmount1));
+	float linearSunAmount2 = *((const float *)(pSunAmount2));
+	float linearSunAmount3 = *((const float *)(pSunAmount3));
+
+	const float flNormalizationFactor = 1.0f;// / 3.0f; - store in 0..1 space (for 0..255 alpha channel), multiply by 3.0 in the shader
+
+	// find a scale factor which makes the average of the 3 bumped mapped vectors match the
+	// straight up vector (if possible), so that flat bumpmapped areas match non-bumpmapped
+	// areas.
+	float bumpAverage = linearSunAmount1;
+	bumpAverage += linearSunAmount2;
+	bumpAverage += linearSunAmount3;
+	bumpAverage *= flNormalizationFactor;
+
+	float correctionScale;
+
+	if ( *(int *)&bumpAverage != 0 )
+	{
+		// fast path when we know that we don't have to worry about divide by zero.
+		correctionScale = linearSunAmountUnbumped / bumpAverage;
+	}
+	else
+	{
+		correctionScale = 1.0f;
+		if ( bumpAverage != 0.0f )
+		{
+			correctionScale = linearSunAmountUnbumped / bumpAverage;
+		}
+	}
+	linearSunAmount1 *= correctionScale;
+	linearSunAmount2 *= correctionScale;
+	linearSunAmount3 *= correctionScale;
+
+	*((float *)(pSunAmount1)) = linearSunAmount1;
+	*((float *)(pSunAmount2)) = linearSunAmount2;
+	*((float *)(pSunAmount3)) = linearSunAmount3;
+}
+
+
+void DumpElapsedTime( int timeTaken )
+{
+	if ( g_bDumpBumpStaticProps && (g_numVradStaticPropsLightingStreams == 3) )
+	{
+		char mapName[MAX_PATH];
+		Q_FileBase( source, mapName, sizeof( mapName ) );
+
+		char bumpPropFilename[MAX_PATH];
+		sprintf( bumpPropFilename, "vrad_bumpstaticprops_%s.txt", mapName );
+
+		Msg( "Writing %s...\n", bumpPropFilename );
+
+		FILE *fp = fopen( bumpPropFilename, "a" );
+
+		if ( !fp )
+		{
+			Msg( "Writing %s...failed\n", bumpPropFilename );
+			return;
+		}
+
+		char str[512];
+		GetHourMinuteSecondsString( timeTaken, str, sizeof( str ) );
+
+		fprintf( fp, "\n\nUsing -staticpropsamplescale %f (-final defaults to 16)\n", g_flStaticPropSampleScale );
+		fprintf( fp, "\nTotal time taken to bake static prop lighting: %s\n", str );
+
+		fclose( fp );
+	}
+}
 //-----------------------------------------------------------------------------
 // Globals
 //-----------------------------------------------------------------------------
@@ -231,18 +256,6 @@ CUtlSymbolTable g_ForcedTextureShadowsModels;
 // INSIDE PropTested_t.  USE THAT INSTEAD.
 IPhysicsCollision *s_pPhysCollision = NULL;
 
-static void ConvertTexelDataToTexture(unsigned int _resX, unsigned int _resY, ImageFormat _destFmt, const CUtlVector<colorTexel_t>& _srcTexels, CUtlMemory<byte>* _outTexture);
-
-// Such a monstrosity. :(
-static void GenerateLightmapSamplesForMesh( const matrix3x4_t& _matPos, const matrix3x4_t& _matNormal, int _iThread, int _skipProp, int _nFlags, int _lightmapResX, int _lightmapResY, 
-											studiohdr_t* _pStudioHdr, mstudiomodel_t* _pStudioModel, OptimizedModel::ModelHeader_t* _pVtxModel, int _meshID, 
-											CComputeStaticPropLightingResults *_pResults );
-
-// Debug function, converts lightmaps to linear space then dumps them out. 
-// TODO: Write out the file in a .dds instead of a .tga, in whatever format we're supposed to use.
-static void DumpLightmapLinear( const char* _dstFilename, const CUtlVector<colorTexel_t>& _srcTexels, int _width, int _height );
-
-
 //-----------------------------------------------------------------------------
 // Vrad's static prop manager
 //-----------------------------------------------------------------------------
@@ -261,6 +274,8 @@ public:
 	// iterate all the instanced static props and compute their vertex lighting
 	void ComputeLighting( int iThread );
 
+	virtual void MakePatches() override;
+
 private:
 #ifdef MPI
 	// VMPI stuff.
@@ -294,12 +309,15 @@ private:
 		CUtlBuffer		m_VtxBuf;
 		CUtlVector<int>	m_textureShadowIndex;	// each texture has an index if this model casts texture shadows
 		CUtlVector<int>	m_triangleMaterialIndex;// each triangle has an index if this model casts texture shadows
+		Vector			m_vReflectivity;
+		bool			m_bHasBumpmap;
+		bool			m_bHasPhong;
 	};
 
 	struct MeshData_t
 	{
-		CUtlVector<Vector>	m_VertexColors;
-		CUtlMemory<byte>	m_TexelsEncoded;
+		CUtlVector<Vector4D> m_VertColorData; // w has the additional lightmap alpha data
+		int					m_numVerts;
 		int					m_nLod;
 	};
 
@@ -315,16 +333,9 @@ private:
 		BSPTreeDataHandle_t		m_Handle;
 		CUtlVector<MeshData_t>	m_MeshData;
 		int                     m_Flags;
+		int                     m_FlagsEx;
 		bool					m_bLightingOriginValid;
-
-		// Note that all lightmaps for a given prop share the same resolution (and format)--and there can be multiple lightmaps
-		// per prop (if there are multiple pieces--the watercooler is an example).
-		// This is effectively because there's not a good way in hammer for a prop to say "this should be the resolution
-		// of each of my sub-pieces."
-		ImageFormat				m_LightmapImageFormat;
-		unsigned int			m_LightmapImageWidth;
-		unsigned int			m_LightmapImageHeight;
-
+		Vector					m_vReflectivity;
 	};
 
 	// Enumeration context
@@ -341,7 +352,7 @@ private:
 	bool m_bIgnoreStaticPropTrace;
 
 	void ComputeLighting( CStaticProp &prop, int iThread, int prop_index, CComputeStaticPropLightingResults *pResults );
-	void ApplyLightingToStaticProp( int iStaticProp, CStaticProp &prop, const CComputeStaticPropLightingResults *pResults );
+	void ApplyLightingToStaticProp( CStaticProp &prop, const CComputeStaticPropLightingResults *pResults );
 
 	void SerializeLighting();
 	void AddPolysForRayTrace();
@@ -392,6 +403,9 @@ bool IsStaticProp( studiohdr_t* pHdr )
 //-----------------------------------------------------------------------------
 static bool LoadFile( char const* pFileName, CUtlBuffer& buf )
 {
+	if ( ReadFileFromPak( GetPakFile(), pFileName, false, buf ) )
+		return true;
+
 	if ( !g_pFullFileSystem )
 		return false;
 
@@ -456,7 +470,7 @@ CPhysCollide* ComputeConvexHull( studiohdr_t* pStudioHdr )
 
 	// Convert an array of convex elements to a compiled collision model
 	// (this deletes the convex elements)
-	return s_pPhysCollision->ConvertConvexToCollide( convexHulls.Base(), convexHulls.Size() );
+	return s_pPhysCollision->ConvertConvexToCollide( convexHulls.Base(), convexHulls.Count() );
 }
 
 
@@ -500,8 +514,8 @@ bool LoadStudioModel( char const* pModelName, CUtlBuffer& buf )
 	}
 
 	// ensure reset
-	pHdr->SetVertexBase(NULL);
-	pHdr->SetIndexBase(NULL);
+	pHdr->SetVertexBase( NULL );
+	pHdr->SetIndexBase( NULL );
 
 	return true;
 }
@@ -532,7 +546,7 @@ bool LoadVTXFile( char const* pModelName, const studiohdr_t *pStudioHdr, CUtlBuf
 
 	// construct filename
 	Q_StripExtension( pModelName, filename, sizeof( filename ) );
-	strcat( filename, ".dx80.vtx" );
+	strcat( filename, ".dx90.vtx" );
 
 	if ( !LoadFile( filename, buf ) )
 	{
@@ -784,15 +798,15 @@ public:
 	// HACKHACK: Compute the average coverage for this triangle by sampling the AABB of its texture space
 	float ComputeCoverageForTriangle( int shadowTextureIndex, const Vector2D &t0, const Vector2D &t1, const Vector2D &t2 )
 	{
-		float umin = MIN(t0.x, t1.x);
-		umin = MIN(umin, t2.x);
-		float umax = MAX(t0.x, t1.x);
-		umax = MAX(umax, t2.x);
+		float umin = min(t0.x, t1.x);
+		umin = min(umin, t2.x);
+		float umax = max(t0.x, t1.x);
+		umax = max(umax, t2.x);
 
-		float vmin = MIN(t0.y, t1.y);
-		vmin = MIN(vmin, t2.y);
-		float vmax = MAX(t0.y, t1.y);
-		vmax = MAX(vmax, t2.y);
+		float vmin = min(t0.y, t1.y);
+		vmin = min(vmin, t2.y);
+		float vmax = max(t0.y, t1.y);
+		vmax = max(vmax, t2.y);
 
 		// UNDONE: Do something about tiling
 		umin = clamp(umin, 0, 1);
@@ -833,21 +847,28 @@ public:
 		if ( bBackface && !tex.allowBackface )
 			return 0;
 		Vector2D uv = coords.x * mat.uv[0] + coords.y * mat.uv[1] + coords.z * mat.uv[2];
-		int u = RoundFloatToInt( uv[0] * tex.width );
-		int v = RoundFloatToInt( uv[1] * tex.height );
-		
-		// asume power of 2, clamp or wrap
-		// UNDONE: Support clamp?  This code should work
-#if 0
-		u = tex.clampU ? clamp(u,0,(tex.width-1)) : (u & (tex.width-1));
-		v = tex.clampV ? clamp(v,0,(tex.height-1)) : (v & (tex.height-1));
-#else
-		// for now always wrap
+		// bilinear filtered sample
+		float ou = uv[0] * tex.width;
+		float ov = uv[1] * tex.height;
+		int u = floor( ou );
+		int v = floor( ov );
+		int u1 = u+1;
+		int v1 = v+1;
 		u &= (tex.width-1);
+		u1 &= (tex.width-1);
 		v &= (tex.height-1);
-#endif
+		v1 &= (tex.height-1);
+		float lerpU = ou - u;
+		float lerpV = ov - v;
+		int x = (tex.pAlphaTexels[v * tex.width + u] * (1-lerpU)) + (lerpU*tex.pAlphaTexels[v * tex.width + u1]);
+		int y = (tex.pAlphaTexels[v1 * tex.width + u] * (1-lerpU)) + (lerpU*tex.pAlphaTexels[v1 * tex.width + u1]);
+		return int( x * (1-lerpV) + (y*lerpV) );
+	}
 
-		return tex.pAlphaTexels[v * tex.width + u];
+	void GetMapping( int shadowTextureIndex, int *pWidth, int *pHeight )
+	{
+		*pWidth = m_Textures[shadowTextureIndex].width;
+		*pHeight = m_Textures[shadowTextureIndex].height;
 	}
 
 	struct alphatexture_t 
@@ -917,9 +938,36 @@ void CleanModelName( const char *pModelName, char *pOutput, int outLen )
 	{
 		*dot = 0;
 	}
-
 }
 
+int LoadShadowTexture( const char *pMaterialName )
+{
+	int textureIndex = -1;
+	// try to add each texture to the transparent shadow manager
+	char szPath[MAX_PATH];
+
+	Q_strncpy( szPath, "materials/", sizeof( szPath ) );
+	Q_strncat( szPath, pMaterialName, sizeof( szPath ), COPY_ALL_CHARACTERS );
+	Q_strncat( szPath, ".vmt", sizeof( szPath ), COPY_ALL_CHARACTERS );
+	Q_FixSlashes( szPath, CORRECT_PATH_SEPARATOR );
+	g_ShadowTextureList.FindOrLoadIfValid( szPath, &textureIndex );
+	return textureIndex;
+}
+
+int AddShadowTextureTriangle( int shadowTextureIndex, const Vector2D &t0, const Vector2D &t1, const Vector2D &t2 )
+{
+	return g_ShadowTextureList.AddMaterialEntry(shadowTextureIndex, t0, t1, t2 );
+}
+
+float ComputeCoverageForTriangle( int shadowTextureIndex, const Vector2D &t0, const Vector2D &t1, const Vector2D &t2 )
+{
+	return g_ShadowTextureList.ComputeCoverageForTriangle(shadowTextureIndex, t0, t1, t2 );
+}
+
+void GetShadowTextureMapping( int shadowTextureIndex, int *pWidth, int *pHeight )
+{
+	g_ShadowTextureList.GetMapping( shadowTextureIndex, pWidth, pHeight );
+}
 
 void ForceTextureShadowsOnModel( const char *pModelName )
 {
@@ -938,6 +986,197 @@ bool IsModelTextureShadowsForced( const char *pModelName )
 	return g_ForcedTextureShadowsModels.Find(buf).IsValid();
 }
 
+bool IsStaticPropBumpmapped( studiohdr_t *pStudioHdr )
+{
+	if ( g_numVradStaticPropsLightingStreams == 1 )
+	{
+		return false;
+	}
+
+	// check if prop uses "$bumpmap" in any materials, use this as an indication of valid tangent data (availability of tangentdata does not imply it's valid/used)
+	for ( int textureIndex = 0; textureIndex < pStudioHdr->numtextures; textureIndex++ )
+	{
+		char szPath[MAX_PATH];
+
+		// iterate quietly through all specified directories until a valid material is found
+		for ( int i = 0; i < pStudioHdr->numcdtextures; i++ )
+		{
+			Q_strncpy( szPath, "materials/", sizeof( szPath ) );
+			Q_strncat( szPath, pStudioHdr->pCdtexture( i ), sizeof( szPath ) );
+			const char *textureName = pStudioHdr->pTexture( textureIndex )->pszName();
+			Q_strncat( szPath, textureName, sizeof( szPath ), COPY_ALL_CHARACTERS );
+			Q_strncat( szPath, ".vmt", sizeof( szPath ), COPY_ALL_CHARACTERS );
+			Q_FixSlashes( szPath, CORRECT_PATH_SEPARATOR );
+
+			KeyValues *pVMT = new KeyValues( "vmt" );
+			CUtlBuffer buf( 0, 0, CUtlBuffer::TEXT_BUFFER );
+			LoadFileIntoBuffer( buf, szPath );
+			if ( pVMT->LoadFromBuffer( szPath, buf ) )
+			{
+				if ( pVMT->FindKey( "$bumpmap" ) )
+				{
+					pVMT->deleteThis();
+					return true;
+				}
+			}
+			pVMT->deleteThis();
+		}
+	}
+
+	return false;
+}
+
+
+void StaticPropHasPhongBump( studiohdr_t *pStudioHdr, bool *pHasBumpmap, bool *pHasPhong )
+{
+	if ( g_numVradStaticPropsLightingStreams == 1 )
+	{
+		return;
+	}
+
+	*pHasBumpmap = false;
+	*pHasPhong   = false;
+
+	// check if prop uses "$bumpmap" in any materials, use this as an indication of valid tangent data (availability of tangentdata does not imply it's valid/used)
+	for ( int textureIndex = 0; textureIndex < pStudioHdr->numtextures; textureIndex++ )
+	{
+		char szPath[MAX_PATH];
+
+		// iterate quietly through all specified directories until a valid material is found
+		for ( int i = 0; i < pStudioHdr->numcdtextures; i++ )
+		{
+			Q_strncpy( szPath, "materials/", sizeof( szPath ) );
+			Q_strncat( szPath, pStudioHdr->pCdtexture( i ), sizeof( szPath ) );
+			const char *textureName = pStudioHdr->pTexture( textureIndex )->pszName();
+			Q_strncat( szPath, textureName, sizeof( szPath ), COPY_ALL_CHARACTERS );
+			Q_strncat( szPath, ".vmt", sizeof( szPath ), COPY_ALL_CHARACTERS );
+			Q_FixSlashes( szPath, CORRECT_PATH_SEPARATOR );
+
+			KeyValues *pVMT = new KeyValues( "vmt" );
+			CUtlBuffer buf( 0, 0, CUtlBuffer::TEXT_BUFFER );
+			LoadFileIntoBuffer( buf, szPath );
+			if ( pVMT->LoadFromBuffer( szPath, buf ) )
+			{
+				if ( pVMT->FindKey( "$bumpmap" ) )
+				{
+					*pHasBumpmap = true;
+
+					// is it also phong
+					if ( pVMT->FindKey( "$phong" ) )
+					{
+						*pHasPhong = true;
+
+						pVMT->deleteThis();
+						return;
+					}
+				}
+			}
+			pVMT->deleteThis();
+		}
+	}
+
+	return;
+}
+
+Vector ReadReflectivityFromVTF( const char *pName )
+{
+	Vector vRefl( 0.18f, 0.18f, 0.18f );
+
+	char szPath[ MAX_PATH ];
+	Q_strncpy( szPath, "materials/", sizeof( szPath ) );
+	Q_strncat( szPath, pName, sizeof( szPath ), COPY_ALL_CHARACTERS );
+	Q_strncat( szPath, ".vtf", sizeof( szPath ), COPY_ALL_CHARACTERS );
+	Q_FixSlashes( szPath, CORRECT_PATH_SEPARATOR );
+
+	int nHeaderSize = VTFFileHeaderSize();
+	unsigned char *pMem = (unsigned char *)stackalloc( nHeaderSize );
+	CUtlBuffer buf( pMem, nHeaderSize );
+	if ( g_pFullFileSystem->ReadFile( szPath, NULL, buf, nHeaderSize ) )
+	{
+		IVTFTexture *pTex = CreateVTFTexture();
+		if ( pTex->Unserialize( buf, true ) )
+		{
+			vRefl = pTex->Reflectivity();
+		}
+		DestroyVTFTexture( pTex );
+	}
+	return vRefl;
+}
+
+Vector ComputeStaticPropReflectivity( studiohdr_t *pStudioHdr )
+{
+	Vector vReflectivity( 0.18f, 0.18f, 0.18f );
+
+	for ( int textureIndex = 0; textureIndex < pStudioHdr->numtextures; textureIndex++ )
+	{
+		char szPath[ MAX_PATH ];
+
+		// iterate quietly through all specified directories until a valid material is found
+		for ( int i = 0; i < pStudioHdr->numcdtextures; i++ )
+		{
+			Q_strncpy( szPath, "materials/", sizeof( szPath ) );
+			Q_strncat( szPath, pStudioHdr->pCdtexture( i ), sizeof( szPath ) );
+			const char *textureName = pStudioHdr->pTexture( textureIndex )->pszName();
+			Q_strncat( szPath, textureName, sizeof( szPath ), COPY_ALL_CHARACTERS );
+			Q_strncat( szPath, ".vmt", sizeof( szPath ), COPY_ALL_CHARACTERS );
+			Q_FixSlashes( szPath, CORRECT_PATH_SEPARATOR );
+
+			Vector vVtfRefl( 1.0f, 1.0f, 1.0f );
+			Vector vTint( 1.0f, 1.0f, 1.0f );
+
+			KeyValues *pVMT = new KeyValues( "vmt" );
+			CUtlBuffer buf( 0, 0, CUtlBuffer::TEXT_BUFFER );
+			LoadFileIntoBuffer( buf, szPath );
+			if ( pVMT->LoadFromBuffer( szPath, buf ) )
+			{
+				KeyValues *pBaseTexture = pVMT->FindKey( "$basetexture" );
+				if ( pBaseTexture )
+				{
+					const char *pBaseTextureName = pBaseTexture->GetString();
+					if ( pBaseTextureName )
+					{
+						vVtfRefl = ReadReflectivityFromVTF( pBaseTextureName );
+					}
+				}
+
+				vReflectivity = vVtfRefl;
+
+				KeyValues *pColorTint = pVMT->FindKey( "color" );
+				if ( pColorTint )
+				{
+					const char *pColorString = pColorTint->GetString();
+					if ( pColorString[ 0 ] == '{' )
+					{
+						int r = 0;
+						int g = 0;
+						int b = 0;
+						sscanf( pColorString, "{%d %d %d}", &r, &g, &b );
+						vTint.x = SrgbGammaToLinear( clamp( float( r ) / 255.0f, 0.0f, 1.0f ) );
+						vTint.y = SrgbGammaToLinear( clamp( float( r ) / 255.0f, 0.0f, 1.0f ) );
+						vTint.z = SrgbGammaToLinear( clamp( float( r ) / 255.0f, 0.0f, 1.0f ) );
+					}
+					else if ( pColorString[ 0 ] == '[' )
+					{
+						sscanf( pColorString, "[%f %f %f]", &vTint.x, &vTint.y, &vTint.z );
+						vTint.x = clamp( vTint.x, 0.0f, 1.0f );
+						vTint.y = clamp( vTint.y, 0.0f, 1.0f );
+						vTint.z = clamp( vTint.z, 0.0f, 1.0f );
+					}
+				}
+			}
+			pVMT->deleteThis();
+
+			vReflectivity = vVtfRefl * vTint;
+			if ( vReflectivity.x == 1.0f && vReflectivity.y == 1.0f && vReflectivity.z == 1.0f )
+			{
+				vReflectivity.Init( 0.18f, 0.18f, 0.18f );
+			}
+			return vReflectivity;
+		}
+	}
+
+	return vReflectivity;
+}
 
 //-----------------------------------------------------------------------------
 // Creates a collision model (based on the render geometry!)
@@ -1010,6 +1249,10 @@ void CVradStaticPropMgr::CreateCollisionModel( char const* pModelName )
 			g_ShadowTextureList.LoadAllTexturesForModel( pHdr, m_StaticPropDict[i].m_textureShadowIndex.Base() );
 		}
 	}
+
+	// mark static props that use $bumpmap, $phong materials
+	StaticPropHasPhongBump( pHdr, &m_StaticPropDict[ i ].m_bHasBumpmap, &m_StaticPropDict[ i ].m_bHasPhong );
+	m_StaticPropDict[ i ].m_vReflectivity = ComputeStaticPropReflectivity( pHdr );
 }
 
 
@@ -1026,13 +1269,99 @@ void CVradStaticPropMgr::UnserializeModelDict( CUtlBuffer& buf )
 		
 		CreateCollisionModel( lump.m_Name );
 	}
+
+ 	// spew bump static prop info
+	if ( g_bDumpBumpStaticProps && (g_numVradStaticPropsLightingStreams == 3) )
+	{
+		char mapName[MAX_PATH];
+		Q_FileBase( source, mapName, sizeof( mapName ) );
+
+		char bumpPropFilename[MAX_PATH];
+		sprintf( bumpPropFilename, "vrad_bumpstaticprops_%s.txt", mapName);
+
+		Msg( "Writing %s...\n", bumpPropFilename );
+
+		FILE *fp = fopen( bumpPropFilename, "w" );
+
+		if ( !fp )
+		{
+			Msg( "Writing %s...failed\n", bumpPropFilename );
+			return;
+		}
+
+		fprintf( fp, "Bumpmap static prop list for %s\n", mapName );
+
+		int numBumpmapStaticProps = 0;
+		int numPhongStaticProps = 0;
+		for ( int i = m_StaticPropDict.Count(); --i >= 0; )
+		{
+			studiohdr_t *pStudioHdr = m_StaticPropDict[i].m_pStudioHdr;
+
+			if ( m_StaticPropDict[i].m_bHasBumpmap )
+			{
+				numBumpmapStaticProps++;
+			}
+
+			if ( m_StaticPropDict[i].m_bHasPhong )
+			{
+				numPhongStaticProps++;
+			}
+
+			if ( m_StaticPropDict[i].m_bHasBumpmap || m_StaticPropDict[i].m_bHasPhong )
+			{
+				fprintf( fp, "\nprop: %s\nvmt's containing $bumpmap, $phong:\n", pStudioHdr->pszName() );
+
+				for ( int textureIndex = 0; textureIndex < pStudioHdr->numtextures; textureIndex++ )
+				{
+					char szPath[MAX_PATH];
+
+					// iterate quietly through all specified directories until a valid material is found
+					for ( int i = 0; i < pStudioHdr->numcdtextures; i++ )
+					{
+						Q_strncpy( szPath, "materials/", sizeof( szPath ) );
+						Q_strncat( szPath, pStudioHdr->pCdtexture( i ), sizeof( szPath ) );
+						const char *textureName = pStudioHdr->pTexture( textureIndex )->pszName();
+						Q_strncat( szPath, textureName, sizeof( szPath ), COPY_ALL_CHARACTERS );
+						Q_strncat( szPath, ".vmt", sizeof( szPath ), COPY_ALL_CHARACTERS );
+						Q_FixSlashes( szPath, CORRECT_PATH_SEPARATOR );
+
+						KeyValues *pVMT = new KeyValues( "vmt" );
+						CUtlBuffer buf( 0, 0, CUtlBuffer::TEXT_BUFFER );
+						LoadFileIntoBuffer( buf, szPath );
+						if ( pVMT->LoadFromBuffer( szPath, buf ) )
+						{
+							if ( pVMT->FindKey( "$bumpmap" ) )
+							{
+								if ( pVMT->FindKey( "$phong" ) )
+								{
+									fprintf( fp, "$bump, $phong: %s\n", szPath );
+								}
+								else
+								{
+									fprintf( fp, "$bump: %s\n", szPath );
+								}
+							}
+							else if ( pVMT->FindKey( "$phong" ) )
+							{
+								// not possible/error?
+								fprintf( fp, "$phong: %s\n", szPath );
+							}
+						}
+						pVMT->deleteThis();
+					}
+				}
+			}
+		}
+		fprintf( fp, "\n%d static props, %d bumped static props (%d phong static props)\n", m_StaticPropDict.Count(), numBumpmapStaticProps, numPhongStaticProps );
+		fclose( fp );
+	}
+
 }
 
 void CVradStaticPropMgr::UnserializeModels( CUtlBuffer& buf )
 {
 	int count = buf.GetInt();
 
-
 	m_StaticProps.AddMultipleToTail(count);
 	for ( int i = 0; i < count; ++i )				  
 	{
@@ -1046,12 +1375,11 @@ void CVradStaticPropMgr::UnserializeModels( CUtlBuffer& buf )
 		m_StaticProps[i].m_ModelIdx = lump.m_PropType;
 		m_StaticProps[i].m_Handle = TREEDATA_INVALID_HANDLE;
 		m_StaticProps[i].m_Flags = lump.m_Flags;
-
-		// Changed this from using DXT1 to RGB888 because the compression artifacts were pretty nasty. 
-		// TODO: Consider changing back or basing this on user selection in hammer.
-		m_StaticProps[i].m_LightmapImageFormat = IMAGE_FORMAT_RGB888;
-		m_StaticProps[i].m_LightmapImageWidth = lump.m_nLightmapResolutionX;
-		m_StaticProps[i].m_LightmapImageHeight = lump.m_nLightmapResolutionY;
+		m_StaticProps[ i ].m_FlagsEx = lump.m_FlagsEx;
+		m_StaticProps[ i ].m_vReflectivity.Init( SrgbGammaToLinear( float( lump.m_DiffuseModulation.r ) / 255.0f ),
+												 SrgbGammaToLinear( float( lump.m_DiffuseModulation.g ) / 255.0f ),
+												 SrgbGammaToLinear( float( lump.m_DiffuseModulation.b ) / 255.0f ) );
+		m_StaticProps[ i ].m_vReflectivity *= m_StaticPropDict[ m_StaticProps[ i ].m_ModelIdx ].m_vReflectivity;
 	}
 }
 
@@ -1069,7 +1397,7 @@ void CVradStaticPropMgr::UnserializeStaticProps()
 
 	if ( g_GameLumps.GetGameLumpVersion( handle ) != GAMELUMP_STATIC_PROPS_VERSION )
 	{
-		Error( "Cannot load the static props... encountered a stale map version. Re-vbsp the map.\n" );
+		Error( "Cannot load the static props... encountered a stale map version. Re-vbsp the map." );
 	}
 
 	if ( g_GameLumps.GetGameLump( handle ) )
@@ -1110,7 +1438,7 @@ void CVradStaticPropMgr::Shutdown()
 {
 
 	// Remove all static prop model data
-	for (int i = m_StaticPropDict.Size(); --i >= 0; )
+	for (int i = m_StaticPropDict.Count(); --i >= 0; )
 	{
 		studiohdr_t *pStudioHdr = m_StaticPropDict[i].m_pStudioHdr;
 		if ( pStudioHdr )
@@ -1118,6 +1446,7 @@ void CVradStaticPropMgr::Shutdown()
 			if ( pStudioHdr->VertexBase() )
 			{
 				free( pStudioHdr->VertexBase() );
+				pStudioHdr->SetVertexBase( nullptr );
 			}
 			free( pStudioHdr );
 		}
@@ -1139,6 +1468,15 @@ void ComputeLightmapColor( dface_t* pFace, Vector &color )
 
 bool PositionInSolid( Vector &position )
 {
+/* 	Testing enabling/disabling since it erroneously reports verts inside light blockers
+    and there are a number of position offsets applied elsewhere to avoid surface acne
+	that might well be enough */
+
+	if ( g_bDisableStaticPropVertexInSolidTest )
+	{
+		return false;
+	}
+
 	int ndxLeaf = PointLeafnum( position );
 	if ( dleafs[ndxLeaf].contents & CONTENTS_SOLID )
 	{
@@ -1149,15 +1487,26 @@ bool PositionInSolid( Vector &position )
 	return false;
 }
 
+bool PositionIn3DSkybox( Vector &position )
+{
+	int iLeaf = PointLeafnum( position );
+	int area = dleafs[ iLeaf ].area;
+	return area_sky_cameras[ area ] >= 0;
+}
+
 //-----------------------------------------------------------------------------
 // Trace from a vertex to each direct light source, accumulating its contribution.
 //-----------------------------------------------------------------------------
-void ComputeDirectLightingAtPoint( Vector &position, Vector &normal, Vector &outColor, int iThread,
-								   int static_prop_id_to_skip=-1, int nLFlags = 0)
+void ComputeDirectLightingAtPoint( Vector &position, Vector *normals, Vector *outColors, float *outSunAmount, int numNormals, bool bSkipSkyLight, int iThread,
+								   int static_prop_id_to_skip, int nLFlags )
 {
 	SSE_sampleLightOutput_t	sampleOutput;
 
-	outColor.Init();
+	for ( int k = 0; k < numNormals; ++ k )
+	{
+		outColors[k].Init();
+		outSunAmount[k] = 0.0f;
+	}
 
 	// Iterate over all direct lights and accumulate their contribution
 	int cluster = ClusterFromPoint( position );
@@ -1177,6 +1526,8 @@ void ComputeDirectLightingAtPoint( Vector &position, Vector &normal, Vector &out
 		Vector adjusted_pos = position;
 		float flEpsilon = 0.0;
 
+		const float flFudgeFactor = 4.0;
+
 		if  (dl->light.type != emit_skyambient)
 		{
 			// push towards the light
@@ -1188,38 +1539,133 @@ void ComputeDirectLightingAtPoint( Vector &position, Vector &normal, Vector &out
 				fudge = dl->light.origin-position;
 				VectorNormalize( fudge );
 			}
-			fudge *= 4.0;
+			fudge *= flFudgeFactor;
 			adjusted_pos += fudge;
 		}
 		else 
 		{
 			// push out along normal
-			adjusted_pos += 4.0 * normal;
+			adjusted_pos += flFudgeFactor * normals[0];
 //			flEpsilon = 1.0;
 		}
 
 		FourVectors adjusted_pos4;
-		FourVectors normal4;
 		adjusted_pos4.DuplicateVector( adjusted_pos );
-		normal4.DuplicateVector( normal );
 
-		GatherSampleLightSSE( sampleOutput, dl, -1, adjusted_pos4, &normal4, 1, iThread, nLFlags | GATHERLFLAGS_FORCE_FAST,
-		                      static_prop_id_to_skip, flEpsilon );
+		FourVectors normal4;
+		switch( numNormals )
+		{
+		case 4:
+			normal4.LoadAndSwizzle( normals[0], normals[1], normals[2], normals[3] );
+			break;
+		case 3:
+			normal4.LoadAndSwizzle( normals[0], normals[1], normals[2], normals[0] );
+			break;
+		default:
+			normal4.DuplicateVector( normals[0] );
+			break;
+		}
 
-#ifdef VRAD_SSE
-		VectorMA( outColor, sampleOutput.m_flFalloff.m128_f32[0] * sampleOutput.m_flDot[0].m128_f32[0], dl->light.intensity, outColor );
-#else
-		VectorMA( outColor, sampleOutput.m_flFalloff[0] * sampleOutput.m_flDot[0][0], dl->light.intensity, outColor );
-#endif
+		GatherSampleLightSSE( sampleOutput, dl, -1, adjusted_pos4, &normal4,
+			1, // really it's number of FourVectors passed
+			iThread, g_bFastStaticProps ? ( nLFlags | GATHERLFLAGS_FORCE_FAST ) : nLFlags,
+			static_prop_id_to_skip, flEpsilon );
+
+		for ( int k = 0; k < numNormals; ++k )
+		{
+			if ( !((dl->light.type == emit_skylight) && bSkipSkyLight) )
+			{
+				VectorMA( outColors[k],
+						  sampleOutput.m_flFalloff[k] * sampleOutput.m_flDot[0][k],
+						  dl->light.intensity,
+						  outColors[k] );
+			}
+
+			outSunAmount[k] += SubFloat( sampleOutput.m_flSunAmount[0], k ) * (sampleOutput.m_flDot[0][0] > 0.0f ? 1.0f : 0.0f);
+		}
+	}
+}
+
+//-----------------------------------------------------------------------------
+// version of above that just computes/returns the sun amount
+//-----------------------------------------------------------------------------
+void ComputeSunAmountAtPoint( Vector &position, Vector *normals, float *outSunAmount, int numNormals, int iThread,
+								   int static_prop_id_to_skip = -1, int nLFlags = 0 )
+{
+	SSE_sampleLightOutput_t	sampleOutput;
+
+	for ( int k = 0; k < numNormals; ++k )
+	{
+		outSunAmount[k] = 0.0f;
+	}
+
+	// Iterate over all direct lights and accumulate their contribution
+	int cluster = ClusterFromPoint( position );
+	for ( directlight_t *dl = activelights; dl != NULL; dl = dl->next )
+	{
+		if ( dl->light.style )
+		{
+			// skip lights with style
+			continue;
+		}
+
+		if ( dl->light.type != emit_skylight )
+		{
+			// skip lights that don't contribue to sunamount
+			continue;
+		}
+
+		// is this lights cluster visible?
+		if ( !PVSCheck( dl->pvs, cluster ) )
+			continue;
+
+		// push the vertex towards the light to avoid surface acne
+		Vector adjusted_pos = position;
+		float flEpsilon = 0.0;
+
+		const float flFudgeFactor = 4.0;
+
+		// push towards the light
+		Vector fudge;
+		fudge = -(dl->light.normal);
+		fudge *= flFudgeFactor;
+		adjusted_pos += fudge;
+
+		FourVectors adjusted_pos4;
+		adjusted_pos4.DuplicateVector( adjusted_pos );
+
+		FourVectors normal4;
+		switch ( numNormals )
+		{
+		case 4:
+			normal4.LoadAndSwizzle( normals[0], normals[1], normals[2], normals[3] );
+			break;
+		case 3:
+			normal4.LoadAndSwizzle( normals[0], normals[1], normals[2], normals[0] );
+			break;
+		default:
+			normal4.DuplicateVector( normals[0] );
+			break;
+		}
+
+		GatherSampleLightSSE( sampleOutput, dl, -1, adjusted_pos4, &normal4,
+							  1, // really it's number of FourVectors passed
+							  iThread, g_bFastStaticProps ? (nLFlags | GATHERLFLAGS_FORCE_FAST) : nLFlags,
+							  static_prop_id_to_skip, flEpsilon );
+
+		for ( int k = 0; k < numNormals; ++k )
+		{
+			outSunAmount[k] += SubFloat( sampleOutput.m_flSunAmount[0], k ) * (sampleOutput.m_flDot[0][0] > 0.0f ? 1.0f : 0.0f);
+		}
 	}
 }
 
 //-----------------------------------------------------------------------------
 // Takes the results from a ComputeLighting call and applies it to the static prop in question.
 //-----------------------------------------------------------------------------
-void CVradStaticPropMgr::ApplyLightingToStaticProp( int iStaticProp, CStaticProp &prop, const CComputeStaticPropLightingResults *pResults )
+void CVradStaticPropMgr::ApplyLightingToStaticProp( CStaticProp &prop, const CComputeStaticPropLightingResults *pResults )
 {
-	if ( pResults->m_ColorVertsArrays.Count() == 0 && pResults->m_ColorTexelsArrays.Count() == 0 )
+	if ( pResults->m_ColorVertsArrays.Count() == 0 )
 		return;
 
 	StaticPropDict_t &dict = m_StaticPropDict[prop.m_ModelIdx];
@@ -1227,9 +1673,8 @@ void CVradStaticPropMgr::ApplyLightingToStaticProp( int iStaticProp, CStaticProp
 	OptimizedModel::FileHeader_t *pVtxHdr = (OptimizedModel::FileHeader_t *)dict.m_VtxBuf.Base();
 	Assert( pStudioHdr && pVtxHdr );
 
+	int const numVertexLightComponents = g_numVradStaticPropsLightingStreams;
 	int iCurColorVertsArray = 0;
-	int iCurColorTexelsArray = 0;
-
 	for ( int bodyID = 0; bodyID < pStudioHdr->numbodyparts; ++bodyID )
 	{
 		OptimizedModel::BodyPartHeader_t* pVtxBodyPart = pVtxHdr->pBodyPart( bodyID );
@@ -1239,9 +1684,8 @@ void CVradStaticPropMgr::ApplyLightingToStaticProp( int iStaticProp, CStaticProp
 		{
 			OptimizedModel::ModelHeader_t* pVtxModel = pVtxBodyPart->pModel( modelID );
 			mstudiomodel_t *pStudioModel = pBodyPart->pModel( modelID );
-						
-			const CUtlVector<colorVertex_t> *colorVerts = pResults->m_ColorVertsArrays.Count() ? pResults->m_ColorVertsArrays[iCurColorVertsArray++] : nullptr;
-			const CUtlVector<colorTexel_t> *colorTexels = pResults->m_ColorTexelsArrays.Count() ? pResults->m_ColorTexelsArrays[iCurColorTexelsArray++] : nullptr;
+
+			const CUtlVector<colorVertex_t> &colorVerts = *pResults->m_ColorVertsArrays[iCurColorVertsArray++];
 			
 			for ( int nLod = 0; nLod < pVtxHdr->numLODs; nLod++ )
 			{
@@ -1256,51 +1700,25 @@ void CVradStaticPropMgr::ApplyLightingToStaticProp( int iStaticProp, CStaticProp
 					{
 						OptimizedModel::StripGroupHeader_t* pStripGroup = pVtxMesh->pStripGroup( nGroup );
 						int nMeshIdx = prop.m_MeshData.AddToTail();
+						prop.m_MeshData[nMeshIdx].m_VertColorData.AddMultipleToTail( pStripGroup->numVerts * numVertexLightComponents );
+						prop.m_MeshData[nMeshIdx].m_numVerts = pStripGroup->numVerts;
+						prop.m_MeshData[nMeshIdx].m_nLod = nLod;
 
-						if (colorVerts)
+						for ( int nVertex = 0; nVertex < pStripGroup->numVerts; ++nVertex )
 						{
-							prop.m_MeshData[nMeshIdx].m_VertexColors.AddMultipleToTail( pStripGroup->numVerts );
-							prop.m_MeshData[nMeshIdx].m_nLod = nLod;
+							int nIndex = pMesh->vertexoffset + pStripGroup->pVertex( nVertex )->origMeshVertID;
 
-							for ( int nVertex = 0; nVertex < pStripGroup->numVerts; ++nVertex )
+							Assert( nIndex < pStudioModel->numvertices );
+							
+							if ( numVertexLightComponents <= 1 )
 							{
-								int nIndex = pMesh->vertexoffset + pStripGroup->pVertex( nVertex )->origMeshVertID;
-
-								Assert( nIndex < pStudioModel->numvertices );
-								prop.m_MeshData[nMeshIdx].m_VertexColors[nVertex] = (*colorVerts)[nIndex].m_Color;
+								prop.m_MeshData[nMeshIdx].m_VertColorData[nVertex].AsVector3D() = colorVerts[nIndex].m_Colors[0];
+								prop.m_MeshData[nMeshIdx].m_VertColorData[nVertex].w = colorVerts[nIndex].m_SunAmount[0];
 							}
-						}
-
-						if (colorTexels)
-						{
-							// TODO: Consider doing this work in the worker threads, because then we distribute it.
-							ConvertTexelDataToTexture(prop.m_LightmapImageWidth, prop.m_LightmapImageHeight, prop.m_LightmapImageFormat, (*colorTexels), &prop.m_MeshData[nMeshIdx].m_TexelsEncoded);
-
-							if (g_bDumpPropLightmaps)
+							else for ( int k = 0 ; k < numVertexLightComponents; ++ k )
 							{
-								char buffer[_MAX_PATH];
-								V_snprintf( 
-									buffer, 
-									_MAX_PATH - 1, 
-									"staticprop_lightmap_%d_%.0f_%.0f_%.0f_%s_%d_%d_%d_%d_%d.tga", 
-									iStaticProp, 
-									prop.m_Origin.x, 
-									prop.m_Origin.y,
-									prop.m_Origin.z,
-									dict.m_pStudioHdr->pszName(), 
-									bodyID, 
-									modelID, 
-									nLod, 
-									nMesh, 
-									nGroup 
-								);
-
-								for ( int i = 0; buffer[i]; ++i ) 
-								{
-									if (buffer[i] == '/' || buffer[i] == '\\')
-										buffer[i] = '-';
-								}
-								DumpLightmapLinear( buffer, (*colorTexels), prop.m_LightmapImageWidth, prop.m_LightmapImageHeight );
+								prop.m_MeshData[nMeshIdx].m_VertColorData[nVertex * numVertexLightComponents + k].AsVector3D() = colorVerts[nIndex].m_Colors[k + 1];
+								prop.m_MeshData[nMeshIdx].m_VertColorData[nVertex * numVertexLightComponents + k].w = colorVerts[nIndex].m_SunAmount[k + 1];
 							}
 						}
 					}
@@ -1310,6 +1728,7 @@ void CVradStaticPropMgr::ApplyLightingToStaticProp( int iStaticProp, CStaticProp
 	}
 }
 
+
 //-----------------------------------------------------------------------------
 // Trace rays from each unique vertex, accumulating direct and indirect
 // sources at each ray termination. Use the winding data to distribute the unique vertexes
@@ -1329,43 +1748,40 @@ void CVradStaticPropMgr::ComputeLighting( CStaticProp &prop, int iThread, int pr
 		return;
 	}
 
-	const bool withVertexLighting = (prop.m_Flags & STATIC_PROP_NO_PER_VERTEX_LIGHTING) == 0;
-	const bool withTexelLighting = (prop.m_Flags & STATIC_PROP_NO_PER_TEXEL_LIGHTING) == 0;
+	int nGatherFlags = (prop.m_Flags & STATIC_PROP_IGNORE_NORMALS) ? GATHERLFLAGS_IGNORE_NORMALS : 0;
+	nGatherFlags |= (prop.m_Flags & STATIC_PROP_NO_PER_VERTEX_LIGHTING) ? GATHERLFLAGS_NO_OCCLUSION : 0;
 
-	if (!withVertexLighting && !withTexelLighting)
-		return;
+ 	if ( dict.m_bHasPhong )
+ 	{
+ 		nGatherFlags &= ~GATHERLFLAGS_IGNORE_NORMALS;
+ 	}
 
-	const int skip_prop = (g_bDisablePropSelfShadowing || (prop.m_Flags & STATIC_PROP_NO_SELF_SHADOWING)) ? prop_index : -1;
-	const int nFlags = ( prop.m_Flags & STATIC_PROP_IGNORE_NORMALS ) ? GATHERLFLAGS_IGNORE_NORMALS : 0;
+	nGatherFlags |= GATHERLFLAGS_STATICPROP;
 
 #ifdef MPI
 	VMPI_SetCurrentStage( "ComputeLighting" );
 #endif
 
-	matrix3x4_t	matPos, matNormal;
-	AngleMatrix(prop.m_Angles, prop.m_Origin, matPos);
-	AngleMatrix(prop.m_Angles, matNormal);
-	
+	int numSampleNormals = (g_numVradStaticPropsLightingStreams > 1) ? (NUM_BUMP_VECTS + 1) : 1;
+	bool bCanUseTangents = dict.m_bHasBumpmap;
+	bool bSkipDirectSkylight = true;		// Only computing indirect GI for all static props now. Direct sunlight applied in shader.
+	if ( PositionIn3DSkybox( prop.m_Origin ) )
+	{
+		bSkipDirectSkylight = false;
+	}
+
 	for ( int bodyID = 0; bodyID < pStudioHdr->numbodyparts; ++bodyID )
 	{
-		OptimizedModel::BodyPartHeader_t* pVtxBodyPart = pVtxHdr->pBodyPart( bodyID );
 		mstudiobodyparts_t *pBodyPart = pStudioHdr->pBodypart( bodyID );
 
 		for ( int modelID = 0; modelID < pBodyPart->nummodels; ++modelID )
 		{
-			OptimizedModel::ModelHeader_t* pVtxModel = pVtxBodyPart->pModel(modelID);
 			mstudiomodel_t *pStudioModel = pBodyPart->pModel( modelID );
 
-			if (withTexelLighting)
-			{
-				CUtlVector<colorTexel_t> *pColorTexelArray = new CUtlVector<colorTexel_t>;
-				pResults->m_ColorTexelsArrays.AddToTail(pColorTexelArray);
-			}
-			
 			// light all unique vertexes
 			CUtlVector<colorVertex_t> *pColorVertsArray = new CUtlVector<colorVertex_t>;
 			pResults->m_ColorVertsArrays.AddToTail( pColorVertsArray );
-						
+			
 			CUtlVector<colorVertex_t> &colorVerts = *pColorVertsArray; 
 			colorVerts.EnsureCount( pStudioModel->numvertices );
 			memset( colorVerts.Base(), 0, colorVerts.Count() * sizeof(colorVertex_t) );
@@ -1375,23 +1791,57 @@ void CVradStaticPropMgr::ComputeLighting( CStaticProp &prop, int iThread, int pr
 			{
 				mstudiomesh_t *pStudioMesh = pStudioModel->pMesh( meshID );
 				const mstudio_meshvertexdata_t *vertData = pStudioMesh->GetVertexData((void *)pStudioHdr);
-
-				Assert(vertData); // This can only return NULL on X360 for now
-				
-				// TODO: Move this into its own function. In fact, refactor this whole function.
-				if (withTexelLighting)
-				{
-					GenerateLightmapSamplesForMesh( matPos, matNormal, iThread, skip_prop, nFlags, prop.m_LightmapImageWidth, prop.m_LightmapImageHeight, pStudioHdr, pStudioModel, pVtxModel, meshID, pResults );
-				}
-
-				// If we do lightmapping, we also do vertex lighting as a potential fallback. This may change.
+				Assert( vertData ); // This can only return NULL on X360 for now
 				for ( int vertexID = 0; vertexID < pStudioMesh->numvertices; ++vertexID )
 				{
-					Vector sampleNormal;
+					Vector sampleNormals[ NUM_BUMP_VECTS + 1 ];
 					Vector samplePosition;
 					// transform position and normal into world coordinate system
-					VectorTransform(*vertData->Position(vertexID), matPos, samplePosition);
-					VectorTransform(*vertData->Normal(vertexID), matNormal, sampleNormal);
+					matrix3x4_t	matrix;
+					AngleMatrix( prop.m_Angles, prop.m_Origin, matrix );
+					VectorTransform( *vertData->Position( vertexID ), matrix, samplePosition );
+					AngleMatrix( prop.m_Angles, matrix );
+					VectorTransform( *vertData->Normal( vertexID ), matrix, sampleNormals[0] );
+
+					if( numSampleNormals > 1 )
+					{
+						Vector *bumpVects = &sampleNormals[1];
+						Vector4D *vecTangentS = vertData->HasTangentData() ? vertData->TangentS( vertexID ) : NULL;
+
+						if ( vecTangentS && bCanUseTangents )
+						{
+							Vector vecTexS;
+							VectorTransform( vecTangentS->AsVector3D(), matrix, vecTexS );
+
+							Vector vecTexT;
+							CrossProduct( sampleNormals[0], vecTexS, vecTexT );
+							vecTexT.NormalizeInPlace();
+
+							// recompute S-vector to have S, T, N as an orthonormal basis for hl2 vectors
+							CrossProduct( vecTexT, sampleNormals[0], vecTexS );
+
+							// respect the flip-factor for T-vector
+							vecTexT *= vecTangentS->w;
+
+							GetStaticPropBumpNormals(
+								vecTexS, vecTexT,
+								sampleNormals[0],
+								sampleNormals[0],
+								bumpVects );
+
+							sampleNormals[0].NormalizeInPlace();
+							sampleNormals[1].NormalizeInPlace();
+							sampleNormals[2].NormalizeInPlace();
+							sampleNormals[3].NormalizeInPlace();
+						}
+						else
+						{
+							sampleNormals[1] = sampleNormals[0];
+							sampleNormals[2] = sampleNormals[0];
+							sampleNormals[3] = sampleNormals[0];
+						}
+					}
+
 
 					if ( PositionInSolid( samplePosition ) )
 					{
@@ -1399,39 +1849,105 @@ void CVradStaticPropMgr::ComputeLighting( CStaticProp &prop, int iThread, int pr
 						badVertex_t badVertex;
 						badVertex.m_ColorVertex = numVertexes;
 						badVertex.m_Position = samplePosition;
-						badVertex.m_Normal = sampleNormal;
+						memcpy( badVertex.m_Normals, sampleNormals, sizeof( badVertex.m_Normals ) );
 						badVerts.AddToTail( badVertex );			
 					}
-					else
+					else 
 					{
 						Vector direct_pos=samplePosition;
-							
-						
+						int skip_prop = -1;
+						if ( g_bDisablePropSelfShadowing || ( prop.m_Flags & STATIC_PROP_NO_SELF_SHADOWING ) )
+						{
+							skip_prop = prop_index;
+						}
 
-						Vector directColor(0,0,0);
-						ComputeDirectLightingAtPoint( direct_pos,
-														sampleNormal, directColor, iThread,
-														skip_prop, nFlags );
-						Vector indirectColor(0,0,0);
+						Vector directColors[ NUM_BUMP_VECTS + 1 ];
+						float sunAmount[ NUM_BUMP_VECTS + 1 ];
+						memset( directColors, 0, sizeof( directColors ) );
+						memset( sunAmount, 0, sizeof( sunAmount ) );
+
+						if ( bCanUseTangents )
+						{
+							ComputeDirectLightingAtPoint( direct_pos,
+															sampleNormals, directColors, sunAmount, numSampleNormals, bSkipDirectSkylight,
+															iThread,
+															skip_prop, nGatherFlags );
+						}
+						else
+						{
+							ComputeDirectLightingAtPoint( direct_pos,
+															sampleNormals, directColors, sunAmount, 1, bSkipDirectSkylight,
+															iThread,
+															skip_prop, nGatherFlags );
+							directColors[1] = directColors[0];
+							directColors[2] = directColors[0];
+							directColors[3] = directColors[0];
+							sunAmount[1] = sunAmount[0];
+							sunAmount[2] = sunAmount[0];
+							sunAmount[3] = sunAmount[0];
+						}
+
+						if ( numSampleNormals > 1 )
+						{
+							// doing this for direct and indirect separately helps eliminate errors with CSM blending
+							NormalizeVertexBumpedLighting( directColors, directColors + 1 );
+						}
+
+
+						Vector indirectColors[ NUM_BUMP_VECTS + 1 ];
+						memset( indirectColors, 0, sizeof( indirectColors ) );
 
 						if (g_bShowStaticPropNormals)
 						{
-							directColor= sampleNormal;
-							directColor += Vector(1.0,1.0,1.0);
-							directColor *= 50.0;
+							directColors[0] = sampleNormals[0];
+							directColors[0] += Vector(1.0,1.0,1.0);
+							directColors[0] *= 50.0;
+							directColors[1] = directColors[0];
+							directColors[2] = directColors[0];
+							directColors[3] = directColors[0];
 						}
 						else
 						{
 							if (numbounce >= 1)
-								ComputeIndirectLightingAtPoint( 
-									samplePosition, sampleNormal, 
-									indirectColor, iThread, true,
-									( prop.m_Flags & STATIC_PROP_IGNORE_NORMALS) != 0 );
+							{
+								if ( bCanUseTangents )
+								{
+									ComputeIndirectLightingAtPoint(
+										samplePosition, sampleNormals,
+										indirectColors, numSampleNormals, iThread, g_bFastStaticProps,
+										( prop.m_Flags & STATIC_PROP_IGNORE_NORMALS ) != 0, prop_index );
+								}
+								else
+								{
+									ComputeIndirectLightingAtPoint(
+										samplePosition, sampleNormals,
+										indirectColors, 1, iThread, g_bFastStaticProps,
+										( prop.m_Flags & STATIC_PROP_IGNORE_NORMALS ) != 0, prop_index );
+									indirectColors[1] = indirectColors[0];
+									indirectColors[2] = indirectColors[0];
+									indirectColors[3] = indirectColors[0];
+								}
+
+								if ( numSampleNormals > 1 )
+								{
+									// doing this for direct and indirect separately helps eliminate errors with CSM blending
+									NormalizeVertexBumpedLighting( indirectColors, indirectColors + 1 );
+								}
+							}
 						}
-						
+
 						colorVerts[numVertexes].m_bValid = true;
 						colorVerts[numVertexes].m_Position = samplePosition;
-						VectorAdd( directColor, indirectColor, colorVerts[numVertexes].m_Color );
+						for ( int k = 0; k < numSampleNormals; ++ k )
+						{
+							VectorAdd( directColors[k], indirectColors[k], colorVerts[numVertexes].m_Colors[k] );
+							colorVerts[numVertexes].m_SunAmount[k] = sunAmount[k];
+						}
+						if ( numSampleNormals > 1 )
+						{
+							float *pSunAmountUnbumped = &colorVerts[numVertexes].m_SunAmount[0];
+							NormalizeVertexBumpedSunAmount( pSunAmountUnbumped, pSunAmountUnbumped+1, pSunAmountUnbumped+2, pSunAmountUnbumped+3 );
+						}
 					}
 					
 					numVertexes++;
@@ -1478,7 +1994,7 @@ void CVradStaticPropMgr::ComputeLighting( CStaticProp &prop, int iThread, int pr
 					}
 
 					// crawl toward best position
-					// sudivide to determine a closer valid point to the bad vertex, and re-light
+					// subdivide to determine a closer valid point to the bad vertex, and re-light
 					Vector midPosition;
 					int numIterations = 20;
 					while ( --numIterations > 0 )
@@ -1490,18 +2006,57 @@ void CVradStaticPropMgr::ComputeLighting( CStaticProp &prop, int iThread, int pr
 						bestPosition = midPosition;
 					}
 
-					// re-light from better position
-					Vector directColor;
-					ComputeDirectLightingAtPoint( bestPosition, badVerts[nBadVertex].m_Normal, directColor, iThread );
+					Vector directColors[ NUM_BUMP_VECTS + 1 ];
+					memset( directColors, 0, sizeof( directColors ) );
+					Vector indirectColors[ NUM_BUMP_VECTS + 1 ];
+					memset( indirectColors, 0, sizeof( indirectColors ) );
+					float sunAmount[NUM_BUMP_VECTS + 1];
+					memset( sunAmount, 0, sizeof( sunAmount ) );
 
-					Vector indirectColor;
-					ComputeIndirectLightingAtPoint( bestPosition, badVerts[nBadVertex].m_Normal,
-													indirectColor, iThread, true );
+					// re-light from better position
+					if ( bCanUseTangents )
+					{
+						ComputeDirectLightingAtPoint( bestPosition, badVerts[nBadVertex].m_Normals,
+													  directColors, sunAmount, numSampleNormals, bSkipDirectSkylight, iThread );
+						ComputeIndirectLightingAtPoint( bestPosition, badVerts[nBadVertex].m_Normals,
+														indirectColors, numSampleNormals, iThread, true, false, prop_index );
+					}
+					else
+					{
+						ComputeDirectLightingAtPoint( bestPosition, badVerts[nBadVertex].m_Normals,
+													  directColors, sunAmount, 1, bSkipDirectSkylight, iThread );
+						// doing this for direct and indirect separately helps eliminate errors with CSM blending
+						ComputeIndirectLightingAtPoint( bestPosition, badVerts[nBadVertex].m_Normals,
+														indirectColors, 1, iThread, true, false, prop_index );
+						for ( int k = 1; k < numSampleNormals; ++k )
+						{
+							directColors[k]		= directColors[0];
+							indirectColors[k]	= indirectColors[0];
+							sunAmount[k]		= sunAmount[0];
+						}
+					}
+
+					if ( numSampleNormals > 1 )
+					{
+						// doing this for direct and indirect separately helps eliminate errors with CSM blending
+						NormalizeVertexBumpedLighting( directColors, directColors + 1 );
+						NormalizeVertexBumpedLighting( indirectColors, indirectColors + 1 );
+					}
 
 					// save results, not changing valid status
 					// to ensure this offset position is not considered as a viable candidate
-					colorVerts[badVerts[nBadVertex].m_ColorVertex].m_Position = bestPosition;
-					VectorAdd( directColor, indirectColor, colorVerts[badVerts[nBadVertex].m_ColorVertex].m_Color );
+					const int idxColorVertex = badVerts[nBadVertex].m_ColorVertex;
+					colorVerts[idxColorVertex].m_Position = bestPosition;
+					for ( int k = 0; k < numSampleNormals; ++ k )
+					{
+						VectorAdd( directColors[k], indirectColors[k], colorVerts[idxColorVertex].m_Colors[k] );
+						colorVerts[idxColorVertex].m_SunAmount[k] = sunAmount[k];
+					}
+					if ( numSampleNormals > 1 )
+					{
+						float *pSunAmountUnbumped = &colorVerts[idxColorVertex].m_SunAmount[0];
+						NormalizeVertexBumpedSunAmount( pSunAmountUnbumped, pSunAmountUnbumped + 1, pSunAmountUnbumped + 2, pSunAmountUnbumped + 3 );
+					}
 				}
 			}
 			
@@ -1512,7 +2067,7 @@ void CVradStaticPropMgr::ComputeLighting( CStaticProp &prop, int iThread, int pr
 }
 
 //-----------------------------------------------------------------------------
-// Write the lighitng to bsp pak lump
+// Write the lighting to bsp pak lump
 //-----------------------------------------------------------------------------
 void CVradStaticPropMgr::SerializeLighting()
 {
@@ -1533,10 +2088,6 @@ void CVradStaticPropMgr::SerializeLighting()
 	int size;
 	for (int i = 0; i < count; ++i)
 	{
-		// no need to write this file if we didn't compute the data
-		// props marked this way will not load the info anyway 
-		if ( m_StaticProps[i].m_Flags & STATIC_PROP_NO_PER_VERTEX_LIGHTING )
-			continue;
 
 		if (g_bHDR)
 		{
@@ -1550,13 +2101,15 @@ void CVradStaticPropMgr::SerializeLighting()
 		int totalVertexes = 0;
 		for ( int j=0; j<m_StaticProps[i].m_MeshData.Count(); j++ )
 		{
-			totalVertexes += m_StaticProps[i].m_MeshData[j].m_VertexColors.Count();
+			totalVertexes += m_StaticProps[i].m_MeshData[j].m_numVerts;
 		}
 
+		int numLightingComponents = g_numVradStaticPropsLightingStreams;
+
 		// allocate a buffer with enough padding for alignment
 		size = sizeof( HardwareVerts::FileHeader_t ) + 
 				m_StaticProps[i].m_MeshData.Count()*sizeof(HardwareVerts::MeshHeader_t) +
-				totalVertexes*4 + 2*512;
+				totalVertexes*4*numLightingComponents + 2*512;
 		utlBuf.EnsureCapacity( size );
 		Q_memset( utlBuf.Base(), 0, size );
 
@@ -1564,13 +2117,13 @@ void CVradStaticPropMgr::SerializeLighting()
 
 		// align to start of vertex data
 		unsigned char *pVertexData = (unsigned char *)(sizeof( HardwareVerts::FileHeader_t ) + m_StaticProps[i].m_MeshData.Count()*sizeof(HardwareVerts::MeshHeader_t));
-		pVertexData = (unsigned char*)pVhvHdr + ALIGN_TO_POW2( (uintp)pVertexData, 512 );
+		pVertexData = (unsigned char*)pVhvHdr + ALIGN_TO_POW2( (size_t)pVertexData, 512 );
 		
 		// construct header
 		pVhvHdr->m_nVersion     = VHV_VERSION;
 		pVhvHdr->m_nChecksum    = m_StaticPropDict[m_StaticProps[i].m_ModelIdx].m_pStudioHdr->checksum;
-		pVhvHdr->m_nVertexFlags = VERTEX_COLOR;
-		pVhvHdr->m_nVertexSize  = 4;
+		pVhvHdr->m_nVertexFlags = ( numLightingComponents > 1 ) ? VERTEX_NORMAL : VERTEX_COLOR;
+		pVhvHdr->m_nVertexSize  = 4 * numLightingComponents;
 		pVhvHdr->m_nVertexes    = totalVertexes;
 		pVhvHdr->m_nMeshes      = m_StaticProps[i].m_MeshData.Count();
 
@@ -1579,16 +2132,19 @@ void CVradStaticPropMgr::SerializeLighting()
 			// construct mesh dictionary
 			HardwareVerts::MeshHeader_t *pMesh = pVhvHdr->pMesh( n );
 			pMesh->m_nLod      = m_StaticProps[i].m_MeshData[n].m_nLod;
-			pMesh->m_nVertexes = m_StaticProps[i].m_MeshData[n].m_VertexColors.Count();
-			pMesh->m_nOffset   = (uintp)pVertexData - (uintp)pVhvHdr; 
+			pMesh->m_nVertexes = m_StaticProps[i].m_MeshData[n].m_numVerts;
+			pMesh->m_nOffset   = (size_t)pVertexData - (size_t)pVhvHdr; 
 
 			// construct vertexes
-			for (int k=0; k<pMesh->m_nVertexes; k++)
+			for (int k=0; k<m_StaticProps[i].m_MeshData[n].m_VertColorData.Count(); k++)
 			{
-				Vector &vertexColor = m_StaticProps[i].m_MeshData[n].m_VertexColors[k];
+				Vector &vector = m_StaticProps[i].m_MeshData[n].m_VertColorData[k].AsVector3D();
+
+				//if ( (vector.x > 1024.0f) || (vector.y > 1024.0f) || (vector.z > 1024.0f) )s
+				//	Msg(" *** out of range prop lighting *** \n");
 
 				ColorRGBExp32 rgbColor;
-				VectorToColorRGBExp32( vertexColor, rgbColor );
+				VectorToColorRGBExp32( vector, rgbColor );
 				unsigned char dstColor[4];
 				ConvertRGBExp32ToRGBA8888( &rgbColor, dstColor );
 
@@ -1596,74 +2152,21 @@ void CVradStaticPropMgr::SerializeLighting()
 				pVertexData[0] = dstColor[2];
 				pVertexData[1] = dstColor[1];
 				pVertexData[2] = dstColor[0];
-				pVertexData[3] = dstColor[3];
+
+				// Use the unmodified lighting data to generate the sun percentage, not the output of the RGBE conversions above!
+				float flSunAmount = m_StaticProps[i].m_MeshData[n].m_VertColorData[k].w;
+				pVertexData[3] = uint8( clamp( flSunAmount, 0.0f, 1.0f ) * 255.0f + 0.5f );
+
 				pVertexData += 4;
 			}
 		}
 
 		// align to end of file
-		pVertexData = (unsigned char *)((uintp)pVertexData - (uintp)pVhvHdr);
-		pVertexData = (unsigned char*)pVhvHdr + ALIGN_TO_POW2( (uintp)pVertexData, 512 );
+		pVertexData = (unsigned char *)((size_t)pVertexData - (size_t)pVhvHdr);
+		pVertexData = (unsigned char*)pVhvHdr + ALIGN_TO_POW2( (size_t)pVertexData, 512 );
 
 		AddBufferToPak( GetPakFile(), filename, (void*)pVhvHdr, pVertexData - (unsigned char*)pVhvHdr, false );
 	}
-
-	for (int i = 0; i < count; ++i)
-	{
-		const int kAlignment = 512;
-		// no need to write this file if we didn't compute the data
-		// props marked this way will not load the info anyway 
-		if (m_StaticProps[i].m_Flags & STATIC_PROP_NO_PER_TEXEL_LIGHTING)
-			continue;
-
-		sprintf(filename, "texelslighting_%d.ppl", i);
-
-		ImageFormat fmt = m_StaticProps[i].m_LightmapImageFormat;
-
-		uintp totalTexelSizeBytes = 0;
-		for (int j = 0; j < m_StaticProps[i].m_MeshData.Count(); j++)
-		{
-			totalTexelSizeBytes += m_StaticProps[i].m_MeshData[j].m_TexelsEncoded.Count();
-		}
-
-		// allocate a buffer with enough padding for alignment
-		size = sizeof(HardwareTexels::FileHeader_t) 
-			 + m_StaticProps[i].m_MeshData.Count() * sizeof(HardwareTexels::MeshHeader_t) 
-			 + totalTexelSizeBytes
-			 + 2 * kAlignment;
-		
-		utlBuf.EnsureCapacity(size);
-		Q_memset(utlBuf.Base(), 0, size);
-
-		HardwareTexels::FileHeader_t *pVhtHdr = (HardwareTexels::FileHeader_t *)utlBuf.Base();
-
-		// align start of texel data
-		unsigned char *pTexelData = (unsigned char *)(sizeof(HardwareTexels::FileHeader_t) + m_StaticProps[i].m_MeshData.Count() * sizeof(HardwareTexels::MeshHeader_t));
-		pTexelData = (unsigned char*)pVhtHdr + ALIGN_TO_POW2((uintp)pTexelData, kAlignment);
-
-		pVhtHdr->m_nVersion	    = VHT_VERSION;
-		pVhtHdr->m_nChecksum    = m_StaticPropDict[m_StaticProps[i].m_ModelIdx].m_pStudioHdr->checksum;
-		pVhtHdr->m_nTexelFormat = fmt;
-		pVhtHdr->m_nMeshes      = m_StaticProps[i].m_MeshData.Count();
-
-		for (int n = 0; n < pVhtHdr->m_nMeshes; n++)
-		{
-			HardwareTexels::MeshHeader_t *pMesh = pVhtHdr->pMesh(n);
-			pMesh->m_nLod = m_StaticProps[i].m_MeshData[n].m_nLod;
-			pMesh->m_nOffset = (uintp)pTexelData - (uintp)pVhtHdr;
-			pMesh->m_nBytes = m_StaticProps[i].m_MeshData[n].m_TexelsEncoded.Count();
-			pMesh->m_nWidth = m_StaticProps[i].m_LightmapImageWidth;
-			pMesh->m_nHeight = m_StaticProps[i].m_LightmapImageHeight;
-
-			Q_memcpy(pTexelData, m_StaticProps[i].m_MeshData[n].m_TexelsEncoded.Base(), m_StaticProps[i].m_MeshData[n].m_TexelsEncoded.Count());
-			pTexelData += m_StaticProps[i].m_MeshData[n].m_TexelsEncoded.Count();
-		}
-
-		pTexelData = (unsigned char *)((uintp)pTexelData - (uintp)pVhtHdr);
-		pTexelData = (unsigned char*)pVhtHdr + ALIGN_TO_POW2((uintp)pTexelData, kAlignment);
-
-		AddBufferToPak(GetPakFile(), filename, (void*)pVhtHdr, pTexelData - (unsigned char*)pVhtHdr, false);
-	}
 }
 
 #ifdef MPI
@@ -1700,17 +2203,6 @@ void CVradStaticPropMgr::VMPI_ProcessStaticProp( int iThread, int iStaticProp, M
 		pBuf->write( &count, sizeof( count ) );
 		pBuf->write( curList.Base(), curList.Count() * sizeof( colorVertex_t ) );
 	}
-
-	nLists = results.m_ColorTexelsArrays.Count();
-	pBuf->write(&nLists, sizeof(nLists));
-
-	for (int i = 0; i < nLists; i++)
-	{
-		CUtlVector<colorTexel_t> &curList = *results.m_ColorTexelsArrays[i];
-		int count = curList.Count();
-		pBuf->write(&count, sizeof(count));
-		pBuf->write(curList.Base(), curList.Count() * sizeof(colorTexel_t));
-	}
 }
 
 //-----------------------------------------------------------------------------
@@ -1734,22 +2226,9 @@ void CVradStaticPropMgr::VMPI_ReceiveStaticPropResults( int iStaticProp, Message
 		pList->SetSize( count );
 		pBuf->read( pList->Base(), count * sizeof( colorVertex_t ) );
 	}
-
-	pBuf->read(&nLists, sizeof(nLists));
-
-	for (int i = 0; i < nLists; i++)
-	{
-		CUtlVector<colorTexel_t> *pList = new CUtlVector<colorTexel_t>;
-		results.m_ColorTexelsArrays.AddToTail(pList);
-
-		int count;
-		pBuf->read(&count, sizeof(count));
-		pList->SetSize(count);
-		pBuf->read(pList->Base(), count * sizeof(colorTexel_t));
-	}
 	
 	// Apply the results.
-	ApplyLightingToStaticProp( iStaticProp, m_StaticProps[iStaticProp], &results );
+	ApplyLightingToStaticProp( m_StaticProps[iStaticProp], &results );
 }
 #endif
 
@@ -1758,7 +2237,7 @@ void CVradStaticPropMgr::ComputeLightingForProp( int iThread, int iStaticProp )
 	// Compute the lighting.
 	CComputeStaticPropLightingResults results;
 	ComputeLighting( m_StaticProps[iStaticProp], iThread, iStaticProp, &results );
-	ApplyLightingToStaticProp( iStaticProp, m_StaticProps[iStaticProp], &results );
+	ApplyLightingToStaticProp( m_StaticProps[iStaticProp], &results );
 }
 
 void CVradStaticPropMgr::ThreadComputeStaticPropLighting( int iThread, void *pUserData )
@@ -1787,8 +2266,16 @@ void CVradStaticPropMgr::ComputeLighting( int iThread )
 		return;
 	}
 
+	double start = Plat_FloatTime();
+
 	StartPacifier( "Computing static prop lighting : " );
 
+#if 0
+	CGlViewBuffer glViewBuf;
+	glViewBuf.WriteKDTree( &g_RtEnv );
+	g_pFullFileSystem->WriteFile( "maps/rtenv.gl", "GAME", glViewBuf );
+#endif
+
 	// ensure any traces against us are ignored because we have no inherit lighting contribution
 	m_bIgnoreStaticPropTrace = true;
 
@@ -1800,7 +2287,6 @@ void CVradStaticPropMgr::ComputeLighting( int iThread )
 		
 		DistributeWork( 
 			count, 
-			VMPI_DISTRIBUTEWORK_PACKETID,
 			&CVradStaticPropMgr::VMPI_ProcessStaticProp_Static, 
 			&CVradStaticPropMgr::VMPI_ReceiveStaticPropResults_Static );
 	}
@@ -1812,11 +2298,15 @@ void CVradStaticPropMgr::ComputeLighting( int iThread )
 
 	// restore default
 	m_bIgnoreStaticPropTrace = false;
-
+	 
 	// save data to bsp
 	SerializeLighting();
 
 	EndPacifier( true );
+
+	double end = Plat_FloatTime();
+
+	DumpElapsedTime( (int)(end - start) );
 }
 
 //-----------------------------------------------------------------------------
@@ -1879,11 +2369,17 @@ void CVradStaticPropMgr::AddPolysForRayTrace( void )
 		if ( !pStudioHdr || !pVtxHdr )
 		{
 			// must have model and its verts for decoding triangles
-			return;
+			// must have model and its verts for decoding triangles
+			printf( "Can't get studio header (%p) and vertex data (%p) for %s\n", pStudioHdr, pVtxHdr,
+					pStudioHdr ? pStudioHdr->name : "***unknown***" );
+			continue;
 		}
 		// only init the triangle table the first time
 		bool bInitTriangles = dict.m_triangleMaterialIndex.Count() ? false : true;
 		int triangleIndex = 0;
+		// transform position into world coordinate system
+		matrix3x4_t	matrix;
+		AngleMatrix( prop.m_Angles, prop.m_Origin, matrix );
 
 		// meshes are deeply hierarchial, divided between three stores, follow the white rabbit
 		// body parts -> models -> lod meshes -> strip groups -> strips
@@ -1942,77 +2438,65 @@ void CVradStaticPropMgr::AddPolysForRayTrace( void )
 						{
 							OptimizedModel::StripHeader_t *pStrip = pStripGroup->pStrip( nStrip );
 
-							if ( pStrip->flags & OptimizedModel::STRIP_IS_TRILIST )
+							for ( int i = 0; i < pStrip->numIndices; i += 3 )
 							{
-								for ( int i = 0; i < pStrip->numIndices; i += 3 )
+								int idx = pStrip->indexOffset + i;
+
+								unsigned short i1 = *pStripGroup->pIndex( idx );
+								unsigned short i2 = *pStripGroup->pIndex( idx + 1 );
+								unsigned short i3 = *pStripGroup->pIndex( idx + 2 );
+
+								int vertex1 = pStripGroup->pVertex( i1 )->origMeshVertID;
+								int vertex2 = pStripGroup->pVertex( i2 )->origMeshVertID;
+								int vertex3 = pStripGroup->pVertex( i3 )->origMeshVertID;
+
+								// transform position into world coordinate system
+								matrix3x4_t	matrix;
+								AngleMatrix( prop.m_Angles, prop.m_Origin, matrix );
+								Vector position1;
+								Vector position2;
+								Vector position3;
+								VectorTransform( *vertData->Position( vertex1 ), matrix, position1 );
+								VectorTransform( *vertData->Position( vertex2 ), matrix, position2 );
+								VectorTransform( *vertData->Position( vertex3 ), matrix, position3 );
+								unsigned short flags = 0;
+								int materialIndex = -1;
+								Vector color = vec3_origin;
+								if ( shadowTextureIndex >= 0 )
 								{
-									int idx = pStrip->indexOffset + i;
-
-									unsigned short i1 = *pStripGroup->pIndex( idx );
-									unsigned short i2 = *pStripGroup->pIndex( idx + 1 );
-									unsigned short i3 = *pStripGroup->pIndex( idx + 2 );
-
-									int vertex1 = pStripGroup->pVertex( i1 )->origMeshVertID;
-									int vertex2 = pStripGroup->pVertex( i2 )->origMeshVertID;
-									int vertex3 = pStripGroup->pVertex( i3 )->origMeshVertID;
-
-									// transform position into world coordinate system
-									matrix3x4_t	matrix;
-									AngleMatrix( prop.m_Angles, prop.m_Origin, matrix );
-
-									Vector position1;
-									Vector position2;
-									Vector position3;
-									VectorTransform( *vertData->Position( vertex1 ), matrix, position1 );
-									VectorTransform( *vertData->Position( vertex2 ), matrix, position2 );
-									VectorTransform( *vertData->Position( vertex3 ), matrix, position3 );
-									unsigned short flags = 0;
-									int materialIndex = -1;
-									Vector color = vec3_origin;
-									if ( shadowTextureIndex >= 0 )
+									if ( bInitTriangles )
 									{
-										if ( bInitTriangles )
+										// add texture space and texture index to material database
+										// now
+										float coverage = g_ShadowTextureList.ComputeCoverageForTriangle(shadowTextureIndex, *vertData->Texcoord(vertex1), *vertData->Texcoord(vertex2), *vertData->Texcoord(vertex3) );
+										if ( coverage < 1.0f )
 										{
-											// add texture space and texture index to material database
-											// now
-											float coverage = g_ShadowTextureList.ComputeCoverageForTriangle(shadowTextureIndex, *vertData->Texcoord(vertex1), *vertData->Texcoord(vertex2), *vertData->Texcoord(vertex3) );
-											if ( coverage < 1.0f )
-											{
-												materialIndex = g_ShadowTextureList.AddMaterialEntry( shadowTextureIndex, *vertData->Texcoord(vertex1), *vertData->Texcoord(vertex2), *vertData->Texcoord(vertex3) );
-												color.x = coverage;
-											}
-											else
-											{
-												materialIndex = -1;
-											}
-											dict.m_triangleMaterialIndex.AddToTail(materialIndex);
+											materialIndex = g_ShadowTextureList.AddMaterialEntry( shadowTextureIndex, *vertData->Texcoord(vertex1), *vertData->Texcoord(vertex2), *vertData->Texcoord(vertex3) );
+											color.x = coverage;
 										}
 										else
 										{
-											materialIndex = dict.m_triangleMaterialIndex[triangleIndex];
-											triangleIndex++;
-										}
-										if ( materialIndex >= 0 )
-										{
-											flags = FCACHETRI_TRANSPARENT;
+											materialIndex = -1;
 										}
+										dict.m_triangleMaterialIndex.AddToTail(materialIndex);
 									}
+									else
+									{
+										materialIndex = dict.m_triangleMaterialIndex[triangleIndex];
+										triangleIndex++;
+									}
+									if ( materialIndex >= 0 )
+									{
+										flags = FCACHETRI_TRANSPARENT;
+									}
+								}
 // 		printf( "\ngl 3\n" );
 // 		printf( "gl %6.3f %6.3f %6.3f 1 0 0\n", XYZ(position1));
 // 		printf( "gl %6.3f %6.3f %6.3f 0 1 0\n", XYZ(position2));
 // 		printf( "gl %6.3f %6.3f %6.3f 0 0 1\n", XYZ(position3));
-									g_RtEnv.AddTriangle( TRACE_ID_STATICPROP | nProp,
-														 position1, position2, position3,
-														 color, flags, materialIndex);
-								}
-							}
-							else
-							{
-								// all tris expected to be discrete tri lists
-								// must fixme if stripping ever occurs
-								printf( "unexpected strips found\n" );
-								Assert( 0 );
-								return;
+								g_RtEnv.AddTriangle( TRACE_ID_STATICPROP | nProp,
+													 position1, position2, position3,
+													 color, flags, materialIndex);
 							}
 						}
 					}
@@ -2126,50 +2610,39 @@ void CVradStaticPropMgr::BuildTriList( CStaticProp &prop )
 					{
 						OptimizedModel::StripHeader_t *pStrip = pStripGroup->pStrip( nStrip );
 
-						if ( pStrip->flags & OptimizedModel::STRIP_IS_TRILIST )
+						for ( int i = 0; i < pStrip->numIndices; i += 3 )
 						{
-							for ( int i = 0; i < pStrip->numIndices; i += 3 )
-							{
-								int idx = pStrip->indexOffset + i;
+							int idx = pStrip->indexOffset + i;
 
-								unsigned short i1 = *pStripGroup->pIndex( idx );
-								unsigned short i2 = *pStripGroup->pIndex( idx + 1 );
-								unsigned short i3 = *pStripGroup->pIndex( idx + 2 );
+							unsigned short i1 = *pStripGroup->pIndex( idx );
+							unsigned short i2 = *pStripGroup->pIndex( idx + 1 );
+							unsigned short i3 = *pStripGroup->pIndex( idx + 2 );
 
-								int vertex1 = pStripGroup->pVertex( i1 )->origMeshVertID;
-								int vertex2 = pStripGroup->pVertex( i2 )->origMeshVertID;
-								int vertex3 = pStripGroup->pVertex( i3 )->origMeshVertID;
+							int vertex1 = pStripGroup->pVertex( i1 )->origMeshVertID;
+							int vertex2 = pStripGroup->pVertex( i2 )->origMeshVertID;
+							int vertex3 = pStripGroup->pVertex( i3 )->origMeshVertID;
 
-								// transform position into world coordinate system
-								matrix3x4_t	matrix;
-								AngleMatrix( prop.m_Angles, prop.m_Origin, matrix );
+							// transform position into world coordinate system
+							matrix3x4_t	matrix;
+							AngleMatrix( prop.m_Angles, prop.m_Origin, matrix );
 
-								Vector position1;
-								Vector position2;
-								Vector position3;
-								VectorTransform( *vertData->Position( vertex1 ), matrix, position1 );
-								VectorTransform( *vertData->Position( vertex2 ), matrix, position2 );
-								VectorTransform( *vertData->Position( vertex3 ), matrix, position3 );
+							Vector position1;
+							Vector position2;
+							Vector position3;
+							VectorTransform( *vertData->Position( vertex1 ), matrix, position1 );
+							VectorTransform( *vertData->Position( vertex2 ), matrix, position2 );
+							VectorTransform( *vertData->Position( vertex3 ), matrix, position3 );
 
-								Vector normal1;
-								Vector normal2;
-								Vector normal3;
-								VectorTransform( *vertData->Normal( vertex1 ), matrix, normal1 );
-								VectorTransform( *vertData->Normal( vertex2 ), matrix, normal2 );
-								VectorTransform( *vertData->Normal( vertex3 ), matrix, normal3 );
+							Vector normal1;
+							Vector normal2;
+							Vector normal3;
+							VectorTransform( *vertData->Normal( vertex1 ), matrix, normal1 );
+							VectorTransform( *vertData->Normal( vertex2 ), matrix, normal2 );
+							VectorTransform( *vertData->Normal( vertex3 ), matrix, normal3 );
 
-								AddTriVertsToList( triListVerts, pMesh->vertexoffset + vertex1, position1, position1, position2, position3, normal1, normal2, normal3 );
-								AddTriVertsToList( triListVerts, pMesh->vertexoffset + vertex2, position2, position1, position2, position3, normal1, normal2, normal3 );
-								AddTriVertsToList( triListVerts, pMesh->vertexoffset + vertex3, position3, position1, position2, position3, normal1, normal2, normal3 );
-							}
-						}
-						else
-						{
-							// all tris expected to be discrete tri lists
-							// must fixme if stripping ever occurs
-							printf( "unexpected strips found\n" );
-							Assert( 0 );
-							return;
+							AddTriVertsToList( triListVerts, pMesh->vertexoffset + vertex1, position1, position1, position2, position3, normal1, normal2, normal3 );
+							AddTriVertsToList( triListVerts, pMesh->vertexoffset + vertex2, position2, position1, position2, position3, normal1, normal2, normal3 );
+							AddTriVertsToList( triListVerts, pMesh->vertexoffset + vertex3, position3, position1, position2, position3, normal1, normal2, normal3 );
 						}
 					}
 				}
@@ -2197,23 +2670,20 @@ const vertexFileHeader_t * mstudiomodel_t::CacheVertexData( void *pModelData )
 	strcat( fileName, ".vvd" );
 
 	// load the model
-	FileHandle_t fileHandle = g_pFileSystem->Open( fileName, "rb" );
-	if ( !fileHandle )
+	CUtlBuffer bufData;
+	if ( !LoadFile( fileName, bufData ) )
 	{
 		Error( "Unable to load vertex data \"%s\"\n", fileName );
 	}
 
 	// Get the file size
-	int vvdSize = g_pFileSystem->Size( fileHandle );
+	int vvdSize = bufData.TellPut();
 	if ( vvdSize == 0 )
 	{
-		g_pFileSystem->Close( fileHandle );
 		Error( "Bad size for vertex data \"%s\"\n", fileName );
 	}
 
-	vertexFileHeader_t *pVvdHdr = (vertexFileHeader_t *)malloc( vvdSize );
-	g_pFileSystem->Read( pVvdHdr, vvdSize, fileHandle );
-	g_pFileSystem->Close( fileHandle );
+	vertexFileHeader_t *pVvdHdr = (vertexFileHeader_t *) bufData.Base();
 
 	// check header
 	if ( pVvdHdr->id != MODEL_VERTEX_FILE_ID )
@@ -2238,471 +2708,150 @@ const vertexFileHeader_t * mstudiomodel_t::CacheVertexData( void *pModelData )
 	}
 
 	// load vertexes and run fixups
-	Studio_LoadVertexes( pVvdHdr, pNewVvdHdr, 0, true );
+	Studio_LoadVertexes(pVvdHdr, pNewVvdHdr, 0, true);
 
 	// discard original
-	free( pVvdHdr );
 	pVvdHdr = pNewVvdHdr;
 
-	pActiveStudioHdr->SetVertexBase((void*)pVvdHdr);
+	pActiveStudioHdr->SetVertexBase( (void*)pVvdHdr );
 	return pVvdHdr;
 }
 
-// ------------------------------------------------------------------------------------------------
-// ------------------------------------------------------------------------------------------------
-// ------------------------------------------------------------------------------------------------
-struct ColorTexelValue
+extern float totalarea;
+extern unsigned num_degenerate_faces;
+extern int fakeplanes;
+extern int	PlaneTypeForNormal( Vector& normal );
+
+void MakePatchForTriangle( winding_t *w, Vector vRefl, int nStaticPropIdx )
 {
-	Vector mLinearColor;	// Linear color value for this texel
-	bool mValidData;		// Whether there is valid data in this texel.
-	size_t mTriangleIndex;	// Which triangle we used to generate the texel.
-};
+	float	    area;
+	CPatch		*patch;
+	Vector		centroid( 0, 0, 0 );
 
-// ------------------------------------------------------------------------------------------------
-inline int ComputeLinearPos( int _x, int _y, int _resX, int _resY )
-{
-	return MIN( MAX( 0, _y ), _resY - 1 ) * _resX
-		 + MIN( MAX( 0, _x ), _resX - 1 );
-}
-
-// ------------------------------------------------------------------------------------------------
-inline float ComputeBarycentricDistanceToTri( Vector _barycentricCoord, Vector2D _v[3] )
-{
-	Vector2D realPos = _barycentricCoord.x * _v[0]
-		             + _barycentricCoord.y * _v[1]
-					 + _barycentricCoord.z * _v[2];
-
-	int minIndex = 0;
-	float minVal = _barycentricCoord[0];
-	for (int i = 1; i < 3; ++i) {
-		if (_barycentricCoord[i] < minVal) {
-			minVal = _barycentricCoord[i];
-			minIndex = i;
-		}
-	}
-
-	Vector2D& first  = _v[ (minIndex + 1) % 3];
-	Vector2D& second = _v[ (minIndex + 2) % 3];
-
-	return CalcDistanceToLineSegment2D( realPos, first, second );
-}
-
-// ------------------------------------------------------------------------------------------------
-static void GenerateLightmapSamplesForMesh( const matrix3x4_t& _matPos, const matrix3x4_t& _matNormal, int _iThread, int _skipProp, int _flags, int _lightmapResX, int _lightmapResY, studiohdr_t* _pStudioHdr, mstudiomodel_t* _pStudioModel, OptimizedModel::ModelHeader_t* _pVtxModel, int _meshID, CComputeStaticPropLightingResults *_outResults )
-{
-	// Could iterate and gen this if needed.
-	int nLod = 0;
-
-	OptimizedModel::ModelLODHeader_t *pVtxLOD = _pVtxModel->pLOD(nLod);
-
-	CUtlVector<colorTexel_t> &colorTexels = (*_outResults->m_ColorTexelsArrays.Tail());
-	const int cTotalPixelCount = _lightmapResX * _lightmapResY;
-	colorTexels.EnsureCount(cTotalPixelCount);
-	memset(colorTexels.Base(), 0, colorTexels.Count() * sizeof(colorTexel_t));
-
-	for (int i = 0; i < colorTexels.Count(); ++i) {
-		colorTexels[i].m_fDistanceToTri = FLT_MAX;	
-	}
-
-	mstudiomesh_t* pMesh = _pStudioModel->pMesh(_meshID);
-	OptimizedModel::MeshHeader_t* pVtxMesh = pVtxLOD->pMesh(_meshID);
-	const mstudio_meshvertexdata_t *vertData = pMesh->GetVertexData((void *)_pStudioHdr);
-	Assert(vertData); // This can only return NULL on X360 for now
-
-	for (int nGroup = 0; nGroup < pVtxMesh->numStripGroups; ++nGroup)
+	area = WindingArea( w );
+	if ( area <= 0 )
 	{
-		OptimizedModel::StripGroupHeader_t* pStripGroup = pVtxMesh->pStripGroup(nGroup);
+		num_degenerate_faces++;
+		return;
+	}
 
-		int nStrip;
-		for (nStrip = 0; nStrip < pStripGroup->numStrips; nStrip++)
+	totalarea += area;
+
+	// get a patch
+	int ndxPatch = g_Patches.AddToTail();
+	patch = &g_Patches[ ndxPatch ];
+	memset( patch, 0, sizeof( CPatch ) );
+	patch->ndxNext = g_Patches.InvalidIndex();
+	patch->ndxNextParent = g_Patches.InvalidIndex();
+	patch->ndxNextClusterChild = g_Patches.InvalidIndex();
+	patch->child1 = g_Patches.InvalidIndex();
+	patch->child2 = g_Patches.InvalidIndex();
+	patch->parent = g_Patches.InvalidIndex();
+	patch->needsBumpmap = false;
+	patch->staticPropIdx = nStaticPropIdx;
+
+	patch->scale[ 0 ] = patch->scale[ 1 ] = 1.0f;
+	patch->area = area;
+	patch->sky = false;
+
+	// chop scaled up lightmaps coarser
+	patch->luxscale = 16.0f;
+	patch->chop = maxchop;
+
+	patch->winding = w;
+
+	patch->plane = new dplane_t;
+
+	Vector vecNormal;
+	CrossProduct( w->p[ 2 ] - w->p[ 0 ], w->p[ 1 ] - w->p[ 0 ], vecNormal );
+	VectorNormalize( vecNormal );
+	VectorCopy( vecNormal, patch->plane->normal );
+
+	patch->plane->dist = vecNormal.Dot( w->p[ 0 ] );
+	patch->plane->type = PlaneTypeForNormal( patch->plane->normal );
+	patch->planeDist = patch->plane->dist;
+
+	patch->faceNumber = -1;		// This is a bit hacky and is used to identify static prop patches in other parts of the code
+	WindingCenter( w, patch->origin );
+
+	VectorCopy( patch->plane->normal, patch->normal );
+
+	WindingBounds( w, patch->face_mins, patch->face_maxs );
+	VectorCopy( patch->face_mins, patch->mins );
+	VectorCopy( patch->face_maxs, patch->maxs );
+
+	patch->baselight.Init( 0.0f, 0.0f, 0.0f );
+	patch->basearea = 1;
+	patch->reflectivity = vRefl;
+}
+
+
+
+void CVradStaticPropMgr::MakePatches()
+{
+	int count = m_StaticProps.Count();
+	if ( !count )
+	{
+		// nothing to do
+		return;
+	}
+
+	// Triangle coverage of 1 (full coverage)
+	Vector fullCoverage;
+	fullCoverage.x = 1.0f;
+	int nPatchCount = 0;
+
+	//IScratchPad3D *pPad = ScratchPad3D_Create();
+	//pPad->SetAutoFlush( false );
+	for ( int nProp = 0; nProp < count; ++nProp )
+	{
+		CStaticProp &prop = m_StaticProps[ nProp ];
+		StaticPropDict_t &dict = m_StaticPropDict[ prop.m_ModelIdx ];
+
+		if ( dict.m_pModel )
 		{
-			OptimizedModel::StripHeader_t *pStrip = pStripGroup->pStrip(nStrip);
-
-			// If this hits, re-factor the code to iterate over triangles, and build the triangles
-			// from the underlying structures.
-			Assert((pStrip->flags & OptimizedModel::STRIP_IS_TRISTRIP) == 0);
-
-			if (pStrip->flags & OptimizedModel::STRIP_IS_TRILIST)
+			// Get material, get reflectivity
+			VMatrix xform;
+			xform.SetupMatrixOrgAngles( prop.m_Origin, prop.m_Angles );
+			ICollisionQuery *queryModel = s_pPhysCollision->CreateQueryModel( dict.m_pModel );
+			for ( int nConvex = 0; nConvex < queryModel->ConvexCount(); ++nConvex )
 			{
-				for (int i = 0; i < pStrip->numIndices; i += 3)
+				for ( int nTri = 0; nTri < queryModel->TriangleCount( nConvex ); ++nTri )
 				{
-					int idx = pStrip->indexOffset + i;
+					Vector verts[ 3 ];
+					queryModel->GetTriangleVerts( nConvex, nTri, verts );
+					for ( int nVert = 0; nVert < 3; ++nVert )
+						verts[ nVert ] = xform.VMul4x3( verts[ nVert ] );
 
-					unsigned short i1 = *pStripGroup->pIndex(idx);
-					unsigned short i2 = *pStripGroup->pIndex(idx + 1);
-					unsigned short i3 = *pStripGroup->pIndex(idx + 2);
+					//pPad->DrawPolygon( CSPVertList( verts, 3, CSPColor( prop.m_vReflectivity ) ) );
+					//pPad->DrawLine( CSPVert( g_Patches.Tail().origin ), CSPVert( g_Patches.Tail().origin + 5.0f * g_Patches.Tail().normal) );
 
-					int vertex1 = pStripGroup->pVertex(i1)->origMeshVertID;
-					int vertex2 = pStripGroup->pVertex(i2)->origMeshVertID;
-					int vertex3 = pStripGroup->pVertex(i3)->origMeshVertID;
-
-					Vector modelPos[3] = {
-						*vertData->Position(vertex1),
-						*vertData->Position(vertex2),
-						*vertData->Position(vertex3)
-					};
-
-					Vector modelNormal[3] = {
-						*vertData->Normal(vertex1),
-						*vertData->Normal(vertex2),
-						*vertData->Normal(vertex3)
-					};
-
-					Vector worldPos[3];
-					Vector worldNormal[3];
-
-					VectorTransform(modelPos[0], _matPos, worldPos[0]);
-					VectorTransform(modelPos[1], _matPos, worldPos[1]);
-					VectorTransform(modelPos[2], _matPos, worldPos[2]);
-
-					VectorTransform(modelNormal[0], _matNormal, worldNormal[0]);
-					VectorTransform(modelNormal[1], _matNormal, worldNormal[1]);
-					VectorTransform(modelNormal[2], _matNormal, worldNormal[2]);
-
-					Vector2D texcoord[3] = { 
-						*vertData->Texcoord(vertex1),
-						*vertData->Texcoord(vertex2),
-						*vertData->Texcoord(vertex3)
-					};
-
-					Rasterizer rasterizer(texcoord[0], texcoord[1], texcoord[2],
-					                      _lightmapResX, _lightmapResY);
-
-					for (auto it = rasterizer.begin(); it != rasterizer.end(); ++it)
+					winding_t *w = AllocWinding( 3 );
+					for ( int i = 0; i < 3; i++ )
 					{
-						size_t linearPos = rasterizer.GetLinearPos(it);
-						Assert(linearPos < cTotalPixelCount);
-
-						if ( colorTexels[linearPos].m_bValid )
-						{
-							continue;
-						}						
-
-						float ourDistancetoTri = ComputeBarycentricDistanceToTri( it->barycentric, texcoord );
-
-						bool doWrite =  it->insideTriangle
-							        || !colorTexels[linearPos].m_bPossiblyInteresting
-									||  colorTexels[linearPos].m_fDistanceToTri > ourDistancetoTri;
-
-						if (doWrite)
-						{
-							Vector itWorldPos = worldPos[0] * it->barycentric.x
-											  + worldPos[1] * it->barycentric.y
-											  + worldPos[2] * it->barycentric.z;
-
-							Vector itWorldNormal = worldNormal[0] * it->barycentric.x
-												 + worldNormal[1] * it->barycentric.y
-												 + worldNormal[2] * it->barycentric.z;
-							itWorldNormal.NormalizeInPlace();
-
-							colorTexels[linearPos].m_WorldPosition = itWorldPos;
-							colorTexels[linearPos].m_WorldNormal = itWorldNormal;
-							colorTexels[linearPos].m_bValid = it->insideTriangle;
-							colorTexels[linearPos].m_bPossiblyInteresting = true;
-							colorTexels[linearPos].m_fDistanceToTri = ourDistancetoTri;
-						}
+						w->p[ i ] = verts[ i ];
 					}
+					w->numpoints = 3;
+					MakePatchForTriangle( w, prop.m_vReflectivity, nProp );
+					//pPad->DrawPolygon( CSPVertList( verts, 3 ) );
+					//pPad->DrawLine( CSPVert( g_Patches.Tail().origin ), CSPVert( g_Patches.Tail().origin + 5.0f * g_Patches.Tail().normal) );
+					g_RtEnv_RadiosityPatches.AddTriangle( TRACE_ID_PATCH | (g_Patches.Count() - 1), verts[ 0 ], verts[ 1 ], verts[ 2 ], Vector( 1.0f, 1.0f, 1.0f ) );
+					nPatchCount++;
 				}
 			}
+			s_pPhysCollision->DestroyQueryModel( queryModel );
 		}
-	}
-
-	// Process neighbors to the valid region. Walk through the existing array, look for samples that
-	// are not valid but are adjacent to valid samples. Works if we are only bilinearly sampling
-	// on the other side.
-	// First attempt: Just pretend the triangle was larger and cast a ray from this new world pos 
-	// as above.
-	int linearPos = 0;
-	for ( int j = 0; j < _lightmapResY; ++j )
-	{
-		for (int i = 0; i < _lightmapResX; ++i )
+		else
 		{
-			bool shouldProcess = colorTexels[linearPos].m_bValid;
-			// Are any of the eight neighbors valid??
-			if ( colorTexels[linearPos].m_bPossiblyInteresting )
-			{
-				// Look at our neighborhood (3x3 centerd on us). 
-				shouldProcess = shouldProcess
-				             || colorTexels[ComputeLinearPos( i - 1, j - 1, _lightmapResX, _lightmapResY )].m_bValid  // TL
-							 || colorTexels[ComputeLinearPos( i    , j - 1, _lightmapResX, _lightmapResY )].m_bValid  // T
-							 || colorTexels[ComputeLinearPos( i + 1, j - 1, _lightmapResX, _lightmapResY )].m_bValid  // TR
-
-							 || colorTexels[ComputeLinearPos( i - 1, j    , _lightmapResX, _lightmapResY )].m_bValid  // L
-							 || colorTexels[ComputeLinearPos( i + 1, j    , _lightmapResX, _lightmapResY )].m_bValid  // R
-
-							 || colorTexels[ComputeLinearPos( i - 1, j + 1, _lightmapResX, _lightmapResY )].m_bValid  // BL
-							 || colorTexels[ComputeLinearPos( i    , j + 1, _lightmapResX, _lightmapResY )].m_bValid  // B
-							 || colorTexels[ComputeLinearPos( i + 1, j + 1, _lightmapResX, _lightmapResY )].m_bValid; // BR
-			}
-
-			if (shouldProcess)
-			{
-				Vector directColor(0, 0, 0),
-					   indirectColor(0, 0, 0);
-
-
-				ComputeDirectLightingAtPoint( colorTexels[linearPos].m_WorldPosition, colorTexels[linearPos].m_WorldNormal, directColor, _iThread, _skipProp, _flags);
-
-				if (numbounce >= 1) {
-					ComputeIndirectLightingAtPoint( colorTexels[linearPos].m_WorldPosition, colorTexels[linearPos].m_WorldNormal, indirectColor, _iThread, true, (_flags & GATHERLFLAGS_IGNORE_NORMALS) != 0 );
-				}
-
-				VectorAdd(directColor, indirectColor, colorTexels[linearPos].m_Color);
-			}
-
-			++linearPos;
+			// FIXME
+#if 0
+			VectorAdd( dict.m_Mins, prop.m_Origin, prop.m_mins );
+			VectorAdd( dict.m_Maxs, prop.m_Origin, prop.m_maxs );
+			g_RtEnv.AddAxisAlignedRectangularSolid( TRACE_ID_STATICPROP | nProp, prop.m_mins, prop.m_maxs, fullCoverage );
+#endif
 		}
 	}
-}
-
-// ------------------------------------------------------------------------------------------------
-static int GetTexelCount(unsigned int _resX, unsigned int _resY, bool _mipmaps)
-{
-	// Because they are unsigned, this is a != check--but if we were to change to ints, this would be
-	// the right assert (and it's no worse than != now). 
-	Assert(_resX > 0 && _resY > 0);
-
-	if (_mipmaps == false)
-		return _resX * _resY;
-
-	int retVal = 0;
-	while (_resX > 1 || _resY > 1) 
-	{
-		retVal += _resX * _resY;
-		_resX = MAX(1, _resX >> 1);
-		_resY = MAX(1, _resY >> 1);
-	}
-
-	// Add in the 1x1 mipmap level, which wasn't hit above. This could be done in the initializer of 
-	// retVal, but it's more obvious here. 
-	retVal += 1;
-
-	return retVal;
-}
-
-// ------------------------------------------------------------------------------------------------
-static void FilterFineMipmap(unsigned int _resX, unsigned int _resY, const CUtlVector<colorTexel_t>& _srcTexels, CUtlVector<Vector>* _outLinear)
-{
-	Assert(_outLinear);
-	// We can't filter in place, so go ahead and create a linear buffer here.
-	CUtlVector<Vector> filterSrc;
-	filterSrc.EnsureCount(_srcTexels.Count());
-
-	for (int i = 0; i < _srcTexels.Count(); ++i)
-	{
-		ColorRGBExp32 rgbColor;
-		VectorToColorRGBExp32(_srcTexels[i].m_Color, rgbColor);
-		ConvertRGBExp32ToLinear( &rgbColor, &(filterSrc[i]) );
-	}
-
-	const int cRadius = 1;
-	const float cOneOverDiameter = 1.0f / pow(2.0f * cRadius + 1.0f, 2.0f) ;
-	// Filter here.
-	for (int j = 0; j < _resY; ++j) 
-	{
-		for (int i = 0; i < _resX; ++i)
-		{
-			Vector value(0, 0, 0);
-			int thisIndex = ComputeLinearPos(i, j, _resX, _resY);
-
-			if (!_srcTexels[thisIndex].m_bValid)
-			{
-				(*_outLinear)[thisIndex] = filterSrc[thisIndex];
-				continue;
-			}
-
-			// TODO: Check ASM for this, unroll by hand if needed.
-			for ( int offsetJ = -cRadius; offsetJ <= cRadius; ++offsetJ )
-			{
-				for ( int offsetI = -cRadius; offsetI <= cRadius; ++offsetI )
-				{
-					int finalIndex = ComputeLinearPos( i + offsetI, j + offsetJ, _resX, _resY );
-					if ( !_srcTexels[finalIndex].m_bValid )
-					{
-						finalIndex = thisIndex;
-					}
-						
-					value += filterSrc[finalIndex];
-				}
-			}
-
-			(*_outLinear)[thisIndex] = value * cOneOverDiameter;
-		}
-	}
-}
-
-// ------------------------------------------------------------------------------------------------
-static void BuildFineMipmap(unsigned int _resX, unsigned int _resY, bool _applyFilter, const CUtlVector<colorTexel_t>& _srcTexels, CUtlVector<RGB888_t>* _outTexelsRGB888, CUtlVector<Vector>* _outLinear)
-{
-	// At least one of these needs to be non-null, otherwise what are we doing here?
-	Assert(_outTexelsRGB888 || _outLinear);
-	Assert(!_applyFilter || _outLinear);
-	Assert(_srcTexels.Count() == GetTexelCount(_resX, _resY, false));
-
-	int texelCount = GetTexelCount(_resX, _resY, true);
-
-	if (_outTexelsRGB888)
-		(*_outTexelsRGB888).EnsureCount(texelCount);
-
-	if (_outLinear)
-		(*_outLinear).EnsureCount(GetTexelCount(_resX, _resY, false));
-
-	// This code can take awhile, so minimize the branchiness of the inner-loop. 
-	if (_applyFilter)
-	{
-
-		FilterFineMipmap(_resX, _resY, _srcTexels, _outLinear);
-
-		if ( _outTexelsRGB888 )
-		{
-			for (int i = 0; i < _srcTexels.Count(); ++i) 
-			{
-				RGBA8888_t encodedColor;
-
-				Vector linearColor = (*_outLinear)[i];
-
-				ConvertLinearToRGBA8888( &linearColor, (unsigned char*)&encodedColor );
-				(*_outTexelsRGB888)[i].r = encodedColor.r;
-				(*_outTexelsRGB888)[i].g = encodedColor.g;
-				(*_outTexelsRGB888)[i].b = encodedColor.b;
-			}
-		}
-	}
-	else
-	{
-		for (int i = 0; i < _srcTexels.Count(); ++i) 
-		{
-			ColorRGBExp32 rgbColor;
-			RGBA8888_t encodedColor;
-			VectorToColorRGBExp32(_srcTexels[i].m_Color, rgbColor);
-			ConvertRGBExp32ToRGBA8888(&rgbColor, (unsigned char*)&encodedColor, (_outLinear ? (&(*_outLinear)[i]) : NULL) );
-			// We drop alpha on the floor here, if this were to fire we'd need to consider using a different compressed format.
-			Assert(encodedColor.a == 0xFF);
-
-			if (_outTexelsRGB888)
-			{
-				(*_outTexelsRGB888)[i].r = encodedColor.r;
-				(*_outTexelsRGB888)[i].g = encodedColor.g;
-				(*_outTexelsRGB888)[i].b = encodedColor.b;
-			}
-		}
-	}
-}
-
-// ------------------------------------------------------------------------------------------------
-static void FilterCoarserMipmaps(unsigned int _resX, unsigned int _resY, CUtlVector<Vector>* _scratchLinear, CUtlVector<RGB888_t> *_outTexelsRGB888)
-{
-	Assert(_outTexelsRGB888);
-
-	int srcResX = _resX;
-	int srcResY = _resY;
-	int dstResX = MAX(1, (srcResX >> 1));
-	int dstResY = MAX(1, (srcResY >> 1));
-	int dstOffset = GetTexelCount(srcResX, srcResY, false);
-
-	// Build mipmaps here, after being converted to linear space. 
-	// TODO: Should do better filtering for downsampling. But this will work for now.
-	while (srcResX > 1 || srcResY > 1)
-	{
-		for (int j = 0; j < srcResY; j += 2) {
-			for (int i = 0; i < srcResX; i += 2) {
-				int srcCol0 = i;
-				int srcCol1 = i + 1 > srcResX - 1 ? srcResX - 1 : i + 1;
-				int srcRow0 = j;
-				int srcRow1 = j + 1 > srcResY - 1 ? srcResY - 1 : j + 1;;
-
-				int dstCol = i >> 1;
-				int dstRow = j >> 1;
-
-
-				const Vector& tl = (*_scratchLinear)[srcCol0 + (srcRow0 * srcResX)];
-				const Vector& tr = (*_scratchLinear)[srcCol1 + (srcRow0 * srcResX)];
-				const Vector& bl = (*_scratchLinear)[srcCol0 + (srcRow1 * srcResX)];
-				const Vector& br = (*_scratchLinear)[srcCol1 + (srcRow1 * srcResX)];
-
-				Vector sample = (tl + tr + bl + br) / 4.0f;
-
-				ConvertLinearToRGBA8888(&sample, (unsigned char*)&(*_outTexelsRGB888)[dstOffset + dstCol + dstRow * dstResX]);
-
-				// Also overwrite the srcBuffer to filter the next loop. This is safe because we won't be reading this source value
-				// again during this mipmap level.
-				(*_scratchLinear)[dstCol + dstRow * dstResX] = sample;
-			}
-		}
-
-		srcResX = dstResX;
-		srcResY = dstResY;
-		dstResX = MAX(1, (srcResX >> 1));
-		dstResY = MAX(1, (srcResY >> 1));
-		dstOffset += GetTexelCount(srcResX, srcResY, false);
-	}
-}
-
-// ------------------------------------------------------------------------------------------------
-static void ConvertToDestinationFormat(unsigned int _resX, unsigned int _resY, ImageFormat _destFmt, const CUtlVector<RGB888_t>& _scratchRBG888, CUtlMemory<byte>* _outTexture)
-{
-	const ImageFormat cSrcImageFormat = IMAGE_FORMAT_RGB888;
-
-	// Converts from the scratch RGB888 buffer, which should be fully filled out to the output texture.
-	int destMemoryUsage = ImageLoader::GetMemRequired(_resX, _resY, 1, _destFmt, true);
-	(*_outTexture).EnsureCapacity(destMemoryUsage);
-
-	int srcResX = _resX;
-	int srcResY = _resY;
-	int srcOffset = 0;
-	int dstOffset = 0;
-
-	// The usual case--that they'll be different.
-	if (cSrcImageFormat != _destFmt)
-	{
-		while (srcResX > 1 || srcResY > 1)
-		{
-			// Convert this mipmap level.
-			ImageLoader::ConvertImageFormat((unsigned char*)(&_scratchRBG888[srcOffset]), cSrcImageFormat, (*_outTexture).Base() + dstOffset, _destFmt, srcResX, srcResY);
-
-			// Then update offsets for the next mipmap level.
-			srcOffset += GetTexelCount(srcResX, srcResY, false);
-			dstOffset += ImageLoader::GetMemRequired(srcResX, srcResY, 1, _destFmt, false);
-
-			srcResX = MAX(1, (srcResX >> 1));
-			srcResY = MAX(1, (srcResY >> 1));
-		}
-
-		// Do the 1x1 level also.
-		ImageLoader::ConvertImageFormat((unsigned char*)_scratchRBG888.Base() + srcOffset, cSrcImageFormat, (*_outTexture).Base() + dstOffset, _destFmt, srcResX, srcResY);
-	} else {
-		// But sometimes (particularly for debugging) they will be the same.
-		Q_memcpy( (*_outTexture).Base(), _scratchRBG888.Base(), destMemoryUsage );
-	}
-}
-
-// ------------------------------------------------------------------------------------------------
-static void ConvertTexelDataToTexture(unsigned int _resX, unsigned int _resY, ImageFormat _destFmt, const CUtlVector<colorTexel_t>& _srcTexels, CUtlMemory<byte>* _outTexture)
-{
-	Assert(_outTexture);
-	Assert(_srcTexels.Count() == _resX * _resY);
-
-	CUtlVector<RGB888_t> scratchRGB888;
-	CUtlVector<Vector> scratchLinear;
-
-	BuildFineMipmap(_resX, _resY, true, _srcTexels, &scratchRGB888, &scratchLinear);
-	FilterCoarserMipmaps(_resX, _resY, &scratchLinear, &scratchRGB888 );
-	ConvertToDestinationFormat(_resX, _resY, _destFmt, scratchRGB888, _outTexture);
-}
-
-// ------------------------------------------------------------------------------------------------
-static void DumpLightmapLinear( const char* _dstFilename, const CUtlVector<colorTexel_t>& _srcTexels, int _width, int _height )
-{
-	CUtlVector< Vector > linearFloats;
-	CUtlVector< BGR888_t > linearBuffer;
-	BuildFineMipmap( _width, _height, true, _srcTexels, NULL, &linearFloats );
-	linearBuffer.SetCount( linearFloats.Count() );
-
-	for ( int i = 0; i < linearFloats.Count(); ++i ) {
-		linearBuffer[i].b = RoundFloatToByte(linearFloats[i].z * 255.0f);
-		linearBuffer[i].g = RoundFloatToByte(linearFloats[i].y * 255.0f);
-		linearBuffer[i].r = RoundFloatToByte(linearFloats[i].x * 255.0f);
-	}
-	
-	TGAWriter::WriteTGAFile( _dstFilename, _width, _height, IMAGE_FORMAT_BGR888, (uint8*)(linearBuffer.Base()), _width * ImageLoader::SizeInBytes(IMAGE_FORMAT_BGR888) );
+	//pPad->Release();
+	g_RtEnv_RadiosityPatches.SetupAccelerationStructure();
+	qprintf( "%i static prop patches\n", nPatchCount );
 }
diff --git a/wscript b/wscript
index a1d448e6..261c68d5 100644
--- a/wscript
+++ b/wscript
@@ -455,7 +455,6 @@ def configure(conf):
 		conf.load('mm_hook')
 
 	define_platform(conf)
-	conf.env.targets = list(set(conf.env.targets))
 	conf.env.REL_VERSION = VERSION
 
 	conf.env.BIT32_MANDATORY = not conf.options.ALLOW64
@@ -610,7 +609,8 @@ def configure(conf):
 		conf.env.CC.insert(0, 'ccache')
 		conf.env.CXX.insert(0, 'ccache')
 
-	conf.add_subproject(conf.env.targets)
+	for v in set(conf.env.targets):
+		conf.add_subproject(v)
 
 def build(bld):
 	if not os.environ.get('CCACHE_DIR'):