From 9626b3bc42d336e5ac6efc56cacbfbbdb947c2da Mon Sep 17 00:00:00 2001
From: hrydgard <hrydgard@gmail.com>
Date: Fri, 8 Aug 2008 19:46:04 +0000
Subject: [PATCH] Fix shader cache bugs, improving linux speed. Remove some
 unnecessary printfs.

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@153 8ced0084-cf51-0410-be5f-012b33b47a6e
---
 Source/Core/Common/Src/Common.h               | 13 +++------
 Source/Core/Common/Src/DynamicLibrary.cpp     |  6 ++--
 Source/Core/Common/Src/MemoryUtil.cpp         |  4 +--
 Source/Core/Core/Src/Core.cpp                 |  2 +-
 Source/Core/Core/Src/HW/SystemTimers.cpp      |  4 +++
 .../Src/PowerPC/Jit64/Jit_FloatingPoint.cpp   |  6 +++-
 .../Core/Src/PowerPC/Jit64/Jit_Integer.cpp    | 21 --------------
 .../Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp | 17 +++++------
 Source/Core/DolphinWX/src/PluginManager.cpp   | 12 --------
 Source/Plugins/Plugin_VideoOGL/Src/GLInit.cpp |  1 +
 .../Src/PixelShaderManager.cpp                | 13 ++++-----
 .../Plugin_VideoOGL/Src/PixelShaderManager.h  | 28 +++++++------------
 .../Src/VertexShaderManager.cpp               |  1 +
 .../Plugin_VideoOGL/Src/VertexShaderManager.h |  8 ++++--
 14 files changed, 49 insertions(+), 87 deletions(-)

diff --git a/Source/Core/Common/Src/Common.h b/Source/Core/Common/Src/Common.h
index ddee157b53..043d051433 100644
--- a/Source/Core/Common/Src/Common.h
+++ b/Source/Core/Common/Src/Common.h
@@ -100,15 +100,10 @@ typedef union _LARGE_INTEGER
 } LARGE_INTEGER;
 #endif
 
-#if defined (__MINGW32__) || defined (_WIN32)
-#define GC_ALIGNED16(x) __declspec(align(16)) x
-#define GC_ALIGNED16_DECL(x) x
-#else
-#define GC_ALIGNED16(x)  x
-#define GC_ALIGNED64(x)  x
-#define GC_ALIGNED16_DECL(x) x __attribute((aligned(16)))
-#define GC_ALIGNED64_DECL(x) x
-#endif
+#define GC_ALIGNED16(x)  __attribute((aligned(16))) x
+#define GC_ALIGNED64(x)  __attribute((aligned(64))) x
+#define GC_ALIGNED16_DECL(x) __attribute((aligned(16))) x
+#define GC_ALIGNED64_DECL(x) __attribute((aligned(64))) x
 
 #ifndef __forceinline
 #define __forceinline inline
diff --git a/Source/Core/Common/Src/DynamicLibrary.cpp b/Source/Core/Common/Src/DynamicLibrary.cpp
index 8097e16a2c..407dcc494b 100644
--- a/Source/Core/Common/Src/DynamicLibrary.cpp
+++ b/Source/Core/Common/Src/DynamicLibrary.cpp
@@ -86,7 +86,7 @@ bool DynamicLibrary::Load(const char* filename)
 	if (library) {
 		library_file = filename;
 	}
-	return(library != 0);
+	return library != 0;
 }
 
 
@@ -121,14 +121,14 @@ void* DynamicLibrary::Get(const char* funcname) const
 		//PanicAlert("Did not find function %s in library %s.", funcname, library_file.c_str());
 	//}
 
-	return(retval);
+	return retval;
 
 #else
 	retval = dlsym(library, funcname);
 
 	if (!retval)
 	{
-		printf("%s\n", dlerror());
+		printf("Symbol %s missing in %s (error: %s)\n", funcname, library_file.c_str(), dlerror());
 	}
 #endif
 }
diff --git a/Source/Core/Common/Src/MemoryUtil.cpp b/Source/Core/Common/Src/MemoryUtil.cpp
index bd94eaf012..39e550452e 100644
--- a/Source/Core/Common/Src/MemoryUtil.cpp
+++ b/Source/Core/Common/Src/MemoryUtil.cpp
@@ -58,7 +58,7 @@ void* AllocateExecutableMemory(int size, bool low)
 		 | (low ? MAP_32BIT : 0)
 #endif
          , -1, 0);  // | MAP_FIXED
-	printf("mappah exe %p %i\n", retval, size);
+	// printf("Mapped executable memory at %p (size %i)\n", retval, size);
 
 	if (!retval)
 	{
@@ -86,7 +86,7 @@ void* AllocateMemoryPages(int size)
 #else
 	void* retval = mmap(0, size, PROT_READ | PROT_WRITE,
 			MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); // | MAP_FIXED
-	printf("mappah %p %i\n", retval, size);
+	// printf("Mapped memory at %p (size %i)\n", retval, size);
 
 	if (!retval)
 	{
diff --git a/Source/Core/Core/Src/Core.cpp b/Source/Core/Core/Src/Core.cpp
index 260094ad6d..9a130d03e5 100644
--- a/Source/Core/Core/Src/Core.cpp
+++ b/Source/Core/Core/Src/Core.cpp
@@ -304,7 +304,7 @@ THREAD_RETURN EmuThread(void *pArg)
 			if (Callback_PeekMessages) {
 				Callback_PeekMessages();
 			}
-			Common::SleepCurrentThread(20);
+			Common::SleepCurrentThread(200);
 		}
 	}
 	else
diff --git a/Source/Core/Core/Src/HW/SystemTimers.cpp b/Source/Core/Core/Src/HW/SystemTimers.cpp
index 0e4d83895d..ecefdee8bc 100644
--- a/Source/Core/Core/Src/HW/SystemTimers.cpp
+++ b/Source/Core/Core/Src/HW/SystemTimers.cpp
@@ -153,6 +153,10 @@ int timeHistory[HISTORYLENGTH] = {0,0,0,0,0};
 
 void Throttle(u64 userdata, int cyclesLate)
 {
+#ifndef _WIN32
+	// had some weird problem in linux. will investigate.
+	return;
+#endif
 	static Common::Timer timer;
 
 	for (int i=0; i<HISTORYLENGTH-1; i++)
diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_FloatingPoint.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_FloatingPoint.cpp
index adf14c451a..6cb68576d0 100644
--- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_FloatingPoint.cpp
+++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_FloatingPoint.cpp
@@ -14,6 +14,7 @@
 
 // Official SVN repository and contact information can be found at
 // http://code.google.com/p/dolphin-emu/
+
 #include "Common.h"
 
 #include "../PowerPC.h"
@@ -24,8 +25,11 @@
 #include "JitCache.h"
 #include "JitRegCache.h"
 
-// #define INSTRUCTION_START Default(inst); return;
+#ifdef _WIN32
 #define INSTRUCTION_START
+#else
+#define INSTRUCTION_START Default(inst); return;
+#endif
 
 namespace Jit64
 {
diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_Integer.cpp
index 0a069eecf0..787b4fe7c1 100644
--- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_Integer.cpp
+++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_Integer.cpp
@@ -129,27 +129,6 @@ namespace Jit64
 		SetJumpTarget(continue1);
 		SetJumpTarget(continue2);
 		OR(32, M(&CR), R(EAX));
-		/*
-		alternative		
-		MOV(32, R(EAX), M(&CR));
-		AND(32, R(EAX), Imm32(~(0xF0000000 >> (crf*4))));
-		CMP(32, gpr.R(a), Imm32(uimm));
-		FixupBranch pLesser  = J_CC(CC_B);
-		FixupBranch pGreater = J_CC(CC_A);
-		
-		OR(32, R(EAX), Imm32(0x20000000 >> shift)); // _x86Reg == 0
-		FixupBranch continue1 = J();
-		
-		SetJumpTarget(pGreater);
-		OR(32, R(EAX), Imm32(0x40000000 >> shift)); // _x86Reg > 0
-		FixupBranch continue2 = J();
-		
-		SetJumpTarget(pLesser);
-		OR(32, R(EAX), Imm32(0x80000000 >> shift)); // _x86Reg < 0
-		SetJumpTarget(continue1);
-		SetJumpTarget(continue2);
-		MOV(32, M(&CR), R(EAX));
-		*/
 	}
 
 	// signed
diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp
index 31644355aa..515c2962b4 100644
--- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp
+++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp
@@ -35,11 +35,8 @@
 #include "JitAsm.h"
 #include "JitRegCache.h"
 
-#ifdef _WIN32
-#define INSTRUCTION_START
-#else
+// #define INSTRUCTION_START
 #define INSTRUCTION_START Default(inst); return;
-#endif
 
 #ifdef _M_IX86
 #define DISABLE_32BIT Default(inst); return;
@@ -59,7 +56,7 @@ void WriteDual32(u64 value, u32 address)
 	Memory::Write_U32((u32)value, address + 4);
 }
 
-static const double m_quantizeTableD[] =
+static const double GC_ALIGNED16(m_quantizeTableD[]) =
 {
 	(1 <<  0),	(1 <<  1),	(1 <<  2),	(1 <<  3),
 	(1 <<  4),	(1 <<  5),	(1 <<  6),	(1 <<  7),
@@ -79,7 +76,7 @@ static const double m_quantizeTableD[] =
 	1.0 / (1 <<  4),	1.0 / (1 <<  3),	1.0 / (1 <<  2),	1.0 / (1 <<  1),
 }; 
 
-static const double m_dequantizeTableD[] =
+static const double GC_ALIGNED16(m_dequantizeTableD[]) =
 {
 	1.0 / (1 <<  0),	1.0 / (1 <<  1),	1.0 / (1 <<  2),	1.0 / (1 <<  3),
 	1.0 / (1 <<  4),	1.0 / (1 <<  5),	1.0 / (1 <<  6),	1.0 / (1 <<  7),
@@ -149,8 +146,6 @@ void psq_st(UGeckoInstruction inst)
 		SetJumpTarget(argh);
 		CALL((void *)&WriteDual32); 
 		SetJumpTarget(arg2);
-		if (update)
-			MOV(32, gpr.R(a), R(ABI_PARAM2));
 		gpr.UnlockAll();
 		fpr.UnlockAll();
 	}
@@ -164,6 +159,8 @@ void psq_st(UGeckoInstruction inst)
 		MOV(32, R(ABI_PARAM2), gpr.R(a));
 		if (offset)
 			ADD(32, R(ABI_PARAM2), Imm32((u32)offset));
+		if (update && offset)
+			MOV(32, gpr.R(a), R(ABI_PARAM2));
 		MOVAPS(XMM0, fpr.R(s));
 		MOVDDUP(XMM1, M((void*)&m_quantizeTableD[stScale]));
 		MULPD(XMM0, R(XMM1));
@@ -193,6 +190,8 @@ void psq_st(UGeckoInstruction inst)
 		MOV(32, R(ABI_PARAM2), gpr.R(a));
 		if (offset)
 			ADD(32, R(ABI_PARAM2), Imm32((u32)offset));
+		if (update)
+			MOV(32, gpr.R(a), R(ABI_PARAM2));
 		MOVAPS(XMM0, fpr.R(s));
 		MOVDDUP(XMM1, M((void*)&m_quantizeTableD[stScale]));
 		MULPD(XMM0, R(XMM1));
@@ -209,8 +208,6 @@ void psq_st(UGeckoInstruction inst)
 		PUSH(32, R(ABI_PARAM1));
 		CALL(&Memory::Write_U32);
 #endif
-		if (update)
-			MOV(32, gpr.R(a), R(ABI_PARAM2));
 		gpr.UnlockAll();
 		fpr.UnlockAll();
 	}
diff --git a/Source/Core/DolphinWX/src/PluginManager.cpp b/Source/Core/DolphinWX/src/PluginManager.cpp
index 4bb7fc1f5a..246f9780c1 100644
--- a/Source/Core/DolphinWX/src/PluginManager.cpp
+++ b/Source/Core/DolphinWX/src/PluginManager.cpp
@@ -74,7 +74,6 @@ CPluginManager::ScanForPlugins(wxWindow* _wxWindow)
 		for (size_t i = 0; i < rFilenames.size(); i++)
 		{
 			std::string orig_name = rFilenames[i];
-			printf("Scanning %s\n", rFilenames[i].c_str());
 			std::string FileName;
 
 			if (!SplitPath(rFilenames[i], NULL, &FileName, NULL))
@@ -94,17 +93,11 @@ CPluginManager::ScanForPlugins(wxWindow* _wxWindow)
 				break;
 			}
 
-			printf("Examining %s\n", FileName.c_str());
-
 			CPluginInfo PluginInfo(orig_name);
-			printf("%s\n", orig_name.c_str());
-
 			if (PluginInfo.IsValid())
 			{
 				m_PluginInfos.push_back(PluginInfo);
 			}
-
-			printf("Valid plugin\n");
 		}
 	}
 }
@@ -136,12 +129,8 @@ CPluginInfo::CPluginInfo(const std::string& _rFileName)
 	: m_FileName(_rFileName)
 	, m_Valid(false)
 {
-	printf("Loading!\n");
-
 	if (Common::CPlugin::Load(_rFileName.c_str()))
 	{
-		printf("Loaded!\n");
-
 		if (Common::CPlugin::GetInfo(m_PluginInfo))
 		{
 			m_Valid = true;
@@ -152,7 +141,6 @@ CPluginInfo::CPluginInfo(const std::string& _rFileName)
 		}
 
 		Common::CPlugin::Release();
-		printf("Unloaded!\n");
 	}
 	else
 	{
diff --git a/Source/Plugins/Plugin_VideoOGL/Src/GLInit.cpp b/Source/Plugins/Plugin_VideoOGL/Src/GLInit.cpp
index 0c9136e6ea..820b69ac7d 100644
--- a/Source/Plugins/Plugin_VideoOGL/Src/GLInit.cpp
+++ b/Source/Plugins/Plugin_VideoOGL/Src/GLInit.cpp
@@ -81,6 +81,7 @@ BOOL Callback_PeekMessages()
     while (XPending(GLWin.dpy) > 0) {
         XNextEvent(GLWin.dpy, &event);
 	}
+	return TRUE;
 #endif
 }
 
diff --git a/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderManager.cpp b/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderManager.cpp
index b9defabc01..00e3e6a495 100644
--- a/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderManager.cpp
+++ b/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderManager.cpp
@@ -117,7 +117,7 @@ FRAGMENTSHADER* PixelShaderMngr::GetShader()
     PSCache::iterator iter = pshaders.find(uid);
 
     if (iter != pshaders.end()) {
-        iter->second.frameCount=frameCount;
+        iter->second.frameCount = frameCount;
         PSCacheEntry &entry = iter->second;
         if (&entry.shader != pShaderLast)
         {
@@ -131,13 +131,14 @@ FRAGMENTSHADER* PixelShaderMngr::GetShader()
 	char *code = GeneratePixelShader(s_texturemask,
 		                             Renderer::GetZBufferTarget() != 0,
 									 Renderer::GetRenderMode() != Renderer::RM_Normal);
+//	printf("Compiling pixel shader. size = %i\n", strlen(code));
     if (!code || !CompilePixelShader(newentry.shader, code)) {
         ERROR_LOG("failed to create pixel shader\n");
         return NULL;
     }
     
     //Make an entry in the table
-    newentry.frameCount=frameCount;
+    newentry.frameCount = frameCount;
     
     pShaderLast = &newentry.shader;
     INCSTAT(stats.numPixelShadersCreated);
@@ -150,7 +151,7 @@ void PixelShaderMngr::Cleanup()
     PSCache::iterator iter = pshaders.begin();
     while(iter != pshaders.end()) {
         PSCacheEntry &entry = iter->second;
-        if (entry.frameCount<frameCount-200) {
+        if (entry.frameCount < frameCount - 200) {
             entry.Destroy();
 #ifdef _WIN32
             iter = pshaders.erase(iter);
@@ -170,11 +171,7 @@ bool PixelShaderMngr::CompilePixelShader(FRAGMENTSHADER& ps, const char* pstrpro
 
     char stropt[64];
     sprintf(stropt, "MaxLocalParams=32,NumInstructionSlots=%d", s_nMaxPixelInstructions);
-#ifdef _WIN32
     const char* opts[] = {"-profileopts",stropt,"-O2","-q",NULL};
-#else
-    const char* opts[] = {"-profileopts",stropt,"-q",NULL};
-#endif
     CGprogram tempprog = cgCreateProgram(g_cgcontext, CG_SOURCE, pstrprogram, g_cgfProf, "main", opts);
     if (!cgIsProgram(tempprog) || cgGetError() != CG_NO_ERROR) {
         ERROR_LOG("Failed to create ps %s:\n", cgGetLastListing(g_cgcontext));
@@ -202,7 +199,6 @@ bool PixelShaderMngr::CompilePixelShader(FRAGMENTSHADER& ps, const char* pstrpro
 
     //ERROR_LOG(pcompiledprog);
     //ERROR_LOG(pstrprogram);
-
     glGenProgramsARB( 1, &ps.glprogid );
     glBindProgramARB( GL_FRAGMENT_PROGRAM_ARB, ps.glprogid );
     glProgramStringARB( GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, (GLsizei)strlen(pcompiledprog), pcompiledprog);
@@ -215,6 +211,7 @@ bool PixelShaderMngr::CompilePixelShader(FRAGMENTSHADER& ps, const char* pstrpro
     }
 
     cgDestroyProgram(tempprog);
+	printf("Compiled pixel shader %i\n", ps.glprogid);
 
 #ifdef _DEBUG
     ps.strprog = pstrprogram;
diff --git a/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderManager.h b/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderManager.h
index 3630abc0e4..b540801a36 100644
--- a/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderManager.h
+++ b/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderManager.h
@@ -28,17 +28,21 @@ struct FRAGMENTSHADER
 {
     FRAGMENTSHADER() : glprogid(0) { }
     GLuint glprogid; // opengl program id
-
 #ifdef _DEBUG
 	std::string strprog;
 #endif
 };
+
 class PixelShaderMngr
 {
     class PIXELSHADERUID
     {
     public:
-        PIXELSHADERUID() { values = new u32[3+32+6+11]; tevstages = indstages = 0; }
+        PIXELSHADERUID() {
+			values = new u32[4+32+6+11];
+			memset(values, 0, (4+32+6+11) * 4);
+            tevstages = indstages = 0;
+		}
         ~PIXELSHADERUID() { delete[] values; }
         PIXELSHADERUID(const PIXELSHADERUID& r)
         {
@@ -57,7 +61,7 @@ class PixelShaderMngr
             else if( values[0] > _Right.values[0] )
                 return false;
 
-            int N = tevstages + 3; // numTevStages*3/2+1
+            int N = tevstages + indstages + 3; // numTevStages*3/2+1
             int i = 1;
             for(; i < N; ++i) {
                 if( values[i] < _Right.values[i] )
@@ -66,14 +70,6 @@ class PixelShaderMngr
                     return false;
             }
 
-            N += indstages;
-            for(; i < N; ++i) {
-                if( values[i] < _Right.values[i] )
-                    return true;
-                else if( values[i] > _Right.values[i] )
-                    return false;
-            }
-
             return false;
         }
 
@@ -82,19 +78,13 @@ class PixelShaderMngr
             if( values[0] != _Right.values[0] )
                 return false;
 
-            int N = tevstages + 3; // numTevStages*3/2+1
+            int N = tevstages + indstages + 3; // numTevStages*3/2+1
             int i = 1;
             for(; i < N; ++i) {
                 if( values[i] != _Right.values[i] )
                     return false;
             }
 
-            N += indstages;
-            for(; i < N; ++i) {
-                if( values[i] != _Right.values[i] )
-                    return false;
-            }
-
             return true;
         }
 
@@ -109,7 +99,9 @@ class PixelShaderMngr
         PSCacheEntry() : frameCount(0) {}
 		~PSCacheEntry() {}
         void Destroy() {
+			printf("Destroying ps %i\n", shader.glprogid);
             glDeleteProgramsARB(1, &shader.glprogid);
+			shader.glprogid = 0;
         }
     };
 
diff --git a/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderManager.cpp b/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderManager.cpp
index 69bdb069b9..848b7f6dc9 100644
--- a/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderManager.cpp
+++ b/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderManager.cpp
@@ -183,6 +183,7 @@ bool VertexShaderMngr::CompileVertexShader(VERTEXSHADER& vs, const char* pstrpro
     }
 
     cgDestroyProgram(tempprog);
+	printf("Compiled vertex shader %i\n", vs.glprogid);
 
 #ifdef _DEBUG
     vs.strprog = pstrprogram;
diff --git a/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderManager.h b/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderManager.h
index 347942d154..9fef282aea 100644
--- a/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderManager.h
+++ b/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderManager.h
@@ -41,14 +41,18 @@ class VertexShaderMngr
         int frameCount;
         VSCacheEntry() : frameCount(0) {}
         void Destroy() {
-            SAFE_RELEASE_PROG(shader.glprogid);
+			printf("Destroying vs %i\n", shader.glprogid);
+            glDeleteProgramsARB(1, &shader.glprogid);
+			shader.glprogid = 0;
         }
     };
 
     class VERTEXSHADERUID
     {
     public:
-        VERTEXSHADERUID() {}
+        VERTEXSHADERUID() {
+			memset(values, 0, sizeof(values));		
+		}
         VERTEXSHADERUID(const VERTEXSHADERUID& r) {
 			for(size_t i = 0; i < sizeof(values) / sizeof(u32); ++i) 
 				values[i] = r.values[i];