diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp index 439400e347..1249011538 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.cpp @@ -545,7 +545,7 @@ const char *GeneratePixelShader(u32 texture_mask, bool dstAlphaEnable, u32 HLSL) // alpha test will always fail, so restart the shader and just make it an empty function p = pmainstart; WRITE(p, "ocol0 = 0;\n"); - WRITE(p, HLSL ? "clip(-1);" : "discard;\n"); + WRITE(p, "discard;return;\n"); } else { @@ -582,9 +582,9 @@ static const char *TEVCMPColorOPTable[16] = "float3(0.0f,0.0f,0.0f)",//7 " %s + ((%s.r > %s.r + (1.0f/510.0f)) ? %s : float3(0.0f,0.0f,0.0f))",//#define TEVCMP_R8_GT 8 " %s + ((abs(%s.r - %s.r) < (1.0f/255.0f)) ? %s : float3(0.0f,0.0f,0.0f))",//#define TEVCMP_R8_EQ 9 - " %s + (( dot(%s.rgb, comp16) >= (dot(%s.rgb, comp16) + (1.0f/510.0f))) ? %s : float3(0.0f,0.0f,0.0f))",//#define TEVCMP_GR16_GT 10 + " %s + (( dot(%s.rgb, comp16) > (dot(%s.rgb, comp16) + (1.0f/510.0f))) ? %s : float3(0.0f,0.0f,0.0f))",//#define TEVCMP_GR16_GT 10 " %s + (abs(dot(%s.rgb, comp16) - dot(%s.rgb, comp16)) < (1.0f/255.0f) ? %s : float3(0.0f,0.0f,0.0f))",//#define TEVCMP_GR16_EQ 11 - " %s + (( dot(%s.rgb, comp24) >= (dot(%s.rgb, comp24) + (1.0f/510.0f))) ? %s : float3(0.0f,0.0f,0.0f))",//#define TEVCMP_BGR24_GT 12 + " %s + (( dot(%s.rgb, comp24) > (dot(%s.rgb, comp24) + (1.0f/510.0f))) ? %s : float3(0.0f,0.0f,0.0f))",//#define TEVCMP_BGR24_GT 12 " %s + (abs(dot(%s.rgb, comp24) - dot(%s.rgb, comp24)) < (1.0f/255.0f) ? %s : float3(0.0f,0.0f,0.0f))",//#define TEVCMP_BGR24_EQ 13 " %s + (max(sign(%s.rgb - %s.rgb - (1.0f/510.0f)),float3(0.0f,0.0f,0.0f)) * %s)",//#define TEVCMP_RGB8_GT 14 " %s + ((float3(1.0f,1.0f,1.0f) - max(sign(abs(%s.rgb - %s.rgb) - (1.0f/255.0f)),float3(0.0f,0.0f,0.0f))) * %s)"//#define TEVCMP_RGB8_EQ 15 @@ -601,13 +601,13 @@ static const char *TEVCMPAlphaOPTable[16] = "0.0f",//5 "0.0f",//6 "0.0f",//7 - " %s + ((%s.r >= (%s.r + (1.0f/510.0f))) ? %s : 0.0f)",//#define TEVCMP_R8_GT 8 + " %s + ((%s.r > (%s.r + (1.0f/510.0f))) ? %s : 0.0f)",//#define TEVCMP_R8_GT 8 " %s + (abs(%s.r - %s.r) < (1.0f/255.0f) ? %s : 0.0f)",//#define TEVCMP_R8_EQ 9 - " %s + ((dot(%s.rgb, comp16) >= (dot(%s.rgb, comp16) + (1.0f/510.0f))) ? %s : 0.0f)",//#define TEVCMP_GR16_GT 10 + " %s + ((dot(%s.rgb, comp16) > (dot(%s.rgb, comp16) + (1.0f/510.0f))) ? %s : 0.0f)",//#define TEVCMP_GR16_GT 10 " %s + (abs(dot(%s.rgb, comp16) - dot(%s.rgb, comp16)) < (1.0f/255.0f) ? %s : 0.0f)",//#define TEVCMP_GR16_EQ 11 - " %s + ((dot(%s.rgb, comp24) >= (dot(%s.rgb, comp24) + (1.0f/510.0f))) ? %s : 0.0f)",//#define TEVCMP_BGR24_GT 12 + " %s + ((dot(%s.rgb, comp24) > (dot(%s.rgb, comp24) + (1.0f/510.0f))) ? %s : 0.0f)",//#define TEVCMP_BGR24_GT 12 " %s + (abs(dot(%s.rgb, comp24) - dot(%s.rgb, comp24)) < (1.0f/255.0f) ? %s : 0.0f)",//#define TEVCMP_BGR24_EQ 13 - " %s + ((%s.a >= (%s.a + (1.0f/510.0f))) ? %s : 0.0f)",//#define TEVCMP_A8_GT 14 + " %s + ((%s.a > (%s.a + (1.0f/510.0f))) ? %s : 0.0f)",//#define TEVCMP_A8_GT 14 " %s + (abs(%s.a - %s.a) < (1.0f/255.0f) ? %s : 0.0f)"//#define TEVCMP_A8_EQ 15 }; @@ -862,6 +862,8 @@ static const char *tevAlphaFuncsTable[] = "(true)" //ALPHACMP_ALWAYS 7 }; + + static const char *tevAlphaFunclogicTable[] = { " && ", // and @@ -903,10 +905,7 @@ static bool WriteAlphaTest(char *&p, u32 HLSL) // Seems we need discard for Cg and clip for d3d. sigh. - if (HLSL) - WRITE(p, "clip( ("); - else - WRITE(p, "discard(!( "); + WRITE(p, "if(!( "); int compindex = bpmem.alphaFunc.comp0 % 8; WRITE(p, tevAlphaFuncsTable[compindex],alphaRef[0]);//lookup the first component from the alpha function table @@ -916,12 +915,8 @@ static bool WriteAlphaTest(char *&p, u32 HLSL) compindex = bpmem.alphaFunc.comp1 % 8; WRITE(p, tevAlphaFuncsTable[compindex],alphaRef[1]);//lookup the second component from the alpha function table - if (HLSL) { - // clip works differently than discard - discard takes a bool, clip takes a value that kills the pixel on negative - WRITE(p, ") ? 1 : -1);\n"); - } else { - WRITE(p, "));\n"); - } + WRITE(p, ")){ocol0 = 0;discard;return;}\n"); + return true; } @@ -967,6 +962,6 @@ static void WriteFog(char *&p) WARN_LOG(VIDEO, "Unknown Fog Type! %08x", bpmem.fog.c_proj_fsel.fsel); } - WRITE(p, " prev.rgb = (1.0f - fog) * prev.rgb + (fog * "I_FOG"[0].rgb);\n"); + WRITE(p, " prev.rgb = lerp(prev.rgb,"I_FOG"[0].rgb,fog);\n"); } diff --git a/Source/Core/VideoCommon/Src/TextureConversionShader.cpp b/Source/Core/VideoCommon/Src/TextureConversionShader.cpp index 4db14bdeaa..eb6b83e234 100644 --- a/Source/Core/VideoCommon/Src/TextureConversionShader.cpp +++ b/Source/Core/VideoCommon/Src/TextureConversionShader.cpp @@ -107,12 +107,16 @@ void WriteSwizzler(char*& p, u32 format,bool HLSL) if(!HLSL) WRITE(p," sampleUv.y = textureDims.y - sampleUv.y;\n"); - WRITE(p, " sampleUv.x = sampleUv.x + textureDims.z;\n" - " sampleUv.y = sampleUv.y + textureDims.w;\n"); + WRITE(p, " sampleUv = sampleUv + textureDims.zw;\n"); + if(HLSL) { - WRITE(p, " sampleUv.x = sampleUv.x / blkDims.z;\n" - " sampleUv.y = sampleUv.y / blkDims.w;\n"); + WRITE(p, " sampleUv = sampleUv + float2(1.0f,1.0f);\n" + " sampleUv = sampleUv / blkDims.zw;\n"); + } + else + { + WRITE(p, " sampleUv = sampleUv + float2(1.0f,-1.0f);\n"); } } @@ -158,15 +162,18 @@ void Write32BitSwizzler(char*& p, u32 format, bool HLSL) WRITE(p, " sampleUv = sampleUv * blkDims.xy;\n"); if(!HLSL) - WRITE(p, " sampleUv.y = textureDims.y - sampleUv.y;\n"); + WRITE(p," sampleUv.y = textureDims.y - sampleUv.y;\n"); - WRITE(p, " sampleUv.x = sampleUv.x + textureDims.z;\n"); - WRITE(p, " sampleUv.y = sampleUv.y + textureDims.w;\n"); + WRITE(p, " sampleUv = sampleUv + textureDims.zw;\n"); if(HLSL) { - WRITE(p, " sampleUv.x = sampleUv.x / blkDims.z;\n" - " sampleUv.y = sampleUv.y / blkDims.w;\n"); + WRITE(p, " sampleUv = sampleUv + float2(1.0f,1.0f);\n" + " sampleUv = sampleUv / blkDims.zw;\n"); + } + else + { + WRITE(p, " sampleUv = sampleUv + float2(1.0f,-1.0f);\n"); } } diff --git a/Source/Core/VideoCommon/Src/TextureDecoder.cpp b/Source/Core/VideoCommon/Src/TextureDecoder.cpp index bbed1cc3cf..01d1c50d4c 100644 --- a/Source/Core/VideoCommon/Src/TextureDecoder.cpp +++ b/Source/Core/VideoCommon/Src/TextureDecoder.cpp @@ -418,6 +418,219 @@ static PC_TexFormat GetPCFormatFromTLUTFormat(int tlutfmt) return PC_TEX_FMT_NONE; // Error } +PC_TexFormat GetPC_TexFormat(int texformat, int tlutfmt) +{ + switch (texformat) + { + case GX_TF_C4: + return GetPCFormatFromTLUTFormat(tlutfmt); + case GX_TF_I4: + return PC_TEX_FMT_I4_AS_I8; + case GX_TF_I8: // speed critical + return PC_TEX_FMT_I8; + case GX_TF_C8: + return GetPCFormatFromTLUTFormat(tlutfmt); + case GX_TF_IA4: + return PC_TEX_FMT_IA4_AS_IA8; + case GX_TF_IA8: + return PC_TEX_FMT_IA8; + case GX_TF_C14X2: + return GetPCFormatFromTLUTFormat(tlutfmt); + case GX_TF_RGB565: + return PC_TEX_FMT_RGB565; + case GX_TF_RGB5A3: + return PC_TEX_FMT_BGRA32; + case GX_TF_RGBA8: // speed critical + return PC_TEX_FMT_BGRA32; + case GX_TF_CMPR: // speed critical + // The metroid games use this format almost exclusively. + { + return PC_TEX_FMT_BGRA32; + } + } + + // The "copy" texture formats, too? + return PC_TEX_FMT_NONE; +} + + +PC_TexFormat TexDecoder_DirectDecode_real(u8 *dst, const u8 *src, int width, int height,int Pitch, int texformat, int tlutaddr, int tlutfmt) +{ + switch (texformat) + { + case GX_TF_C4: + if (tlutfmt == 2) + { + // Special decoding is required for TLUT format 5A3 + for (int y = 0; y < height; y += 8) + for (int x = 0; x < width; x += 8) + for (int iy = 0; iy < 8; iy++, src += 4) + decodebytesC4_5A3_To_BGRA32((u32*)dst + (y + iy) * width + x, src, tlutaddr); + } + else + { + for (int y = 0; y < height; y += 8) + for (int x = 0; x < width; x += 8) + for (int iy = 0; iy < 8; iy++, src += 4) + decodebytesC4_To_Raw16((u16*)dst + (y + iy) * width + x, src, tlutaddr); + } + return GetPCFormatFromTLUTFormat(tlutfmt); + case GX_TF_I4: + { + for (int y = 0; y < height; y += 8) + for (int x = 0; x < width; x += 8) + for (int iy = 0; iy < 8; iy++, src += 4) + for (int ix = 0; ix < 4; ix++) + { + int val = src[ix]; + dst[(y + iy) * width + x + ix * 2] = Convert4To8(val >> 4); + dst[(y + iy) * width + x + ix * 2 + 1] = Convert4To8(val & 0xF); + } + } + return PC_TEX_FMT_I4_AS_I8; + case GX_TF_I8: // speed critical + { + for (int y = 0; y < height; y += 4) + for (int x = 0; x < width; x += 8) + for (int iy = 0; iy < 4; iy++, src += 8) + memcpy(dst + (y + iy)*width+x, src, 8); + } + return PC_TEX_FMT_I8; + case GX_TF_C8: + if (tlutfmt == 2) + { + // Special decoding is required for TLUT format 5A3 + for (int y = 0; y < height; y += 4) + for (int x = 0; x < width; x += 8) + for (int iy = 0; iy < 4; iy++, src += 8) + decodebytesC8_5A3_To_BGRA32((u32*)dst + (y + iy) * width + x, src, tlutaddr); + } + else + { + for (int y = 0; y < height; y += 4) + for (int x = 0; x < width; x += 8) + for (int iy = 0; iy < 4; iy++, src += 8) + decodebytesC8_To_Raw16((u16*)dst + (y + iy) * width + x, src, tlutaddr); + } + return GetPCFormatFromTLUTFormat(tlutfmt); + case GX_TF_IA4: + { + for (int y = 0; y < height; y += 4) + for (int x = 0; x < width; x += 8) + for (int iy = 0; iy < 4; iy++, src += 8) + decodebytesIA4((u16*)dst + (y + iy) * width + x, src); + } + return PC_TEX_FMT_IA4_AS_IA8; + case GX_TF_IA8: + { + for (int y = 0; y < height; y += 4) + for (int x = 0; x < width; x += 4) + for (int iy = 0; iy < 4; iy++, src += 8) + { + u16 *ptr = (u16 *)dst + (y + iy) * width + x; + u16 *s = (u16 *)src; + for(int j = 0; j < 4; j++) + *ptr++ = Common::swap16(*s++); + } + + } + return PC_TEX_FMT_IA8; + case GX_TF_C14X2: + if (tlutfmt == 2) + { + // Special decoding is required for TLUT format 5A3 + for (int y = 0; y < height; y += 4) + for (int x = 0; x < width; x += 4) + for (int iy = 0; iy < 4; iy++, src += 8) + decodebytesC14X2_5A3_To_BGRA32((u32*)dst + (y + iy) * width + x, (u16*)src, tlutaddr); + } + else + { + for (int y = 0; y < height; y += 4) + for (int x = 0; x < width; x += 4) + for (int iy = 0; iy < 4; iy++, src += 8) + decodebytesC14X2_To_Raw16((u16*)dst + (y + iy) * width + x, (u16*)src, tlutaddr); + } + return GetPCFormatFromTLUTFormat(tlutfmt); + case GX_TF_RGB565: + { + for (int y = 0; y < height; y += 4) + for (int x = 0; x < width; x += 4) + for (int iy = 0; iy < 4; iy++, src += 8) + { + u16 *ptr = (u16 *)dst + (y + iy) * width + x; + u16 *s = (u16 *)src; + for(int j = 0; j < 4; j++) + *ptr++ = Common::swap16(*s++); + } + } + return PC_TEX_FMT_RGB565; + case GX_TF_RGB5A3: + { + for (int y = 0; y < height; y += 4) + for (int x = 0; x < width; x += 4) + for (int iy = 0; iy < 4; iy++, src += 8) + //decodebytesRGB5A3((u32*)dst+(y+iy)*width+x, (u16*)src, 4); + decodebytesRGB5A3((u32*)dst+(y+iy)*width+x, (u16*)src); + } + return PC_TEX_FMT_BGRA32; + case GX_TF_RGBA8: // speed critical + { + for (int y = 0; y < height; y += 4) + for (int x = 0; x < width; x += 4) + { + for (int iy = 0; iy < 4; iy++) + decodebytesARGB8_4((u32*)dst + (y+iy)*width + x, (u16*)src + 4 * iy, (u16*)src + 4 * iy + 16); + src += 64; + } + } + return PC_TEX_FMT_BGRA32; + case GX_TF_CMPR: // speed critical + // The metroid games use this format almost exclusively. + { +#if 0 // TODO - currently does not handle transparency correctly and causes problems when texture dimensions are not multiples of 8 + // 11111111 22222222 55555555 66666666 + // 33333333 44444444 77777777 88888888 + for (int y = 0; y < height; y += 8) + { + for (int x = 0; x < width; x += 8) + { + copyDXTBlock(dst+(y/2)*width+x*2, src); + src += 8; + copyDXTBlock(dst+(y/2)*width+x*2+8, src); + src += 8; + copyDXTBlock(dst+(y/2+2)*width+x*2, src); + src += 8; + copyDXTBlock(dst+(y/2+2)*width+x*2+8, src); + src += 8; + } + } + return PC_TEX_FMT_DXT1; +#else + for (int y = 0; y < height; y += 8) + { + for (int x = 0; x < width; x += 8) + { + decodeDXTBlock((u32*)dst + y * width + x, (DXTBlock*)src, width); + src += sizeof(DXTBlock); + decodeDXTBlock((u32*)dst + y * width + x + 4, (DXTBlock*)src, width); + src += sizeof(DXTBlock); + decodeDXTBlock((u32*)dst + (y + 4) * width + x, (DXTBlock*)src, width); + src += sizeof(DXTBlock); + decodeDXTBlock((u32*)dst + (y + 4) * width + x + 4, (DXTBlock*)src, width); + src += sizeof(DXTBlock); + } + } +#endif + return PC_TEX_FMT_BGRA32; + } + } + + // The "copy" texture formats, too? + return PC_TEX_FMT_NONE; +} + + //switch endianness, unswizzle //TODO: to save memory, don't blindly convert everything to argb8888 @@ -599,12 +812,91 @@ PC_TexFormat TexDecoder_Decode_real(u8 *dst, const u8 *src, int width, int heigh return PC_TEX_FMT_NONE; } + void TexDecoder_SetTexFmtOverlayOptions(bool enable, bool center) { TexFmt_Overlay_Enable = enable; TexFmt_Overlay_Center = center; } +void TexDecoder_DirectDecode(u8 *dst, const u8 *src, int width, int height,int Pitch, int texformat, int tlutaddr, int tlutfmt) +{ + PC_TexFormat retval = TexDecoder_DirectDecode_real(dst,src,width,height,Pitch,texformat,tlutaddr,tlutfmt); + + if ((!TexFmt_Overlay_Enable)|| (retval == PC_TEX_FMT_NONE)) + return; + + int w = min(width, 40); + int h = min(height, 10); + + int xoff = (width - w) >> 1; + int yoff = (height - h) >> 1; + + if (!TexFmt_Overlay_Center) + { + xoff=0; + yoff=0; + } + + const char* fmt = texfmt[texformat&15]; + while (*fmt) + { + int xcnt = 0; + int nchar = sfont_map[(int)*fmt]; + + const unsigned char *ptr = sfont_raw[nchar]; // each char is up to 9x10 + + for (int x = 0; x < 9;x++) + { + if (ptr[x] == 0x78) + break; + xcnt++; + } + + for (int y=0; y < 10; y++) + { + for (int x=0; x < xcnt; x++) + { + switch(retval) + { + case PC_TEX_FMT_I8: + { + // TODO: Is this an acceptable way to draw in I8? + u8 *dtp = (u8*)dst; + dtp[(y + yoff) * width + x + xoff] = ptr[x] ? 0xFF : 0x88; + break; + } + case PC_TEX_FMT_IA8: + case PC_TEX_FMT_IA4_AS_IA8: + { + u16 *dtp = (u16*)dst; + dtp[(y + yoff) * width + x + xoff] = ptr[x] ? 0xFFFF : 0xFF00; + break; + } + case PC_TEX_FMT_RGB565: + { + u16 *dtp = (u16*)dst; + dtp[(y + yoff)*width + x + xoff] = ptr[x] ? 0xFFFF : 0x0000; + break; + } + default: + case PC_TEX_FMT_BGRA32: + { + int *dtp = (int*)dst; + dtp[(y + yoff) * width + x + xoff] = ptr[x] ? 0xFFFFFFFF : 0xFF000000; + break; + } + } + } + ptr += 9; + } + xoff += xcnt; + fmt++; + } +} + + + PC_TexFormat TexDecoder_Decode(u8 *dst, const u8 *src, int width, int height, int texformat, int tlutaddr, int tlutfmt) { #if defined(HAVE_OPENCL) && HAVE_OPENCL diff --git a/Source/Core/VideoCommon/Src/TextureDecoder.h b/Source/Core/VideoCommon/Src/TextureDecoder.h index 33ba57ee2f..068be12118 100644 --- a/Source/Core/VideoCommon/Src/TextureDecoder.h +++ b/Source/Core/VideoCommon/Src/TextureDecoder.h @@ -84,9 +84,11 @@ enum PC_TexFormat }; PC_TexFormat TexDecoder_Decode(u8 *dst, const u8 *src, int width, int height, int texformat, int tlutaddr, int tlutfmt); - +void TexDecoder_DirectDecode(u8 *dst, const u8 *src, int width, int height,int Pitch, int texformat, int tlutaddr, int tlutfmt); +PC_TexFormat GetPC_TexFormat(int texformat, int tlutfmt); void TexDecoder_DecodeTexel(u8 *dst, const u8 *src, int s, int t, int imageWidth, int texformat, int tlutaddr, int tlutfmt); + u32 TexDecoder_GetSafeTextureHash(const u8 *src, int width, int height, int texformat, u32 seed=0); u32 TexDecoder_GetTlutHash(const u8* src, int len); diff --git a/Source/Plugins/Plugin_VideoDX9/Src/BPFunctions.cpp b/Source/Plugins/Plugin_VideoDX9/Src/BPFunctions.cpp index 7b8d916538..f8dbf20a5d 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/BPFunctions.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/BPFunctions.cpp @@ -71,7 +71,7 @@ void SetDitherMode(const BPCmd &bp) } void SetLogicOpMode(const BPCmd &bp) { - // Logic op blending. D3D can't do this but can fake some modes. + Renderer::SetLogicOpMode(); } void SetColorMask(const BPCmd &bp) diff --git a/Source/Plugins/Plugin_VideoDX9/Src/D3DTexture.cpp b/Source/Plugins/Plugin_VideoDX9/Src/D3DTexture.cpp index bbf3633344..d9ca8b4f09 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/D3DTexture.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/D3DTexture.cpp @@ -103,13 +103,20 @@ LPDIRECT3DTEXTURE9 CreateTexture2D(const u8* buffer, const int width, const int break; case D3DFMT_A8R8G8B8: { - u32* pIn = pBuffer; - for (int y = 0; y < height; y++) + /*if(Lock.Pitch == width * 4) { - u32* pBits = (u32*)((u8*)Lock.pBits + (y * Lock.Pitch)); - memcpy(pBits, pIn, width * 4); - pIn += pitch; + memcpy(Lock.pBits,buffer,width*height*4); } + else + {*/ + u32* pIn = pBuffer; + for (int y = 0; y < height; y++) + { + u32* pBits = (u32*)((u8*)Lock.pBits + (y * Lock.Pitch)); + memcpy(pBits, pIn, width * 4); + pIn += pitch; + } + //} } break; case D3DFMT_DXT1: @@ -122,6 +129,25 @@ LPDIRECT3DTEXTURE9 CreateTexture2D(const u8* buffer, const int width, const int return pTexture; } +LPDIRECT3DTEXTURE9 CreateOnlyTexture2D(const int width, const int height, D3DFORMAT fmt) +{ + LPDIRECT3DTEXTURE9 pTexture; + // crazy bitmagic, sorry :) + bool isPow2 = !((width&(width-1)) || (height&(height-1))); + bool bExpand = false; + HRESULT hr; + // TODO(ector): Allow mipmaps for non-pow textures on newer cards? + // TODO(ector): Use the game-specified mipmaps? + if (!isPow2) + hr = dev->CreateTexture(width, height, 1, 0, fmt, D3DPOOL_MANAGED, &pTexture, NULL); + else + hr = dev->CreateTexture(width, height, 0, D3DUSAGE_AUTOGENMIPMAP, fmt, D3DPOOL_MANAGED, &pTexture, NULL); + + if (FAILED(hr)) + return 0; + return pTexture; +} + void ReplaceTexture2D(LPDIRECT3DTEXTURE9 pTexture, const u8* buffer, const int width, const int height,const int pitch, D3DFORMAT fmt) { u32* pBuffer = (u32*)buffer; diff --git a/Source/Plugins/Plugin_VideoDX9/Src/D3DTexture.h b/Source/Plugins/Plugin_VideoDX9/Src/D3DTexture.h index 275b59a052..e9fe580177 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/D3DTexture.h +++ b/Source/Plugins/Plugin_VideoDX9/Src/D3DTexture.h @@ -25,4 +25,5 @@ namespace D3D void ReplaceTexture2D(LPDIRECT3DTEXTURE9 pTexture, const u8* buffer, const int width, const int height,const int pitch, D3DFORMAT fmt); LPDIRECT3DTEXTURE9 CreateRenderTarget(const int width, const int height); LPDIRECT3DSURFACE9 CreateDepthStencilSurface(const int width, const int height); + LPDIRECT3DTEXTURE9 CreateOnlyTexture2D(const int width, const int height, D3DFORMAT fmt); } diff --git a/Source/Plugins/Plugin_VideoDX9/Src/Render.cpp b/Source/Plugins/Plugin_VideoDX9/Src/Render.cpp index 04e8d65f52..ae5d3b81e9 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/Render.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/Render.cpp @@ -91,6 +91,86 @@ static const D3DBLEND d3dDestFactors[8] = D3DBLEND_INVDESTALPHA }; +// 0 0x00 +// 1 Source & destination +// 2 Source & ~destination +// 3 Source +// 4 ~Source & destination +// 5 Destination +// 6 Source ^ destination = Source & ~destination | ~Source & destination +// 7 Source | destination + +// 8 ~(Source | destination) +// 9 ~(Source ^ destination) = ~Source & ~destination | Source & destination +// 10 ~Destination +// 11 Source | ~destination +// 12 ~Source +// 13 ~Source | destination +// 14 ~(Source & destination) +// 15 0xff + +static const D3DBLENDOP d3dLogincOPop[16] = +{ + D3DBLENDOP_ADD, + D3DBLENDOP_ADD, + D3DBLENDOP_ADD, + D3DBLENDOP_ADD, + D3DBLENDOP_ADD, + D3DBLENDOP_ADD, + D3DBLENDOP_ADD, + D3DBLENDOP_ADD, + + D3DBLENDOP_REVSUBTRACT, + D3DBLENDOP_REVSUBTRACT, + D3DBLENDOP_SUBTRACT, + D3DBLENDOP_SUBTRACT, + D3DBLENDOP_REVSUBTRACT, + D3DBLENDOP_REVSUBTRACT, + D3DBLENDOP_SUBTRACT, + D3DBLENDOP_ADD +}; + +static const D3DBLEND d3dLogicOpSrcFactors[16] = +{ + D3DBLEND_ZERO, + D3DBLEND_DESTCOLOR, + D3DBLEND_INVDESTCOLOR, + D3DBLEND_ONE, + D3DBLEND_ZERO, + D3DBLEND_ZERO, + D3DBLEND_INVDESTCOLOR, + D3DBLEND_ONE, + + D3DBLEND_ONE, + D3DBLEND_INVDESTCOLOR, + D3DBLEND_ONE, + D3DBLEND_ONE, + D3DBLEND_ONE, + D3DBLEND_ONE, + D3DBLEND_ONE, + D3DBLEND_ONE +}; + +static const D3DBLEND d3dLogicOpDestFactors[16] = +{ + D3DBLEND_ZERO, + D3DBLEND_ZERO, + D3DBLEND_ZERO, + D3DBLEND_ZERO, + D3DBLEND_INVSRCCOLOR, + D3DBLEND_ONE, + D3DBLEND_INVSRCCOLOR, + D3DBLEND_ONE, + + D3DBLEND_ONE, + D3DBLEND_SRCCOLOR, + D3DBLEND_ONE, + D3DBLEND_ONE, + D3DBLEND_ONE, + D3DBLEND_ONE, + D3DBLEND_SRCCOLOR, + D3DBLEND_ONE +}; static const D3DCULL d3dCullModes[4] = { @@ -160,7 +240,7 @@ bool Renderer::Init() { UpdateActiveConfig(); int fullScreenRes, w_temp, h_temp; - s_blendMode = 0; + s_blendMode = 0; int backbuffer_ms_mode = 0; // g_ActiveConfig.iMultisampleMode; sscanf(g_Config.cFSResolution, "%dx%d", &w_temp, &h_temp); @@ -823,10 +903,10 @@ void Renderer::ClearScreen(const EFBRectangle& rc, bool colorEnable, bool alphaE D3D::dev->SetScissorRect(&sirc); if(zEnable) D3D::SetRenderState(D3DRS_ZFUNC, D3DCMP_ALWAYS); - D3D::drawClearQuad(&sirc,color,(z & 0xFFFFFF) / float(0xFFFFFF),PixelShaderCache::GetClearProgram(),VertexShaderCache::GetSimpleVertexShader()); + D3D::drawClearQuad(&sirc,color ,(z & 0xFFFFFF) / float(0xFFFFFF),PixelShaderCache::GetClearProgram(),VertexShaderCache::GetSimpleVertexShader()); if(zEnable) D3D::SetRenderState(D3DRS_ZFUNC, d3dCmpFuncs[bpmem.zmode.func]); - //D3D::dev->Clear(0, NULL, (colorEnable ? D3DCLEAR_TARGET : 0)| ( zEnable ? D3DCLEAR_ZBUFFER : 0), color,(z & 0xFFFFFF) / float(0xFFFFFF), 0); + //D3D::dev->Clear(0, NULL, (colorEnable ? D3DCLEAR_TARGET : 0)| ( zEnable ? D3DCLEAR_ZBUFFER : 0), color | ((alphaEnable)?0:0xFF000000),(z & 0xFFFFFF) / float(0xFFFFFF), 0); SetScissorRect(); UpdateViewport(); } @@ -838,7 +918,8 @@ void Renderer::SetBlendMode(bool forceUpdate) // 2 - reverse subtract enable (else add) // 3-5 - srcRGB function // 6-8 - dstRGB function - + if(bpmem.blendmode.logicopenable) + return; u32 newval = bpmem.blendmode.subtract << 2; if (bpmem.blendmode.subtract) { @@ -937,27 +1018,12 @@ void Renderer::RestoreAPIState() D3D::SetRenderState(D3DRS_SCISSORTESTENABLE, TRUE); SetScissorRect(); SetColorMask(); - SetBlendMode(true); + SetLogicOpMode(); } void Renderer::SetGenerationMode() { - D3D::SetRenderState(D3DRS_CULLMODE, d3dCullModes[bpmem.genMode.cullmode]); - - if (bpmem.genMode.cullmode == 3) - { - D3D::SetRenderState(D3DRS_COLORWRITEENABLE, 0); - } - else - { - DWORD write = 0; - if (bpmem.blendmode.alphaupdate) - write = D3DCOLORWRITEENABLE_ALPHA; - if (bpmem.blendmode.colorupdate) - write |= D3DCOLORWRITEENABLE_RED | D3DCOLORWRITEENABLE_GREEN | D3DCOLORWRITEENABLE_BLUE; - - D3D::SetRenderState(D3DRS_COLORWRITEENABLE, write); - } + D3D::SetRenderState(D3DRS_CULLMODE, d3dCullModes[bpmem.genMode.cullmode]); } void Renderer::SetDepthMode() @@ -977,7 +1043,17 @@ void Renderer::SetDepthMode() void Renderer::SetLogicOpMode() { - //TODO + if (bpmem.blendmode.logicopenable) + { + D3D::SetRenderState(D3DRS_ALPHABLENDENABLE, 1); + D3D::SetRenderState(D3DRS_BLENDOP, d3dLogincOPop[bpmem.blendmode.logicmode]); + D3D::SetRenderState(D3DRS_SRCBLEND, d3dLogicOpSrcFactors[bpmem.blendmode.logicmode]); + D3D::SetRenderState(D3DRS_DESTBLEND, d3dLogicOpDestFactors[bpmem.blendmode.logicmode]); + } + else + { + SetBlendMode(true); + } } void Renderer::SetDitherMode() diff --git a/Source/Plugins/Plugin_VideoDX9/Src/TextureCache.cpp b/Source/Plugins/Plugin_VideoDX9/Src/TextureCache.cpp index 0552cefcf2..bea1631ea1 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/TextureCache.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/TextureCache.cpp @@ -133,7 +133,7 @@ void TextureCache::Cleanup() } } -TextureCache::TCacheEntry *TextureCache::Load(int stage, u32 address, int width, int height, int tex_format, int tlutaddr, int tlutfmt) +/*TextureCache::TCacheEntry *TextureCache::Load(int stage, u32 address, int width, int height, int tex_format, int tlutaddr, int tlutfmt) { if (address == 0) return NULL; @@ -203,6 +203,162 @@ TextureCache::TCacheEntry *TextureCache::Load(int stage, u32 address, int width, } } + //PC_TexFormat pcfmt = TexDecoder_Decode(temp, ptr, expandedWidth, height, tex_format, tlutaddr, tlutfmt); + PC_TexFormat pcfmt = GetPC_TexFormat(tex_format, tlutfmt); + + D3DFORMAT d3d_fmt; + switch (pcfmt) { + case PC_TEX_FMT_BGRA32: + case PC_TEX_FMT_RGBA32: + d3d_fmt = D3DFMT_A8R8G8B8; + break; + case PC_TEX_FMT_RGB565: + d3d_fmt = D3DFMT_R5G6B5; + break; + case PC_TEX_FMT_IA4_AS_IA8: + d3d_fmt = D3DFMT_A8L8; + break; + case PC_TEX_FMT_I8: + case PC_TEX_FMT_I4_AS_I8: + d3d_fmt = D3DFMT_A8L8; + break; + case PC_TEX_FMT_IA8: + d3d_fmt = D3DFMT_A8L8; + break; + case PC_TEX_FMT_DXT1: + d3d_fmt = D3DFMT_DXT1; + break; + } + + //Make an entry in the table + TCacheEntry& entry = textures[texID]; + + entry.oldpixel = ((u32 *)ptr)[0]; + if (g_ActiveConfig.bSafeTextureCache) + entry.hash = hash_value; + else + { + entry.hash = (u32)(((double)rand() / RAND_MAX) * 0xFFFFFFFF); + ((u32 *)ptr)[0] = entry.hash; + } + + entry.addr = address; + entry.size_in_bytes = TexDecoder_GetTextureSizeInBytes(width, height, tex_format); + entry.isRenderTarget = false; + entry.isNonPow2 = ((width & (width - 1)) || (height & (height - 1))); + if (!skip_texture_create) { + entry.texture = D3D::CreateOnlyTexture2D(width, height, d3d_fmt); + } + D3DLOCKED_RECT Lock; + entry.texture->LockRect(0, &Lock, NULL, 0); + TexDecoder_DirectDecode((u8*)Lock.pBits,ptr,expandedWidth,height,Lock.Pitch,tex_format,tlutaddr,tlutfmt); + entry.texture->UnlockRect(0); + entry.frameCount = frameCount; + entry.w = width; + entry.h = height; + entry.fmt = tex_format; + + if (g_ActiveConfig.bDumpTextures) + { + // dump texture to file + char szTemp[MAX_PATH]; + char szDir[MAX_PATH]; + const char* uniqueId = globals->unique_id; + bool bCheckedDumpDir = false; + sprintf(szDir, "%s/%s", FULL_DUMP_TEXTURES_DIR, uniqueId); + if (!bCheckedDumpDir) + { + if (!File::Exists(szDir) || !File::IsDirectory(szDir)) + File::CreateDir(szDir); + + bCheckedDumpDir = true; + } + sprintf(szTemp, "%s/%s_%08x_%i.png", szDir, uniqueId, texHash, tex_format); + //sprintf(szTemp, "%s\\txt_%04i_%i.png", g_Config.texDumpPath.c_str(), counter++, format); <-- Old method + if (!File::Exists(szTemp)) + D3DXSaveTextureToFileA(szTemp,D3DXIFF_BMP,entry.texture,0); + } + + INCSTAT(stats.numTexturesCreated); + SETSTAT(stats.numTexturesAlive, (int)textures.size()); + + //Set the texture! + D3D::SetTexture(stage, entry.texture); + + DEBUGGER_PAUSE_LOG_AT(NEXT_NEW_TEXTURE,true,{printf("A new texture (%d x %d) is loaded", width, height);}); + return &entry; +}*/ + +TextureCache::TCacheEntry *TextureCache::Load(int stage, u32 address, int width, int height, int tex_format, int tlutaddr, int tlutfmt) +{ + if (address == 0) + return NULL; + + u8 *ptr = g_VideoInitialize.pGetMemoryPointer(address); + int bsw = TexDecoder_GetBlockWidthInTexels(tex_format) - 1; //TexelSizeInNibbles(format)*width*height/16; + int bsh = TexDecoder_GetBlockHeightInTexels(tex_format) - 1; //TexelSizeInNibbles(format)*width*height/16; + int expandedWidth = (width + bsw) & (~bsw); + int expandedHeight = (height + bsh) & (~bsh); + + u32 hash_value; + u32 texID = address; + u32 texHash; + + if (g_ActiveConfig.bSafeTextureCache || g_ActiveConfig.bDumpTextures) + { + texHash = TexDecoder_GetSafeTextureHash(ptr, expandedWidth, expandedHeight, tex_format, 0); + if (g_ActiveConfig.bSafeTextureCache) + hash_value = texHash; + if ((tex_format == GX_TF_C4) || (tex_format == GX_TF_C8) || (tex_format == GX_TF_C14X2)) + { + // WARNING! texID != address now => may break CopyRenderTargetToTexture (cf. TODO up) + // tlut size can be up to 32768B (GX_TF_C14X2) but Safer == Slower. + // This trick (to change the texID depending on the TLUT addr) is a trick to get around + // an issue with metroid prime's fonts, where it has multiple sets of fonts on top of + // each other stored in a single texture, and uses the palette to make different characters + // visible or invisible. Thus, unless we want to recreate the textures for every drawn character, + // we must make sure that texture with different tluts get different IDs. + u32 tlutHash = TexDecoder_GetTlutHash(&texMem[tlutaddr], (tex_format == GX_TF_C4) ? 32 : 128); + texHash ^= tlutHash; + if (g_ActiveConfig.bSafeTextureCache) + texID ^= tlutHash; + } + } + + bool skip_texture_create = false; + TexCache::iterator iter = textures.find(texID); + + if (iter != textures.end()) + { + TCacheEntry &entry = iter->second; + + if (!g_ActiveConfig.bSafeTextureCache) + hash_value = ((u32 *)ptr)[0]; + + if (entry.isRenderTarget || ((address == entry.addr) && (hash_value == entry.hash))) + { + entry.frameCount = frameCount; + D3D::SetTexture(stage, entry.texture); + return &entry; + } + else + { + // Let's reload the new texture data into the same texture, + // instead of destroying it and having to create a new one. + // Might speed up movie playback very, very slightly. + + if (width == entry.w && height==entry.h &&(tex_format | (tlutfmt << 16)) == entry.fmt) + { + skip_texture_create = true; + } + else + { + entry.Destroy(false); + textures.erase(iter); + } + } + } + PC_TexFormat pcfmt = TexDecoder_Decode(temp, ptr, expandedWidth, height, tex_format, tlutaddr, tlutfmt); D3DFORMAT d3d_fmt; @@ -255,7 +411,7 @@ TextureCache::TCacheEntry *TextureCache::Load(int stage, u32 address, int width, entry.frameCount = frameCount; entry.w = width; entry.h = height; - entry.fmt = tex_format; + entry.fmt = tex_format | (tlutfmt << 16); if (g_ActiveConfig.bDumpTextures) { @@ -286,8 +442,7 @@ TextureCache::TCacheEntry *TextureCache::Load(int stage, u32 address, int width, DEBUGGER_PAUSE_LOG_AT(NEXT_NEW_TEXTURE,true,{printf("A new texture (%d x %d) is loaded", width, height);}); return &entry; -} - +} void TextureCache::CopyRenderTargetToTexture(u32 address, bool bFromZBuffer, bool bIsIntensityFmt, u32 copyfmt, int bScaleByHalf, const EFBRectangle &source_rect) { int efb_w = source_rect.GetWidth(); diff --git a/Source/Plugins/Plugin_VideoDX9/Src/TextureCache.h b/Source/Plugins/Plugin_VideoDX9/Src/TextureCache.h index 66ead6d5ec..fcb8f5e632 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/TextureCache.h +++ b/Source/Plugins/Plugin_VideoDX9/Src/TextureCache.h @@ -40,7 +40,9 @@ public: int frameCount; int w, h, fmt; - + + float scaleX, scaleY; // Hires texutres need this + bool isRenderTarget; bool isNonPow2; @@ -51,6 +53,12 @@ public: hash = 0; paletteHash = 0; oldpixel = 0; + addr = 0; + size_in_bytes = 0; + frameCount = 0; + scaleX = 1.0f; + scaleY = 1.0f; + isNonPow2 = true; } void Destroy(bool shutdown); bool IntersectsMemoryRange(u32 range_address, u32 range_size); diff --git a/Source/Plugins/Plugin_VideoDX9/Src/TextureConverter.cpp b/Source/Plugins/Plugin_VideoDX9/Src/TextureConverter.cpp index 6de402885c..c3191def13 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/TextureConverter.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/TextureConverter.cpp @@ -363,8 +363,8 @@ void EncodeToRam(u32 address, bool bFromZBuffer, bool bIsIntensityFmt, u32 copyf TextureConversionShader::SetShaderParameters( (float)expandedWidth, expandedHeight * MValueY, - ceilf(source.left * MValueX + 1.0f), - ceilf(source.top * MValueY + 1.0f), + source.left * MValueX, + source.top * MValueY, sampleStride * MValueX, sampleStride * MValueY, (float)Renderer::GetTargetWidth(), diff --git a/Source/Plugins/Plugin_VideoDX9/Src/VertexManager.cpp b/Source/Plugins/Plugin_VideoDX9/Src/VertexManager.cpp index d252afeea4..cde1200475 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/VertexManager.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/VertexManager.cpp @@ -252,12 +252,15 @@ void Flush() if (tentry) { PixelShaderManager::SetTexDims(i, tentry->w, tentry->h, 0, 0); + if (tentry->scaleX != 1.0f || tentry->scaleY != 1.0f) + PixelShaderManager::SetCustomTexScale(i, tentry->scaleX, tentry->scaleY); } else { DEBUGGER_PAUSE_LOG_AT(NEXT_ERROR,true,{printf("Fail to load texture\n");}); ERROR_LOG(VIDEO, "error loading texture"); } + } } PixelShaderManager::SetTexturesUsed(0); diff --git a/Source/Plugins/Plugin_VideoOGL/Src/TextureConverter.cpp b/Source/Plugins/Plugin_VideoOGL/Src/TextureConverter.cpp index d894e60d75..cfd0133ff8 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/TextureConverter.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/TextureConverter.cpp @@ -293,13 +293,13 @@ void EncodeToRam(u32 address, bool bFromZBuffer, bool bIsIntensityFmt, u32 copyf float MValueX = Renderer::GetTargetScaleX(); float MValueY = Renderer::GetTargetScaleY(); - float top = Renderer::GetTargetHeight() - floorf((source.top + expandedHeight) * MValueY - 0.5f); + float top = Renderer::GetTargetHeight() - (source.top + expandedHeight) * MValueY ; float sampleStride = bScaleByHalf?2.0f:1.0f; TextureConversionShader::SetShaderParameters((float)expandedWidth, expandedHeight * MValueY, - ceilf(source.left * MValueX + 0.5f), + source.left * MValueX, top, sampleStride * MValueX, sampleStride * MValueY); diff --git a/Source/Plugins/Plugin_VideoOGL/Src/TextureMngr.cpp b/Source/Plugins/Plugin_VideoOGL/Src/TextureMngr.cpp index 803151afb4..b151ce37fe 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/TextureMngr.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/TextureMngr.cpp @@ -310,7 +310,7 @@ TextureMngr::TCacheEntry* TextureMngr::Load(int texstage, u32 address, int width // Let's reload the new texture data into the same texture, // instead of destroying it and having to create a new one. // Might speed up movie playback very, very slightly. - if (width == entry.w && height == entry.h && tex_format == entry.fmt) + if (width == entry.w && height == entry.h && (tex_format | (tlutfmt << 16)) == entry.fmt) { glBindTexture(entry.isRectangle ? GL_TEXTURE_RECTANGLE_ARB : GL_TEXTURE_2D, entry.texture); if (entry.mode.hex != tm0.hex) @@ -463,7 +463,7 @@ TextureMngr::TCacheEntry* TextureMngr::Load(int texstage, u32 address, int width entry.frameCount = frameCount; entry.w = width; entry.h = height; - entry.fmt = tex_format; + entry.fmt = (tex_format | (tlutfmt << 16)); entry.SetTextureParameters(tm0); if (g_ActiveConfig.bDumpTextures) // dump texture to file @@ -624,6 +624,8 @@ void TextureMngr::CopyRenderTargetToTexture(u32 address, bool bFromZBuffer, bool int w = (abs(source_rect.GetWidth()) >> bScaleByHalf); int h = (abs(source_rect.GetHeight()) >> bScaleByHalf); + + GL_REPORT_ERRORD(); if (!bIsInit)