diff --git a/Source/Core/VideoCommon/Src/BPMemory.h b/Source/Core/VideoCommon/Src/BPMemory.h index cb022893fd..0a62bea162 100644 --- a/Source/Core/VideoCommon/Src/BPMemory.h +++ b/Source/Core/VideoCommon/Src/BPMemory.h @@ -451,7 +451,8 @@ union TexMode0 unsigned mag_filter : 1; unsigned min_filter : 3; unsigned diag_lod : 1; - signed lod_bias : 10; + signed lod_bias : 8; + unsigned pad0 : 2; unsigned max_aniso : 2; unsigned lod_clamp : 1; }; diff --git a/Source/Plugins/Plugin_VideoSoftware/Src/Clipper.cpp b/Source/Plugins/Plugin_VideoSoftware/Src/Clipper.cpp index 0507503267..b97ad3795d 100644 --- a/Source/Plugins/Plugin_VideoSoftware/Src/Clipper.cpp +++ b/Source/Plugins/Plugin_VideoSoftware/Src/Clipper.cpp @@ -90,13 +90,13 @@ namespace Clipper static inline int CalcClipMask(OutputVertexData *v) { int cmask = 0; - float* pos = v->projectedPosition; - if (pos[3] - pos[0] < 0) cmask |= CLIP_POS_X_BIT; - if (pos[0] + pos[3] < 0) cmask |= CLIP_NEG_X_BIT; - if (pos[3] - pos[1] < 0) cmask |= CLIP_POS_Y_BIT; - if (pos[1] + pos[3] < 0) cmask |= CLIP_NEG_Y_BIT; - if (pos[3] * pos[2] > 0) cmask |= CLIP_POS_Z_BIT; - if (pos[2] + pos[3] < 0) cmask |= CLIP_NEG_Z_BIT; + Vec4 pos = v->projectedPosition; + if (pos.w - pos.x < 0) cmask |= CLIP_POS_X_BIT; + if (pos.x + pos.w < 0) cmask |= CLIP_NEG_X_BIT; + if (pos.w - pos.y < 0) cmask |= CLIP_POS_Y_BIT; + if (pos.y + pos.w < 0) cmask |= CLIP_NEG_Y_BIT; + if (pos.w * pos.z > 0) cmask |= CLIP_POS_Z_BIT; + if (pos.z + pos.w < 0) cmask |= CLIP_NEG_Z_BIT; return cmask; } @@ -109,7 +109,7 @@ namespace Clipper #define DIFFERENT_SIGNS(x,y) ((x <= 0 && y > 0) || (x > 0 && y <= 0)) #define CLIP_DOTPROD(I, A, B, C, D) \ - (Vertices[I]->projectedPosition[0] * A + Vertices[I]->projectedPosition[1] * B + Vertices[I]->projectedPosition[2] * C + Vertices[I]->projectedPosition[3] * D) + (Vertices[I]->projectedPosition.x * A + Vertices[I]->projectedPosition.y * B + Vertices[I]->projectedPosition.z * C + Vertices[I]->projectedPosition.w * D) #define POLY_CLIP( PLANE_BIT, A, B, C, D ) \ { \ @@ -153,6 +153,27 @@ namespace Clipper } \ } + #define LINE_CLIP(PLANE_BIT, A, B, C, D ) \ + { \ + if (mask & PLANE_BIT) { \ + const float dp0 = CLIP_DOTPROD( 0, A, B, C, D ); \ + const float dp1 = CLIP_DOTPROD( 1, A, B, C, D ); \ + const bool neg_dp0 = dp0 < 0; \ + const bool neg_dp1 = dp1 < 0; \ + \ + if (neg_dp0 && neg_dp1) \ + return; \ + \ + if (neg_dp1) { \ + float t = dp1 / (dp1 - dp0); \ + if (t > t1) t1 = t; \ + } else if (neg_dp0) { \ + float t = dp0 / (dp0 - dp1); \ + if (t > t0) t0 = t; \ + } \ + } \ + } + void ClipTriangle(int *indices, int &numIndices) { int mask = 0; @@ -202,6 +223,53 @@ namespace Clipper } } + void ClipLine(int *indices) + { + int mask = 0; + int clip_mask[2] = { 0, 0 }; + + for (int i = 0; i < 2; ++i) + { + clip_mask[i] = CalcClipMask(Vertices[i]); + mask |= clip_mask[i]; + } + + if (mask == 0) + return; + + float t0 = 0; + float t1 = 0; + + // Mark unused in case of early termination + // of the macros below. (When fully clipped) + indices[0] = SKIP_FLAG; + indices[1] = SKIP_FLAG; + + LINE_CLIP(CLIP_POS_X_BIT, -1, 0, 0, 1); + LINE_CLIP(CLIP_NEG_X_BIT, 1, 0, 0, 1); + LINE_CLIP(CLIP_POS_Y_BIT, 0, -1, 0, 1); + LINE_CLIP(CLIP_NEG_Y_BIT, 0, 1, 0, 1); + LINE_CLIP(CLIP_POS_Z_BIT, 0, 0, -1, 1); + LINE_CLIP(CLIP_NEG_Z_BIT, 0, 0, 1, 1); + + // Restore the old values as this line + // was not fully clipped. + indices[0] = 0; + indices[1] = 1; + + int numVertices = 2; + + if (clip_mask[0]) { + indices[0] = numVertices; + AddInterpolatedVertex(t0, 0, 1, numVertices); + } + + if (clip_mask[1]) { + indices[1] = numVertices; + AddInterpolatedVertex(t1, 1, 0, numVertices); + } + } + void ProcessTriangle(OutputVertexData *v0, OutputVertexData *v1, OutputVertexData *v2) { if (stats.thisFrame.numDrawnObjects < g_Config.drawStart || stats.thisFrame.numDrawnObjects >= g_Config.drawEnd ) @@ -247,6 +315,75 @@ namespace Clipper } } + void CopyVertex(OutputVertexData *dst, OutputVertexData *src, float dx, float dy, unsigned int sOffset) + { + dst->screenPosition.x = src->screenPosition.x + dx; + dst->screenPosition.y = src->screenPosition.y + dy; + dst->screenPosition.z = src->screenPosition.z; + + for (int i = 0; i < 3; ++i) + dst->normal[i] = src->normal[i]; + + for (int i = 0; i < 4; ++i) + dst->color[0][i] = src->color[0][i]; + + // todo - s offset + for (int i = 0; i < 8; ++i) + dst->texCoords[i] = src->texCoords[i]; + } + + void ProcessLine(OutputVertexData *lineV0, OutputVertexData *lineV1) + { + int indices[4] = { 0, 1, SKIP_FLAG, SKIP_FLAG }; + + Vertices[0] = lineV0; + Vertices[1] = lineV1; + + ClipLine(indices); + + if(indices[0] != SKIP_FLAG) + { + OutputVertexData *v0 = Vertices[indices[0]]; + OutputVertexData *v1 = Vertices[indices[1]]; + + PerspectiveDivide(v0); + PerspectiveDivide(v1); + + float dx = v1->screenPosition.x - v0->screenPosition.x; + float dy = v1->screenPosition.y - v0->screenPosition.y; + + float screenDx = 0; + float screenDy = 0; + + if(abs(dx) > abs(dy)) + { + if(dx > 0) + screenDy = bpmem.lineptwidth.linesize / -12.0f; + else + screenDy = bpmem.lineptwidth.linesize / 12.0f; + } + else + { + if(dy > 0) + screenDx = bpmem.lineptwidth.linesize / 12.0f; + else + screenDx = bpmem.lineptwidth.linesize / -12.0f; + } + + OutputVertexData triangle[3]; + + CopyVertex(&triangle[0], v0, screenDx, screenDy, 0); + CopyVertex(&triangle[1], v1, screenDx, screenDy, 0); + CopyVertex(&triangle[2], v1, -screenDx, -screenDy, bpmem.lineptwidth.lineoff); + + // ccw winding + Rasterizer::DrawTriangleFrontFace(&triangle[2], &triangle[1], &triangle[0]); + + CopyVertex(&triangle[1], v0, -screenDx, -screenDy, bpmem.lineptwidth.lineoff); + + Rasterizer::DrawTriangleFrontFace(&triangle[0], &triangle[1], &triangle[2]); + } + } bool CullTest(OutputVertexData *v0, OutputVertexData *v1, OutputVertexData *v2, bool &backface) { @@ -260,15 +397,15 @@ namespace Clipper return false; } - float x0 = v0->projectedPosition[0]; - float x1 = v1->projectedPosition[0]; - float x2 = v2->projectedPosition[0]; - float y1 = v1->projectedPosition[1]; - float y0 = v0->projectedPosition[1]; - float y2 = v2->projectedPosition[1]; - float w0 = v0->projectedPosition[3]; - float w1 = v1->projectedPosition[3]; - float w2 = v2->projectedPosition[3]; + float x0 = v0->projectedPosition.x; + float x1 = v1->projectedPosition.x; + float x2 = v2->projectedPosition.x; + float y1 = v1->projectedPosition.y; + float y0 = v0->projectedPosition.y; + float y2 = v2->projectedPosition.y; + float w0 = v0->projectedPosition.w; + float w1 = v1->projectedPosition.w; + float w2 = v2->projectedPosition.w; float normalZDir = (x0*w2 - x2*w0)*y1 + (x2*y0 - x0*y2)*w1 + (y2*w0 - y0*w2)*x1; @@ -291,13 +428,13 @@ namespace Clipper void PerspectiveDivide(OutputVertexData *vertex) { - float *projected = vertex->projectedPosition; - float *screen = vertex->screenPosition; + Vec4 &projected = vertex->projectedPosition; + Vec3 &screen = vertex->screenPosition; - float wInverse = 1.0f/projected[3]; - screen[0] = projected[0] * wInverse * xfregs.viewport.wd + m_ViewOffset[0]; - screen[1] = projected[1] * wInverse * xfregs.viewport.ht + m_ViewOffset[1]; - screen[2] = projected[2] * wInverse + m_ViewOffset[2]; + float wInverse = 1.0f/projected.w; + screen.x = projected.x * wInverse * xfregs.viewport.wd + m_ViewOffset[0]; + screen.y = projected.y * wInverse * xfregs.viewport.ht + m_ViewOffset[1]; + screen.z = projected.z * wInverse + m_ViewOffset[2]; } } diff --git a/Source/Plugins/Plugin_VideoSoftware/Src/Clipper.h b/Source/Plugins/Plugin_VideoSoftware/Src/Clipper.h index 476b224783..ee9e1d8ebb 100644 --- a/Source/Plugins/Plugin_VideoSoftware/Src/Clipper.h +++ b/Source/Plugins/Plugin_VideoSoftware/Src/Clipper.h @@ -31,6 +31,7 @@ namespace Clipper void ProcessTriangle(OutputVertexData *v0, OutputVertexData *v1, OutputVertexData *v2); + void ProcessLine(OutputVertexData *v0, OutputVertexData *v1); bool CullTest(OutputVertexData *v0, OutputVertexData *v1, OutputVertexData *v2, bool &backface); diff --git a/Source/Plugins/Plugin_VideoSoftware/Src/DebugUtil.cpp b/Source/Plugins/Plugin_VideoSoftware/Src/DebugUtil.cpp index 95a73d6721..05cb82ee06 100644 --- a/Source/Plugins/Plugin_VideoSoftware/Src/DebugUtil.cpp +++ b/Source/Plugins/Plugin_VideoSoftware/Src/DebugUtil.cpp @@ -49,36 +49,32 @@ void Init() } } -bool SaveTexture(const char* filename, u32 texmap, int width, int height) -{ - u8 *data = new u8[width * height * 4]; - - GetTextureBGRA(data, texmap, width, height); - - bool result = SaveTGA(filename, width, height, data); - - delete []data; - - return result; -} - -void SaveTexture(const char* filename, u32 texmap) +void SaveTexture(const char* filename, u32 texmap, s32 mip) { FourTexUnits& texUnit = bpmem.tex[(texmap >> 2) & 1]; u8 subTexmap = texmap & 3; TexImage0& ti0 = texUnit.texImage0[subTexmap]; - SaveTexture(filename, texmap, ti0.width + 1, ti0.height + 1); + int width = ti0.width + 1; + int height = ti0.height + 1; + + u8 *data = new u8[width * height * 4]; + + GetTextureBGRA(data, texmap, mip, width, height); + + bool result = SaveTGA(filename, width, height, data); + + delete []data; } -void GetTextureBGRA(u8 *dst, u32 texmap, int width, int height) +void GetTextureBGRA(u8 *dst, u32 texmap, s32 mip, int width, int height) { u8 sample[4]; for (int y = 0; y < height; y++) for (int x = 0; x < width; x++) { - TextureSampler::Sample((float)x, (float)y, 0, texmap, sample); + TextureSampler::SampleMip(x << 7, y << 7, mip, false, texmap, sample); // rgba to bgra *(dst++) = sample[2]; *(dst++) = sample[1]; @@ -87,13 +83,32 @@ void GetTextureBGRA(u8 *dst, u32 texmap, int width, int height) } } +s32 GetMaxTextureLod(u32 texmap) +{ + FourTexUnits& texUnit = bpmem.tex[(texmap >> 2) & 1]; + u8 subTexmap = texmap & 3; + + u8 maxLod = texUnit.texMode1[subTexmap].max_lod; + u8 mip = maxLod >> 4; + u8 fract = maxLod & 0xf; + + if(fract) + ++mip; + + return (s32)mip; +} + void DumpActiveTextures() { for (unsigned int stageNum = 0; stageNum < bpmem.genMode.numindstages; stageNum++) { u32 texmap = bpmem.tevindref.getTexMap(stageNum); - SaveTexture(StringFromFormat("%star%i_ind%i_map%i.tga", File::GetUserPath(D_DUMPTEXTURES_IDX), stats.thisFrame.numDrawnObjects, stageNum, texmap).c_str(), texmap); + s32 maxLod = GetMaxTextureLod(texmap); + for (s32 mip = 0; mip < maxLod; ++mip) + { + SaveTexture(StringFromFormat("%star%i_ind%i_map%i_mip%i.tga", File::GetUserPath(D_DUMPTEXTURES_IDX), stats.thisFrame.numDrawnObjects, stageNum, texmap, mip).c_str(), texmap, mip); + } } for (unsigned int stageNum = 0; stageNum <= bpmem.genMode.numtevstages; stageNum++) @@ -104,7 +119,11 @@ void DumpActiveTextures() int texmap = order.getTexMap(stageOdd); - SaveTexture(StringFromFormat("%star%i_stage%i_map%i.tga", File::GetUserPath(D_DUMPTEXTURES_IDX), stats.thisFrame.numDrawnObjects, stageNum, texmap).c_str(), texmap); + s32 maxLod = GetMaxTextureLod(texmap); + for (s32 mip = 0; mip < maxLod; ++mip) + { + SaveTexture(StringFromFormat("%star%i_stage%i_map%i_mip%i.tga", File::GetUserPath(D_DUMPTEXTURES_IDX), stats.thisFrame.numDrawnObjects, stageNum, texmap, mip).c_str(), texmap, mip); + } } } diff --git a/Source/Plugins/Plugin_VideoSoftware/Src/DebugUtil.h b/Source/Plugins/Plugin_VideoSoftware/Src/DebugUtil.h index c03d291f66..d40a4dc3e7 100644 --- a/Source/Plugins/Plugin_VideoSoftware/Src/DebugUtil.h +++ b/Source/Plugins/Plugin_VideoSoftware/Src/DebugUtil.h @@ -22,7 +22,7 @@ namespace DebugUtil { void Init(); - void GetTextureBGRA(u8 *dst, u32 texmap, int width, int height); + void GetTextureBGRA(u8 *dst, u32 texmap, s32 mip, int width, int height); void DumpActiveTextures(); diff --git a/Source/Plugins/Plugin_VideoSoftware/Src/HwRasterizer.cpp b/Source/Plugins/Plugin_VideoSoftware/Src/HwRasterizer.cpp index ab70b0fa96..5975e0967b 100644 --- a/Source/Plugins/Plugin_VideoSoftware/Src/HwRasterizer.cpp +++ b/Source/Plugins/Plugin_VideoSoftware/Src/HwRasterizer.cpp @@ -155,7 +155,7 @@ namespace HwRasterizer int width = texImage0.width; int height = texImage0.height; - DebugUtil::GetTextureBGRA(temp, 0, width, height); + DebugUtil::GetTextureBGRA(temp, 0, 0, width, height); glGenTextures(1, (GLuint *)&texture); glBindTexture(GL_TEXTURE_RECTANGLE_ARB, texture); diff --git a/Source/Plugins/Plugin_VideoSoftware/Src/NativeVertexFormat.h b/Source/Plugins/Plugin_VideoSoftware/Src/NativeVertexFormat.h index befc048f37..a4e9af8dad 100644 --- a/Source/Plugins/Plugin_VideoSoftware/Src/NativeVertexFormat.h +++ b/Source/Plugins/Plugin_VideoSoftware/Src/NativeVertexFormat.h @@ -18,6 +18,8 @@ #ifndef _NATIVEVERTEXFORMAT_H #define _NATIVEVERTEXFORMAT_H +#include "../../Plugin_VideoDX9/Src/Vec3.h" + #ifdef WIN32 #define LOADERDECL __cdecl #else @@ -26,25 +28,33 @@ typedef void (LOADERDECL *TPipelineFunction)(); +struct Vec4 +{ + float x; + float y; + float z; + float w; +}; + struct InputVertexData { u8 posMtx; u8 texMtx[8]; - float position[4]; - float normal[3][3]; + Vec3 position; + Vec3 normal[3]; u8 color[2][4]; float texCoords[8][2]; }; struct OutputVertexData { - float mvPosition[3]; - float projectedPosition[4]; - float screenPosition[3]; - float normal[3][3]; + Vec3 mvPosition; + Vec4 projectedPosition; + Vec3 screenPosition; + Vec3 normal[3]; u8 color[2][4]; - float texCoords[8][3]; + Vec3 texCoords[8]; void Lerp(float t, OutputVertexData *a, OutputVertexData *b) { @@ -52,17 +62,16 @@ struct OutputVertexData #define LINTERP_INT(T, OUT, IN) (OUT) + (((IN - OUT) * T) >> 8) - for (int i = 0; i < 3; ++i) - mvPosition[i] = LINTERP(t, a->mvPosition[i], b->mvPosition[i]); + mvPosition = LINTERP(t, a->mvPosition, b->mvPosition); - for (int i = 0; i < 4; ++i) - projectedPosition[i] = LINTERP(t, a->projectedPosition[i], b->projectedPosition[i]); + projectedPosition.x = LINTERP(t, a->projectedPosition.x, b->projectedPosition.x); + projectedPosition.y = LINTERP(t, a->projectedPosition.y, b->projectedPosition.y); + projectedPosition.z = LINTERP(t, a->projectedPosition.z, b->projectedPosition.z); + projectedPosition.w = LINTERP(t, a->projectedPosition.w, b->projectedPosition.w); for (int i = 0; i < 3; ++i) { - normal[i][0] = LINTERP(t, a->normal[i][0], b->normal[i][0]); - normal[i][1] = LINTERP(t, a->normal[i][1], b->normal[i][1]); - normal[i][2] = LINTERP(t, a->normal[i][2], b->normal[i][2]); + normal[i] = LINTERP(t, a->normal[i], b->normal[i]); } u16 t_int = (u16)(t * 256); @@ -74,9 +83,7 @@ struct OutputVertexData for (int i = 0; i < 8; ++i) { - texCoords[i][0] = LINTERP(t, a->texCoords[i][0], b->texCoords[i][0]); - texCoords[i][1] = LINTERP(t, a->texCoords[i][1], b->texCoords[i][1]); - texCoords[i][2] = LINTERP(t, a->texCoords[i][2], b->texCoords[i][2]); + texCoords[i] = LINTERP(t, a->texCoords[i], b->texCoords[i]); } #undef LINTERP diff --git a/Source/Plugins/Plugin_VideoSoftware/Src/Rasterizer.cpp b/Source/Plugins/Plugin_VideoSoftware/Src/Rasterizer.cpp index cf85d494ac..22ecb1c6d5 100644 --- a/Source/Plugins/Plugin_VideoSoftware/Src/Rasterizer.cpp +++ b/Source/Plugins/Plugin_VideoSoftware/Src/Rasterizer.cpp @@ -27,8 +27,20 @@ #include "VideoConfig.h" -#define BLOCK_SIZE 8 +#define BLOCK_SIZE 2 +#define CLAMP(x, a, b) (x>b)?b:(x> 19) - 2032; // integer part + s32 logFract = (*x & 0x007fffff) >> 19; // approximate fractional part + + return logInt + logFract; +} namespace Rasterizer { @@ -43,6 +55,7 @@ s32 scissorRight = 0; s32 scissorBottom = 0; Tev tev; +RasterBlock rasterBlock; void Init() { @@ -91,53 +104,58 @@ void SetTevReg(int reg, int comp, bool konst, s16 color) tev.SetRegColor(reg, comp, konst, color); } -inline void Draw(s32 x, s32 y) +inline void Draw(s32 x, s32 y, s32 xi, s32 yi) { INCSTAT(stats.thisFrame.rasterizedPixels); - float zFloat = 1.0f + ZSlope.GetValue(x, y); - if(zFloat < 0|| zFloat > 1) - return; + float zFloat = 1.0f + ZSlope.GetValue(x, y); + if (zFloat < 0.0f || zFloat > 1.0f) + return; - u32 z = (u32)(zFloat * 0x00ffffff); + s32 z = (s32)(zFloat * 0x00ffffff); - if (bpmem.zcontrol.zcomploc && bpmem.zmode.testenable) - { - // early z - if (!EfbInterface::ZCompare(x, y, z)) - return; - } + if (bpmem.zcontrol.zcomploc && bpmem.zmode.testenable) + { + // early z + if (!EfbInterface::ZCompare(x, y, z)) + return; + } - float invW = 1.0f / WSlope.GetValue(x, y); + RasterBlockPixel& pixel = rasterBlock.Pixel[xi][yi]; - tev.Position[0] = x; - tev.Position[1] = y; - tev.Position[2] = z; + float invW = pixel.InvW; - for(unsigned int i = 0; i < bpmem.genMode.numcolchans; i++) - { - for(int comp = 0; comp < 4; comp++) - tev.Color[i][comp] = (u8)ColorSlopes[i][comp].GetValue(x, y); - } + tev.Position[0] = x; + tev.Position[1] = y; + tev.Position[2] = z; - for(unsigned int i = 0; i < bpmem.genMode.numtexgens; i++) - { - if (xfregs.texMtxInfo[i].projection) - { - float q = TexSlopes[i][2].GetValue(x, y) * invW; - float invQ = invW / q; - tev.Uv[i][0] = TexSlopes[i][0].GetValue(x, y) * invQ * (bpmem.texcoords[i].s.scale_minus_1 + 1); - tev.Uv[i][1] = TexSlopes[i][1].GetValue(x, y) * invQ * (bpmem.texcoords[i].t.scale_minus_1 + 1); - tev.Lod[i] = 0; - } - else - { - tev.Uv[i][0] = TexSlopes[i][0].GetValue(x, y) * invW * (bpmem.texcoords[i].s.scale_minus_1 + 1); - tev.Uv[i][1] = TexSlopes[i][1].GetValue(x, y) * invW * (bpmem.texcoords[i].t.scale_minus_1 + 1); - tev.Lod[i] = 0; - } - } + // colors + for (unsigned int i = 0; i < bpmem.genMode.numcolchans; i++) + { + for(int comp = 0; comp < 4; comp++) + tev.Color[i][comp] = (u8)ColorSlopes[i][comp].GetValue(x, y); + } + // tex coords + for (unsigned int i = 0; i < bpmem.genMode.numtexgens; i++) + { + // multiply by 128 because TEV stores stores UVs as s17.7 + tev.Uv[i].s = (s32)(pixel.Uv[i][0] * 128); + tev.Uv[i].t = (s32)(pixel.Uv[i][1] * 128); + } + + for (unsigned int i = 0; i < bpmem.genMode.numindstages; i++) + { + tev.IndirectLod[i] = rasterBlock.IndirectLod[i]; + tev.IndirectLinear[i] = rasterBlock.IndirectLinear[i]; + } + + for (unsigned int i = 0; i <= bpmem.genMode.numtevstages; i++) + { + tev.TextureLod[i] = rasterBlock.TextureLod[i]; + tev.TextureLinear[i] = rasterBlock.TextureLinear[i]; + } + tev.Draw(); } @@ -155,6 +173,109 @@ void InitSlope(Slope *slope, float f1, float f2, float f3, float DX31, float DX1 slope->y0 = Y1; } +inline void CalculateLOD(s32 &lod, bool &linear, u32 texmap, u32 texcoord) +{ + FourTexUnits& texUnit = bpmem.tex[(texmap >> 2) & 1]; + u8 subTexmap = texmap & 3; + + // LOD calculation requires data from the texture mode for bias, etc. + // it does not seem to use the actual texture size + TexMode0& tm0 = texUnit.texMode0[subTexmap]; + TexMode1& tm1 = texUnit.texMode1[subTexmap]; + + float sDelta, tDelta; + if (tm0.diag_lod) + { + float *uv0 = rasterBlock.Pixel[0][0].Uv[texcoord]; + float *uv1 = rasterBlock.Pixel[1][1].Uv[texcoord]; + + sDelta = abs(uv0[0] - uv1[0]); + tDelta = abs(uv0[1] - uv1[1]); + } + else + { + float *uv0 = rasterBlock.Pixel[0][0].Uv[texcoord]; + float *uv1 = rasterBlock.Pixel[1][0].Uv[texcoord]; + float *uv2 = rasterBlock.Pixel[0][1].Uv[texcoord]; + + sDelta = max(abs(uv0[0] - uv1[0]), abs(uv0[0] - uv2[0])); + tDelta = max(abs(uv0[1] - uv1[1]), abs(uv0[1] - uv2[1])); + } + + // get LOD in s28.4 + lod = FixedLog2(max(sDelta, tDelta)); + + // bias is s2.5 + int bias = tm0.lod_bias; + bias >>= 1; + lod += bias; + + linear = (lod >= 0 && (tm0.min_filter & 4) || lod < 0 && tm0.mag_filter); + + // order of checks matters + // should be: + // if lod > max then max + // else if lod < min then min + lod = CLAMP(lod, (s32)tm1.min_lod, (s32)tm1.max_lod); +} + +void BuildBlock(s32 blockX, s32 blockY) +{ + for (s32 yi = 0; yi < BLOCK_SIZE; yi++) + { + for (s32 xi = 0; xi < BLOCK_SIZE; xi++) + { + RasterBlockPixel& pixel = rasterBlock.Pixel[xi][yi]; + + s32 x = xi + blockX; + s32 y = yi + blockY; + + float invW = 1.0f / WSlope.GetValue(x, y); + pixel.InvW = invW; + + // tex coords + for (unsigned int i = 0; i < bpmem.genMode.numtexgens; i++) + { + float projection; + if (xfregs.texMtxInfo[i].projection) + { + float q = TexSlopes[i][2].GetValue(x, y) * invW; + projection = invW / q; + } + else + projection = invW; + + pixel.Uv[i][0] = TexSlopes[i][0].GetValue(x, y) * projection; + pixel.Uv[i][1] = TexSlopes[i][1].GetValue(x, y) * projection; + } + } + } + + u32 indref = bpmem.tevindref.hex; + for (unsigned int i = 0; i < bpmem.genMode.numindstages; i++) + { + u32 texmap = indref & 3; + indref >>= 3; + u32 texcoord = indref & 3; + indref >>= 3; + + CalculateLOD(rasterBlock.IndirectLod[i], rasterBlock.IndirectLinear[i], texmap, texcoord); + } + + for (unsigned int i = 0; i <= bpmem.genMode.numtevstages; i++) + { + int stageOdd = i&1; + TwoTevStageOrders &order = bpmem.tevorders[i >> 1]; + if(order.getEnable(stageOdd)) + { + u32 texmap = order.getTexMap(stageOdd); + u32 texcoord = order.getTexCoord(stageOdd); + + CalculateLOD(rasterBlock.TextureLod[i], rasterBlock.TextureLinear[i], texmap, texcoord); + } + } +} + void DrawTriangleFrontFace(OutputVertexData *v0, OutputVertexData *v1, OutputVertexData *v2) { INCSTAT(stats.thisFrame.numTrianglesDrawn); @@ -217,7 +338,7 @@ void DrawTriangleFrontFace(OutputVertexData *v0, OutputVertexData *v1, OutputVer float fltdy12 = flty1 - v1->screenPosition[1]; float fltdy31 = v2->screenPosition[1] - flty1; - float w[3] = { 1.0f / v0->projectedPosition[3], 1.0f / v1->projectedPosition[3], 1.0f / v2->projectedPosition[3] }; + float w[3] = { 1.0f / v0->projectedPosition.w, 1.0f / v1->projectedPosition.w, 1.0f / v2->projectedPosition.w }; InitSlope(&WSlope, w[0], w[1], w[2], fltdx31, fltdx12, fltdy12, fltdy31, fltx1, flty1); InitSlope(&ZSlope, v0->screenPosition[2], v1->screenPosition[2], v2->screenPosition[2], fltdx31, fltdx12, fltdy12, fltdy31, fltx1, flty1); @@ -281,14 +402,16 @@ void DrawTriangleFrontFace(OutputVertexData *v0, OutputVertexData *v1, OutputVer // Skip block when outside an edge if(a == 0x0 || b == 0x0 || c == 0x0) continue; + BuildBlock(x, y); + // Accept whole block when totally covered if(a == 0xF && b == 0xF && c == 0xF) { for(s32 iy = 0; iy < BLOCK_SIZE; iy++) { - for(s32 ix = x; ix < x + BLOCK_SIZE; ix++) + for(s32 ix = 0; ix < BLOCK_SIZE; ix++) { - Draw(ix, iy + y); + Draw(x + ix, y + iy, ix, iy); } } } @@ -298,17 +421,17 @@ void DrawTriangleFrontFace(OutputVertexData *v0, OutputVertexData *v1, OutputVer s32 CY2 = C2 + DX23 * y0 - DY23 * x0; s32 CY3 = C3 + DX31 * y0 - DY31 * x0; - for(s32 iy = y; iy < y + BLOCK_SIZE; iy++) + for(s32 iy = 0; iy < BLOCK_SIZE; iy++) { s32 CX1 = CY1; s32 CX2 = CY2; s32 CX3 = CY3; - for(s32 ix = x; ix < x + BLOCK_SIZE; ix++) + for(s32 ix = 0; ix < BLOCK_SIZE; ix++) { if(CX1 > 0 && CX2 > 0 && CX3 > 0) { - Draw(ix, iy); + Draw(x + ix, y + iy, ix, iy); } CX1 -= FDY12; diff --git a/Source/Plugins/Plugin_VideoSoftware/Src/Rasterizer.h b/Source/Plugins/Plugin_VideoSoftware/Src/Rasterizer.h index 33c152703e..403b0459ba 100644 --- a/Source/Plugins/Plugin_VideoSoftware/Src/Rasterizer.h +++ b/Source/Plugins/Plugin_VideoSoftware/Src/Rasterizer.h @@ -39,6 +39,21 @@ namespace Rasterizer float y0; float GetValue(s32 x, s32 y) { return f0 + (dfdx * (x - x0)) + (dfdy * (y - y0)); } }; + + struct RasterBlockPixel + { + float InvW; + float Uv[8][2]; + }; + + struct RasterBlock + { + RasterBlockPixel Pixel[2][2]; + s32 IndirectLod[4]; + bool IndirectLinear[4]; + s32 TextureLod[16]; + bool TextureLinear[16]; + }; } diff --git a/Source/Plugins/Plugin_VideoSoftware/Src/SetupUnit.cpp b/Source/Plugins/Plugin_VideoSoftware/Src/SetupUnit.cpp index de28989972..6bc92dc071 100644 --- a/Source/Plugins/Plugin_VideoSoftware/Src/SetupUnit.cpp +++ b/Source/Plugins/Plugin_VideoSoftware/Src/SetupUnit.cpp @@ -134,10 +134,38 @@ void SetupUnit::SetupTriFan() } void SetupUnit::SetupLine() -{} +{ + if (m_VertexCounter < 1) + { + m_VertexCounter++; + m_VertWritePointer = m_VertPointer[m_VertexCounter]; + return; + } + + Clipper::ProcessLine(m_VertPointer[0], m_VertPointer[1]); + + m_VertexCounter = 0; + m_VertWritePointer = m_VertPointer[0]; +} void SetupUnit::SetupLineStrip() -{} +{ + if (m_VertexCounter < 1) + { + m_VertexCounter++; + m_VertWritePointer = m_VertPointer[m_VertexCounter]; + return; + } + + m_VertexCounter++; + + Clipper::ProcessLine(m_VertPointer[0], m_VertPointer[1]); + + m_VertWritePointer = m_VertPointer[0]; + + m_VertPointer[0] = m_VertPointer[1]; + m_VertPointer[1] = &m_Vertices[m_VertexCounter & 1]; +} void SetupUnit::SetupPoint() {} diff --git a/Source/Plugins/Plugin_VideoSoftware/Src/Tev.cpp b/Source/Plugins/Plugin_VideoSoftware/Src/Tev.cpp index 83c095c972..680806e85a 100644 --- a/Source/Plugins/Plugin_VideoSoftware/Src/Tev.cpp +++ b/Source/Plugins/Plugin_VideoSoftware/Src/Tev.cpp @@ -439,34 +439,33 @@ static bool AlphaTest(int alpha) return true; } -inline float WrapIndirectCoord(float coord, int wrapMode) +inline s32 WrapIndirectCoord(s32 coord, int wrapMode) { switch (wrapMode) { case ITW_OFF: return coord; case ITW_256: - return fmod(coord, 256); - case ITW_128: - return fmod(coord, 128); + return (coord % (256 << 7)); + case ITW_128: + return (coord % (128 << 7)); case ITW_64: - return fmod(coord, 64); + return (coord % (64 << 7)); case ITW_32: - return fmod(coord, 32); + return (coord % (32 << 7)); case ITW_16: - return fmod(coord, 16); + return (coord % (16 << 7)); case ITW_0: return 0; } return 0; } -void Tev::Indirect(unsigned int stageNum, float s, float t) +void Tev::Indirect(unsigned int stageNum, s32 s, s32 t) { TevStageIndirect &indirect = bpmem.tevind[stageNum]; u8 *indmap = IndirectTex[indirect.bt]; - - float indcoord[3]; + s32 indcoord[3]; // alpha bump select switch (indirect.bs) { @@ -494,32 +493,32 @@ void Tev::Indirect(unsigned int stageNum, float s, float t) // format switch(indirect.fmt) { case ITF_8: - indcoord[0] = (float)indmap[ALP_C] + bias[0]; - indcoord[1] = (float)indmap[BLU_C] + bias[1]; - indcoord[2] = (float)indmap[GRN_C] + bias[2]; + indcoord[0] = indmap[ALP_C] + bias[0]; + indcoord[1] = indmap[BLU_C] + bias[1]; + indcoord[2] = indmap[GRN_C] + bias[2]; AlphaBump = AlphaBump & 0xf8; break; case ITF_5: - indcoord[0] = (float)(indmap[ALP_C] & 0x1f) + bias[0]; - indcoord[1] = (float)(indmap[BLU_C] & 0x1f) + bias[1]; - indcoord[2] = (float)(indmap[GRN_C] & 0x1f) + bias[2]; + indcoord[0] = (indmap[ALP_C] & 0x1f) + bias[0]; + indcoord[1] = (indmap[BLU_C] & 0x1f) + bias[1]; + indcoord[2] = (indmap[GRN_C] & 0x1f) + bias[2]; AlphaBump = AlphaBump & 0xe0; break; case ITF_4: - indcoord[0] = (float)(indmap[ALP_C] & 0x0f) + bias[0]; - indcoord[1] = (float)(indmap[BLU_C] & 0x0f) + bias[1]; - indcoord[2] = (float)(indmap[GRN_C] & 0x0f) + bias[2]; + indcoord[0] = (indmap[ALP_C] & 0x0f) + bias[0]; + indcoord[1] = (indmap[BLU_C] & 0x0f) + bias[1]; + indcoord[2] = (indmap[GRN_C] & 0x0f) + bias[2]; AlphaBump = AlphaBump & 0xf0; break; case ITF_3: - indcoord[0] = (float)(indmap[ALP_C] & 0x07) + bias[0]; - indcoord[1] = (float)(indmap[BLU_C] & 0x07) + bias[1]; - indcoord[2] = (float)(indmap[GRN_C] & 0x07) + bias[2]; + indcoord[0] = (indmap[ALP_C] & 0x07) + bias[0]; + indcoord[1] = (indmap[BLU_C] & 0x07) + bias[1]; + indcoord[2] = (indmap[GRN_C] & 0x07) + bias[2]; AlphaBump = AlphaBump & 0xf8; break; } - float indtevtrans[2] = { 0,0 }; + s64 indtevtrans[2] = { 0,0 }; // matrix multiply int indmtxid = indirect.mid & 3; @@ -529,39 +528,40 @@ void Tev::Indirect(unsigned int stageNum, float s, float t) int scale = ((u32)indmtx.col0.s0 << 0) | ((u32)indmtx.col1.s1 << 2) | ((u32)indmtx.col2.s2 << 4); - float fscale = 0.0f; + + int shift; switch (indirect.mid & 12) { - case 0: - fscale = powf(2.0f, (float)(scale - 17)) / 1024.0f; + case 0: + shift = 3 + (17 - scale); indtevtrans[0] = indmtx.col0.ma * indcoord[0] + indmtx.col1.mc * indcoord[1] + indmtx.col2.me * indcoord[2]; indtevtrans[1] = indmtx.col0.mb * indcoord[0] + indmtx.col1.md * indcoord[1] + indmtx.col2.mf * indcoord[2]; break; case 4: // s matrix - fscale = powf(2.0f, (float)(scale - 17)) / 256; + shift = 8 + (17 - scale); indtevtrans[0] = s * indcoord[0]; indtevtrans[1] = t * indcoord[0]; break; case 8: // t matrix - fscale = powf(2.0f, (float)(scale - 17)) / 256; + shift = 8 + (17 - scale); indtevtrans[0] = s * indcoord[1]; indtevtrans[1] = t * indcoord[1]; break; } - indtevtrans[0] *= fscale; - indtevtrans[1] *= fscale; + indtevtrans[0] = shift >= 0 ? indtevtrans[0] >> shift : indtevtrans[0] << -shift; + indtevtrans[1] = shift >= 0 ? indtevtrans[1] >> shift : indtevtrans[1] << -shift; } - if (indirect.fb_addprev) + if (indirect.fb_addprev) { - TexCoord[0] += WrapIndirectCoord(s, indirect.sw) + indtevtrans[0]; - TexCoord[1] += WrapIndirectCoord(t, indirect.tw) + indtevtrans[1]; + TexCoord.s += (int)(WrapIndirectCoord(s, indirect.sw) + indtevtrans[0]); + TexCoord.t += (int)(WrapIndirectCoord(t, indirect.tw) + indtevtrans[1]); } else { - TexCoord[0] = WrapIndirectCoord(s, indirect.sw) + indtevtrans[0]; - TexCoord[1] = WrapIndirectCoord(t, indirect.tw) + indtevtrans[1]; + TexCoord.s = (int)(WrapIndirectCoord(s, indirect.sw) + indtevtrans[0]); + TexCoord.t = (int)(WrapIndirectCoord(t, indirect.tw) + indtevtrans[1]); } } @@ -580,10 +580,12 @@ void Tev::Draw() u32 texcoordSel = bpmem.tevindref.getTexCoord(stageNum); u32 texmap = bpmem.tevindref.getTexMap(stageNum); - float scaleS = bpmem.texscale[stageNum2].getScaleS(stageOdd); - float scaleT = bpmem.texscale[stageNum2].getScaleT(stageOdd); + const TEXSCALE& texscale = bpmem.texscale[stageNum2]; + s32 scaleS = stageOdd ? texscale.ss1:texscale.ss0; + s32 scaleT = stageOdd ? texscale.ts1:texscale.ts0; - TextureSampler::Sample(Uv[texcoordSel][0] * scaleS, Uv[texcoordSel][1] * scaleT, Lod[texcoordSel], texmap, IndirectTex[stageNum]); + TextureSampler::Sample(Uv[texcoordSel].s >> scaleS, Uv[texcoordSel].t >> scaleT, + IndirectLod[stageNum], IndirectLinear[stageNum], texmap, IndirectTex[stageNum]); #ifdef _DEBUG if (g_Config.bDumpTevStages) @@ -608,14 +610,14 @@ void Tev::Draw() int texcoordSel = order.getTexCoord(stageOdd); int texmap = order.getTexMap(stageOdd); - Indirect(stageNum, Uv[texcoordSel][0], Uv[texcoordSel][1]); + Indirect(stageNum, Uv[texcoordSel].s, Uv[texcoordSel].t); // sample texture if (order.getEnable(stageOdd)) { u8 texel[4]; - TextureSampler::Sample(TexCoord[0], TexCoord[1], Lod[texcoordSel], texmap, texel); + TextureSampler::Sample(TexCoord.s, TexCoord.t, TextureLod[stageNum], TextureLinear[stageNum], texmap, texel); int swaptable = ac.tswap * 2; diff --git a/Source/Plugins/Plugin_VideoSoftware/Src/Tev.h b/Source/Plugins/Plugin_VideoSoftware/Src/Tev.h index 0419df8086..caaa88cfc9 100644 --- a/Source/Plugins/Plugin_VideoSoftware/Src/Tev.h +++ b/Source/Plugins/Plugin_VideoSoftware/Src/Tev.h @@ -21,7 +21,20 @@ #include "BPMemLoader.h" class Tev -{ +{ + struct InputRegType { + unsigned a : 8; + unsigned b : 8; + unsigned c : 8; + signed d : 11; + }; + + struct TextureCoordinateType + { + signed s : 24; + signed t : 24; + }; + // color order: RGBA s16 Reg[4][4]; s16 KonstantColors[4][4]; @@ -32,7 +45,7 @@ class Tev s16 Zero16[4]; u8 AlphaBump; u8 IndirectTex[4][4]; - float TexCoord[2]; + TextureCoordinateType TexCoord; s16 *m_ColorInputLUT[16][3]; s16 *m_AlphaInputLUT[8]; // values must point to RGBA color @@ -49,20 +62,16 @@ class Tev void DrawAlphaRegular(TevStageCombiner::AlphaCombiner &ac); void DrawAlphaCompare(TevStageCombiner::AlphaCombiner &ac); - void Indirect(unsigned int stageNum, float s, float t); - - struct InputRegType { - unsigned a : 8; - unsigned b : 8; - unsigned c : 8; - signed d : 11; - }; + void Indirect(unsigned int stageNum, s32 s, s32 t); public: - s32 Position[3]; + s32 Position[3]; u8 Color[2][4]; - float Uv[8][2]; - float Lod[8]; + TextureCoordinateType Uv[8]; + s32 IndirectLod[4]; + bool IndirectLinear[4]; + s32 TextureLod[16]; + bool TextureLinear[16]; void Init(); diff --git a/Source/Plugins/Plugin_VideoSoftware/Src/TextureSampler.cpp b/Source/Plugins/Plugin_VideoSoftware/Src/TextureSampler.cpp index e7001a537b..44878e262b 100644 --- a/Source/Plugins/Plugin_VideoSoftware/Src/TextureSampler.cpp +++ b/Source/Plugins/Plugin_VideoSoftware/Src/TextureSampler.cpp @@ -23,29 +23,11 @@ #include +#define ALLOW_MIPMAP 1 + namespace TextureSampler { -inline int iround(float x) -{ - int t; - -#if defined(_WIN32) && !defined(_M_X64) - __asm - { - fld x - fistp t - } -#else - t = (int)x; - if((x - t) >= 0.5) - return t + 1; -#endif - - return t; -} - - inline void WrapCoord(int &coord, int wrapMode, int imageSize) { switch (wrapMode) @@ -85,9 +67,53 @@ inline void AddTexel(u8 *inTexel, u32 *outTexel, u32 fract) outTexel[3] += inTexel[3] * fract; } -void Sample(float s, float t, float lod, u8 texmap, u8 *sample) +void Sample(s32 s, s32 t, s32 lod, bool linear, u8 texmap, u8 *sample) { - FourTexUnits& texUnit = bpmem.tex[(texmap >> 2) & 1]; + int baseMip = 0; + bool mipLinear = false; + +#if (ALLOW_MIPMAP) + FourTexUnits& texUnit = bpmem.tex[(texmap >> 2) & 1]; + TexMode0& tm0 = texUnit.texMode0[texmap & 3]; + + s32 lodFract = lod & 0xf; + + if (lod > 0 && tm0.min_filter & 3) + { + // use mipmap + baseMip = lod >> 4; + mipLinear = (lodFract && tm0.min_filter & 2); + + // if using nearest mip filter and lodFract >= 0.5 round up to next mip + baseMip += (lodFract >> 3) & (tm0.min_filter & 1); + } + + if (mipLinear) + { + u8 sampledTex[4]; + u32 texel[4]; + + SampleMip(s, t, baseMip, linear, texmap, sampledTex); + SetTexel(sampledTex, texel, (16 - lodFract)); + + SampleMip(s, t, baseMip + 1, linear, texmap, sampledTex); + AddTexel(sampledTex, texel, lodFract); + + sample[0] = (u8)(texel[0] >> 4); + sample[1] = (u8)(texel[1] >> 4); + sample[2] = (u8)(texel[2] >> 4); + sample[3] = (u8)(texel[3] >> 4); + } + else +#endif + { + SampleMip(s, t, baseMip, linear, texmap, sample); + } +} + +void SampleMip(s32 s, s32 t, s32 mip, bool linear, u8 texmap, u8 *sample) +{ + FourTexUnits& texUnit = bpmem.tex[(texmap >> 2) & 1]; u8 subTexmap = texmap & 3; TexMode0& tm0 = texUnit.texMode0[subTexmap]; @@ -97,59 +123,85 @@ void Sample(float s, float t, float lod, u8 texmap, u8 *sample) u32 imageBase = texUnit.texImage3[subTexmap].image_base << 5; u8 *imageSrc = g_VideoInitialize.pGetMemoryPointer(imageBase); - bool linear = false; - if ((lod > 0 && tm0.min_filter > 4) || (lod <= 0 && tm0.mag_filter)) - linear = true; + int imageWidth = ti0.width; + int imageHeight = ti0.height; + + int tlutAddress = texTlut.tmem_offset << 9; + + // reduce sample location and texture size to mip level + // move texture pointer to mip location + if (mip) + { + int mipWidth = imageWidth + 1; + int mipHeight = imageHeight + 1; + + int fmtWidth = TexDecoder_GetBlockWidthInTexels(ti0.format); + int fmtHeight = TexDecoder_GetBlockHeightInTexels(ti0.format); + int fmtDepth = TexDecoder_GetTexelSizeInNibbles(ti0.format); + + imageWidth >>= mip; + imageHeight >>= mip; + s >>= mip; + t >>= mip; + + while (mip) + { + mipWidth = max(mipWidth, fmtWidth); + mipHeight = max(mipHeight, fmtHeight); + u32 size = (mipWidth * mipHeight * fmtDepth) >> 1; + + imageSrc += size; + mipWidth >>= 1; + mipHeight >>= 1; + mip--; + } + } + + // integer part of sample location + int imageS = s >> 7; + int imageT = t >> 7; if (linear) { - s32 s256 = s32((s - 0.5f) * 256); - s32 t256 = s32((t- 0.5f) * 256); - - int imageS = s256 >> 8; - int imageSPlus1 = imageS + 1; - u32 fractS = s256 & 0xff; - fractS += fractS >> 7; - - int imageT = t256 >> 8; + // linear sampling + int imageSPlus1 = imageS + 1; + int fractS = s & 0x7f; + int imageTPlus1 = imageT + 1; - u32 fractT = t256 & 0xff; - fractT += fractT >> 7; + int fractT = t & 0x7f; u8 sampledTex[4]; u32 texel[4]; - WrapCoord(imageS, tm0.wrap_s, ti0.width); - WrapCoord(imageT, tm0.wrap_t, ti0.height); - WrapCoord(imageSPlus1, tm0.wrap_s, ti0.width); - WrapCoord(imageTPlus1, tm0.wrap_t, ti0.height); + WrapCoord(imageS, tm0.wrap_s, imageWidth); + WrapCoord(imageT, tm0.wrap_t, imageHeight); + WrapCoord(imageSPlus1, tm0.wrap_s, imageWidth); + WrapCoord(imageTPlus1, tm0.wrap_t, imageHeight); - TexDecoder_DecodeTexel(sampledTex, imageSrc, imageS, imageT, ti0.width, ti0.format, texTlut.tmem_offset << 9, texTlut.tlut_format); - SetTexel(sampledTex, texel, (256 - fractS) * (256 - fractT)); + TexDecoder_DecodeTexel(sampledTex, imageSrc, imageS, imageT, imageWidth, ti0.format, tlutAddress, texTlut.tlut_format); + SetTexel(sampledTex, texel, (128 - fractS) * (128 - fractT)); - TexDecoder_DecodeTexel(sampledTex, imageSrc, imageSPlus1, imageT, ti0.width, ti0.format, texTlut.tmem_offset << 9, texTlut.tlut_format); - AddTexel(sampledTex, texel, (fractS) * (256 - fractT)); + TexDecoder_DecodeTexel(sampledTex, imageSrc, imageSPlus1, imageT, imageWidth, ti0.format, tlutAddress, texTlut.tlut_format); + AddTexel(sampledTex, texel, (fractS) * (128 - fractT)); - TexDecoder_DecodeTexel(sampledTex, imageSrc, imageS, imageTPlus1, ti0.width, ti0.format, texTlut.tmem_offset << 9, texTlut.tlut_format); - AddTexel(sampledTex, texel, (256 - fractS) * (fractT)); + TexDecoder_DecodeTexel(sampledTex, imageSrc, imageS, imageTPlus1, imageWidth, ti0.format, tlutAddress, texTlut.tlut_format); + AddTexel(sampledTex, texel, (128 - fractS) * (fractT)); - TexDecoder_DecodeTexel(sampledTex, imageSrc, imageSPlus1, imageTPlus1, ti0.width, ti0.format, texTlut.tmem_offset << 9, texTlut.tlut_format); + TexDecoder_DecodeTexel(sampledTex, imageSrc, imageSPlus1, imageTPlus1, imageWidth, ti0.format, tlutAddress, texTlut.tlut_format); AddTexel(sampledTex, texel, (fractS) * (fractT)); - sample[0] = (u8)(texel[0] >> 16); - sample[1] = (u8)(texel[1] >> 16); - sample[2] = (u8)(texel[2] >> 16); - sample[3] = (u8)(texel[3] >> 16); + sample[0] = (u8)(texel[0] >> 14); + sample[1] = (u8)(texel[1] >> 14); + sample[2] = (u8)(texel[2] >> 14); + sample[3] = (u8)(texel[3] >> 14); } else { - int imageS = int(s); - int imageT = int(t); + // nearest neighbor sampling + WrapCoord(imageS, tm0.wrap_s, imageWidth); + WrapCoord(imageT, tm0.wrap_t, imageHeight); - WrapCoord(imageS, tm0.wrap_s, ti0.width); - WrapCoord(imageT, tm0.wrap_t, ti0.height); - - TexDecoder_DecodeTexel(sample, imageSrc, imageS, imageT, ti0.width, ti0.format, texTlut.tmem_offset << 9, texTlut.tlut_format); + TexDecoder_DecodeTexel(sample, imageSrc, imageS, imageT, imageWidth, ti0.format, tlutAddress, texTlut.tlut_format); } } diff --git a/Source/Plugins/Plugin_VideoSoftware/Src/TextureSampler.h b/Source/Plugins/Plugin_VideoSoftware/Src/TextureSampler.h index 27d786068d..b456769c92 100644 --- a/Source/Plugins/Plugin_VideoSoftware/Src/TextureSampler.h +++ b/Source/Plugins/Plugin_VideoSoftware/Src/TextureSampler.h @@ -23,7 +23,9 @@ namespace TextureSampler { - void Sample(float s, float t, float lod, u8 texmap, u8 *sample); + void Sample(s32 s, s32 t, s32 lod, bool linear, u8 texmap, u8 *sample); + + void SampleMip(s32 s, s32 t, s32 mip, bool linear, u8 texmap, u8 *sample); } diff --git a/Source/Plugins/Plugin_VideoSoftware/Src/TransformUnit.cpp b/Source/Plugins/Plugin_VideoSoftware/Src/TransformUnit.cpp index b955d233e9..c614af8627 100644 --- a/Source/Plugins/Plugin_VideoSoftware/Src/TransformUnit.cpp +++ b/Source/Plugins/Plugin_VideoSoftware/Src/TransformUnit.cpp @@ -22,6 +22,7 @@ #include "TransformUnit.h" #include "XFMemLoader.h" #include "CPMemLoader.h" +#include "BPMemLoader.h" #include "NativeVertexFormat.h" #include "../../Plugin_VideoDX9/Src/Vec3.h" @@ -30,48 +31,48 @@ namespace TransformUnit { -void MultiplyVec2Mat24(const float *vec, const float *mat, float *result) +void MultiplyVec2Mat24(const Vec3 &vec, const float *mat, Vec3 &result) { - result[0] = mat[0] * vec[0] + mat[1] * vec[1] + mat[2] + mat[3]; - result[1] = mat[4] * vec[0] + mat[5] * vec[1] + mat[6] + mat[7]; + result.x = mat[0] * vec.x + mat[1] * vec.y + mat[2] + mat[3]; + result.y = mat[4] * vec.x + mat[5] * vec.y + mat[6] + mat[7]; } -void MultiplyVec2Mat34(const float *vec, const float *mat, float *result) +void MultiplyVec2Mat34(const Vec3 &vec, const float *mat, Vec3 &result) { - result[0] = mat[0] * vec[0] + mat[1] * vec[1] + mat[2] + mat[3]; - result[1] = mat[4] * vec[0] + mat[5] * vec[1] + mat[6] + mat[7]; - result[2] = mat[8] * vec[0] + mat[9] * vec[1] + mat[10] + mat[11]; + result.x = mat[0] * vec.x + mat[1] * vec.y + mat[2] + mat[3]; + result.y = mat[4] * vec.x + mat[5] * vec.y + mat[6] + mat[7]; + result.z = mat[8] * vec.x + mat[9] * vec.y + mat[10] + mat[11]; } -void MultiplyVec3Mat33(const float *vec, const float *mat, float *result) +void MultiplyVec3Mat33(const Vec3 &vec, const float *mat, Vec3 &result) { - result[0] = mat[0] * vec[0] + mat[1] * vec[1] + mat[2] * vec[2]; - result[1] = mat[3] * vec[0] + mat[4] * vec[1] + mat[5] * vec[2]; - result[2] = mat[6] * vec[0] + mat[7] * vec[1] + mat[8] * vec[2]; + result.x = mat[0] * vec.x + mat[1] * vec.y + mat[2] * vec.z; + result.y = mat[3] * vec.x + mat[4] * vec.y + mat[5] * vec.z; + result.z = mat[6] * vec.x + mat[7] * vec.y + mat[8] * vec.z; } -void MultiplyVec3Mat34(const float *vec, const float *mat, float *result) +void MultiplyVec3Mat34(const Vec3 &vec, const float *mat, Vec3 &result) { - result[0] = mat[0] * vec[0] + mat[1] * vec[1] + mat[2] * vec[2] + mat[3]; - result[1] = mat[4] * vec[0] + mat[5] * vec[1] + mat[6] * vec[2] + mat[7]; - result[2] = mat[8] * vec[0] + mat[9] * vec[1] + mat[10] * vec[2] + mat[11]; + result.x = mat[0] * vec.x + mat[1] * vec.y + mat[2] * vec.z + mat[3]; + result.y = mat[4] * vec.x + mat[5] * vec.y + mat[6] * vec.z + mat[7]; + result.z = mat[8] * vec.x + mat[9] * vec.y + mat[10] * vec.z + mat[11]; } -void MultipleVec3Perspective(const float *vec, const float *proj, float *result) +void MultipleVec3Perspective(const Vec3 &vec, const float *proj, Vec4 &result) { - result[0] = proj[0] * vec[0] + proj[1] * vec[2]; - result[1] = proj[2] * vec[1] + proj[3] * vec[2]; - //result[2] = (proj[4] * vec[2] + proj[5]); - result[2] = (proj[4] * vec[2] + proj[5]) * (1.0f - (float)1e-7); - result[3] = -vec[2]; + result.x = proj[0] * vec.x + proj[1] * vec.z; + result.y = proj[2] * vec.y + proj[3] * vec.z; + //result.z = (proj[4] * vec.z + proj[5]); + result.z = (proj[4] * vec.z + proj[5]) * (1.0f - (float)1e-7); + result.w = -vec.z; } -void MultipleVec3Ortho(const float *vec, const float *proj, float *result) +void MultipleVec3Ortho(const Vec3 &vec, const float *proj, Vec4 &result) { - result[0] = proj[0] * vec[0] + proj[1]; - result[1] = proj[2] * vec[1] + proj[3]; - result[2] = proj[4] * vec[2] + proj[5]; - result[3] = 1; + result.x = proj[0] * vec.x + proj[1]; + result.y = proj[2] * vec.y + proj[3]; + result.z = proj[4] * vec.z + proj[5]; + result.w = 1; } void TransformPosition(const InputVertexData *src, OutputVertexData *dst) @@ -98,55 +99,53 @@ void TransformNormal(const InputVertexData *src, bool nbt, OutputVertexData *dst MultiplyVec3Mat33(src->normal[0], mat, dst->normal[0]); MultiplyVec3Mat33(src->normal[1], mat, dst->normal[1]); MultiplyVec3Mat33(src->normal[2], mat, dst->normal[2]); - Vec3 *norm0 = (Vec3*)dst->normal[0]; - norm0->normalize(); + dst->normal[0].normalize(); } else { MultiplyVec3Mat33(src->normal[0], mat, dst->normal[0]); - Vec3 *norm0 = (Vec3*)dst->normal[0]; - norm0->normalize(); + dst->normal[0].normalize(); } } inline void TransformTexCoordRegular(const TexMtxInfo &texinfo, int coordNum, bool specialCase, const InputVertexData *srcVertex, OutputVertexData *dstVertex) { - const float *src; + const Vec3 *src; switch (texinfo.sourcerow) { case XF_SRCGEOM_INROW: - src = srcVertex->position; + src = &srcVertex->position; break; case XF_SRCNORMAL_INROW: - src = srcVertex->normal[0]; + src = &srcVertex->normal[0]; break; case XF_SRCBINORMAL_T_INROW: - src = srcVertex->normal[1]; + src = &srcVertex->normal[1]; break; case XF_SRCBINORMAL_B_INROW: - src = srcVertex->normal[2]; + src = &srcVertex->normal[2]; break; default: _assert_(texinfo.sourcerow >= XF_SRCTEX0_INROW && texinfo.sourcerow <= XF_SRCTEX7_INROW); - src = srcVertex->texCoords[texinfo.sourcerow - XF_SRCTEX0_INROW]; + src = (Vec3*)srcVertex->texCoords[texinfo.sourcerow - XF_SRCTEX0_INROW]; break; } const float *mat = (const float*)&xfregs.posMatrices[srcVertex->texMtx[coordNum] * 4]; - float *dst = dstVertex->texCoords[coordNum]; + Vec3 *dst = &dstVertex->texCoords[coordNum]; if (texinfo.inputform == XF_TEXINPUT_AB11) { - MultiplyVec2Mat34(src, mat, dst); + MultiplyVec2Mat34(*src, mat, *dst); } else { - MultiplyVec3Mat34(src, mat, dst); + MultiplyVec3Mat34(*src, mat, *dst); } if (xfregs.dualTexTrans) { - float tempCoord[3]; + Vec3 tempCoord; // normalize const PostMtxInfo &postInfo = xfregs.postMtxInfo[coordNum]; @@ -157,12 +156,12 @@ inline void TransformTexCoordRegular(const TexMtxInfo &texinfo, int coordNum, bo // no normalization // q of input is 1 // q of output is unknown - tempCoord[0] = dst[0]; - tempCoord[1] = dst[1]; + tempCoord.x = dst->x; + tempCoord.y = dst->y; - dst[0] = postMat[0] * tempCoord[0] + postMat[1] * tempCoord[1] + postMat[2] + postMat[3]; - dst[1] = postMat[4] * tempCoord[0] + postMat[5] * tempCoord[1] + postMat[6] + postMat[7]; - dst[2] = 0.0f; + dst->x = postMat[0] * tempCoord.x + postMat[1] * tempCoord.y + postMat[2] + postMat[3]; + dst->y = postMat[4] * tempCoord.x + postMat[5] * tempCoord.y + postMat[6] + postMat[7]; + dst->z = 1.0f; } else { @@ -170,18 +169,14 @@ inline void TransformTexCoordRegular(const TexMtxInfo &texinfo, int coordNum, bo { float length = sqrtf(dst[0] * dst[0] + dst[1] * dst[1] + dst[2] * dst[2]); float invL = 1.0f / length; - tempCoord[0] = invL * dst[0]; - tempCoord[1] = invL * dst[1]; - tempCoord[2] = invL * dst[2]; + tempCoord = *dst * invL; } else { - tempCoord[0] = dst[0]; - tempCoord[1] = dst[1]; - tempCoord[2] = dst[2]; + tempCoord = *dst; } - MultiplyVec3Mat34(tempCoord, postMat, dst); + MultiplyVec3Mat34(tempCoord, postMat, *dst); } } } @@ -220,13 +215,8 @@ inline float SafeDivide(float n, float d) return (d==0)?(n>0?1:0):n/d; } -void LightColor(const float *vertexPos, const float *normal, u8 lightNum, const LitChannel &chan, Vec3 &lightCol) +void LightColor(const Vec3 &pos, const Vec3 &normal, u8 lightNum, const LitChannel &chan, Vec3 &lightCol) { - // must be the size of 3 32bit floats for the light pointer to be valid - _assert_(sizeof(Vec3) == 12); - - const Vec3 *pos = (const Vec3*)vertexPos; - const Vec3 *norm0 = (const Vec3*)normal; const LightPointer *light = (const LightPointer*)&xfregs.lights[0x10*lightNum]; if (!(chan.attnfunc & 1)) { @@ -237,15 +227,15 @@ void LightColor(const float *vertexPos, const float *normal, u8 lightNum, const break; case LIGHTDIF_SIGN: { - Vec3 ldir = (light->pos - *pos).normalized(); - float diffuse = ldir * (*norm0); + Vec3 ldir = (light->pos - pos).normalized(); + float diffuse = ldir * normal; AddScaledIntegerColor(light->color, diffuse, lightCol); } break; case LIGHTDIF_CLAMP: { - Vec3 ldir = (light->pos - *pos).normalized(); - float diffuse = max(0.0f, ldir * (*norm0)); + Vec3 ldir = (light->pos - pos).normalized(); + float diffuse = max(0.0f, ldir * normal); AddScaledIntegerColor(light->color, diffuse, lightCol); } break; @@ -254,7 +244,7 @@ void LightColor(const float *vertexPos, const float *normal, u8 lightNum, const } else { // spec and spot // not sure about divide by zero checks - Vec3 ldir = light->pos - *pos; + Vec3 ldir = light->pos - pos; float attn; if (chan.attnfunc == 3) { // spot @@ -269,7 +259,7 @@ void LightColor(const float *vertexPos, const float *normal, u8 lightNum, const } else if (chan.attnfunc == 1) { // specular // donko - what is going on here? 655.36 is a guess but seems about right. - attn = (light->pos * (*norm0)) > -655.36 ? max(0.0f, (light->dir * (*norm0))) : 0; + attn = (light->pos * normal) > -655.36 ? max(0.0f, (light->dir * normal)) : 0; ldir.set(1.0f, attn, attn * attn); float cosAtt = max(0.0f, light->cosatt * ldir); @@ -283,14 +273,14 @@ void LightColor(const float *vertexPos, const float *normal, u8 lightNum, const break; case LIGHTDIF_SIGN: { - float difAttn = ldir * (*norm0); + float difAttn = ldir * normal; AddScaledIntegerColor(light->color, attn * difAttn, lightCol); } break; case LIGHTDIF_CLAMP: { - float difAttn = max(0.0f, ldir * (*norm0)); + float difAttn = max(0.0f, ldir * normal); AddScaledIntegerColor(light->color, attn * difAttn, lightCol); } break; @@ -299,13 +289,8 @@ void LightColor(const float *vertexPos, const float *normal, u8 lightNum, const } } -void LightAlpha(const float *vertexPos, const float *normal, u8 lightNum, const LitChannel &chan, float &lightCol) +void LightAlpha(const Vec3 &pos, const Vec3 &normal, u8 lightNum, const LitChannel &chan, float &lightCol) { - // must be the size of 3 32bit floats for the light pointer to be valid - _assert_(sizeof(Vec3) == 12); - - const Vec3 *pos = (const Vec3*)vertexPos; - const Vec3 *norm0 = (const Vec3*)normal; const LightPointer *light = (const LightPointer*)&xfregs.lights[0x10*lightNum]; if (!(chan.attnfunc & 1)) { @@ -316,15 +301,15 @@ void LightAlpha(const float *vertexPos, const float *normal, u8 lightNum, const break; case LIGHTDIF_SIGN: { - Vec3 ldir = (light->pos - *pos).normalized(); - float diffuse = ldir * (*norm0); + Vec3 ldir = (light->pos - pos).normalized(); + float diffuse = ldir * normal; lightCol += light->color[0] * diffuse; } break; case LIGHTDIF_CLAMP: { - Vec3 ldir = (light->pos - *pos).normalized(); - float diffuse = max(0.0f, ldir * (*norm0)); + Vec3 ldir = (light->pos - pos).normalized(); + float diffuse = max(0.0f, ldir * normal); lightCol += light->color[0] * diffuse; } break; @@ -332,7 +317,7 @@ void LightAlpha(const float *vertexPos, const float *normal, u8 lightNum, const } } else { // spec and spot - Vec3 ldir = light->pos - *pos; + Vec3 ldir = light->pos - pos; float attn; if (chan.attnfunc == 3) { // spot @@ -347,7 +332,7 @@ void LightAlpha(const float *vertexPos, const float *normal, u8 lightNum, const } else if (chan.attnfunc == 1) { // specular // donko - what is going on here? 655.36 is a guess but seems about right. - attn = (light->pos * (*norm0)) > -655.36 ? max(0.0f, (light->dir * (*norm0))) : 0; + attn = (light->pos * normal) > -655.36 ? max(0.0f, (light->dir * normal)) : 0; ldir.set(1.0f, attn, attn * attn); float cosAtt = light->cosatt * ldir; @@ -361,14 +346,14 @@ void LightAlpha(const float *vertexPos, const float *normal, u8 lightNum, const break; case LIGHTDIF_SIGN: { - float difAttn = ldir * (*norm0); + float difAttn = ldir * normal; lightCol += light->color[0] * attn * difAttn; } break; case LIGHTDIF_CLAMP: { - float difAttn = max(0.0f, ldir * (*norm0)); + float difAttn = max(0.0f, ldir * normal); lightCol += light->color[0] * attn * difAttn; } break; @@ -472,14 +457,11 @@ void TransformTexCoord(const InputVertexData *src, OutputVertexData *dst, bool s break; case XF_TEXGEN_EMBOSS_MAP: { - const Vec3 *pos = (const Vec3*)dst->mvPosition; - const Vec3 *norm1 = (const Vec3*)dst->normal[1]; - const Vec3 *norm2 = (const Vec3*)dst->normal[2]; const LightPointer *light = (const LightPointer*)&xfregs.lights[0x10*texinfo.embosslightshift]; - Vec3 ldir = (light->pos - *pos).normalized(); - float d1 = ldir * (*norm1); - float d2 = ldir * (*norm2); + Vec3 ldir = (light->pos - dst->mvPosition).normalized(); + float d1 = ldir * dst->normal[1]; + float d2 = ldir * dst->normal[2]; dst->texCoords[coordNum][0] = dst->texCoords[texinfo.embosssourceshift][0] + d1; dst->texCoords[coordNum][1] = dst->texCoords[texinfo.embosssourceshift][1] + d2; @@ -503,6 +485,9 @@ void TransformTexCoord(const InputVertexData *src, OutputVertexData *dst, bool s default: ERROR_LOG(VIDEO, "Bad tex gen type %i", texinfo.texgentype); } + + dst->texCoords[coordNum][0] *= (bpmem.texcoords[coordNum].s.scale_minus_1 + 1); + dst->texCoords[coordNum][1] *= (bpmem.texcoords[coordNum].t.scale_minus_1 + 1); } } diff --git a/Source/Plugins/Plugin_VideoSoftware/Src/VertexFormatConverter.cpp b/Source/Plugins/Plugin_VideoSoftware/Src/VertexFormatConverter.cpp index d3d3f421c9..f50220aaad 100644 --- a/Source/Plugins/Plugin_VideoSoftware/Src/VertexFormatConverter.cpp +++ b/Source/Plugins/Plugin_VideoSoftware/Src/VertexFormatConverter.cpp @@ -24,32 +24,32 @@ namespace VertexFormatConverter { void LoadNormal1_Byte(InputVertexData *dst, u8 *src) { - dst->normal[0][0] = (float)(s8)src[0] / 128; - dst->normal[0][1] = (float)(s8)src[1] / 128; - dst->normal[0][2] = (float)(s8)src[2] / 128; + dst->normal[0].x = (float)(s8)src[0] / 128; + dst->normal[0].y = (float)(s8)src[1] / 128; + dst->normal[0].z = (float)(s8)src[2] / 128; } void LoadNormal1_Short(InputVertexData *dst, u8 *src) { - dst->normal[0][0] = (float)((s16*)src)[0] / 32768; - dst->normal[0][1] = (float)((s16*)src)[1] / 32768; - dst->normal[0][2] = (float)((s16*)src)[2] / 32768; + dst->normal[0].x = (float)((s16*)src)[0] / 32768; + dst->normal[0].y = (float)((s16*)src)[1] / 32768; + dst->normal[0].z = (float)((s16*)src)[2] / 32768; } void LoadNormal1_Float(InputVertexData *dst, u8 *src) { - dst->normal[0][0] = ((float*)src)[0]; - dst->normal[0][1] = ((float*)src)[1]; - dst->normal[0][2] = ((float*)src)[2]; + dst->normal[0].x = ((float*)src)[0]; + dst->normal[0].y = ((float*)src)[1]; + dst->normal[0].z = ((float*)src)[2]; } void LoadNormal3_Byte(InputVertexData *dst, u8 *src) { for (int i = 0, j = 0; i < 3; i++, j+=3) { - dst->normal[i][0] = (float)(s8)src[j + 0] / 128; - dst->normal[i][1] = (float)(s8)src[j + 1] / 128; - dst->normal[i][2] = (float)(s8)src[j + 2] / 128; + dst->normal[i].x = (float)(s8)src[j + 0] / 128; + dst->normal[i].y = (float)(s8)src[j + 1] / 128; + dst->normal[i].z = (float)(s8)src[j + 2] / 128; } } @@ -57,9 +57,9 @@ namespace VertexFormatConverter { for (int i = 0, j = 0; i < 3; i++, j+=3) { - dst->normal[i][0] = (float)((s16*)src)[j + 0] / 32768; - dst->normal[i][1] = (float)((s16*)src)[j + 1] / 32768; - dst->normal[i][2] = (float)((s16*)src)[j + 2] / 32768; + dst->normal[i].x = (float)((s16*)src)[j + 0] / 32768; + dst->normal[i].y = (float)((s16*)src)[j + 1] / 32768; + dst->normal[i].z = (float)((s16*)src)[j + 2] / 32768; } } @@ -67,9 +67,9 @@ namespace VertexFormatConverter { for (int i = 0, j = 0; i < 3; i++, j+=3) { - dst->normal[i][0] = ((float*)src)[j + 0]; - dst->normal[i][1] = ((float*)src)[j + 1]; - dst->normal[i][2] = ((float*)src)[j + 2]; + dst->normal[i].x = ((float*)src)[j + 0]; + dst->normal[i].y = ((float*)src)[j + 1]; + dst->normal[i].z = ((float*)src)[j + 2]; } } }