diff --git a/Source/Core/VideoCommon/Src/OpenCL/TextureDecoder.cpp b/Source/Core/VideoCommon/Src/OpenCL/TextureDecoder.cpp index 322d5dea64..e463d9d1f0 100644 --- a/Source/Core/VideoCommon/Src/OpenCL/TextureDecoder.cpp +++ b/Source/Core/VideoCommon/Src/OpenCL/TextureDecoder.cpp @@ -134,6 +134,7 @@ PC_TexFormat TexDecoder_Decode_OpenCL(u8 *dst, const u8 *src, int width, int hei default: return PC_TEX_FMT_NONE; }*/ + return PC_TEX_FMT_NONE; switch(texformat) { case GX_TF_I8: @@ -143,13 +144,13 @@ PC_TexFormat TexDecoder_Decode_OpenCL(u8 *dst, const u8 *src, int width, int hei printf("width %d, height %d\n", width, height); // Create the input and output arrays in device memory for our calculation // - cl_mem _dst = clCreateBuffer(OpenCL::g_context, CL_MEM_WRITE_ONLY, TexDecoder_GetTextureSizeInBytes(width, height, texformat), NULL, NULL); + cl_mem _dst = clCreateBuffer(OpenCL::g_context, CL_MEM_WRITE_ONLY, sizeof(unsigned char) * width * height, NULL, NULL); if (!dst) { printf("Error: Failed to allocate device memory!\n"); exit(1); } - cl_mem _src = clCreateBuffer(OpenCL::g_context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, TexDecoder_GetTextureSizeInBytes(width, height, texformat), (void*)src, NULL); + cl_mem _src = clCreateBuffer(OpenCL::g_context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(unsigned char) * width * height, (void*)src, NULL); if (!src) { printf("Error: Failed to allocate device memory!\n"); @@ -174,8 +175,9 @@ PC_TexFormat TexDecoder_Decode_OpenCL(u8 *dst, const u8 *src, int width, int hei if (err != CL_SUCCESS) { printf("Error: Failed to retrieve kernel work group info! %d\n", err); - exit(1); + local = 64; } + // Execute the kernel over the entire range of our 1d input data set // using the maximum number of work group items for this device // @@ -193,7 +195,7 @@ PC_TexFormat TexDecoder_Decode_OpenCL(u8 *dst, const u8 *src, int width, int hei // Read back the results from the device to verify the output // - err = clEnqueueReadBuffer( OpenCL::g_cmdq, _dst, CL_TRUE, 0, TexDecoder_GetTextureSizeInBytes(width, height, texformat), dst, 0, NULL, NULL ); + err = clEnqueueReadBuffer( OpenCL::g_cmdq, _dst, CL_TRUE, 0, sizeof(unsigned char) * width * height, dst, 0, NULL, NULL ); if (err != CL_SUCCESS) { printf("Error: Failed to read output array! %d\n", err); diff --git a/Source/Core/VideoCommon/Src/XFBConvert.cpp b/Source/Core/VideoCommon/Src/XFBConvert.cpp index fc6fcdd99c..b5cb4f5ab8 100644 --- a/Source/Core/VideoCommon/Src/XFBConvert.cpp +++ b/Source/Core/VideoCommon/Src/XFBConvert.cpp @@ -87,15 +87,14 @@ const char *__ConvertToXFB = "__kernel void ConvertToXFB(__global unsigned int { \n \ const unsigned char *src = _pEFB;\n \ int id = get_global_id(0);\n \ - src += id * 8; \n \ + int srcOffset = id * 8; \n \ \n \ - int y1 = (((16843 * src[0]) + (33030 * src[1]) + (6423 * src[2])) >> 16) + 16; \n \ - int u1 = ((-(9699 * src[0]) - (19071 * src[1]) + (28770 * src[2])) >> 16) + 128;\n \ - src += 4;\n \ + int y1 = (((16843 * src[srcOffset]) + (33030 * src[srcOffset + 1]) + (6423 * src[srcOffset + 2])) >> 16) + 16; \n \ + int u1 = ((-(9699 * src[srcOffset]) - (19071 * src[srcOffset + 1]) + (28770 * src[srcOffset + 2])) >> 16) + 128;\n \ + srcOffset += 4;\n \ \n \ - int y2 = (((16843 * src[0]) + (33030 * src[1]) + (6423 * src[2])) >> 16) + 16;\n \ - int v2 = (((28770 * src[0]) - (24117 * src[1]) - (4653 * src[2])) >> 16) + 128;\n \ - src += 4;\n \ + int y2 = (((16843 * src[srcOffset]) + (33030 * src[srcOffset + 1]) + (6423 * src[srcOffset + 2])) >> 16) + 16;\n \ + int v2 = (((28770 * src[srcOffset]) - (24117 * src[srcOffset + 1]) - (4653 * src[srcOffset + 2])) >> 16) + 128;\n \ \n \ dst[id] = (v2 << 24) | (y2 << 16) | (u1 << 8) | (y1); \n \ } \n "; @@ -178,8 +177,9 @@ void ConvertFromXFB(u32 *dst, const u8* _pXFB, int width, int height) if (err != CL_SUCCESS) { printf("Error: Failed to retrieve kernel work group info! %d\n", err); - exit(1); + local = 32; } + // Execute the kernel over the entire range of our 1d input data set // using the maximum number of work group items for this device // @@ -283,8 +283,9 @@ void ConvertToXFB(u32 *dst, const u8* _pEFB, int width, int height) if (err != CL_SUCCESS) { printf("Error: Failed to retrieve kernel work group info! %d\n", err); - exit(1); + local = 64; } + // Execute the kernel over the entire range of our 1d input data set // using the maximum number of work group items for this device //