(RGL PS3) Faster glClear

This commit is contained in:
twinaphex 2013-03-30 03:21:54 +01:00
parent 8ac2ad16c8
commit b4fc13b0cc
2 changed files with 388 additions and 473 deletions

View File

@ -177,10 +177,7 @@ GLboolean rglPlatformNeedsConversion (const rglAttributeState* as, GLuint index)
// and since we will load them right after enter this function, there are LHS.
GLboolean rglPlatformRequiresSlowPath (void *data, const GLenum indexType, uint32_t indexCount);
void rglPlatformRasterGetIntegerv( GLenum pname, GLint* params );
void rglValidateClipPlanes (void);
void rglInvalidateAttributes (void);
GLuint rglValidateAttributes (const void* indices, GLboolean *isMain);
GLuint rglValidateAttributesSlow (void *data, GLboolean *isMain);
//----------------------------------------
// Raster/.../PlatformTexture.c

View File

@ -1305,127 +1305,52 @@ GLboolean rglPlatformBufferObjectUnmap (void *data)
PLATFORM FRAMEBUFFER
============================================================ */
// set render targets
static void rglValidateFramebuffer (void)
{
RGLdevice *LDevice = _CurrentDevice;
rglGcmDevice *gcmDevice = ( rglGcmDevice * )LDevice->platformDevice;
RGLcontext* LContext = _CurrentContext;
rglGcmDriver *gcmDriver = (rglGcmDriver*)_CurrentDevice->rasterDriver;
// reset buffer data
gcmDriver->rtValid = GL_FALSE;
// get buffer parameters
// This may come from a framebuffer_object or the default framebuffer.
//
gcmDriver->rt = gcmDevice->rt;
if (LContext->framebuffer)
{
rglPlatformFramebuffer* framebuffer = (rglPlatformFramebuffer *)rglGetFramebuffer(LContext, LContext->framebuffer);
if (framebuffer->needValidate)
framebuffer->validate( LContext );
gcmDriver->rt = framebuffer->rt;
}
gcmDriver->rtValid = GL_TRUE;
// update GPU configuration
rglGcmFifoGlSetRenderTarget( &gcmDriver->rt );
LContext->needValidate &= ~RGL_VALIDATE_FRAMEBUFFER;
LContext->needValidate |= RGL_VALIDATE_VIEWPORT;
}
GLAPI void APIENTRY glClear( GLbitfield mask )
{
RGLcontext* LContext = _CurrentContext;
rglGcmDriver *driver = (rglGcmDriver*)_CurrentDevice->rasterDriver;
if ( LContext->needValidate & RGL_VALIDATE_FRAMEBUFFER )
rglValidateFramebuffer();
{
// set render targets
RGLdevice *LDevice = _CurrentDevice;
rglGcmDevice *gcmDevice = ( rglGcmDevice * )LDevice->platformDevice;
rglGcmDriver *driver = (rglGcmDriver*)_CurrentDevice->rasterDriver;
// reset buffer data
driver->rtValid = GL_FALSE;
// get buffer parameters
// This may come from a framebuffer_object or the default framebuffer.
//
driver->rt = gcmDevice->rt;
if (LContext->framebuffer)
{
rglPlatformFramebuffer* framebuffer = (rglPlatformFramebuffer *)rglGetFramebuffer(LContext, LContext->framebuffer);
if (framebuffer->needValidate)
framebuffer->validate( LContext );
driver->rt = framebuffer->rt;
}
driver->rtValid = GL_TRUE;
// update GPU configuration
rglGcmFifoGlSetRenderTarget( &driver->rt );
LContext->needValidate &= ~RGL_VALIDATE_FRAMEBUFFER;
LContext->needValidate |= RGL_VALIDATE_VIEWPORT;
}
if (!driver->rtValid)
return;
GLbitfield newmask = 0;
if ((mask & GL_COLOR_BUFFER_BIT) && driver->rt.colorBufferCount)
newmask |= RGLGCM_COLOR_BUFFER_BIT;
GCM_FUNC( cellGcmSetClearColor, 0 );
if (!newmask)
return;
GCM_FUNC( cellGcmSetClearSurface, CELL_GCM_CLEAR_R | CELL_GCM_CLEAR_G |
CELL_GCM_CLEAR_B | CELL_GCM_CLEAR_A );
GLbitfield clearMask = newmask;
if (driver->rt.colorFormat != RGLGCM_ARGB8)
clearMask &= ~RGLGCM_COLOR_BUFFER_BIT;
// always use quad clear for colors with MRT
// There is one global clear mask for all render targets. This doesn't
// work nicely for all render target combinations, e.g. only the first
// and last targets enabled. Quad clear works because color mask is
// per target.
//
// TODO: Clear could be used if the enabled render targets are
// contiguous from 0, i.e. {0,1}, {0,1,2}, {0,1,2,3}. If this is done,
// parallel changes need to made in rglValidateWriteMask because we
// bypass calling rglGcmFifoGlColorMask there and the mask used by nv_glClear
// is not updated.
if ( driver->rt.colorBufferCount > 1 )
clearMask &= ~RGLGCM_COLOR_BUFFER_BIT;
if ( clearMask )
{
GCM_FUNC( cellGcmSetClearColor, 0 );
if (rglGcmState_i.renderTarget.colorFormat)
GCM_FUNC( cellGcmSetClearSurface, CELL_GCM_CLEAR_R | CELL_GCM_CLEAR_G |
CELL_GCM_CLEAR_B | CELL_GCM_CLEAR_A );
newmask &= ~clearMask;
}
if ( newmask )
{
// draw a quad to erase everything.
// disable/set up a lot of states
//
static float rglClearVertexBuffer[4*3] __attribute__(( aligned( RGL_ALIGN_FAST_TRANSFER ) ) ) =
{
-1.f, -1.f, 0.f,
-1.f, 1.f, 0.f,
1.f, -1.f, 0.f,
1.f, 1.f, 0.f,
};
GLuint bufferId = gmmAlloc((CellGcmContextData*)&rglGcmState_i.fifo,
0, sizeof(rglClearVertexBuffer));
__builtin_memcpy(gmmIdToAddress(bufferId), rglClearVertexBuffer, sizeof(rglClearVertexBuffer));
GCM_FUNC( cellGcmSetVertexDataArray, 0, 1, 3 * sizeof(GLfloat), 3,
CELL_GCM_VERTEX_F, CELL_GCM_LOCATION_LOCAL, gmmIdToOffset(bufferId) );
RGLBIT_TRUE( LContext->attribs->DirtyMask, 0 );
for (int i = 1; i < RGL_MAX_VERTEX_ATTRIBS; ++i)
{
GCM_FUNC( cellGcmSetVertexDataArray, i, 0, 0, 0, CELL_GCM_VERTEX_F, CELL_GCM_LOCATION_LOCAL, 0);
RGLBIT_TRUE( LContext->attribs->DirtyMask, i );
}
int clearcolor = 0;
GCM_FUNC( cellGcmSetVertexData4f, RGL_ATTRIB_PRIMARY_COLOR_INDEX, ( GLfloat* )&clearcolor );
LContext->needValidate |= RGL_VALIDATE_FRAGMENT_PROGRAM;
gmmFree( bufferId );
}
GCM_FUNC_NO_ARGS( cellGcmSetInvalidateVertexCache );
rglGcmFifoFlush( &rglGcmState_i.fifo );
@ -1641,366 +1566,6 @@ void rglDumpFifo (char *name);
#undef RGLGCM_REMAP_MODES
static inline void rglValidateStates (GLuint mask)
{
RGLcontext* LContext = _CurrentContext;
LContext->needValidate &= mask;
GLuint needValidate = LContext->needValidate;
if (RGL_UNLIKELY( needValidate & RGL_VALIDATE_FRAMEBUFFER))
{
rglValidateFramebuffer();
needValidate = LContext->needValidate;
}
if (RGL_UNLIKELY( needValidate & RGL_VALIDATE_TEXTURES_USED))
{
long unitInUseCount = LContext->BoundFragmentProgram->samplerCount;
const GLuint* unitsInUse = LContext->BoundFragmentProgram->samplerUnits;
for ( long i = 0; i < unitInUseCount; ++i )
{
long unit = unitsInUse[i];
rglTexture* texture = LContext->TextureImageUnits[unit].currentTexture;
if (!texture)
continue;
// Validate resources of a texture and set it to a unit
if (RGL_UNLIKELY( texture->revalidate))
rglPlatformValidateTextureResources(texture); // this updates the isComplete bit.
GLboolean isCompleteCache = texture->isComplete;
if (RGL_LIKELY(isCompleteCache))
{
// Set a texture to a gcm texture unit
rglGcmTexture *platformTexture = ( rglGcmTexture * )texture->platformTexture;
const GLuint imageOffset = gmmIdToOffset(platformTexture->gpuAddressId) +
platformTexture->gpuAddressIdOffset;
platformTexture->gcmTexture.offset = imageOffset;
// set up the texture unit with the info for the current texture
// bind texture , control 1,3,format and remap
GCM_FUNC_SAFE( cellGcmSetTexture, unit, &platformTexture->gcmTexture );
CellGcmContextData *gcm_context = (CellGcmContextData*)&rglGcmState_i.fifo;
cellGcmReserveMethodSizeInline(gcm_context, 11);
uint32_t *current = gcm_context->current;
current[0] = CELL_GCM_METHOD_HEADER_TEXTURE_OFFSET(unit, 8);
current[1] = CELL_GCM_METHOD_DATA_TEXTURE_OFFSET(platformTexture->gcmTexture.offset);
current[2] = CELL_GCM_METHOD_DATA_TEXTURE_FORMAT(platformTexture->gcmTexture.location,
platformTexture->gcmTexture.cubemap,
platformTexture->gcmTexture.dimension,
platformTexture->gcmTexture.format,
platformTexture->gcmTexture.mipmap);
current[3] = CELL_GCM_METHOD_DATA_TEXTURE_ADDRESS( platformTexture->gcmMethods.address.wrapS,
platformTexture->gcmMethods.address.wrapT,
platformTexture->gcmMethods.address.wrapR,
platformTexture->gcmMethods.address.unsignedRemap,
platformTexture->gcmMethods.address.zfunc,
platformTexture->gcmMethods.address.gamma,
0);
current[4] = CELL_GCM_METHOD_DATA_TEXTURE_CONTROL0(CELL_GCM_TRUE,
platformTexture->gcmMethods.control0.minLOD,
platformTexture->gcmMethods.control0.maxLOD,
platformTexture->gcmMethods.control0.maxAniso);
current[5] = platformTexture->gcmTexture.remap;
current[6] = CELL_GCM_METHOD_DATA_TEXTURE_FILTER(
(platformTexture->gcmMethods.filter.bias & 0x1fff),
platformTexture->gcmMethods.filter.min,
platformTexture->gcmMethods.filter.mag,
platformTexture->gcmMethods.filter.conv);
current[7] = CELL_GCM_METHOD_DATA_TEXTURE_IMAGE_RECT(
platformTexture->gcmTexture.height,
platformTexture->gcmTexture.width);
current[8] = CELL_GCM_METHOD_DATA_TEXTURE_BORDER_COLOR(
platformTexture->gcmMethods.borderColor);
current[9] = CELL_GCM_METHOD_HEADER_TEXTURE_CONTROL3(unit,1);
current[10] = CELL_GCM_METHOD_DATA_TEXTURE_CONTROL3(
platformTexture->gcmTexture.pitch,
platformTexture->gcmTexture.depth);
gcm_context->current = &current[11];
}
else
{
// Validate incomplete texture by remapping
//RGL_REPORT_EXTRA( RGL_REPORT_TEXTURE_INCOMPLETE, "Texture %d bound to unit %d(%s) is incomplete.", texture->name, unit, rglGetGLEnumName( texture->target ) );
GLuint remap = CELL_GCM_REMAP_MODE(
CELL_GCM_TEXTURE_REMAP_ORDER_XYXY,
CELL_GCM_TEXTURE_REMAP_FROM_A,
CELL_GCM_TEXTURE_REMAP_FROM_R,
CELL_GCM_TEXTURE_REMAP_FROM_G,
CELL_GCM_TEXTURE_REMAP_FROM_B,
CELL_GCM_TEXTURE_REMAP_ONE,
CELL_GCM_TEXTURE_REMAP_ZERO,
CELL_GCM_TEXTURE_REMAP_ZERO,
CELL_GCM_TEXTURE_REMAP_ZERO );
// disable control 0
GCM_FUNC( cellGcmSetTextureControl, unit, CELL_GCM_FALSE, 0, 0, 0 );
// set texture remap only
GCM_FUNC( cellGcmSetTextureRemap, unit, remap );
}
}
}
bool validate_vertex_consts = false;
if (RGL_UNLIKELY(needValidate & RGL_VALIDATE_VERTEX_PROGRAM))
{
const _CGprogram *program = (const _CGprogram*)LContext->BoundVertexProgram;
__dcbt(program->ucode);
__dcbt(((uint8_t*)program->ucode)+128);
__dcbt(((uint8_t*)program->ucode)+256);
__dcbt(((uint8_t*)program->ucode)+384);
CellCgbVertexProgramConfiguration conf;
conf.instructionSlot = program->header.vertexProgram.instructionSlot;
conf.instructionCount = program->header.instructionCount;
conf.registerCount = program->header.vertexProgram.registerCount;
conf.attributeInputMask = program->header.attributeInputMask;
rglGcmFifo *fifo = (rglGcmFifo*)&rglGcmState_i.fifo;
GLuint spaceInWords = 7 + 5 * conf.instructionCount;
// Push a CG program onto the current command buffer
// make sure there is space for the pushbuffer + any nops we need to add for alignment
if ( fifo->current + spaceInWords + 1024 > fifo->end )
rglOutOfSpaceCallback( fifo, spaceInWords );
GCM_FUNC( cellGcmSetVertexProgramLoad, &conf, program->ucode );
GCM_FUNC( cellGcmSetUserClipPlaneControl, 0, 0, 0, 0, 0, 0 );
rglGcmInterpolantState *s = &rglGcmState_i.state.interpolant;
s->vertexProgramAttribMask = program->header.vertexProgram.attributeOutputMask;
GCM_FUNC( cellGcmSetVertexAttribOutputMask, (( s->vertexProgramAttribMask) &
s->fragmentProgramAttribMask) );
int count = program->defaultValuesIndexCount;
for ( int i = 0;i < count;i++ )
{
const CgParameterEntry *parameterEntry = program->parametersEntries + program->defaultValuesIndices[i].entryIndex;
GLboolean cond = (( parameterEntry->flags & CGPF_REFERENCED )
&& ( parameterEntry->flags & CGPV_MASK ) == CGPV_CONSTANT );
if (!cond)
continue;
const float *value = program->defaultValues +
program->defaultValuesIndices[i].defaultValueIndex;
const CgParameterResource *parameterResource = rglGetParameterResource( program, parameterEntry );
GLboolean cond2 = parameterResource->resource != (unsigned short) - 1;
if (!cond2)
continue;
switch ( parameterResource->type )
{
case CG_FLOAT:
case CG_FLOAT1:
case CG_FLOAT2:
case CG_FLOAT3:
case CG_FLOAT4:
case CG_HALF:
case CG_HALF1:
case CG_HALF2:
case CG_HALF3:
case CG_HALF4:
case CG_INT:
case CG_INT1:
case CG_INT2:
case CG_INT3:
case CG_INT4:
case CG_BOOL:
case CG_BOOL1:
case CG_BOOL2:
case CG_BOOL3:
case CG_BOOL4:
case CG_FIXED:
case CG_FIXED1:
case CG_FIXED2:
case CG_FIXED3:
case CG_FIXED4:
GCM_FUNC( cellGcmSetVertexProgramParameterBlock, parameterResource->resource, 1, value ); // GCM_PORT_TESTED [Cedric]
break;
case CG_FLOAT4x4:
case CG_HALF4x4:
case CG_INT4x4:
case CG_BOOL4x4:
case CG_FIXED4x4:
// set 4 consts
{
GLfloat v2[16];
v2[0] = value[0];
v2[1] = value[4];
v2[2] = value[8];
v2[3] = value[12];
v2[4] = value[1];
v2[5] = value[5];
v2[6] = value[9];
v2[7] = value[13];
v2[8] = value[2];
v2[9] = value[6];
v2[10] = value[10];
v2[11] = value[14];
v2[12] = value[3];
v2[13] = value[7];
v2[14] = value[11];
v2[15] = value[15];
GCM_FUNC( cellGcmSetVertexProgramParameterBlock, parameterResource->resource, 4, v2 ); // GCM_PORT_TESTED [Cedric]
}
break;
case CG_FLOAT3x3:
case CG_HALF3x3:
case CG_INT3x3:
case CG_BOOL3x3:
case CG_FIXED3x3:
// set 3 consts
{
GLfloat v2[12];
v2[0] = value[0];v2[1] = value[3];v2[2] = value[6];v2[3] = 0;
v2[4] = value[1];v2[5] = value[4];v2[6] = value[7];v2[7] = 0;
v2[8] = value[2];v2[9] = value[5];v2[10] = value[8];v2[11] = 0;
GCM_FUNC( cellGcmSetVertexProgramParameterBlock, parameterResource->resource, 3, v2 );
}
break;
}
}
// Set all uniforms.
if(!(LContext->needValidate & RGL_VALIDATE_VERTEX_CONSTANTS) && LContext->BoundVertexProgram->parentContext)
validate_vertex_consts = true;
}
if (RGL_LIKELY(needValidate & RGL_VALIDATE_VERTEX_CONSTANTS) || validate_vertex_consts)
{
_CGprogram *cgprog = LContext->BoundVertexProgram;
rglGcmFifo *fifo = (rglGcmFifo*)&rglGcmState_i.fifo;
GLuint spaceInWords = cgprog->constantPushBufferWordSize + 4 + 32;
// Push a CG program onto the current command buffer
// make sure there is space for the pushbuffer + any nops we need to add for alignment
if ( fifo->current + spaceInWords + 1024 > fifo->end )
rglOutOfSpaceCallback( fifo, spaceInWords );
// first add nops to get us the next alligned position in the fifo
// [YLIN] Use VMX register to copy
uint32_t padding_in_word = ( ( 0x10-(((uint32_t)rglGcmState_i.fifo.current)&0xf))&0xf )>>2;
uint32_t padded_size = ( ((cgprog->constantPushBufferWordSize)<<2) + 0xf )&~0xf;
GCM_FUNC( cellGcmSetNopCommandUnsafe, padding_in_word );
memcpy16(rglGcmState_i.fifo.current, cgprog->constantPushBuffer, padded_size);
rglGcmState_i.fifo.current+=cgprog->constantPushBufferWordSize;
}
if (RGL_UNLIKELY(needValidate & RGL_VALIDATE_FRAGMENT_PROGRAM))
{
// Set up the current fragment program on hardware
rglGcmDriver *driver = (rglGcmDriver*)_CurrentDevice->rasterDriver;
_CGprogram *program = LContext->BoundFragmentProgram;
// params are set directly in the GPU memory, so there is nothing to be done here.
CellCgbFragmentProgramConfiguration conf;
conf.offset = gmmIdToOffset(program->loadProgramId) + program->loadProgramOffset;
rglGcmInterpolantState *s = &rglGcmState_i.state.interpolant;
s->fragmentProgramAttribMask |= program->header.attributeInputMask | CELL_GCM_ATTRIB_OUTPUT_MASK_POINTSIZE;
conf.attributeInputMask = ( s->vertexProgramAttribMask) &
s->fragmentProgramAttribMask;
conf.texCoordsInputMask = program->header.fragmentProgram.texcoordInputMask;
conf.texCoords2D = program->header.fragmentProgram.texcoord2d;
conf.texCoordsCentroid = program->header.fragmentProgram.texcoordCentroid;
int fragmentControl = ( 1 << 15 ) | ( 1 << 10 );
fragmentControl |= program->header.fragmentProgram.flags & CGF_DEPTHREPLACE ? 0xE : 0x0;
fragmentControl |= program->header.fragmentProgram.flags & CGF_OUTPUTFROMH0 ? 0x00 : 0x40;
fragmentControl |= program->header.fragmentProgram.flags & CGF_PIXELKILL ? 0x80 : 0x00;
conf.fragmentControl = fragmentControl;
conf.registerCount = program->header.fragmentProgram.registerCount < 2 ? 2 : program->header.fragmentProgram.registerCount;
conf.fragmentControl &= ~CELL_GCM_MASK_SET_SHADER_CONTROL_CONTROL_TXP;
/* TODO - look into this */
conf.fragmentControl |= 0 << CELL_GCM_SHIFT_SET_SHADER_CONTROL_CONTROL_TXP;
GCM_FUNC( cellGcmSetFragmentProgramLoad, &conf );
GCM_FUNC( cellGcmSetZMinMaxControl, ( program->header.fragmentProgram.flags & CGF_DEPTHREPLACE ) ? RGLGCM_FALSE : RGLGCM_TRUE, RGLGCM_FALSE, RGLGCM_FALSE );
driver->fpLoadProgramId = program->loadProgramId;
driver->fpLoadProgramOffset = program->loadProgramOffset;
}
if ( RGL_LIKELY(( needValidate & ~( RGL_VALIDATE_TEXTURES_USED |
RGL_VALIDATE_VERTEX_PROGRAM |
RGL_VALIDATE_VERTEX_CONSTANTS |
RGL_VALIDATE_FRAGMENT_PROGRAM ) ) == 0 ) )
{
LContext->needValidate = 0;
return;
}
if ( RGL_UNLIKELY( needValidate & RGL_VALIDATE_VIEWPORT ) )
{
rglGcmViewportState *v = &rglGcmState_i.state.viewport;
v->x = LContext->ViewPort.X;
v->y = LContext->ViewPort.Y;
v->w = LContext->ViewPort.XSize;
v->h = LContext->ViewPort.YSize;
rglGcmFifoGlViewport(v, LContext->DepthNear, LContext->DepthFar);
}
if (RGL_UNLIKELY(needValidate & RGL_VALIDATE_BLENDING ))
{
if ((LContext->Blending))
{
GCM_FUNC( cellGcmSetBlendEnable, LContext->Blending );
rglGcmBlendState *blend = &rglGcmState_i.state.blend;
GLuint hwColor;
blend->r = LContext->BlendColor.R;
blend->g = LContext->BlendColor.G;
blend->b = LContext->BlendColor.B;
blend->a = LContext->BlendColor.A;
if (rglGcmState_i.renderTarget.colorFormat == RGLGCM_ARGB8)
{
RGLGCM_CALC_COLOR_LE_ARGB8( &hwColor, blend->r, blend->g, blend->b, blend->a );
GCM_FUNC( cellGcmSetBlendColor, hwColor, hwColor );
}
GCM_FUNC( cellGcmSetBlendEquation, (rglGcmEnum)LContext->BlendEquationRGB,
(rglGcmEnum)LContext->BlendEquationAlpha );
GCM_FUNC( cellGcmSetBlendFunc, (rglGcmEnum)LContext->BlendFactorSrcRGB,(rglGcmEnum)LContext->BlendFactorDestRGB,(rglGcmEnum)LContext->BlendFactorSrcAlpha,(rglGcmEnum)LContext->BlendFactorDestAlpha);
}
}
if ( RGL_UNLIKELY( needValidate & RGL_VALIDATE_SHADER_SRGB_REMAP ) )
{
GCM_FUNC( cellGcmSetFragmentProgramGammaEnable, LContext->ShaderSRGBRemap ? CELL_GCM_TRUE : CELL_GCM_FALSE);
LContext->needValidate &= ~RGL_VALIDATE_SHADER_SRGB_REMAP;
}
LContext->needValidate = 0;
}
const uint32_t c_rounded_size_ofrglDrawParams = (sizeof(rglDrawParams)+0x7f)&~0x7f;
static uint8_t s_dparams_buff[ c_rounded_size_ofrglDrawParams ] __attribute__((aligned(128)));
@ -2027,7 +1592,360 @@ GLAPI void APIENTRY glDrawArrays (GLenum mode, GLint first, GLsizei count)
dparams->vertexCount = count;
if ( LContext->needValidate )
rglValidateStates( RGL_VALIDATE_ALL );
{
GLuint mask = RGL_VALIDATE_ALL;
LContext->needValidate &= mask;
GLuint needValidate = LContext->needValidate;
if (RGL_UNLIKELY( needValidate & RGL_VALIDATE_TEXTURES_USED))
{
long unitInUseCount = LContext->BoundFragmentProgram->samplerCount;
const GLuint* unitsInUse = LContext->BoundFragmentProgram->samplerUnits;
for ( long i = 0; i < unitInUseCount; ++i )
{
long unit = unitsInUse[i];
rglTexture* texture = LContext->TextureImageUnits[unit].currentTexture;
if (!texture)
continue;
// Validate resources of a texture and set it to a unit
if (RGL_UNLIKELY( texture->revalidate))
rglPlatformValidateTextureResources(texture); // this updates the isComplete bit.
GLboolean isCompleteCache = texture->isComplete;
if (RGL_LIKELY(isCompleteCache))
{
// Set a texture to a gcm texture unit
rglGcmTexture *platformTexture = ( rglGcmTexture * )texture->platformTexture;
const GLuint imageOffset = gmmIdToOffset(platformTexture->gpuAddressId) +
platformTexture->gpuAddressIdOffset;
platformTexture->gcmTexture.offset = imageOffset;
// set up the texture unit with the info for the current texture
// bind texture , control 1,3,format and remap
GCM_FUNC_SAFE( cellGcmSetTexture, unit, &platformTexture->gcmTexture );
CellGcmContextData *gcm_context = (CellGcmContextData*)&rglGcmState_i.fifo;
cellGcmReserveMethodSizeInline(gcm_context, 11);
uint32_t *current = gcm_context->current;
current[0] = CELL_GCM_METHOD_HEADER_TEXTURE_OFFSET(unit, 8);
current[1] = CELL_GCM_METHOD_DATA_TEXTURE_OFFSET(platformTexture->gcmTexture.offset);
current[2] = CELL_GCM_METHOD_DATA_TEXTURE_FORMAT(platformTexture->gcmTexture.location,
platformTexture->gcmTexture.cubemap,
platformTexture->gcmTexture.dimension,
platformTexture->gcmTexture.format,
platformTexture->gcmTexture.mipmap);
current[3] = CELL_GCM_METHOD_DATA_TEXTURE_ADDRESS( platformTexture->gcmMethods.address.wrapS,
platformTexture->gcmMethods.address.wrapT,
platformTexture->gcmMethods.address.wrapR,
platformTexture->gcmMethods.address.unsignedRemap,
platformTexture->gcmMethods.address.zfunc,
platformTexture->gcmMethods.address.gamma,
0);
current[4] = CELL_GCM_METHOD_DATA_TEXTURE_CONTROL0(CELL_GCM_TRUE,
platformTexture->gcmMethods.control0.minLOD,
platformTexture->gcmMethods.control0.maxLOD,
platformTexture->gcmMethods.control0.maxAniso);
current[5] = platformTexture->gcmTexture.remap;
current[6] = CELL_GCM_METHOD_DATA_TEXTURE_FILTER(
(platformTexture->gcmMethods.filter.bias & 0x1fff),
platformTexture->gcmMethods.filter.min,
platformTexture->gcmMethods.filter.mag,
platformTexture->gcmMethods.filter.conv);
current[7] = CELL_GCM_METHOD_DATA_TEXTURE_IMAGE_RECT(
platformTexture->gcmTexture.height,
platformTexture->gcmTexture.width);
current[8] = CELL_GCM_METHOD_DATA_TEXTURE_BORDER_COLOR(
platformTexture->gcmMethods.borderColor);
current[9] = CELL_GCM_METHOD_HEADER_TEXTURE_CONTROL3(unit,1);
current[10] = CELL_GCM_METHOD_DATA_TEXTURE_CONTROL3(
platformTexture->gcmTexture.pitch,
platformTexture->gcmTexture.depth);
gcm_context->current = &current[11];
}
else
{
// Validate incomplete texture by remapping
//RGL_REPORT_EXTRA( RGL_REPORT_TEXTURE_INCOMPLETE, "Texture %d bound to unit %d(%s) is incomplete.", texture->name, unit, rglGetGLEnumName( texture->target ) );
GLuint remap = CELL_GCM_REMAP_MODE(
CELL_GCM_TEXTURE_REMAP_ORDER_XYXY,
CELL_GCM_TEXTURE_REMAP_FROM_A,
CELL_GCM_TEXTURE_REMAP_FROM_R,
CELL_GCM_TEXTURE_REMAP_FROM_G,
CELL_GCM_TEXTURE_REMAP_FROM_B,
CELL_GCM_TEXTURE_REMAP_ONE,
CELL_GCM_TEXTURE_REMAP_ZERO,
CELL_GCM_TEXTURE_REMAP_ZERO,
CELL_GCM_TEXTURE_REMAP_ZERO );
// disable control 0
GCM_FUNC( cellGcmSetTextureControl, unit, CELL_GCM_FALSE, 0, 0, 0 );
// set texture remap only
GCM_FUNC( cellGcmSetTextureRemap, unit, remap );
}
}
}
bool validate_vertex_consts = false;
if (RGL_UNLIKELY(needValidate & RGL_VALIDATE_VERTEX_PROGRAM))
{
const _CGprogram *program = (const _CGprogram*)LContext->BoundVertexProgram;
__dcbt(program->ucode);
__dcbt(((uint8_t*)program->ucode)+128);
__dcbt(((uint8_t*)program->ucode)+256);
__dcbt(((uint8_t*)program->ucode)+384);
CellCgbVertexProgramConfiguration conf;
conf.instructionSlot = program->header.vertexProgram.instructionSlot;
conf.instructionCount = program->header.instructionCount;
conf.registerCount = program->header.vertexProgram.registerCount;
conf.attributeInputMask = program->header.attributeInputMask;
rglGcmFifo *fifo = (rglGcmFifo*)&rglGcmState_i.fifo;
GLuint spaceInWords = 7 + 5 * conf.instructionCount;
// Push a CG program onto the current command buffer
// make sure there is space for the pushbuffer + any nops we need to add for alignment
if ( fifo->current + spaceInWords + 1024 > fifo->end )
rglOutOfSpaceCallback( fifo, spaceInWords );
GCM_FUNC( cellGcmSetVertexProgramLoad, &conf, program->ucode );
GCM_FUNC( cellGcmSetUserClipPlaneControl, 0, 0, 0, 0, 0, 0 );
rglGcmInterpolantState *s = &rglGcmState_i.state.interpolant;
s->vertexProgramAttribMask = program->header.vertexProgram.attributeOutputMask;
GCM_FUNC( cellGcmSetVertexAttribOutputMask, (( s->vertexProgramAttribMask) &
s->fragmentProgramAttribMask) );
int count = program->defaultValuesIndexCount;
for ( int i = 0;i < count;i++ )
{
const CgParameterEntry *parameterEntry = program->parametersEntries + program->defaultValuesIndices[i].entryIndex;
GLboolean cond = (( parameterEntry->flags & CGPF_REFERENCED )
&& ( parameterEntry->flags & CGPV_MASK ) == CGPV_CONSTANT );
if (!cond)
continue;
const float *value = program->defaultValues +
program->defaultValuesIndices[i].defaultValueIndex;
const CgParameterResource *parameterResource = rglGetParameterResource( program, parameterEntry );
GLboolean cond2 = parameterResource->resource != (unsigned short) - 1;
if (!cond2)
continue;
switch ( parameterResource->type )
{
case CG_FLOAT:
case CG_FLOAT1:
case CG_FLOAT2:
case CG_FLOAT3:
case CG_FLOAT4:
case CG_HALF:
case CG_HALF1:
case CG_HALF2:
case CG_HALF3:
case CG_HALF4:
case CG_INT:
case CG_INT1:
case CG_INT2:
case CG_INT3:
case CG_INT4:
case CG_BOOL:
case CG_BOOL1:
case CG_BOOL2:
case CG_BOOL3:
case CG_BOOL4:
case CG_FIXED:
case CG_FIXED1:
case CG_FIXED2:
case CG_FIXED3:
case CG_FIXED4:
GCM_FUNC( cellGcmSetVertexProgramParameterBlock, parameterResource->resource, 1, value ); // GCM_PORT_TESTED [Cedric]
break;
case CG_FLOAT4x4:
case CG_HALF4x4:
case CG_INT4x4:
case CG_BOOL4x4:
case CG_FIXED4x4:
// set 4 consts
{
GLfloat v2[16];
v2[0] = value[0];
v2[1] = value[4];
v2[2] = value[8];
v2[3] = value[12];
v2[4] = value[1];
v2[5] = value[5];
v2[6] = value[9];
v2[7] = value[13];
v2[8] = value[2];
v2[9] = value[6];
v2[10] = value[10];
v2[11] = value[14];
v2[12] = value[3];
v2[13] = value[7];
v2[14] = value[11];
v2[15] = value[15];
GCM_FUNC( cellGcmSetVertexProgramParameterBlock, parameterResource->resource, 4, v2 ); // GCM_PORT_TESTED [Cedric]
}
break;
case CG_FLOAT3x3:
case CG_HALF3x3:
case CG_INT3x3:
case CG_BOOL3x3:
case CG_FIXED3x3:
// set 3 consts
{
GLfloat v2[12];
v2[0] = value[0];v2[1] = value[3];v2[2] = value[6];v2[3] = 0;
v2[4] = value[1];v2[5] = value[4];v2[6] = value[7];v2[7] = 0;
v2[8] = value[2];v2[9] = value[5];v2[10] = value[8];v2[11] = 0;
GCM_FUNC( cellGcmSetVertexProgramParameterBlock, parameterResource->resource, 3, v2 );
}
break;
}
}
// Set all uniforms.
if(!(LContext->needValidate & RGL_VALIDATE_VERTEX_CONSTANTS) && LContext->BoundVertexProgram->parentContext)
validate_vertex_consts = true;
}
if (RGL_LIKELY(needValidate & RGL_VALIDATE_VERTEX_CONSTANTS) || validate_vertex_consts)
{
_CGprogram *cgprog = LContext->BoundVertexProgram;
rglGcmFifo *fifo = (rglGcmFifo*)&rglGcmState_i.fifo;
GLuint spaceInWords = cgprog->constantPushBufferWordSize + 4 + 32;
// Push a CG program onto the current command buffer
// make sure there is space for the pushbuffer + any nops we need to add for alignment
if ( fifo->current + spaceInWords + 1024 > fifo->end )
rglOutOfSpaceCallback( fifo, spaceInWords );
// first add nops to get us the next alligned position in the fifo
// [YLIN] Use VMX register to copy
uint32_t padding_in_word = ( ( 0x10-(((uint32_t)rglGcmState_i.fifo.current)&0xf))&0xf )>>2;
uint32_t padded_size = ( ((cgprog->constantPushBufferWordSize)<<2) + 0xf )&~0xf;
GCM_FUNC( cellGcmSetNopCommandUnsafe, padding_in_word );
memcpy16(rglGcmState_i.fifo.current, cgprog->constantPushBuffer, padded_size);
rglGcmState_i.fifo.current+=cgprog->constantPushBufferWordSize;
}
if (RGL_UNLIKELY(needValidate & RGL_VALIDATE_FRAGMENT_PROGRAM))
{
// Set up the current fragment program on hardware
rglGcmDriver *driver = (rglGcmDriver*)_CurrentDevice->rasterDriver;
_CGprogram *program = LContext->BoundFragmentProgram;
// params are set directly in the GPU memory, so there is nothing to be done here.
CellCgbFragmentProgramConfiguration conf;
conf.offset = gmmIdToOffset(program->loadProgramId) + program->loadProgramOffset;
rglGcmInterpolantState *s = &rglGcmState_i.state.interpolant;
s->fragmentProgramAttribMask |= program->header.attributeInputMask | CELL_GCM_ATTRIB_OUTPUT_MASK_POINTSIZE;
conf.attributeInputMask = ( s->vertexProgramAttribMask) &
s->fragmentProgramAttribMask;
conf.texCoordsInputMask = program->header.fragmentProgram.texcoordInputMask;
conf.texCoords2D = program->header.fragmentProgram.texcoord2d;
conf.texCoordsCentroid = program->header.fragmentProgram.texcoordCentroid;
int fragmentControl = ( 1 << 15 ) | ( 1 << 10 );
fragmentControl |= program->header.fragmentProgram.flags & CGF_DEPTHREPLACE ? 0xE : 0x0;
fragmentControl |= program->header.fragmentProgram.flags & CGF_OUTPUTFROMH0 ? 0x00 : 0x40;
fragmentControl |= program->header.fragmentProgram.flags & CGF_PIXELKILL ? 0x80 : 0x00;
conf.fragmentControl = fragmentControl;
conf.registerCount = program->header.fragmentProgram.registerCount < 2 ? 2 : program->header.fragmentProgram.registerCount;
conf.fragmentControl &= ~CELL_GCM_MASK_SET_SHADER_CONTROL_CONTROL_TXP;
/* TODO - look into this */
conf.fragmentControl |= 0 << CELL_GCM_SHIFT_SET_SHADER_CONTROL_CONTROL_TXP;
GCM_FUNC( cellGcmSetFragmentProgramLoad, &conf );
GCM_FUNC( cellGcmSetZMinMaxControl, ( program->header.fragmentProgram.flags & CGF_DEPTHREPLACE ) ? RGLGCM_FALSE : RGLGCM_TRUE, RGLGCM_FALSE, RGLGCM_FALSE );
driver->fpLoadProgramId = program->loadProgramId;
driver->fpLoadProgramOffset = program->loadProgramOffset;
}
if ( RGL_LIKELY(( needValidate & ~( RGL_VALIDATE_TEXTURES_USED |
RGL_VALIDATE_VERTEX_PROGRAM |
RGL_VALIDATE_VERTEX_CONSTANTS |
RGL_VALIDATE_FRAGMENT_PROGRAM ) ) == 0 ) )
{
LContext->needValidate = 0;
goto beginning;
}
if ( RGL_UNLIKELY( needValidate & RGL_VALIDATE_VIEWPORT ) )
{
rglGcmViewportState *v = &rglGcmState_i.state.viewport;
v->x = LContext->ViewPort.X;
v->y = LContext->ViewPort.Y;
v->w = LContext->ViewPort.XSize;
v->h = LContext->ViewPort.YSize;
rglGcmFifoGlViewport(v, LContext->DepthNear, LContext->DepthFar);
}
if (RGL_UNLIKELY(needValidate & RGL_VALIDATE_BLENDING ))
{
if ((LContext->Blending))
{
GCM_FUNC( cellGcmSetBlendEnable, LContext->Blending );
rglGcmBlendState *blend = &rglGcmState_i.state.blend;
GLuint hwColor;
blend->r = LContext->BlendColor.R;
blend->g = LContext->BlendColor.G;
blend->b = LContext->BlendColor.B;
blend->a = LContext->BlendColor.A;
if (rglGcmState_i.renderTarget.colorFormat == RGLGCM_ARGB8)
{
RGLGCM_CALC_COLOR_LE_ARGB8( &hwColor, blend->r, blend->g, blend->b, blend->a );
GCM_FUNC( cellGcmSetBlendColor, hwColor, hwColor );
}
GCM_FUNC( cellGcmSetBlendEquation, (rglGcmEnum)LContext->BlendEquationRGB,
(rglGcmEnum)LContext->BlendEquationAlpha );
GCM_FUNC( cellGcmSetBlendFunc, (rglGcmEnum)LContext->BlendFactorSrcRGB,(rglGcmEnum)LContext->BlendFactorDestRGB,(rglGcmEnum)LContext->BlendFactorSrcAlpha,(rglGcmEnum)LContext->BlendFactorDestAlpha);
}
}
#if 0
if ( RGL_UNLIKELY( needValidate & RGL_VALIDATE_SHADER_SRGB_REMAP ) )
{
GCM_FUNC( cellGcmSetFragmentProgramGammaEnable, LContext->ShaderSRGBRemap ? CELL_GCM_TRUE : CELL_GCM_FALSE);
LContext->needValidate &= ~RGL_VALIDATE_SHADER_SRGB_REMAP;
}
#endif
LContext->needValidate = 0;
}
beginning:
GLboolean slowPath = rglPlatformRequiresSlowPath( dparams, 0, 0);
(void)slowPath;