(RGL PS3) Cleanups

2025-04-17 20:43:10 +00:00 · 2012-11-21 01:15:07 +01:00 · 2012-11-21 01:15:07 +01:00 · 897167789f
commit 897167789f
parent 9746cb8f97
4 changed files with 111 additions and 262 deletions
--- a/console/rgl/src/ps3/include/rgl-inline.h
+++ b/console/rgl/src/ps3/include/rgl-inline.h
@ -85,30 +85,25 @@ static inline void rglGcmFifoGlViewport( GLint x, GLint y, GLsizei width, GLsize
      clipY0 = y;
      clipY1 = y + height;
   }
+
   if ( clipX0 < 0 )
-   {
      clipX0 = 0;
-   }
   if ( clipY0 < 0 )
-   {
      clipY0 = 0;
-   }
+
   if ( clipX1 >= RGLGCM_MAX_RT_DIMENSION )
-   {
      clipX1 = RGLGCM_MAX_RT_DIMENSION;
-   }
+
   if ( clipY1 >= RGLGCM_MAX_RT_DIMENSION )
-   {
      clipY1 = RGLGCM_MAX_RT_DIMENSION;
-   }
+
   if (( clipX1 <= clipX0 ) || ( clipY1 <= clipY0 ) )
-   {
      clipX0 = clipY0 = clipX1 = clipY1 = 0;
-   }

   // update viewport info
   vp->xScale = width * 0.5f;
   vp->xCenter = ( GLfloat )( x + vp->xScale + RGLGCM_SUBPIXEL_ADJUST );
+
   if ( rt->yInverted )
   {
      vp->yScale = height * -0.5f;
@ -226,7 +221,7 @@ static inline void rglGcmFifoGlDrawArrays( rglGcmEnum mode, GLint first, GLsizei

 static inline GLuint rglGcmMapMinTextureFilter( GLenum filter )
 {
-   switch ( filter )
+   switch (filter)
   {
      case GL_NEAREST:
         return CELL_GCM_TEXTURE_NEAREST;
@ -268,44 +263,6 @@ static inline GLuint rglGcmMapMagTextureFilter( GLenum filter )
   return filter;
 }

-static inline GLuint rglGcmMapAniso( GLuint maxAniso )
-{
-
-   if ( maxAniso >= 16 )
-      return CELL_GCM_TEXTURE_MAX_ANISO_16;
-   if ( maxAniso == 1 )
-      return CELL_GCM_TEXTURE_MAX_ANISO_1;
-
-   switch ( maxAniso / 2 )
-   {
-      case 1:
-         return CELL_GCM_TEXTURE_MAX_ANISO_2;
-         break;
-      case 2:
-         return CELL_GCM_TEXTURE_MAX_ANISO_4;
-         break;
-      case 3:
-         return CELL_GCM_TEXTURE_MAX_ANISO_6;
-         break;
-      case 4:
-         return CELL_GCM_TEXTURE_MAX_ANISO_8;
-         break;
-      case 5:
-         return CELL_GCM_TEXTURE_MAX_ANISO_10;
-         break;
-      case 6:
-         return CELL_GCM_TEXTURE_MAX_ANISO_12;
-         break;
-      case 7:
-         return CELL_GCM_TEXTURE_MAX_ANISO_16;
-         break;
-      default:
-         return 0;
-         break;
-   }
-   return 0;
-}
-
 static inline GLuint rglGcmMapWrapMode( GLuint mode )
 {
   switch ( mode )
@ -568,69 +525,6 @@ static inline void rglGcmFifoGlInvalidateTextureCache( void )
   GCM_FUNC( cellGcmSetInvalidateTextureCache, CELL_GCM_INVALIDATE_TEXTURE );
 }

-/* writes the supplied new semaphore value once the gpu has completed all
- ** currently pending work.
- **
- ** note:
- **  - we do not enforce pairing of Acquire/Release, so you can (ab)use it to
- **    write synchronized signal values...
- */
-
-static inline void rglGcmFifoGlReleaseSemaphore( rglGcmEnum target, GLuint semaphoreId, GLuint newSemphoreValue )
-{
-   rglGcmSemaphoreMemory *semaphores = rglGcmState_i.semaphores;
-
-   switch ( target )
-   {
-      case RGLGCM_SEMAPHORE_USING_GPU:
-         // let the backend(rop/fb) write the release value
-         // -- guarantees all reads/writes have completed
-         GCM_FUNC( cellGcmSetWriteBackEndLabel, semaphoreId, newSemphoreValue );
-         break;
-      case RGLGCM_SEMAPHORE_USING_GPU_NO_WRITE_FLUSH:
-         // write the semaphore value once host/vb/ib/tex are no longer referencing
-         // any data prior to the method.
-         // -- does _NOT_ guarantee that read/writes on the render target surfaces
-         //    have completed (iow: cpu read on the color buffer will be undefined)
-
-         GCM_FUNC( cellGcmSetWriteTextureLabel, semaphoreId, newSemphoreValue );
-         break;
-      case RGLGCM_SEMAPHORE_USING_CPU:
-         semaphores->userSemaphores[semaphoreId].val = newSemphoreValue;
-         break;
-      default:
-         break;
-   }
-}
-
-/* lets the gpu/cpu wait until the specific semaphore is equal to the requested
- ** semaphore value.
- **
- ** note:
- **  - we do not enforce pairing of Acquire/Release, so you can (ab)use it to
- **  - What about aquire timeouts (after a few seconds) ?
- */
-void static inline rglGcmFifoGlAcquireSemaphore( rglGcmEnum target, GLuint semaphoreId, GLuint reqSemphoreValue )
-{
-   rglGcmSemaphoreMemory *semaphores = rglGcmState_i.semaphores;
-
-   // pick location
-   switch ( target )
-   {
-      case RGLGCM_SEMAPHORE_USING_GPU:
-         // let the frontend aquire the semaphore...
-         GCM_FUNC( cellGcmSetWaitLabel, semaphoreId, reqSemphoreValue );
-         break;
-      case RGLGCM_SEMAPHORE_USING_CPU:
-         // lame polling for now...
-         for ( ;semaphores->userSemaphores[semaphoreId].val != reqSemphoreValue; )
-            sys_timer_usleep(10);
-         break;
-      default:
-         break;
-   }
-}
-
 // Fast conversion for values between 0.0 and 65535.0
 GLuint inline static RGLGCM_QUICK_FLOAT2UINT( const GLfloat f )
 {
@ -902,7 +796,7 @@ static inline void rglGcmFifoGlBlendEquation( rglGcmEnum mode, rglGcmEnum modeAl
   GCM_FUNC( cellGcmSetBlendEquation, mode, modeAlpha );
 }

-   void static inline rglGcmFifoGlVertexAttribPointer
+static inline void rglGcmFifoGlVertexAttribPointer
 (
 GLuint          index,
 GLint           size,
@ -939,13 +833,9 @@ static inline void rglGcmFifoGlBlendEquation( rglGcmEnum mode, rglGcmEnum modeAl
   {
      case RGLGCM_UNSIGNED_BYTE:
         if (normalized)
-         {
            gcmType = CELL_GCM_VERTEX_UB;
-         }
         else
-         {
            gcmType = CELL_GCM_VERTEX_UB256;
-         }
         break;

      case RGLGCM_SHORT:
@ -969,12 +859,7 @@ static inline void rglGcmFifoGlBlendEquation( rglGcmEnum mode, rglGcmEnum modeAl
         break;
   }

-   uint8_t location = CELL_GCM_LOCATION_LOCAL;
-
-   if ( isMain )
-      location = CELL_GCM_LOCATION_MAIN;
-
-   GCM_FUNC( cellGcmSetVertexDataArray, index, frequency, stride, size, gcmType, location, offset );
+   GCM_FUNC( cellGcmSetVertexDataArray, index, frequency, stride, size, gcmType, CELL_GCM_LOCATION_LOCAL, offset );
 }

 // set the vertex attribute to the specified value.
@ -1104,10 +989,22 @@ static inline void rglFifoGlProgramParameterfvVP( const _CGprogram *program, con
            // set 4 consts
            {
               GLfloat v2[16];
-               v2[0] = value[0];v2[1] = value[4];v2[2] = value[8];v2[3] = value[12];
-               v2[4] = value[1];v2[5] = value[5];v2[6] = value[9];v2[7] = value[13];
-               v2[8] = value[2];v2[9] = value[6];v2[10] = value[10];v2[11] = value[14];
-               v2[12] = value[3];v2[13] = value[7];v2[14] = value[11];v2[15] = value[15];
+               v2[0] = value[0];
+               v2[1] = value[4];
+               v2[2] = value[8];
+               v2[3] = value[12];
+               v2[4] = value[1];
+               v2[5] = value[5];
+               v2[6] = value[9];
+               v2[7] = value[13];
+               v2[8] = value[2];
+               v2[9] = value[6];
+               v2[10] = value[10];
+               v2[11] = value[14];
+               v2[12] = value[3];
+               v2[13] = value[7];
+               v2[14] = value[11];
+               v2[15] = value[15];
               GCM_FUNC( cellGcmSetVertexProgramParameterBlock, parameterResource->resource, 4, v2 ); // GCM_PORT_TESTED [Cedric]
            }
            break;
@ -1130,21 +1027,6 @@ static inline void rglFifoGlProgramParameterfvVP( const _CGprogram *program, con
   }
 }

-// Push a CG program onto the current command buffer
-static inline void rglGcmPushProgramPushBuffer( _CGprogram * cgprog )
-{
-   // make sure there is space for the pushbuffer + any nops we need to add for alignment  
-   rglGcmFifoWaitForFreeSpace( &rglGcmState_i.fifo,  cgprog->constantPushBufferWordSize + 4 + 32); 
-   // first add nops to get us the next alligned position in the fifo 
-   // [YLIN] Use VMX register to copy
-   uint32_t padding_in_word = ( ( 0x10-(((uint32_t)rglGcmState_i.fifo.current)&0xf))&0xf )>>2;
-   uint32_t padded_size = ( ((cgprog->constantPushBufferWordSize)<<2) + 0xf )&~0xf;
-   GCM_FUNC( cellGcmSetNopCommandUnsafe, padding_in_word );
-   memcpy16(rglGcmState_i.fifo.current, cgprog->constantPushBuffer, padded_size);
-   rglGcmState_i.fifo.current+=cgprog->constantPushBufferWordSize;
-
-}
-
 // Look up the memory location of a buffer object (VBO, PBO)
 static inline GLuint rglGcmGetBufferObjectOrigin( GLuint buffer )
 {
--- a/console/rgl/src/ps3/include/rgl-typedefs.h
+++ b/console/rgl/src/ps3/include/rgl-typedefs.h
@ -19,7 +19,7 @@ typedef struct  _tagMODESTRUC
   GLushort  wVertSyncStart;
   GLushort  wVertSyncEnd;
   GLushort  wVertBlankEnd;
-   GLuint      dwDotClock;      // In 10K Hertz
+   GLuint    dwDotClock;      // In 10K Hertz
   GLushort  wHSyncPolarity;
   GLushort  wVSyncPolarity;
 } MODESTRUC;
--- a/console/rgl/src/ps3/rgl_ps3.cpp
+++ b/console/rgl/src/ps3/rgl_ps3.cpp
@ -2375,38 +2375,6 @@ void rglGcmDestroyRM( rglGcmResource* gcmResource )
   return;
 }

-void rglGcmGraphicsHandler( const uint32_t head )
-{
-   // GCM will call this Graphics Handler if there is a channel error which 
-   // can be caused by bad fifo commands, and GPU error, or GPU memory access. 
-
-   printf( "========================================\n" );
-   printf( " RGL [rglGcmGraphicsHandler]  \n" );
-   printf( " GCM triggers this because of RSX error \n" );
-   printf( "  due to invalid Fifo Commands, \n" );
-   printf( "  invalid GPU state, or invalid memory access\n" );
-   printf( "========================================\n" );
-
-   // print out the previous 10 words from the current position;
-   rglGcmState_i.fifo.updateLastGetRead(); 
-
-   // Dumping current fifo state 
-   printf(" Current RGL FIFO info \n" ); 
-   printf(" Fifo Begin %p End %p Current %p and Get %p \n", 
-         rglGcmState_i.fifo.begin, 
-         rglGcmState_i.fifo.end,
-         rglGcmState_i.fifo.current,
-         rglGcmState_i.fifo.lastGetRead ); 
-
-   printf(" Last 10 words of the RGL Fifo from the ppu put/current position \n" );  
-   rglPrintFifoFromPut( 10 ); 
-
-   printf(" Last 10 words of the RGL Fifo from the gpu get position \n" );  
-   rglPrintFifoFromGet( 10 ); 
-}
-
-extern GLboolean _psglDisableCompression;
-
 int rglGcmInitRM( rglGcmResource *gcmResource, unsigned int hostMemorySize, int inSysMem, unsigned int dmaPushBufferSize )
 {
   memset( gcmResource, 0, sizeof( rglGcmResource ) );
@ -2435,10 +2403,6 @@ int rglGcmInitRM( rglGcmResource *gcmResource, unsigned int hostMemorySize, int
      return GL_FALSE;
   }

-   cellGcmSetDebugOutputLevel( CELL_GCM_DEBUG_LEVEL2 );
-   // set the rglGcm graphics error callback
-   cellGcmSetGraphicsHandler( &rglGcmGraphicsHandler );
-
   // Get Gpu configuration
   CellGcmConfig config;
   cellGcmGetConfiguration( &config );
@ -2751,7 +2715,7 @@ static void rglGcmAllocateTiledSurface(
   //  certain dimension combinations, but this is simple and may conserve
   //  tiled region usage over some alternatives.
   GLuint padSize = RGLGCM_TILED_BUFFER_ALIGNMENT; // 64KB
-   
+
   while (( padSize % ( tiledPitch*8 ) ) != 0 )
      padSize += RGLGCM_TILED_BUFFER_ALIGNMENT;

--- a/console/rgl/src/ps3/rgl_ps3_raster.cpp
+++ b/console/rgl/src/ps3/rgl_ps3_raster.cpp
@ -1106,14 +1106,14 @@ void rglSetDefaultValuesVP( _CGprogram *program )
         const float *itemDefaultValues = program->defaultValues + program->defaultValuesIndices[i].defaultValueIndex;
         int registerStride = isMatrix(( CGtype )parameterResource->type ) ? rglGetTypeRowCount(( CGtype )parameterResource->type ) : 1;
         if ( parameterEntry->flags & CGP_CONTIGUOUS )
-            memcpy( rtParameter->pushBufferPointer, itemDefaultValues, arrayCount * registerStride *4*sizeof( float ) );
+            __builtin_memcpy( rtParameter->pushBufferPointer, itemDefaultValues, arrayCount * registerStride *4*sizeof( float ) );
         else
         {
            unsigned int *pushBufferPointer = (( unsigned int * )rtParameter->pushBufferPointer );
            for ( int j = 0;j < arrayCount;j++ )
            {
               unsigned int *pushBufferAddress = isArray ? ( *( unsigned int** )pushBufferPointer ) : pushBufferPointer;
-               memcpy( pushBufferAddress, itemDefaultValues, registerStride*4*sizeof( float ) );
+               __builtin_memcpy( pushBufferAddress, itemDefaultValues, registerStride*4*sizeof( float ) );
               pushBufferPointer += isArray ? 1 : 3 + registerStride * 4;
               itemDefaultValues += 4 * registerStride;
            }
@ -1163,7 +1163,7 @@ void rglSetDefaultValuesFP( _CGprogram *program )
               dst[2] = SWAP_IF_BIG_ENDIAN( itemDefaultValues[2] );
               dst[3] = SWAP_IF_BIG_ENDIAN( itemDefaultValues[3] );
            }
-            memcpy(( void* )hostMemoryCopy, ( void* )itemDefaultValues, sizeof( float )*4 );
+            __builtin_memcpy(( void* )hostMemoryCopy, ( void* )itemDefaultValues, sizeof( float )*4 );
            hostMemoryCopy += 4;
            itemDefaultValues += 4;
            resource++; //skip the register of the next item
@ -1255,9 +1255,7 @@ void rglPlatformBufferObjectSetData( rglBufferObject* bufferObject, GLintptr off
   rglGcmBufferObject *rglBuffer = ( rglGcmBufferObject * )bufferObject->platformBufferObject;

   if ( size == bufferObject->size && tryImmediateCopy )
-   {
-      memcpy( gmmIdToAddress( rglBuffer->bufferId ) + offset, data, size );
-   }
+      __builtin_memcpy( gmmIdToAddress( rglBuffer->bufferId ) + offset, data, size );
   else
      if ( size >= bufferObject->size )
      {
@ -1276,16 +1274,14 @@ void rglPlatformBufferObjectSetData( rglBufferObject* bufferObject, GLintptr off
               rglSetError( GL_OUT_OF_MEMORY );
               return;
            default:
-               memcpy( gmmIdToAddress( rglBuffer->bufferId ), data, size );
+               __builtin_memcpy( gmmIdToAddress( rglBuffer->bufferId ), data, size );
               break;
         }
      }
      else
      {
         if ( tryImmediateCopy )
-         {
-            memcpy( gmmIdToAddress( rglBuffer->bufferId ) + offset, data, size );
-         }
+            __builtin_memcpy( gmmIdToAddress( rglBuffer->bufferId ) + offset, data, size );
         else
         {
            // partial buffer write
@ -1440,7 +1436,7 @@ void rglFBClear( GLbitfield mask )
      GLuint bufferId = gmmAlloc((CellGcmContextData*)&rglGcmState_i.fifo, 
            CELL_GCM_LOCATION_LOCAL, 0, sizeof(rglClearVertexBuffer));

-      memcpy( gmmIdToAddress(bufferId), rglClearVertexBuffer, sizeof( rglClearVertexBuffer ) );
+      __builtin_memcpy(gmmIdToAddress(bufferId), rglClearVertexBuffer, sizeof(rglClearVertexBuffer));
      rglGcmFifoGlVertexAttribPointer( 0, 3, RGLGCM_FLOAT, RGLGCM_FALSE, 3*sizeof( GLfloat ), 1, 0, gmmIdToOffset(bufferId) );
      RGLBIT_TRUE( LContext->attribs->DirtyMask, 0 );

@ -1867,9 +1863,9 @@ GLuint rglValidateAttributesSlow( rglDrawParams *dparams, GLboolean *isMain )
   rglBitfield needsUpdateMask = ( as->DirtyMask | ( as->EnabledMask & ~as->HasVBOMask ) );

   // for any remaining attributes that need updating, do it now.
-   if ( needsUpdateMask )
+   if(needsUpdateMask)
   {
-      for ( GLuint i = 0; i < RGL_MAX_VERTEX_ATTRIBS; ++i )
+      for(GLuint i = 0; i < RGL_MAX_VERTEX_ATTRIBS; ++i)
      {
         // skip this attribute if not needing update
         if ( ! RGLBIT_GET( needsUpdateMask, i ) ) continue;
@ -1888,9 +1884,9 @@ GLuint rglValidateAttributesSlow( rglDrawParams *dparams, GLboolean *isMain )
               GLuint offset = ( dparams->firstVertex / freq ) * stride;

               char * b = ( char * )xferBuffer + dparams->attribXferOffset[i];
-               memcpy( b + offset,
-                     ( char * )attrib->clientData + offset,
-                     dparams->attribXferSize[i] - offset );
+               __builtin_memcpy(b + offset,
+                     ( char*)attrib->clientData + offset,
+                     dparams->attribXferSize[i] - offset);

               // draw directly from bounce buffer
               *isMain = gmmIdIsMain(xferId);
@ -2353,7 +2349,7 @@ void rglPlatformUploadTexture( rglTexture* texture )

   // create surface descriptors for image transfer
   rglGcmSurface src = {
-source:		RGLGCM_SURFACE_SOURCE_TEMPORARY,
+            source:		RGLGCM_SURFACE_SOURCE_TEMPORARY,
            width:		0,		// replaced per image
            height:		0,		// replaced per image
            bpp:		pixelBytes,
@ -2366,7 +2362,7 @@ source:		RGLGCM_SURFACE_SOURCE_TEMPORARY,
   };

   rglGcmSurface dst = {
-source:		RGLGCM_SURFACE_SOURCE_TEXTURE,
+            source:		RGLGCM_SURFACE_SOURCE_TEXTURE,
            width:		0,		// replaced per image
            height:		0,		// replaced per image
            bpp:		pixelBytes,
@ -2380,74 +2376,70 @@ source:		RGLGCM_SURFACE_SOURCE_TEXTURE,

   // use a bounce buffer to transfer to GPU
   GLuint bounceBufferId = GMM_ERROR;
+
+   // check if upload is needed for this image
+   rglImage *image = texture->image;
+
+   if ( image->dataState == RGL_IMAGE_DATASTATE_HOST )
   {
+      // determine image offset from base address
+      // TODO: compute all offsets at once for efficiency
+      //  This is the offset in bytes for this face/image from the
+      //  texture base address.
+      const GLuint dataOffset = rglGetGcmImageOffset( layout, 0, 0 );
+
+      // set source pixel buffer
+      src.ppuData = image->data;
+
+      // lazy allocation of bounce buffer
+      if ( bounceBufferId == GMM_ERROR && layout->baseDepth == 1 )
+         bounceBufferId = gmmAlloc((CellGcmContextData*)&rglGcmState_i.fifo,
+               CELL_GCM_LOCATION_LOCAL, 0, gcmTexture->gpuSize);
+
+      if ( bounceBufferId != GMM_ERROR )
      {
-         // check if upload is needed for this image
-         rglImage *image = texture->image;
-         if ( image->dataState == RGL_IMAGE_DATASTATE_HOST )
-         {
-            // determine image offset from base address
-            // TODO: compute all offsets at once for efficiency
-            //  This is the offset in bytes for this face/image from the
-            //  texture base address.
-            const GLuint dataOffset = rglGetGcmImageOffset( layout, 0, 0 );
+         // copy image to bounce buffer
+         src.dataId = bounceBufferId;
+         src.dataIdOffset = dataOffset;

-            // set source pixel buffer
-            src.ppuData = image->data;
+         // NPOT DXT
+         __builtin_memcpy( gmmIdToAddress( src.dataId ) + dataOffset, 
+               image->data, image->storageSize );
+      }

-            // lazy allocation of bounce buffer
-            if ( bounceBufferId == GMM_ERROR && layout->baseDepth == 1 )
-               bounceBufferId = gmmAlloc((CellGcmContextData*)&rglGcmState_i.fifo,
-                     CELL_GCM_LOCATION_LOCAL, 0, gcmTexture->gpuSize);
+      // use surface copy functions
+      src.width = image->width;
+      src.height = image->height;
+      src.pitch = pixelBytes * src.width;

-            if ( bounceBufferId != GMM_ERROR )
-            {
-               // copy image to bounce buffer
-               src.dataId = bounceBufferId;
-               src.dataIdOffset = dataOffset;
+      dst.width = src.width;
+      dst.height = image->height;
+      dst.dataId = gcmTexture->gpuAddressId;
+      dst.dataIdOffset = gcmTexture->gpuAddressIdOffset + dataOffset;

-               // NPOT DXT
-               memcpy( gmmIdToAddress( src.dataId ) + dataOffset, 
-                     image->data, 
-                     image->storageSize );
-            }
+      GLuint offsetHeight = 0;

-            {
-               // use surface copy functions
-               src.width = image->width;
-               src.height = image->height;
-               src.pitch = pixelBytes * src.width;
+      if(dst.pitch)
+      {
+         // linear (not swizzled)
+         //  The tiled linear format requires that render
+         //  targets be aligned to 8*pitch from the start of
+         //  the tiled region.
+         offsetHeight = ( dataOffset / dst.pitch ) % 8;
+         dst.height += offsetHeight;
+         dst.dataIdOffset -= offsetHeight * dst.pitch;
+      }

-               dst.width = src.width;
-               dst.height = image->height;
-               dst.dataId = gcmTexture->gpuAddressId;
-               dst.dataIdOffset = gcmTexture->gpuAddressIdOffset + dataOffset;
+      rglGcmCopySurface(
+            &src, 0, 0,
+            &dst, 0, offsetHeight,
+            src.width, src.height,
+            GL_TRUE );	// don't bypass GPU pipeline

-               GLuint offsetHeight = 0;
-               if ( dst.pitch )
-               {
-                  // linear (not swizzled)
-                  //  The tiled linear format requires that render
-                  //  targets be aligned to 8*pitch from the start of
-                  //  the tiled region.
-                  offsetHeight = ( dataOffset / dst.pitch ) % 8;
-                  dst.height += offsetHeight;
-                  dst.dataIdOffset -= offsetHeight * dst.pitch;
-               }
-
-               rglGcmCopySurface(
-                     &src, 0, 0,
-                     &dst, 0, offsetHeight,
-                     src.width, src.height,
-                     GL_TRUE );	// don't bypass GPU pipeline
-            }
-
-            // free CPU copy of data
-            rglImageFreeCPUStorage( image );
-            image->dataState |= RGL_IMAGE_DATASTATE_GPU;
-         } // newer data on host
-      } // loop over levels
-   } // loop over faces
+      // free CPU copy of data
+      rglImageFreeCPUStorage( image );
+      image->dataState |= RGL_IMAGE_DATASTATE_GPU;
+   } // newer data on host

   if ( bounceBufferId != GMM_ERROR )
      gmmFree( bounceBufferId );
@ -2469,10 +2461,8 @@ static inline void rglGcmUpdateGcmTexture( rglTexture * texture, rglGcmTextureLa
         platformTexture->gcmTexture.format, platformTexture->gcmTexture.remap );

   // This is just to cover the conversion from swizzled to linear
-   if ( layout->pitch )
-   {
+   if(layout->pitch)
      platformTexture->gcmTexture.format += 0x20; // see class doc definitions for SZ_NR vs LN_NR...
-   }

   platformTexture->gcmTexture.width = layout->baseWidth;
   platformTexture->gcmTexture.height = layout->baseHeight;
@ -2541,7 +2531,7 @@ void rglGcmUpdateMethods( rglTexture * texture )

   // -----------------------------------------------------------------------
   // set the SET_TEXTURE_CONTROL0 params
-   platformTexture->gcmMethods.control0.maxAniso = rglGcmMapAniso( maxAniso );
+   platformTexture->gcmMethods.control0.maxAniso = CELL_GCM_TEXTURE_MAX_ANISO_1;
   const GLfloat minLOD = MAX( texture->minLod, texture->baseLevel );
   const GLfloat maxLOD = MIN( texture->maxLod, texture->maxLevel );
   platformTexture->gcmMethods.control0.minLOD = ( GLuint )( MAX( minLOD, 0 ) * 256.0f );
@ -3046,8 +3036,21 @@ void rglValidateVertexProgram()
 void rglValidateVertexConstants()
 {
   RGLcontext*	LContext = _CurrentContext;
+   _CGprogram *cgprog = LContext->BoundVertexProgram;

-   rglGcmPushProgramPushBuffer( LContext->BoundVertexProgram );
+   // Push a CG program onto the current command buffer
+
+   // make sure there is space for the pushbuffer + any nops we need to add for alignment  
+   rglGcmFifoWaitForFreeSpace( &rglGcmState_i.fifo,  cgprog->constantPushBufferWordSize + 4 + 32); 
+
+   // first add nops to get us the next alligned position in the fifo 
+   // [YLIN] Use VMX register to copy
+   uint32_t padding_in_word = ( ( 0x10-(((uint32_t)rglGcmState_i.fifo.current)&0xf))&0xf )>>2;
+   uint32_t padded_size = ( ((cgprog->constantPushBufferWordSize)<<2) + 0xf )&~0xf;
+
+   GCM_FUNC( cellGcmSetNopCommandUnsafe, padding_in_word );
+   memcpy16(rglGcmState_i.fifo.current, cgprog->constantPushBuffer, padded_size);
+   rglGcmState_i.fifo.current+=cgprog->constantPushBufferWordSize;
 }

 /*============================================================