/* */ #define DEBUG #include #include #include #include #include "debug.h" #include "pad.h" //#define GS_TRASHES_CACHE // define this if any Gs... function trashes the dcache #define PACKET_MAX (1024*24) #define OT_LENGTH 1 #define OT_SHIFT (14-OT_LENGTH) #define PAL #ifdef PAL #define SCREEN_WIDTH 320 #define SCREEN_HEIGHT 256 #define SCREEN_MODE MODE_PAL #else #define SCREEN_WIDTH 256 #define SCREEN_HEIGHT 240 #define SCREEN_MODE MODE_NTSC #endif #define FIRE_WIDTH (SCREEN_WIDTH>>1) #define FIRE_HEIGHT ((SCREEN_HEIGHT-16)>>2) #define IMG_WIDTH (FIRE_WIDTH<<1) #define IMG_HEIGHT (FIRE_HEIGHT<<1) #define FADE_AT 2 #define FADE_BY 2 //#define MAKE_RGB(s,r,g,b) (((s&1)<<15)|((b&31)<<10)|((g&31)<<5)|(r&31)) /* ** ----- typedefs ----- */ /* Actual working variables */ static PACKET PacketArea[2][PACKET_MAX]; static GsOT Wot[2]; static GsOT_TAG WTags[2][1<> 1, (SCREEN_HEIGHT - 16 - IMG_HEIGHT + 20) + (WhichBuff ? SCREEN_HEIGHT : 0), IMG_WIDTH, IMG_HEIGHT - 20 ); LoadImage( &r, (u_long*) TheImage ); } static void initFlameBottom( void ) { register int d = 0; register int i; register u_short *wp = &Working[FIRE_HEIGHT-1][0]; for ( i = 0; i < FIRE_WIDTH; ++i ) { if ( (rand() & 1)) { d = (rand() & 1) ? 255 : 0; } wp[0] = d; wp[FIRE_WIDTH] = d; ++wp; } } static u_short *copyPalette( void ) { register int i; register u_short *p1, *p2; p1 = Palette; p2 = (u_short*) 0x1F800000; for ( i = 0; i < 256; ++i ) { *p2 = *p1; ++p1; ++p2; } return (u_short*) 0x1F800004; } /* The main loop does 3 memory accesses rather than the original 5; the other two ** are cached in registers, saving us some 30 HSyncs. The palette values are ** stored in the DCACHE giving a further saving of ~35 HSyncs. ** ** I think that with creative use of the dcache and by processing the array diagonally ** from bottom left to top right, i.e. in the order: ** ** 10 ** 6 9 ** 3 5 8 ** 1 2 4 7 ... etc. ** ** at least one more main memory access would be removed, but at the expense of much ** greater complexity. ** ** But the greatest spped increase of all comes from compiling with GCC rather than ** CodeWarrior. GCC's code runs 2-4 times faster than CodeWarrior's! ** ** The algorithm calculates the value for a pixel by averaging 5 surrounding pixels: ** ** 1 ** 2 X 3 X is the pixel being calculated, 1-5 are the pixels used for averaging. ** 4 5 */ //#define UNOPTIMISED // define this to use the unoptimised version of the routine static void updateFlame( void ) { #ifdef UNOPTIMISED int x, y; u_short cl1; for (y = FIRE_HEIGHT-1; y >= 2; y--) { for (x = 1; x <= FIRE_WIDTH; x++) { cl1 = Current[y - 1][x + 0]; // [1] cl1 += Current[y + 0][x - 1]; // [2] cl1 += Current[y + 0][x + 1]; // [3] cl1 += Current[y + 1][x - 1]; // [4] cl1 += Current[y + 1][x + 1]; // [5] cl1 /= 5; if (cl1 > FADE_AT) { cl1 -= FADE_BY; } Working[y-1][x] = cl1; TheImage[(y-1)<<1][(x<<1)] = Palette[cl1]; TheImage[(y-1)<<1][(x<<1)+1] = Palette[cl1]; TheImage[((y-1)<<1)+1][(x<<1)] = Palette[cl1]; TheImage[((y-1)<<1)+1][(x<<1)+1]= Palette[cl1]; } } #else register u_int cl1; register u_short *cp1; // pointer to Current array register u_short *wp; // pointer to Working array register u_short *ip1; // pointer to image register u_short *pp; // pointer to palette register u_short pv, cv1, cv2; // palette value; cached value 1 & 2 int x, y; cp1 = &Current[FIRE_HEIGHT-2][1]; wp = &Working[FIRE_HEIGHT-2][1]; ip1 = &TheImage[(FIRE_HEIGHT-2)<<1][1]; #ifdef GS_TRASHES_CACHE pp = copyPalette(); #else pp = (u_short*) 0x1F800000; #endif for ( y = FIRE_HEIGHT - 1; y >= 2; --y ) { cl1 = cp1[FIRE_WIDTH - 1]; // [2] cv1 = cp1[FIRE_WIDTH + FIRE_WIDTH - 1]; // [4] cv2 = cp1[FIRE_WIDTH + FIRE_WIDTH]; // [4]+1 for ( x = 1; x < FIRE_WIDTH; ++x ) { cl1 += cp1[0]; // [1] cl1 += cp1[FIRE_WIDTH + 1]; // [3] cl1 += cv1; // [4] cv1 = cv2; cv2 = cp1[FIRE_WIDTH + FIRE_WIDTH + 1]; // [5] cl1 += cv2; cl1 = Div5Table[cl1]; *wp = cl1; pv = pp[cl1]; ip1[0] = pv; ip1[1] = pv; ip1[IMG_WIDTH] = pv; ip1[IMG_WIDTH+1] = pv; ip1 += 2; ++cp1; ++wp; } cp1 -= FIRE_WIDTH + FIRE_WIDTH - 1; wp -= FIRE_WIDTH + FIRE_WIDTH - 1; ip1 -= IMG_WIDTH + IMG_WIDTH + IMG_WIDTH - 2; } #endif } static void flame( void ) { initFlameBottom(); updateFlame(); } static void main_proc( void ) { register u_short (*tmp)[FIRE_WIDTH]; register GsOT *wp; WhichBuff = GsGetActiveBuff(); while ( ( PadRead() & PAD1abort) != PAD1abort ) { wp = &Wot[WhichBuff]; dbgIncFrame(); GsSetWorkBase( (PACKET*) PacketArea[WhichBuff] ); GsClearOt( 0, 0, wp ); GsSortClear( 0, 0, 4, wp ); /* ... */ flame(); /* ... */ GsDrawOt( wp ); drawCurrentFlame(); dbgDisplay(); FntFlush(-1); /* ** double buffer - swap screens */ DrawSync(0); // wait for drawing to stop VSync(0); // wait for vertical retrace GsSwapDispBuff(); // swap buffers tmp = Current; Current = Working; Working = tmp; WhichBuff ^= 1; } } static void term( void ) { dbgTerm(); } void main( void ) { init(); #ifndef GS_TRASHES_CACHE copyPalette(); #endif main_proc(); term(); }