 /*
  * <LIC_AMD_STD>
  * Copyright (C) 2005 Advanced Micro Devices, Inc.  All Rights Reserved.
  * </LIC_AMD_STD>
  *
  * <CTL_AMD_STD>
  * </CTL_AMD_STD>
  *
  * <DOC_AMD_STD>
  * Cimarron graphics processor routines.  These routines program the graphics
  * hardware using the graphics command buffer.
  * </DOC_AMD_STD>
  *
  */

/*---------------------*/
/* CIMARRON GP GLOBALS */
/*---------------------*/

CIMARRON_STATIC unsigned long gp3_bpp         = 0;
CIMARRON_STATIC unsigned long gp3_ch3_bpp     = 0;
CIMARRON_STATIC unsigned long gp3_pat_origin  = 0;
CIMARRON_STATIC unsigned long gp3_buffer_lead = 0;
CIMARRON_STATIC unsigned long gp3_cmd_header;
CIMARRON_STATIC unsigned long gp3_cmd_top;
CIMARRON_STATIC unsigned long gp3_cmd_bottom;
CIMARRON_STATIC unsigned long gp3_cmd_current;
CIMARRON_STATIC unsigned long gp3_cmd_next;
CIMARRON_STATIC unsigned long gp3_blt_mode;
CIMARRON_STATIC unsigned long gp3_vec_mode;
CIMARRON_STATIC unsigned long gp3_raster_mode;
CIMARRON_STATIC unsigned long gp3_pix_shift;
CIMARRON_STATIC unsigned long gp3_ch3_pat;
CIMARRON_STATIC unsigned long gp3_blt;
CIMARRON_STATIC unsigned long gp3_blt_flags;
CIMARRON_STATIC unsigned long gp3_src_stride;
CIMARRON_STATIC unsigned long gp3_dst_stride;
CIMARRON_STATIC unsigned long gp3_src_format;
CIMARRON_STATIC unsigned long gp3_src_pix_shift;
CIMARRON_STATIC unsigned long gp3_pat_format;
CIMARRON_STATIC unsigned long gp3_pat_pix_shift;
CIMARRON_STATIC unsigned long gp3_fb_base;
CIMARRON_STATIC unsigned long gp3_vector_pattern_color;
CIMARRON_STATIC unsigned long gp3_scratch_base;
CIMARRON_STATIC unsigned long gp3_base_register;
CIMARRON_STATIC unsigned long gp3_vec_pat;

/*---------------------------------------------------------------------------
 * gp_set_limit_on_buffer_lead
 *
 * This routine is used to specify the maximum number of bytes in the command
 * buffer by which software can lead the graphics processor.  When declaring
 * a BLT with the CIMGP_BLTFLAGS_LIMITBUFFER flag set, Cimarron will wait until
 * the command buffer read and write pointers differ by no more than 'lead'
 * bytes.  This can be useful to limit the time lag possible when creating a
 * command buffer full of simple BLT commands.
 *-------------------------------------------------------------------------*/

void gp_set_limit_on_buffer_lead (unsigned long lead)
{	
	gp3_buffer_lead = lead;
}

/*---------------------------------------------------------------------------
 * gp_set_command_buffer_base
 *
 * This routine is used to program the command buffer region in physical
 * memory.  The command buffer start address must be 1MB aligned. start and
 * stop refer to endpoints within the associated 16MB region.  Command buffers
 * larger than 16MB are not supported.
 *-------------------------------------------------------------------------*/

void gp_set_command_buffer_base (unsigned long address, unsigned long start,
	unsigned long stop)
{	
	Q_WORD msr_value;

	/* WAIT FOR IDLE */
	/* Obviously, we cannot change the command buffer pointer while the GP */
	/* is currently fetching commands.                                     */

	gp_wait_until_idle();

    /* WRITE THE COMMAND BUFFER BASE */

	msr_read64 (MSR_DEVICE_GEODELX_GP, MSR_GEODELINK_CONFIG, &msr_value);
	msr_value.low &= 0xF000FFFF;
	msr_value.low |= (address >> 4) & 0x0FFF0000;
	msr_write64 (MSR_DEVICE_GEODELX_GP, MSR_GEODELINK_CONFIG, &msr_value);

    /* WRITE THE BASE OFFSETS */
	/* We also reset the write and read pointers.  The hardware will */
	/* automatically update the write pointer when the read pointer  */
	/* is updated to prevent the hardware from getting confused when */
	/* initializing a new command buffer.                            */

	WRITE_GP32 (GP3_CMD_TOP, start);
	WRITE_GP32 (GP3_CMD_BOT, stop);
	WRITE_GP32 (GP3_CMD_READ, start);

    /* SAVE THE BASE ADDRESSES */
	/* These are used to determine the appropriate wrap point. */

	gp3_cmd_current = gp3_cmd_top = start;
	gp3_cmd_bottom  = stop;
}

/*---------------------------------------------------------------------------
 * gp_set_frame_buffer_base
 *
 * This routine is used to program the base address of the frame buffer in
 * physical memory.  The frame buffer address must be 16MB aligned.  Cimarron
 * tracks the base address because the maximum frame buffer size may exceed
 * 16MB. Any primitive will thus program the corresponding 16MB region into all
 * base offset registers as well as program the offset into the 16MB region.
 * The size parameter is provided to allow Cimarron to claim the last 1MB of
 * space to be used as a scratch area for workarounds or expanded functionality.
 *-------------------------------------------------------------------------*/

void gp_set_frame_buffer_base (unsigned long address, unsigned long size)
{	
    gp3_scratch_base = size - GP3_SCRATCH_BUFFER_SIZE;
	gp3_fb_base = address >> 24;
    gp3_base_register = (gp3_fb_base << 24) | (gp3_fb_base << 14) | (gp3_fb_base << 4);
    WRITE_GP32 (GP3_BASE_OFFSET, gp3_base_register);
}

/*---------------------------------------------------------------------------
 * gp_set_bpp
 *
 * This routine sets the output BPP of the GP.  The BPP used by the GP does
 * not have to match the display BPP, but that is usually the case.  The
 * supported BPP values are as follows:
 *
 *    8  - palettized 8BPP
 *    12 - 4:4:4:4
 *    15 - 1:5:5:5
 *    16 - 0:5:6:5
 *    32 - 8:8:8:8
 *-------------------------------------------------------------------------*/

void gp_set_bpp (int bpp)
{
	/* STORE BPP */
	/* The bpp is not updated until the next call to gp_set_raster_mode. */
	/* This allows the gp_set_bpp call to happen outside of a BLT.  It   */
	/* also implies that no registers need be written in this routine.   */
	
	switch(bpp)
	{
		case 8:	
			gp3_bpp       = GP3_RM_BPPFMT_332;
			gp3_ch3_bpp   = GP3_CH3_SRC_3_3_2;
			gp3_pix_shift = 0;
			break;
		case 12:
			gp3_bpp       = GP3_RM_BPPFMT_4444;
			gp3_ch3_bpp   = GP3_CH3_SRC_4_4_4_4;
			gp3_pix_shift = 1;
			break;
		case 15:
			gp3_bpp       = GP3_RM_BPPFMT_1555;
			gp3_ch3_bpp   = GP3_CH3_SRC_1_5_5_5;
			gp3_pix_shift = 1;
			break;
		case 16:
			gp3_bpp       = GP3_RM_BPPFMT_565;
			gp3_ch3_bpp   = GP3_CH3_SRC_0_5_6_5;
			gp3_pix_shift = 1;
			break;
        case 24:
        case 32:
			gp3_bpp       = GP3_RM_BPPFMT_8888;
			gp3_ch3_bpp   = GP3_CH3_SRC_8_8_8_8;
			gp3_pix_shift = 2;
			break;
		default:
			gp3_bpp       = GP3_RM_BPPFMT_332;
			gp3_ch3_bpp   = GP3_CH3_SRC_3_3_2;
			gp3_pix_shift = 0;
			break;
	}
}

/*---------------------------------------------------------------------------
 * gp_declare_blt
 *
 * This routine is used to prepare for a 2D BLT.  Its primary function
 * is to verify that enough room is available in the command buffer
 * to hold a BLT command.  This command can be called multiple times if
 * necessary.  For example, if a function calls this routine on entry, but
 * later realizes that a LUT load command must be executed before the BLT,
 * the application could call gp_set_color_pattern and then call
 * gp_declare_blt to declare the BLT.  This is possible because the hardware
 * buffer pointer is not updated until a new BLT is actually executed.  An
 * application must take care not to call any routines that perform a buffer
 * command, (such as gp_set_color_pattern) between gp_declare_blt and the
 * routines used to program the BLT parameters.  In addition to checking for
 * available space, this routine also performs the following actions:
 *    - Sets the wrap bit if this BLT will pass close to the end of the buffer.
 *    - Writes the command header.
 *
 * The available flags are defined as follows:
 *   0x01 - Preserve the LUT
 *   0x02 - Preserve the color pattern.
 *   0x04 - Enable prefetch.
 *-------------------------------------------------------------------------*/

void gp_declare_blt (unsigned long flags)
{	
	unsigned long temp;

	gp3_blt = 1;
	gp3_blt_flags = flags;

	/* SET ADDRESS OF NEXT COMMAND */
	/* A summary of the command buffer logic is as follows:           */
	/*  - If after a basic BLT we will not have room for the largest  */
	/*    command (a full line of host source data), we set the wrap  */
	/*    bit.  This will waste up to a whopping 8K of command buffer */
	/*    space, but it simplifies the logic for all commands.        */
	/* -  If we are wrapping, we have extra logic to ensure that we   */
	/*    don't skip over the current GP read pointer.                */

    gp3_cmd_next = gp3_cmd_current + GP3_BLT_COMMAND_SIZE;

	/* CHECK WRAP CONDITION */

	if ((gp3_cmd_bottom - gp3_cmd_next) <= GP3_MAX_COMMAND_SIZE)
	{
		gp3_cmd_next   = gp3_cmd_top;
		gp3_cmd_header = GP3_BLT_HDR_TYPE | GP3_BLT_HDR_WRAP;

		/* WAIT FOR HARDWARE */
		/* When wrapping, we must take steps to ensure that we do not    */
		/* wrap over the current hardware read pointer.  We do this by   */
		/* verifying that the hardware is not between us and the end of  */
		/* the command buffer.  We also have a special case to make sure */
		/* that the hardware is not currently reading the top of the     */
		/* command buffer.                                               */

		GP3_WAIT_WRAP(temp);
	}
	else
	{
		gp3_cmd_header = GP3_BLT_HDR_TYPE;

		/* WAIT FOR AVAILABLE SPACE */

		GP3_WAIT_PRIMITIVE(temp);
	}

    if (flags & CIMGP_BLTFLAGS_LIMITBUFFER)
    {
        while (1)
        {
            temp = READ_GP32 (GP3_CMD_READ);
            if (((gp3_cmd_current >= temp) && ((gp3_cmd_current - temp) <= gp3_buffer_lead)) ||
                ((gp3_cmd_current <  temp) && ((gp3_cmd_current + (gp3_cmd_bottom - temp)) <= gp3_buffer_lead)))
            {
                break;
            }
        }
    }

	/* SET THE CURRENT BUFFER POINTER */
	/* We initialize a pointer to the current buffer base to avoid an */
	/* extra addition for every buffer write.                         */

	cim_cmd_ptr = cim_cmd_base_ptr + gp3_cmd_current;

    /* SET THE HAZARD BIT */

    if (flags & CIMGP_BLTFLAGS_HAZARD)
        gp3_cmd_header |= GP3_BLT_HDR_HAZARD_ENABLE;
}

/*---------------------------------------------------------------------------
 * gp_declare_vector
 *
 * This routine is used to prepare for a 2D vector.  It has no other function
 * except to verify that enough room is available in the command buffer
 * to hold a vector command.  The same rules that apply to BLTs apply to
 * vectors. (See the documentation for gp_declare_blt).
 *-------------------------------------------------------------------------*/

void gp_declare_vector (unsigned long flags)
{
	unsigned long temp;

	gp3_blt = 0;
    gp3_blt_flags = flags;
	
	/* SET ADDRESS OF NEXT COMMAND                            */
	/* The logic to force a wrap during a vector is identical */
	/* to the BLT logic.                                      */

    /* ALLOCATE SPACE FOR AN ADDITIONAL VECTOR TO CLEAR THE BYTE ENABLES */

    gp3_cmd_next = gp3_cmd_current + GP3_VECTOR_COMMAND_SIZE + GP3_VECTOR_COMMAND_SIZE + 32;

	/* CHECK WRAP CONDITION */

	if ((gp3_cmd_bottom - gp3_cmd_next) <= GP3_MAX_COMMAND_SIZE)
	{
		gp3_cmd_next   = gp3_cmd_top;
		gp3_cmd_header = GP3_VEC_HDR_TYPE | GP3_VEC_HDR_WRAP;

		/* CHECK WRAP CONDITION */

		GP3_WAIT_WRAP(temp);
	}
	else
	{
		gp3_cmd_header = GP3_VEC_HDR_TYPE;

		/* WAIT FOR AVAILABLE SPACE */

		GP3_WAIT_PRIMITIVE(temp);

        gp3_cmd_next -= GP3_VECTOR_COMMAND_SIZE + 32;
	}

    if (flags & CIMGP_BLTFLAGS_LIMITBUFFER)
    {
        while (1)
        {
            temp = READ_GP32 (GP3_CMD_READ);
            if (((gp3_cmd_current >= temp) && ((gp3_cmd_current - temp) <= gp3_buffer_lead)) ||
                ((gp3_cmd_current <  temp) && ((gp3_cmd_current + (gp3_cmd_bottom - temp)) <= gp3_buffer_lead)))
            {
                break;
            }
        }
    }

	cim_cmd_ptr = cim_cmd_base_ptr + gp3_cmd_current;

    /* SET THE HAZARD BIT */

    if (flags & CIMGP_BLTFLAGS_HAZARD)
        gp3_cmd_header |= GP3_VEC_HDR_HAZARD_ENABLE;
}

/*---------------------------------------------------------------------------
 * gp_write_parameters
 *
 * This routine is called to write all recent parameters to the hardware.
 * This routine is necessary for any implementation that performs the setup
 * for a BLT separate from the actual BLT.  An example would be a driver
 * that prepares for multiple pattern fills by programming the ROP,
 * pattern color and destination stride.  The driver might then perform
 * repeated pattern fills with minimal effort.
 *-------------------------------------------------------------------------*/

void gp_write_parameters (void)
{
	/* WRITE THE COMMAND HEADER */
	/* Command header is at offset 0 for BLTs and vectors */
	
	WRITE_COMMAND32 (GP3_BLT_CMD_HEADER, gp3_cmd_header);
	
	/* INCREMENT THE CURRENT WRITE POINTER */

	gp3_cmd_current = gp3_cmd_next;

	/* UPDATE THE GP WRITE POINTER */

	WRITE_GP32 (GP3_CMD_WRITE, gp3_cmd_current);
}

/*---------------------------------------------------------------------------
 * gp_set_raster_operation
 *
 * This is generally the first routine called when programming a BLT.  This
 * routine performs the following functions:
 *   - Sets the initial value of the GP3_RASTER_MODE register in the buffer.
 *   - Clears any 8x8 pattern if the ROP does not involve pattern data.
 *-------------------------------------------------------------------------*/

void gp_set_raster_operation (unsigned char ROP)
{
	gp3_cmd_header |= GP3_BLT_HDR_RASTER_ENABLE;

	/* WRITE THE RASTER MODE REGISTER                                   */
	/* This register is in the same location in BLT and vector commands */

	gp3_raster_mode = gp3_bpp | (unsigned long)ROP;
	WRITE_COMMAND32 (GP3_BLT_RASTER_MODE, gp3_raster_mode);

	/* CHECK IF DESTINATION IS REQUIRED */

	if ((ROP & 0x55) ^ ((ROP >> 1) & 0x55))
	{
		gp3_blt_mode = GP3_BM_DST_REQ;
		gp3_vec_mode = GP3_VM_DST_REQ;
	}
	else
	{
		gp3_blt_mode = gp3_vec_mode = 0;
	}
}

/*----------------------------------------------------------------------------
 * gp_set_alpha_operation
 *
 * BLTs are generally one of two types, a ROPed BLT or a BLT composited using
 * alpha blending.  For the latter, this routine is used to configure the
 * mathematical function used to create the blended output.  This routine
 * should generally be called first when programming a BLT.  The available
 * parameters mirror the hardware and are described as follows:
 *
 * alpha_operation =
 *   0 - alpha * A
 *   1 - (1 - alpha) * B
 *   2 - A + (1 - alpha)*B
 *   3 - alpha*A + (1 - alpha)*B
 *
 * alpha_type =
 *   0 - alpha component of channel A
 *   1 - alpha component of channel B
 *   2 - Constant alpha
 *   3 - Constant 1
 *   4 - The color components of channel A
 *   5 - The color components of channel B
 *   6 - Alpha comes from the alpha channel of the source before the source undergoes
 *       color conversion.
 *
 * channel =
 *   0 - Channel A = source, channel B = destination
 *   1 - Channel B = source, channel A = destination
 *
 * apply_alpha =
 *   1 - Apply alpha blend to only the RGB portion of the pixel.  This must be
 *       set when the source or destination format do not include an alpha channel.
 *   2 - Apply alpha blend only to the alpha portion of the pixel.  This implies
 *       that both destination and source include an alpha channel.
 *   3 - Apply alpha blend to both the RGB and alpha portions of the pixel.
 *
 * Alpha-blended vectors are not currently supported.
 *-------------------------------------------------------------------------*/

void gp_set_alpha_operation (int alpha_operation, int alpha_type, int channel,
	int apply_alpha, unsigned char alpha)
{
	gp3_cmd_header |= GP3_BLT_HDR_RASTER_ENABLE;
	
	/* THE AVAILABLE ALPHA DEFINITIONS FOLLOW THE HARDWARE       */
	/* This allows us to avoid giant switch structures, but it   */
	/* also implies that there is no mechanism to detect invalid */
	/* parameters.                                               */

	gp3_raster_mode = gp3_bpp | (unsigned long)alpha |
		((unsigned long)apply_alpha     << 22) |
		((unsigned long)alpha_operation << 20) |
		((unsigned long)alpha_type      << 17) |
		((unsigned long)channel         << 16);
	
	WRITE_COMMAND32 (GP3_BLT_RASTER_MODE, gp3_raster_mode);

	/* CHECK IF DESTINATION IS REQUIRED */

	if ( (alpha_operation == CIMGP_ALPHA_TIMES_A &&
          channel == CIMGP_CHANNEL_A_SOURCE &&
          alpha_type != CIMGP_CHANNEL_B_ALPHA &&
          alpha_type != CIMGP_ALPHA_FROM_RGB_B)  ||
         (alpha_operation == CIMGP_BETA_TIMES_B &&
          channel == CIMGP_CHANNEL_A_DEST &&
          alpha_type != CIMGP_CHANNEL_A_ALPHA &&
          alpha_type != CIMGP_ALPHA_FROM_RGB_A))
    {
        gp3_blt_mode = 0;
    }
    else
        gp3_blt_mode = GP3_BM_DST_REQ;
}

/*---------------------------------------------------------------------------
 * gp_set_solid_pattern
 *
 * This routine is called to program the hardware for a solid pattern.  It need
 * not be called for any other reason.  As a side effect, this routine will
 * clear any 8x8 pattern data.
 *-------------------------------------------------------------------------*/

void gp_set_solid_pattern (unsigned long color)
{
	/* CHANNEL 3 IS NOT NEEDED FOR SOLID PATTERNS */

	gp3_ch3_pat = 0;

	/* SET SOLID PATTERN IN COMMAND BUFFER */
	/* We are assuming that only one pattern type is ever set for a */
	/* BLT.  We are also assuming that gp_set_raster_operation will */
	/* be called before this routine.  With these assumptions, we   */
	/* will thus never have to change the raster mode register for  */
	/* solid patterns.                                              */

	if (gp3_blt)
	{
		gp3_cmd_header |= GP3_BLT_HDR_PAT_CLR0_ENABLE;

		WRITE_COMMAND32 (GP3_BLT_PAT_COLOR_0, color);
	}
	else
	{
		gp3_cmd_header |= GP3_VEC_HDR_PAT_CLR0_ENABLE;

		WRITE_COMMAND32 (GP3_VECTOR_PAT_COLOR_0, color);
	}
}

/*---------------------------------------------------------------------------
 * gp_set_mono_pattern
 *
 * This routine is called to program the hardware for a monochrome pattern.
 * As a side effect, this routine will clear any 8x8 pattern data.
 *-------------------------------------------------------------------------*/

void gp_set_mono_pattern (unsigned long bgcolor, unsigned long fgcolor,
	unsigned long data0, unsigned long data1, int transparent, int x, int y)
{
	/* CHANNEL 3 IS NOT NEEDED FOR MONOCHROME PATTERNS */

	gp3_ch3_pat = 0;

	/* UPDATE RASTER MODE REGISTER */

	if (transparent) gp3_raster_mode |= GP3_RM_PAT_MONO | GP3_RM_PAT_TRANS;
	else             gp3_raster_mode |= GP3_RM_PAT_MONO;
		
	gp3_cmd_header |= GP3_BLT_HDR_RASTER_ENABLE;

	WRITE_COMMAND32 (GP3_BLT_RASTER_MODE, gp3_raster_mode);	

	/* SET MONOCHROME PATTERN DATA AND COLORS */

	if (gp3_blt)
	{
		gp3_cmd_header |= (GP3_BLT_HDR_PAT_CLR0_ENABLE | GP3_BLT_HDR_PAT_CLR1_ENABLE |
			GP3_BLT_HDR_PAT_DATA0_ENABLE | GP3_BLT_HDR_PAT_DATA1_ENABLE);

		WRITE_COMMAND32 (GP3_BLT_PAT_COLOR_0, bgcolor);
		WRITE_COMMAND32 (GP3_BLT_PAT_COLOR_1, fgcolor);
		WRITE_COMMAND32 (GP3_BLT_PAT_DATA_0,  data0);
		WRITE_COMMAND32 (GP3_BLT_PAT_DATA_1,  data1);
	}
	else
	{
		gp3_cmd_header |= (GP3_VEC_HDR_PAT_CLR0_ENABLE | GP3_VEC_HDR_PAT_CLR1_ENABLE |
			GP3_VEC_HDR_PAT_DATA0_ENABLE | GP3_VEC_HDR_PAT_DATA1_ENABLE);

		WRITE_COMMAND32 (GP3_VECTOR_PAT_COLOR_0, bgcolor);
		WRITE_COMMAND32 (GP3_VECTOR_PAT_COLOR_1, fgcolor);
		WRITE_COMMAND32 (GP3_VECTOR_PAT_DATA_0,  data0);
		WRITE_COMMAND32 (GP3_VECTOR_PAT_DATA_1,  data1);
	}

	/* SAVE PATTERN ORIGIN */

	gp3_pat_origin = ((unsigned long)y << 29) | (((unsigned long)x & 7) << 26);
}

/*---------------------------------------------------------------------------
 * gp_set_pattern_origin
 *
 * This routine overrides the pattern origins set in gp_set_mono_pattern or
 * gp_set_color_pattern.  It is generally used to override the original
 * pattern origin due to a change in clipping.
 *-------------------------------------------------------------------------*/

void gp_set_pattern_origin (int x, int y)
{
	/* SAVE PATTERN ORIGIN */

	gp3_pat_origin = ((unsigned long)y << 29) | (((unsigned long)x & 7) << 26);
}

/*---------------------------------------------------------------------------
 * gp_set_color_pattern
 *
 * This routine is called to program a 8x8 color pattern into the LUT hardware.
 * Unlike the other pattern routines, this routine must be called before
 * any gp_declare_xxxx routines.  The pattern that is programmed into the
 * hardware will stay persistent for all subsequent primitives until one of
 * the following conditions happens.
 *   - Another pattern type is programmed.
 *   - A color-conversion BLT rotation BLT.
 *-------------------------------------------------------------------------*/

void gp_set_color_pattern (unsigned long *pattern, int format, int x, int y)
{
	unsigned long size_dwords, temp;
	
	gp3_ch3_pat = 1;

	/* SAVE COLOR PATTERN SOURCE INFO */
	/* Color patterns can be in a format different than the primary display. */
	/* 4BPP patterns are not supported.                                      */
	
	gp3_pat_pix_shift = (unsigned long)((format >> 2) & 3);
	gp3_pat_format = (((unsigned long)format & 0xF)  << 24) |
		             (((unsigned long)format & 0x10) << 17) |
					 GP3_CH3_COLOR_PAT_ENABLE               |
					 GP3_CH3_C3EN;

	size_dwords = (64 << gp3_pat_pix_shift) >> 2;

	/* CHECK FOR WRAP AFTER LUT LOAD                 */
	/* Primitive size is 12 plus the amount of data. */

	gp3_cmd_next = gp3_cmd_current + (size_dwords << 2) + 12;
	
	if ((gp3_cmd_bottom - gp3_cmd_next) <= GP3_MAX_COMMAND_SIZE)
	{
		gp3_cmd_next   = gp3_cmd_top;
		gp3_cmd_header = GP3_LUT_HDR_TYPE | GP3_LUT_HDR_WRAP | GP3_LUT_HDR_DATA_ENABLE;

		/* WAIT FOR HARDWARE           */
		/* Same logic as BLT wrapping. */

		GP3_WAIT_WRAP(temp);
	}
	else
	{
		gp3_cmd_header = GP3_LUT_HDR_TYPE | GP3_LUT_HDR_DATA_ENABLE;

		/* WAIT FOR AVAILABLE SPACE */

		GP3_WAIT_PRIMITIVE(temp);
	}

	/* SAVE CURRENT BUFFER POINTER */

	cim_cmd_ptr = cim_cmd_base_ptr + gp3_cmd_current;

	/* PREPARE FOR COMMAND BUFFER DATA WRITES                 */
	/* Pattern data is contiguous DWORDs at LUT address 0x100 */

	WRITE_COMMAND32 (0, gp3_cmd_header);
	WRITE_COMMAND32 (4, 0x100);
	WRITE_COMMAND32 (8, size_dwords | GP3_LUT_DATA_TYPE);	

	/* WRITE ALL DATA */

	WRITE_COMMAND_STRING32 (12, pattern, 0, size_dwords);

	/* START OPERATION */

	WRITE_GP32 (GP3_CMD_WRITE, gp3_cmd_next);
	gp3_cmd_current = gp3_cmd_next;

	/* SAVE PATTERN ORIGIN */

	gp3_pat_origin = ((unsigned long)y << 29) | (((unsigned long)x & 7) << 26);
}

/*---------------------------------------------------------------------------
 * gp_set_mono_source
 *
 * This routine is called to program the colors for monochrome source data.
 *-------------------------------------------------------------------------*/

void gp_set_mono_source (unsigned long bgcolor, unsigned long fgcolor,
	int transparent)
{
	/* UPDATE RASTER MODE REGISTER IF TRANSPARENT */

	if (transparent)
	{
		gp3_cmd_header  |= GP3_BLT_HDR_RASTER_ENABLE;
		gp3_raster_mode |= GP3_RM_SRC_TRANS;

		WRITE_COMMAND32 (GP3_BLT_RASTER_MODE, gp3_raster_mode);	
	}

	/* SET MONOCHROME SOURCE COLORS */
	/* Note that this routine only sets the colors.  The actual */
	/* source type is determined by the final output routine    */
	/* (gp_mono_bitmap_xxx, gp_color_bitmap_xxx, etc.)          */

	gp3_cmd_header |= GP3_BLT_HDR_SRC_FG_ENABLE | GP3_BLT_HDR_SRC_BG_ENABLE;

	WRITE_COMMAND32 (GP3_BLT_SRC_COLOR_FG, fgcolor);
	WRITE_COMMAND32 (GP3_BLT_SRC_COLOR_BG, bgcolor);
}

/*---------------------------------------------------------------------------
 * gp_set_solid_source
 *
 * This routine is called to program a solid source color.  A solid source
 * color is used primarily for vectors or antialiased text.
 *-------------------------------------------------------------------------*/

void gp_set_solid_source (unsigned long color)
{
	/* SET SOLID SOURCE COLOR */
	/* The solid source register is in the same place for both BLTs and */
	/* vectors.                                                         */

	gp3_cmd_header |= GP3_BLT_HDR_SRC_FG_ENABLE;

	WRITE_COMMAND32 (GP3_BLT_SRC_COLOR_FG, color);
}

/*---------------------------------------------------------------------------
 * gp_set_source_transparency
 *
 * This routine sets the source transparency and mask to be used in future
 * rendering operations.  Transparency is cleared by gp_set_raster_operation,
 * so this routine should never be called first.
 *-------------------------------------------------------------------------*/

void gp_set_source_transparency (unsigned long color, unsigned long mask)
{
	gp3_raster_mode |= GP3_RM_SRC_TRANS;
	gp3_cmd_header  |= GP3_BLT_HDR_RASTER_ENABLE | GP3_BLT_HDR_SRC_FG_ENABLE |
		GP3_BLT_HDR_SRC_BG_ENABLE;
	
	WRITE_COMMAND32 (GP3_BLT_RASTER_MODE, gp3_raster_mode);	
	WRITE_COMMAND32 (GP3_BLT_SRC_COLOR_FG, color);
	WRITE_COMMAND32 (GP3_BLT_SRC_COLOR_BG, mask);
}

/*---------------------------------------------------------------------------
 * gp_program_lut
 *
 * This routine is called to program the hardware LUT with color-conversion
 * information.  This routine should be called before any gp_declare_xxxx
 * routines.
 *
 * colors   - Pointer to an array of DWORDs for color expansion.
 *
 * full_lut - Selector between 4BPP and 8BPP expansion. The hardware is
 *            initialized with 16 dwords for 4BPP expansion and 256 dwords
 *            for 8BPP expansion.
 *-------------------------------------------------------------------------*/

void gp_program_lut (unsigned long *colors, int full_lut)
{
	unsigned long size_dwords, temp;
	
	/* SIZE IS EITHER 16 DWORDS (4BPP) or 256 DWORDS (8BPP) */

	if (full_lut) size_dwords = 256;
	else          size_dwords = 16;

	/* CHECK FOR WRAP AFTER LUT LOAD                 */
	/* Primitive size is 12 plus the amount of data. */

	gp3_cmd_next = gp3_cmd_current + (size_dwords << 2) + 12;
	
	if ((gp3_cmd_bottom - gp3_cmd_next) <= GP3_MAX_COMMAND_SIZE)
	{
		gp3_cmd_next   = gp3_cmd_top;
		gp3_cmd_header = GP3_LUT_HDR_TYPE | GP3_LUT_HDR_WRAP | GP3_LUT_HDR_DATA_ENABLE;

		/* WAIT FOR HARDWARE           */
		/* Same logic as BLT wrapping. */

		GP3_WAIT_WRAP(temp);
	}
	else
	{
		gp3_cmd_header = GP3_LUT_HDR_TYPE | GP3_LUT_HDR_DATA_ENABLE;

		/* WAIT FOR AVAILABLE SPACE */

		GP3_WAIT_PRIMITIVE(temp);
	}

	/* SAVE CURRENT BUFFER POINTER */

	cim_cmd_ptr = cim_cmd_base_ptr + gp3_cmd_current;

	/* PREPARE FOR COMMAND BUFFER DATA WRITES             */
	/* Pattern data is contiguous DWORDs at LUT address 0 */

	WRITE_COMMAND32 (0, gp3_cmd_header);
	WRITE_COMMAND32 (4, 0);
	WRITE_COMMAND32 (8, (size_dwords | GP3_LUT_DATA_TYPE));	

	/* WRITE ALL DATA */

	WRITE_COMMAND_STRING32 (12, colors, 0, size_dwords);

	/* START OPERATION */

	WRITE_GP32 (GP3_CMD_WRITE, gp3_cmd_next);
	gp3_cmd_current = gp3_cmd_next;
}

/*---------------------------------------------------------------------------
 * gp_set_vector_pattern
 *
 * This routine is called to program the hardware LUT with a vector pattern.
 * A vector pattern is programmed as a 32-bit mask that specifies a
 * transparency pattern.  A length parameter is used to specify patterns
 * smaller than 32.  Note that vectors in Geode LX do not continue across corners.
 * The beginning of each vector will always begin with bit 0 of the vector
 * pattern.  It is the responsibility of the caller to update the pattern
 * if an alternate behavior is desired.
 *
 * This routine faces the same restrictions of all routines that program
 * the LUT, in that it must be called before any gp_declare_xxxx routines,
 * it cannot be combined with an 8x8 color pattern, color conversion or
 * rotation.
 *-------------------------------------------------------------------------*/

void gp_set_vector_pattern (unsigned long pattern, unsigned long color, int length)
{
	unsigned long temp, mask;
	
	gp3_ch3_pat = 1;
	gp3_vector_pattern_color = color;

	/* CREATE SUITABLE PATTERN MASK */
	/* The GP requires a minimum of four pixels in a vector pattern.  We */
	/* can get around this restriction by doubling any patterns smaller  */
	/* than 4 pixels.                                                    */

	while (length < 4)
	{
		mask = 0xFFFFFFFF >> (32 - length);
		pattern = (pattern << length) | (pattern & mask);
		length <<= 1;
	}
	mask = 0xFFFFFFFF >> (32 - length);

    gp3_vec_pat = pattern;

	/* CHECK FOR WRAP AFTER LUT LOAD */

	gp3_cmd_next = gp3_cmd_current + GP3_VECTOR_PATTERN_COMMAND_SIZE;
	
	if ((gp3_cmd_bottom - gp3_cmd_next) <= GP3_MAX_COMMAND_SIZE)
	{
		gp3_cmd_next   = gp3_cmd_top;
		gp3_cmd_header = GP3_LUT_HDR_TYPE | GP3_LUT_HDR_WRAP | GP3_LUT_HDR_DATA_ENABLE;

		/* WAIT FOR HARDWARE           */
		/* Same logic as BLT wrapping. */

		GP3_WAIT_WRAP(temp);
	}
	else
	{
		gp3_cmd_header = GP3_LUT_HDR_TYPE | GP3_LUT_HDR_DATA_ENABLE;

		/* WAIT FOR AVAILABLE SPACE */

		GP3_WAIT_PRIMITIVE(temp);
	}

	/* SAVE CURRENT BUFFER POINTER */

	cim_cmd_ptr = cim_cmd_base_ptr + gp3_cmd_current;

	/* PREPARE FOR COMMAND BUFFER DATA WRITES      */
	/* Pattern data is 2 DWORDs at 0x100 and 0x101 */

	WRITE_COMMAND32 (0, gp3_cmd_header);
	WRITE_COMMAND32 (4, 0x100);
	WRITE_COMMAND32 (8, (2 | GP3_LUT_DATA_TYPE));	
	WRITE_COMMAND32 (12, pattern);
	WRITE_COMMAND32 (16, mask);

	/* START OPERATION */

	WRITE_GP32 (GP3_CMD_WRITE, gp3_cmd_next);
	gp3_cmd_current = gp3_cmd_next;
}

/*---------------------------------------------------------------------------
 * gp_set_strides
 *
 * This routine is called to program the pitch between successive lines of
 * data in the frame buffer.  The strides should be DWORD aligned and less
 * than 64K.  These restrictions are not checked by the API.
 *-------------------------------------------------------------------------*/

void gp_set_strides (unsigned long dst_stride, unsigned long src_stride)
{
	/* SAVE STRIDES */
	/* The source stride may be needed later for channel 3 source data and */
	/* we may need to use these strides in calculations.                   */

	gp3_src_stride = src_stride;
	gp3_dst_stride = dst_stride;

	/* ENABLE STRIDES */
	/* The stride register is in the same place for BLTs and vectors */

	gp3_cmd_header |= GP3_BLT_HDR_STRIDE_ENABLE;

	WRITE_COMMAND32 (GP3_BLT_STRIDE, ((src_stride << 16) | dst_stride));	
}

/*---------------------------------------------------------------------------
 * gp_set_source_format
 *
 * This routine is used to program the format of source data used in subsequent
 * color-conversion or rotation operations.  Note that 4BPP indexed and 8BPP
 * indexed source formats cannot be used when rotating, as the LUT will be
 * destroyed.  These formats also cannot be combined with an 8x8 color pattern.
 * The supported formats mirror the hardware and are described as follows:
 *
 * 0  - 8BPP 3:3:2
 * 1  - 8BPP indexed
 * 4  - 16BPP 4:4:4:4
 * 5  - 16BPP 1:5:5:5
 * 6  - 16BPP 5:6:5
 * 7  - 16BPP YUV
 * 8  - 32BPP 8:8:8:8
 * 13 - 4BPP indexed
 * 20 - 16BPP 4:4:4:4 BGR
 * 21 - 16BPP 1:5:5:5 BGR
 * 22 - 16BPP 0:5:6:5 BGR
 * 24 - 32BPP 8:8:8:8 BGR
 *-------------------------------------------------------------------------*/

void gp_set_source_format (int format)
{
	/* SAVE FORMAT */
	/* We will combine the source format later when doing color conversion. */
	/* We also save the pixel size for host source calculations.            */
	/* Conveniently, the source formats are organized such that the upper   */
	/* two bits of the nibble represent the pixel shift, with a pixel shift */
	/* of 3 being a special case for 4BPP data.  Clever, yes?  Even more    */
	/* clever, bit 4 indicates BGR ordering.                                */

	gp3_src_pix_shift = (unsigned long)((format >> 2) & 3);
	gp3_src_format = (((unsigned long)format & 0xF)  << 24) |
		             (((unsigned long)format & 0x10) << 18);
}

/*---------------------------------------------------------------------------
 * gp_pattern_fill
 *
 * This routine is called to perform a simple pattern fill.  The pattern
 * can be solid, monochrome or a preprogrammed 8x8 color pattern.  If
 * the ROP involves source data, that source data will be constant.
 *-------------------------------------------------------------------------*/

void gp_pattern_fill (unsigned long dstoffset, unsigned long width, unsigned long height)
{
    unsigned long base_register;

    base_register = (gp3_base_register & ~GP3_BASE_OFFSET_DSTMASK) |
                   ((gp3_fb_base << 24) + (dstoffset & 0xFFC00000));

    /* ENABLE RELEVANT REGISTERS */
	/* Note that we always enable and write the channel 3 mode, if only */
	/* to turn it off.   Cimarron also always writes the base offset    */
	/* register to allow operation with frame buffers larger than 16MB. */

	gp3_cmd_header |= GP3_BLT_HDR_DST_OFF_ENABLE     |
		              GP3_BLT_HDR_WIDHI_ENABLE       |
					  GP3_BLT_HDR_BASE_OFFSET_ENABLE |
					  GP3_BLT_HDR_CH3_STR_ENABLE     |
					  GP3_BLT_HDR_CH3_WIDHI_ENABLE   |
					  GP3_BLT_HDR_BLT_MODE_ENABLE;

	/* WRITE THE REGISTERS COMMON TO ALL PATTERN TYPES */
	/* The destination base is the frame buffer base plus whatever */
	/* 4MB segment we happen to be BLTing to.                      */

	WRITE_COMMAND32 (GP3_BLT_WID_HEIGHT, ((width << 16) | height));
	WRITE_COMMAND32 (GP3_BLT_BASE_OFFSET, base_register);
	
	/* CHECK 8X8 COLOR PATTERN CASE */

	if (gp3_ch3_pat)
	{
		/* SET CHANNEL 3 PATTERN ORIGINS */

		gp3_cmd_header |= GP3_BLT_HDR_CH3_OFF_ENABLE;

		/* INITIALIZE CHANNEL 3 PARAMETERS */

		WRITE_COMMAND32 (GP3_BLT_CH3_WIDHI, ((width << 16) | height));
		WRITE_COMMAND32 (GP3_BLT_CH3_OFFSET, gp3_pat_origin);
		WRITE_COMMAND32 (GP3_BLT_DST_OFFSET, dstoffset & 0x3FFFFF);
		WRITE_COMMAND32 (GP3_BLT_CH3_MODE_STR, gp3_pat_format);
	}
	else
	{
		/* DISABLE CHANNEL 3 AND USE NORMAL PATTERN ORIGINS */

		WRITE_COMMAND32 (GP3_BLT_CH3_MODE_STR, 0);
		WRITE_COMMAND32 (GP3_BLT_DST_OFFSET, ((dstoffset & 0x3FFFFF) | gp3_pat_origin));
	}
	
	/* START THE BLT */

	WRITE_COMMAND32 (GP3_BLT_CMD_HEADER, gp3_cmd_header);
	WRITE_COMMAND32 (GP3_BLT_MODE, gp3_blt_mode);
	WRITE_GP32 (GP3_CMD_WRITE, gp3_cmd_next);
	gp3_cmd_current = gp3_cmd_next;
}

/*---------------------------------------------------------------------------
 * gp_screen_to_screen_blt
 *
 * This routine is called to perform a BLT from one location inside video
 * memory to another location inside video memory.  The source and destination
 * formats are assumed to be the current BPP.  Whenever possible, this routine
 * tries to use channel 3 to fetch source data.  The BLT flags can affect this
 * behavior in the following ways:
 *   CIMGP_BLTFLAGS_PRES_COLOR_PAT
 *      A color pattern is being stored in the channel 3 buffer.  It is either
 *      being stored for a later BLT or being combined with the current source
 *      data.  Channel 3 cannot be used to fetch source data or the pattern
 *      will be overwritten.
 *   CIMGP_BLTFLAGS_PRES_LUT
 *      If the first flag is not set, this flag will limit the use of the
 *      channel 3 buffer to 1K.
 *-------------------------------------------------------------------------*/

void gp_screen_to_screen_blt (unsigned long dstoffset, unsigned long srcoffset,
	unsigned long width, unsigned long height, int flags)
{
	unsigned long base;
	unsigned long ch3_flags = 0;
	unsigned long blt_mode  = gp3_blt_mode;
	unsigned long size = ((width << 16) | height);
    unsigned long dstbase, srcbase;

    /* CALCULATE BASE OFFSETS                                             */
    /* We need to set the 4MB aligned base offsets before we add offsets  */
    /* for negative BLTs.                                                 */

    srcbase = srcoffset & 0xFFC00000;
    dstbase = dstoffset & 0xFFC00000;
    srcoffset &= 0x3FFFFF;
    dstoffset &= 0x3FFFFF;

    /* ADJUST OFFSETS BASED ON FLAGS */
	/* We adjust the source and destination offsets to point to the first */
	/* byte of the first pixel of the BLT.  This routine assumes that the */
	/* source and destination regions do not wrap past the end of a 16MB  */
	/* region.                                                            */

	if (flags & CIMGP_NEGXDIR)
	{
		srcoffset += (width << gp3_pix_shift) - 1;
		dstoffset += (width << gp3_pix_shift) - 1;
		blt_mode  |= GP3_BM_NEG_XDIR;
		ch3_flags |= GP3_CH3_NEG_XDIR;
	}
	if (flags & CIMGP_NEGYDIR)
	{
		srcoffset += (height - 1) * gp3_src_stride;
		dstoffset += (height - 1) * gp3_dst_stride;
		blt_mode  |= GP3_BM_NEG_YDIR;
		ch3_flags |= GP3_CH3_NEG_YDIR;
	}
	
	/* BRANCH BASED ON CHANNEL 3                                     */
	/* If a color pattern is not being saved or used, channel 3 will */
	/* be used to fetch source for maximum throughput.   Channel 3   */
    /* is not used if transparency or alpha blending is enabled.     */

	if (!(gp3_blt_flags & CIMGP_BLTFLAGS_PRES_COLOR_PAT) &&
        !(gp3_raster_mode & GP3_RM_SRC_TRANS) &&
        !(flags & CIMGP_NEGYDIR))
	{
		base = ((gp3_fb_base << 24) +  dstbase)        |
		       ((gp3_fb_base << 4)  + (srcbase >> 20)) |
                (gp3_base_register & GP3_BASE_OFFSET_SRCMASK);

		gp3_cmd_header |= GP3_BLT_HDR_DST_OFF_ENABLE     |
						  GP3_BLT_HDR_WIDHI_ENABLE       |
						  GP3_BLT_HDR_CH3_STR_ENABLE     |
						  GP3_BLT_HDR_CH3_WIDHI_ENABLE   |
						  GP3_BLT_HDR_CH3_OFF_ENABLE     |
						  GP3_BLT_HDR_BASE_OFFSET_ENABLE |
						  GP3_BLT_HDR_BLT_MODE_ENABLE;

		WRITE_COMMAND32 (GP3_BLT_DST_OFFSET, (dstoffset | gp3_pat_origin));
		WRITE_COMMAND32 (GP3_BLT_CH3_OFFSET, srcoffset);
		WRITE_COMMAND32 (GP3_BLT_WID_HEIGHT, size);
		WRITE_COMMAND32 (GP3_BLT_CH3_WIDHI, size);
		WRITE_COMMAND32 (GP3_BLT_BASE_OFFSET, base);
		WRITE_COMMAND32 (GP3_BLT_CH3_MODE_STR,
			  GP3_CH3_C3EN                                    |
			  GP3_CH3_REPLACE_SOURCE                          |
			  gp3_ch3_bpp                                     |
			  gp3_src_stride                                  |
			  ch3_flags                                       |
			((gp3_blt_flags & CIMGP_ENABLE_PREFETCH)   << 17) |
			((gp3_blt_flags & CIMGP_BLTFLAGS_PRES_LUT) << 20));
	}
	else
	{
		/* CALCULATE BASE OFFSET REGISTER */

		base = ((gp3_fb_base << 24) +  dstbase)        |
		       ((gp3_fb_base << 14) + (srcbase >> 10)) |
               (gp3_base_register & GP3_BASE_OFFSET_CH3MASK);

		/* PROGRAM THE NORMAL SOURCE CHANNEL REGISTERS                */
		/* We assume that a color pattern is being ROPed with source  */
		/* data if the pattern type is color and the preserve pattern */
		/* was set.                                                   */

		blt_mode |= GP3_BM_SRC_FB;

		gp3_cmd_header |= GP3_BLT_HDR_SRC_OFF_ENABLE     |
			              GP3_BLT_HDR_DST_OFF_ENABLE     |
						  GP3_BLT_HDR_WIDHI_ENABLE       |
                          GP3_BLT_HDR_CH3_WIDHI_ENABLE   |
						  GP3_BLT_HDR_CH3_STR_ENABLE     |
                          GP3_BLT_HDR_CH3_OFF_ENABLE     |
						  GP3_BLT_HDR_BASE_OFFSET_ENABLE |
						  GP3_BLT_HDR_BLT_MODE_ENABLE;

		if (gp3_ch3_pat)
		{
			WRITE_COMMAND32 (GP3_BLT_CH3_OFFSET, gp3_pat_origin);
			WRITE_COMMAND32 (GP3_BLT_DST_OFFSET, dstoffset);
			WRITE_COMMAND32 (GP3_BLT_CH3_MODE_STR, gp3_pat_format | ch3_flags);
            WRITE_COMMAND32 (GP3_BLT_CH3_WIDHI, size);
		}
		else
		{
			WRITE_COMMAND32 (GP3_BLT_DST_OFFSET, dstoffset | gp3_pat_origin);
			WRITE_COMMAND32 (GP3_BLT_CH3_MODE_STR, 0);
		}
		
		WRITE_COMMAND32 (GP3_BLT_SRC_OFFSET, srcoffset);
		WRITE_COMMAND32 (GP3_BLT_WID_HEIGHT, size);
		WRITE_COMMAND32 (GP3_BLT_BASE_OFFSET, base);
	}
		
	/* START THE BLT */

	WRITE_COMMAND32 (GP3_BLT_CMD_HEADER, gp3_cmd_header);
	WRITE_COMMAND32 (GP3_BLT_MODE, blt_mode);
	WRITE_GP32 (GP3_CMD_WRITE, gp3_cmd_next);
	gp3_cmd_current = gp3_cmd_next;
}

/*---------------------------------------------------------------------------
 * gp_screen_to_screen_convert
 *
 * This routine is called to color-convert a rectangular region of the frame
 * buffer into the current BPP. The format of the source region is programmed
 * by gp_set_source_format.
 *-------------------------------------------------------------------------*/

void gp_screen_to_screen_convert (unsigned long dstoffset, unsigned long srcoffset,
    unsigned long width, unsigned long height, int nibble)
{
	unsigned long size = ((width << 16) | height);
	unsigned long ch3_offset = srcoffset & 0x3FFFFF;
    unsigned long ch3_size, base;

    base =  ((gp3_fb_base << 24) +  (dstoffset & 0xFFC00000))        |
	        ((gp3_fb_base << 4)  + ((srcoffset & 0xFFC00000) >> 20)) |
             (gp3_base_register & GP3_BASE_OFFSET_SRCMASK);

	/* SET NIBBLE FOR 4BPP */
	/* 4BPP is a special case in that it requires subpixel addressing.  The */
	/* user must supply this information via the nibble parameter.  This    */
	/* parameter is ignored for every other source format.                  */

    ch3_size = size;
	if (gp3_src_pix_shift == 3)
		ch3_offset |= ((nibble & 1) << 25);
    else if ((gp3_src_format & GP3_CH3_SRC_MASK) == GP3_CH3_SRC_24BPP_PACKED)
        ch3_size = ((((width * 3) + 3) >> 2) << 16) | height;

	/* SET APPROPRIATE ENABLES */

	gp3_cmd_header |= GP3_BLT_HDR_DST_OFF_ENABLE     |
		              GP3_BLT_HDR_WIDHI_ENABLE       |
					  GP3_BLT_HDR_CH3_OFF_ENABLE     |
					  GP3_BLT_HDR_CH3_STR_ENABLE     |
					  GP3_BLT_HDR_CH3_WIDHI_ENABLE   |
					  GP3_BLT_HDR_BASE_OFFSET_ENABLE |
					  GP3_BLT_HDR_BLT_MODE_ENABLE;

	/* WRITE ALL BLT REGISTERS */
	
	WRITE_COMMAND32 (GP3_BLT_DST_OFFSET, (dstoffset & 0x3FFFFF) | gp3_pat_origin);
	WRITE_COMMAND32 (GP3_BLT_WID_HEIGHT, size);
	WRITE_COMMAND32 (GP3_BLT_CH3_WIDHI, ch3_size);
	WRITE_COMMAND32 (GP3_BLT_CH3_OFFSET, ch3_offset);
	WRITE_COMMAND32 (GP3_BLT_CH3_MODE_STR,
		GP3_CH3_C3EN                                    |
		GP3_CH3_REPLACE_SOURCE                          |
		gp3_src_format                                  |
	  ((gp3_blt_flags & CIMGP_BLTFLAGS_PRES_LUT) << 20) |
	  ((gp3_blt_flags & CIMGP_ENABLE_PREFETCH)   << 17) |
	    gp3_src_stride);
	WRITE_COMMAND32 (GP3_BLT_BASE_OFFSET, base);
		
	/* START THE BLT */

    WRITE_COMMAND32 (GP3_BLT_CMD_HEADER, gp3_cmd_header);
	WRITE_COMMAND32 (GP3_BLT_MODE, gp3_blt_mode);
	WRITE_GP32 (GP3_CMD_WRITE, gp3_cmd_next);
	gp3_cmd_current = gp3_cmd_next;
}

/*---------------------------------------------------------------------------
 * gp_color_bitmap_to_screen_blt
 *
 * This routine is called to BLT data from system memory into the frame buffer.
 * 'srcy' is deliberately omitted to prevent extra calculations for simple
 * applications that have no source indexes.
 *-------------------------------------------------------------------------*/

void gp_color_bitmap_to_screen_blt (unsigned long dstoffset, unsigned long srcx,
    unsigned long width, unsigned long height, unsigned char *data, long pitch)
{
	unsigned long indent, temp;
	unsigned long total_dwords, size_dwords;
	unsigned long dword_count, byte_count;
	unsigned long size = ((width << 16) | height);
	unsigned long srcoffset;
		
	/* ASSUME BITMAPS ARE DWORD ALIGNED */
	/* We will offset into the source data in DWORD increments.  We */
	/* set the source index to the remaining byte offset and        */
	/* increment the size of each line to account for the dont-care */
	/* pixel(s).                                                    */

	indent    = srcx << gp3_pix_shift;
	srcoffset = (indent & ~3L);
	indent   &= 3;

	/* PROGRAM THE NORMAL SOURCE CHANNEL REGISTERS                */
	/* We assume that a color pattern is being ROPed with source  */
	/* data if the pattern type is color and the preserve pattern */
	/* was set.                                                   */

	gp3_cmd_header |= GP3_BLT_HDR_SRC_OFF_ENABLE     |
		              GP3_BLT_HDR_DST_OFF_ENABLE     |
					  GP3_BLT_HDR_WIDHI_ENABLE       |
					  GP3_BLT_HDR_CH3_STR_ENABLE     |
					  GP3_BLT_HDR_BASE_OFFSET_ENABLE |
					  GP3_BLT_HDR_BLT_MODE_ENABLE;

	if (gp3_ch3_pat)
	{
		gp3_cmd_header |= GP3_BLT_HDR_CH3_OFF_ENABLE | GP3_BLT_HDR_CH3_WIDHI_ENABLE;

		WRITE_COMMAND32 (GP3_BLT_CH3_OFFSET, gp3_pat_origin);
		WRITE_COMMAND32 (GP3_BLT_DST_OFFSET, (dstoffset & 0x3FFFFF));
		WRITE_COMMAND32 (GP3_BLT_CH3_MODE_STR, gp3_pat_format);
        WRITE_COMMAND32 (GP3_BLT_CH3_WIDHI, size);
	}
	else
	{
		WRITE_COMMAND32 (GP3_BLT_DST_OFFSET, ((dstoffset & 0x3FFFFF) | gp3_pat_origin));
		WRITE_COMMAND32 (GP3_BLT_CH3_MODE_STR, 0);
	}
	
	WRITE_COMMAND32 (GP3_BLT_SRC_OFFSET, indent);
	WRITE_COMMAND32 (GP3_BLT_WID_HEIGHT, size);
	WRITE_COMMAND32 (GP3_BLT_BASE_OFFSET,
		((gp3_fb_base << 24) + (dstoffset & 0xFFC00000)));
	WRITE_COMMAND32 (GP3_BLT_MODE, gp3_blt_mode | GP3_BM_SRC_HOST);
		
	/* START THE BLT */

	WRITE_COMMAND32 (GP3_BLT_CMD_HEADER, gp3_cmd_header);
	WRITE_GP32 (GP3_CMD_WRITE, gp3_cmd_next);
	gp3_cmd_current = gp3_cmd_next;

	/* CALCULATE THE SIZE OF ONE LINE */

	size = (width << gp3_pix_shift) + indent;
	total_dwords = (size + 3) >> 2;
	size_dwords  = (total_dwords << 2) + 8;
	dword_count  = (size >> 2);
	byte_count   = (size & 3);

    /* CHECK FOR SMALL BLT CASE */

    if (((total_dwords << 2) * height) <= GP3_BLT_1PASS_SIZE &&
        (gp3_cmd_bottom - gp3_cmd_current) > (GP3_BLT_1PASS_SIZE + 72))
    {
        /* UPDATE THE COMMAND POINTER */

		cim_cmd_ptr = cim_cmd_base_ptr + gp3_cmd_current;

        /* CHECK IF A WRAP WILL BE NEEDED */

		gp3_cmd_next = gp3_cmd_current + ((total_dwords << 2) * height) + 8;

	    if ((gp3_cmd_bottom - gp3_cmd_next) <= GP3_MAX_COMMAND_SIZE)
		{
			gp3_cmd_next  = gp3_cmd_top;
            	
			GP3_WAIT_WRAP(temp);
			WRITE_COMMAND32 (0, GP3_DATA_LOAD_HDR_TYPE | GP3_DATA_LOAD_HDR_WRAP |
				GP3_DATA_LOAD_HDR_ENABLE);
		}
		else
		{
			GP3_WAIT_PRIMITIVE(temp);
			WRITE_COMMAND32 (0, GP3_DATA_LOAD_HDR_TYPE | GP3_DATA_LOAD_HDR_ENABLE);
		}

        /* WRITE DWORD COUNT */

		WRITE_COMMAND32 (4, GP3_HOST_SOURCE_TYPE | (total_dwords * height));

        while (height--)
	    {
		    /* WRITE DATA */

		    WRITE_COMMAND_STRING32 (8, data, srcoffset, dword_count);
		    WRITE_COMMAND_STRING8 (8 + (dword_count << 2), data,
			    srcoffset + (dword_count << 2), byte_count);

		    srcoffset += pitch;
            cim_cmd_ptr += total_dwords << 2;
	    }

        WRITE_GP32 (GP3_CMD_WRITE, gp3_cmd_next);
		gp3_cmd_current = gp3_cmd_next;
    }
    else
    {
	    /* WRITE DATA LINE BY LINE */
	    /* Each line will be created as a separate command buffer entry to allow */
	    /* line-by-line wrapping and to allow simultaneous rendering by the HW.  */
    	
	    while (height--)
	    {
		    /* UPDATE THE COMMAND POINTER */
		    /* The WRITE_COMMANDXX macros use a pointer to the current buffer space. */
		    /* This is created by adding gp3_cmd_current to the base pointer.        */

		    cim_cmd_ptr = cim_cmd_base_ptr + gp3_cmd_current;

		    /* CHECK IF A WRAP WILL BE NEEDED */

		    gp3_cmd_next = gp3_cmd_current + size_dwords;
		    if ((gp3_cmd_bottom - gp3_cmd_next) <= GP3_MAX_COMMAND_SIZE)
		    {
			    gp3_cmd_next  = gp3_cmd_top;
    			
			    /* WAIT FOR HARDWARE */
    			
			    GP3_WAIT_WRAP(temp);
			    WRITE_COMMAND32 (0, GP3_DATA_LOAD_HDR_TYPE | GP3_DATA_LOAD_HDR_WRAP |
				    GP3_DATA_LOAD_HDR_ENABLE);
		    }
		    else
		    {
			    /* WAIT FOR AVAILABLE SPACE */

			    GP3_WAIT_PRIMITIVE(temp);
			    WRITE_COMMAND32 (0, GP3_DATA_LOAD_HDR_TYPE | GP3_DATA_LOAD_HDR_ENABLE);
		    }

		    /* WRITE DWORD COUNT */
    		
		    WRITE_COMMAND32 (4, GP3_HOST_SOURCE_TYPE | total_dwords);

		    /* WRITE DATA */

		    WRITE_COMMAND_STRING32 (8, data, srcoffset, dword_count);
		    WRITE_COMMAND_STRING8 (8 + (dword_count << 2), data,
			    srcoffset + (dword_count << 2), byte_count);

		    /* UPDATE POINTERS */

		    srcoffset += pitch;
		    WRITE_GP32 (GP3_CMD_WRITE, gp3_cmd_next);
		    gp3_cmd_current = gp3_cmd_next;
	    }
    }
}

/*---------------------------------------------------------------------------
 * gp_color_convert_blt
 *
 * This routine is called to convert data that is stored in system memory
 * into the current graphics BPP.  The source format is programmed in
 * gp_set_source_format.
 *-------------------------------------------------------------------------*/

void gp_color_convert_blt (unsigned long dstoffset, unsigned long srcx,
    unsigned long width, unsigned long height, unsigned char *data, long pitch)
{
	unsigned long indent, temp;
	unsigned long total_dwords, size_dwords;
	unsigned long dword_count, byte_count;
	unsigned long size = ((width << 16) | height);
    unsigned long ch3_size;
	unsigned long ch3_offset, srcoffset;
    unsigned long base;
		
	/* ASSUME BITMAPS ARE DWORD ALIGNED */
	/* We will offset into the source data in DWORD increments.  We  */
	/* set the source index to the remaining byte offset and         */
	/* increment the size of each line to account for the dont-care  */
	/* pixel(s).   For 4BPP source data, we also set the appropriate */
	/* nibble index.                                                 */
	
    /* CALCULATE THE SIZE OF ONE LINE */

    if ((gp3_src_format & GP3_CH3_SRC_MASK) == GP3_CH3_SRC_24BPP_PACKED)
    {
        /* HANDLE 24BPP */
        /* Note that we do not do anything to guarantee that the source data        */
        /* is DWORD aligned.  The logic here is that the source data will be        */
        /* cacheable, in which case Geode LX will not lose any clocks for unaligned */
        /* moves.  Also note that the channel 3 width is programmed as the          */
        /* number of dwords, while the normal width is programmed as the number     */
        /* of pixels.                                                               */

        srcoffset  = srcx * 3;
        ch3_offset = 0;
        temp       = width * 3;
        ch3_size   = (((temp + 3) >> 2) << 16) | height;
    }
    else
    {
        ch3_size = size;

	    if (gp3_src_pix_shift == 3)
	    {
		    /* CALCULATE INDENT AND SOURCE OFFSET */

		    indent     = (srcx >> 1);
		    srcoffset  = (indent & ~3L);
		    indent    &= 3;
		    ch3_offset = indent | ((srcx & 1) << 25);

		    temp = ((width + (srcx & 1) + 1) >> 1) + indent;
	    }
	    else
	    {
		    indent     = (srcx << gp3_src_pix_shift);
		    srcoffset  = (indent & ~3L);
		    indent    &= 3;
		    ch3_offset = indent;

		    temp = (width << gp3_src_pix_shift) + indent;
	    }
    }

	total_dwords = (temp + 3) >> 2;
	size_dwords  = (total_dwords << 2) + 8;
	dword_count  = (temp >> 2);
	byte_count   = (temp & 3);

    base = ((gp3_fb_base << 24) + (dstoffset & 0xFFC00000)) |
            (gp3_base_register & ~GP3_BASE_OFFSET_DSTMASK);

	/* SET APPROPRIATE ENABLES */

	gp3_cmd_header |= GP3_BLT_HDR_DST_OFF_ENABLE     |
					  GP3_BLT_HDR_WIDHI_ENABLE       |
					  GP3_BLT_HDR_CH3_STR_ENABLE     |
					  GP3_BLT_HDR_CH3_OFF_ENABLE     |
					  GP3_BLT_HDR_CH3_WIDHI_ENABLE   |
					  GP3_BLT_HDR_BASE_OFFSET_ENABLE |
					  GP3_BLT_HDR_BLT_MODE_ENABLE;

	WRITE_COMMAND32 (GP3_BLT_DST_OFFSET, (dstoffset & 0x3FFFFF) | gp3_pat_origin);
	WRITE_COMMAND32 (GP3_BLT_CH3_OFFSET, ch3_offset);
	WRITE_COMMAND32 (GP3_BLT_WID_HEIGHT, size);
	WRITE_COMMAND32 (GP3_BLT_CH3_WIDHI, ch3_size);
	WRITE_COMMAND32 (GP3_BLT_BASE_OFFSET, base);
	WRITE_COMMAND32 (GP3_BLT_CH3_MODE_STR,
		GP3_CH3_C3EN           |
		GP3_CH3_REPLACE_SOURCE |
		GP3_CH3_HST_SRC_ENABLE |
		gp3_src_format         |
	  ((gp3_blt_flags & CIMGP_BLTFLAGS_PRES_LUT) << 20));
	WRITE_COMMAND32 (GP3_BLT_MODE, gp3_blt_mode);

	/* START THE BLT */

	WRITE_COMMAND32 (GP3_BLT_CMD_HEADER, gp3_cmd_header);
	WRITE_GP32 (GP3_CMD_WRITE, gp3_cmd_next);
	gp3_cmd_current = gp3_cmd_next;

    if (((total_dwords << 2) * height) <= GP3_BLT_1PASS_SIZE &&
        (gp3_cmd_bottom - gp3_cmd_current) > (GP3_BLT_1PASS_SIZE + 72))
    {
        /* UPDATE THE COMMAND POINTER */

		cim_cmd_ptr = cim_cmd_base_ptr + gp3_cmd_current;

        /* CHECK IF A WRAP WILL BE NEEDED */

		gp3_cmd_next = gp3_cmd_current + ((total_dwords << 2) * height) + 8;

	    if ((gp3_cmd_bottom - gp3_cmd_next) <= GP3_MAX_COMMAND_SIZE)
		{
			gp3_cmd_next  = gp3_cmd_top;
            	
			GP3_WAIT_WRAP(temp);
			WRITE_COMMAND32 (0, GP3_DATA_LOAD_HDR_TYPE | GP3_DATA_LOAD_HDR_WRAP |
				GP3_DATA_LOAD_HDR_ENABLE);
		}
		else
		{
			GP3_WAIT_PRIMITIVE(temp);
			WRITE_COMMAND32 (0, GP3_DATA_LOAD_HDR_TYPE | GP3_DATA_LOAD_HDR_ENABLE);
		}

        /* WRITE DWORD COUNT */

		WRITE_COMMAND32 (4, GP3_CH3_HOST_SOURCE_TYPE | (total_dwords * height));

        while (height--)
	    {
		    /* WRITE DATA */

		    WRITE_COMMAND_STRING32 (8, data, srcoffset, dword_count);
		    WRITE_COMMAND_STRING8 (8 + (dword_count << 2), data,
			    srcoffset + (dword_count << 2), byte_count);

		    srcoffset += pitch;
            cim_cmd_ptr += total_dwords << 2;
	    }

        WRITE_GP32 (GP3_CMD_WRITE, gp3_cmd_next);
		gp3_cmd_current = gp3_cmd_next;
    }
    else
    {
	    /* WRITE DATA LINE BY LINE */
	    /* Each line will be created as a separate command buffer entry to allow */
	    /* line-by-line wrapping and to allow simultaneous rendering by the HW.  */
    	
	    while (height--)
	    {
		    /* UPDATE THE COMMAND POINTER */
		    /* The WRITE_COMMANDXX macros use a pointer to the current buffer space. */
		    /* This is created by adding gp3_cmd_current to the base pointer.        */

		    cim_cmd_ptr = cim_cmd_base_ptr + gp3_cmd_current;

		    /* CHECK IF A WRAP WILL BE NEEDED */

		    gp3_cmd_next = gp3_cmd_current + size_dwords;
		    if ((gp3_cmd_bottom - gp3_cmd_next) <= GP3_MAX_COMMAND_SIZE)
		    {
			    gp3_cmd_next  = gp3_cmd_top;
    			
			    /* WAIT FOR HARDWARE */
    			
			    GP3_WAIT_WRAP(temp);
			    WRITE_COMMAND32 (0, GP3_DATA_LOAD_HDR_TYPE | GP3_DATA_LOAD_HDR_WRAP |
				    GP3_DATA_LOAD_HDR_ENABLE);
		    }
		    else
		    {
			    /* WAIT FOR AVAILABLE SPACE */

			    GP3_WAIT_PRIMITIVE(temp);
			    WRITE_COMMAND32 (0, GP3_DATA_LOAD_HDR_TYPE | GP3_DATA_LOAD_HDR_ENABLE);
		    }

		    /* WRITE DWORD COUNT */
    		
		    WRITE_COMMAND32 (4, GP3_CH3_HOST_SOURCE_TYPE | total_dwords);

		    /* WRITE DATA */

		    WRITE_COMMAND_STRING32 (8, data, srcoffset, dword_count);
		    WRITE_COMMAND_STRING8 (8 + (dword_count << 2), data,
			    srcoffset + (dword_count << 2), byte_count);

		    /* UPDATE POINTERS */

		    srcoffset += pitch;
		    WRITE_GP32 (GP3_CMD_WRITE, gp3_cmd_next);
		    gp3_cmd_current = gp3_cmd_next;
	    }
    }
}

/*---------------------------------------------------------------------------
 * gp_custom_convert_blt
 *
 * This routine is identical to gp_color_convert_blt, except that the macro
 * to write data to the frame buffer has been replaced with a new macro.  This
 * allows a user to implement custom behavior when sending data, such as manually
 * converting 24BPP to 32BPP, converting 2BPP to 4BPP or premultiplying alpha data.
 *-------------------------------------------------------------------------*/

void gp_custom_convert_blt (unsigned long dstoffset, unsigned long srcx,
    unsigned long width, unsigned long height, unsigned char *data, long pitch)
{
	unsigned long indent, temp;
	unsigned long total_dwords, size_dwords;
	unsigned long dword_count, byte_count;
	unsigned long size = ((width << 16) | height);
	unsigned long ch3_offset, srcoffset;
    unsigned long ch3_size, base;
		
	/* ASSUME BITMAPS ARE DWORD ALIGNED */
	/* We will offset into the source data in DWORD increments.  We  */
	/* set the source index to the remaining byte offset and         */
	/* increment the size of each line to account for the dont-care  */
	/* pixel(s).   For 4BPP source data, we also set the appropriate */
	/* nibble index.                                                 */
	
    /* CALCULATE THE SIZE OF ONE LINE */

    if ((gp3_src_format & GP3_CH3_SRC_MASK) == GP3_CH3_SRC_24BPP_PACKED)
    {
        /* HANDLE 24BPP */
        /* Note that we do not do anything to guarantee that the source data      */
        /* is DWORD aligned.  The logic here is that the source data will be      */
        /* cacheable, in which case Geode LX will not lose any clocks for unaligned */
        /* moves.  Also note that the channel 3 width is programmed as the        */
        /* number of dwords, while the normal width is programmed as the number   */
        /* of pixels.                                                             */

        srcoffset  = srcx * 3;
        ch3_offset = 0;
        temp       = width * 3;
        ch3_size   = (((temp + 3) >> 2) << 16) | height;
    }
    else
    {
        ch3_size = size;

	    if (gp3_src_pix_shift == 3)
	    {
		    /* CALCULATE INDENT AND SOURCE OFFSET */

		    indent     = (srcx >> 1);
		    srcoffset  = (indent & ~3L);
		    indent    &= 3;
		    ch3_offset = indent | ((srcx & 1) << 25);

		    temp = ((width + (srcx & 1) + 1) >> 1) + indent;
	    }
	    else
	    {
		    indent     = (srcx << gp3_src_pix_shift);
		    srcoffset  = (indent & ~3L);
		    indent    &= 3;
		    ch3_offset = indent;

		    temp = (width << gp3_src_pix_shift) + indent;
	    }
    }

	total_dwords = (temp + 3) >> 2;
	size_dwords  = (total_dwords << 2) + 8;
	dword_count  = (temp >> 2);
	byte_count   = (temp & 3);

    base = ((gp3_fb_base << 24) + (dstoffset & 0xFFC00000)) |
           (gp3_base_register & ~GP3_BASE_OFFSET_DSTMASK);

	/* SET APPROPRIATE ENABLES */

	gp3_cmd_header |= GP3_BLT_HDR_DST_OFF_ENABLE     |
					  GP3_BLT_HDR_WIDHI_ENABLE       |
					  GP3_BLT_HDR_CH3_STR_ENABLE     |
					  GP3_BLT_HDR_CH3_OFF_ENABLE     |
					  GP3_BLT_HDR_CH3_WIDHI_ENABLE   |
					  GP3_BLT_HDR_BASE_OFFSET_ENABLE |
					  GP3_BLT_HDR_BLT_MODE_ENABLE;

	WRITE_COMMAND32 (GP3_BLT_DST_OFFSET, (dstoffset & 0x3FFFFF) | gp3_pat_origin);
	WRITE_COMMAND32 (GP3_BLT_CH3_OFFSET, ch3_offset);
	WRITE_COMMAND32 (GP3_BLT_WID_HEIGHT, size);
	WRITE_COMMAND32 (GP3_BLT_CH3_WIDHI, ch3_size);
	WRITE_COMMAND32 (GP3_BLT_BASE_OFFSET, base);
	WRITE_COMMAND32 (GP3_BLT_CH3_MODE_STR,
		GP3_CH3_C3EN           |
		GP3_CH3_REPLACE_SOURCE |
		GP3_CH3_HST_SRC_ENABLE |
		gp3_src_format         |
	  ((gp3_blt_flags & CIMGP_BLTFLAGS_PRES_LUT) << 20));
	WRITE_COMMAND32 (GP3_BLT_MODE, gp3_blt_mode);

	/* START THE BLT */

	WRITE_COMMAND32 (GP3_BLT_CMD_HEADER, gp3_cmd_header);
	WRITE_GP32 (GP3_CMD_WRITE, gp3_cmd_next);
	gp3_cmd_current = gp3_cmd_next;

    if (((total_dwords << 2) * height) <= GP3_BLT_1PASS_SIZE &&
        (gp3_cmd_bottom - gp3_cmd_current) > (GP3_BLT_1PASS_SIZE + 72))
    {
        /* UPDATE THE COMMAND POINTER */

		cim_cmd_ptr = cim_cmd_base_ptr + gp3_cmd_current;

        /* CHECK IF A WRAP WILL BE NEEDED */

		gp3_cmd_next = gp3_cmd_current + ((total_dwords << 2) * height) + 8;

	    if ((gp3_cmd_bottom - gp3_cmd_next) <= GP3_MAX_COMMAND_SIZE)
		{
			gp3_cmd_next  = gp3_cmd_top;
            	
			GP3_WAIT_WRAP(temp);
			WRITE_COMMAND32 (0, GP3_DATA_LOAD_HDR_TYPE | GP3_DATA_LOAD_HDR_WRAP |
				GP3_DATA_LOAD_HDR_ENABLE);
		}
		else
		{
			GP3_WAIT_PRIMITIVE(temp);
			WRITE_COMMAND32 (0, GP3_DATA_LOAD_HDR_TYPE | GP3_DATA_LOAD_HDR_ENABLE);
		}

        /* WRITE DWORD COUNT */

		WRITE_COMMAND32 (4, GP3_CH3_HOST_SOURCE_TYPE | (total_dwords * height));

        while (height--)
	    {
		    /* WRITE DATA */

		    WRITE_CUSTOM_COMMAND_STRING32 (8, data, srcoffset, dword_count);
		    WRITE_CUSTOM_COMMAND_STRING8 (8 + (dword_count << 2), data,
			    srcoffset + (dword_count << 2), byte_count);

		    srcoffset += pitch;
            cim_cmd_ptr += total_dwords << 2;
	    }

        WRITE_GP32 (GP3_CMD_WRITE, gp3_cmd_next);
		gp3_cmd_current = gp3_cmd_next;
    }
    else
    {
	    /* WRITE DATA LINE BY LINE */
	    /* Each line will be created as a separate command buffer entry to allow */
	    /* line-by-line wrapping and to allow simultaneous rendering by the HW.  */
    	
	    while (height--)
	    {
		    /* UPDATE THE COMMAND POINTER */
		    /* The WRITE_COMMANDXX macros use a pointer to the current buffer space. */
		    /* This is created by adding gp3_cmd_current to the base pointer.        */

		    cim_cmd_ptr = cim_cmd_base_ptr + gp3_cmd_current;

		    /* CHECK IF A WRAP WILL BE NEEDED */

		    gp3_cmd_next = gp3_cmd_current + size_dwords;
		    if ((gp3_cmd_bottom - gp3_cmd_next) <= GP3_MAX_COMMAND_SIZE)
		    {
			    gp3_cmd_next  = gp3_cmd_top;
    			
			    /* WAIT FOR HARDWARE */
    			
			    GP3_WAIT_WRAP(temp);
			    WRITE_COMMAND32 (0, GP3_DATA_LOAD_HDR_TYPE | GP3_DATA_LOAD_HDR_WRAP |
				    GP3_DATA_LOAD_HDR_ENABLE);
		    }
		    else
		    {
			    /* WAIT FOR AVAILABLE SPACE */

			    GP3_WAIT_PRIMITIVE(temp);
			    WRITE_COMMAND32 (0, GP3_DATA_LOAD_HDR_TYPE | GP3_DATA_LOAD_HDR_ENABLE);
		    }

		    /* WRITE DWORD COUNT */
    		
		    WRITE_COMMAND32 (4, GP3_CH3_HOST_SOURCE_TYPE | total_dwords);

		    /* WRITE DATA */

		    WRITE_CUSTOM_COMMAND_STRING32 (8, data, srcoffset, dword_count);
		    WRITE_CUSTOM_COMMAND_STRING8 (8 + (dword_count << 2), data,
			    srcoffset + (dword_count << 2), byte_count);

		    /* UPDATE POINTERS */

		    srcoffset += pitch;
		    WRITE_GP32 (GP3_CMD_WRITE, gp3_cmd_next);
		    gp3_cmd_current = gp3_cmd_next;
	    }
    }
}

/*---------------------------------------------------------------------------
 * gp_rotate_blt
 *
 * This routine is called to rotate a rectangular area of video memory.  The
 * data may be color converted during the rotation.  'Degrees' must be a
 * multiple of 90 and indicates a clockwise rotation.  Width and height
 * refer to the width and the height of the source.  The output
 * destinations will be equal to the rotated dimensions.
 *-------------------------------------------------------------------------*/

void gp_rotate_blt (unsigned long dstoffset, unsigned long srcoffset,
	unsigned long width, unsigned long height, int degrees)
{
	unsigned long sizein, sizeout;
	unsigned long ch3_flags;
    unsigned long base;

    base = ((gp3_fb_base << 24) + (dstoffset & 0xFFC00000))         |
	       ((gp3_fb_base << 4)  + ((srcoffset & 0xFFC00000) >> 20)) |
            (gp3_base_register & GP3_BASE_OFFSET_SRCMASK);

    srcoffset &= 0x3FFFFF;
    dstoffset &= 0x3FFFFF;

	/* SET ROTATION PARAMETERS */

	switch (degrees)
	{
		case 90:
			srcoffset += (height - 1) * gp3_src_stride;
			sizein     = ((width  << 16) | height);
			sizeout    = ((height << 16) | width);
			ch3_flags  = GP3_CH3_C3EN           |
				         GP3_CH3_REPLACE_SOURCE |
						 GP3_CH3_ROTATE_ENABLE  |
						 GP3_CH3_NEG_YDIR;	
			break;

		case 180:
			srcoffset += (height - 1) * gp3_src_stride;
			srcoffset += (width << gp3_src_pix_shift) - 1;
			sizein     = sizeout = ((width  << 16) | height);
			ch3_flags  = GP3_CH3_C3EN           |
				         GP3_CH3_REPLACE_SOURCE |
						 GP3_CH3_NEG_YDIR       |
						 GP3_CH3_NEG_XDIR;	
			break;

		case 270:
			srcoffset +=  (width << gp3_src_pix_shift) - 1;
			sizein     = ((width  << 16) | height);
			sizeout    = ((height << 16) | width);
			ch3_flags  = GP3_CH3_C3EN           |
				         GP3_CH3_REPLACE_SOURCE |
						 GP3_CH3_ROTATE_ENABLE  |
						 GP3_CH3_NEG_XDIR;	
			break;

		default:
			sizein     = sizeout = ((width  << 16) | height);
			ch3_flags  = GP3_CH3_C3EN |
				         GP3_CH3_REPLACE_SOURCE;
			break;		
	}

	/* SET APPROPRIATE ENABLES                           */
	/* We override the raster mode setting with a source */
	/* copy ROP.                                         */

	gp3_cmd_header |= GP3_BLT_HDR_RASTER_ENABLE      |
		              GP3_BLT_HDR_DST_OFF_ENABLE     |
		              GP3_BLT_HDR_WIDHI_ENABLE       |
					  GP3_BLT_HDR_CH3_OFF_ENABLE     |
					  GP3_BLT_HDR_CH3_STR_ENABLE     |
					  GP3_BLT_HDR_CH3_WIDHI_ENABLE   |
					  GP3_BLT_HDR_BASE_OFFSET_ENABLE |
					  GP3_BLT_HDR_BLT_MODE_ENABLE;

	/* WRITE ALL BLT REGISTERS */
	
	WRITE_COMMAND32 (GP3_BLT_RASTER_MODE, gp3_bpp | 0xCC);
	WRITE_COMMAND32 (GP3_BLT_DST_OFFSET, dstoffset);
	WRITE_COMMAND32 (GP3_BLT_WID_HEIGHT, sizeout);
	WRITE_COMMAND32 (GP3_BLT_CH3_WIDHI, sizein);
	WRITE_COMMAND32 (GP3_BLT_CH3_OFFSET, srcoffset);
	WRITE_COMMAND32 (GP3_BLT_CH3_MODE_STR,
		ch3_flags                                       |
		gp3_src_format                                  |
	  ((gp3_blt_flags & CIMGP_BLTFLAGS_PRES_LUT) << 20) |
	  ((gp3_blt_flags & CIMGP_ENABLE_PREFETCH)   << 17) |
	    gp3_src_stride);
	WRITE_COMMAND32 (GP3_BLT_BASE_OFFSET, base);

	/* START THE BLT */

	WRITE_COMMAND32 (GP3_BLT_CMD_HEADER, gp3_cmd_header);
	WRITE_COMMAND32 (GP3_BLT_MODE, gp3_blt_mode);
	WRITE_GP32 (GP3_CMD_WRITE, gp3_cmd_next);
	gp3_cmd_current = gp3_cmd_next;
}

/*---------------------------------------------------------------------------
 * gp_mono_bitmap_to_screen_blt
 *
 * This routine expands and BLTs a monchrome bitmap that is stored in system
 * memory into the framebuffer.  'data' points to an array of monochrome data.
 * 'stride' indicates the pitch between successive lines of monochrome data.
 * 'srcx' indicates the x coordinate within each line of blend data
 * corresponding to the first pixel.  A y coordinate for the source is
 * deliberately omitted to avoid extra calculation for simple cases that have
 * no y index.  The calling program must adjust the data pointer accordingly.
 *-------------------------------------------------------------------------*/

void gp_mono_bitmap_to_screen_blt (unsigned long dstoffset, unsigned long srcx,
	unsigned long width, unsigned long height, unsigned char *data, long stride)
{
	unsigned long indent, temp;
	unsigned long total_dwords, size_dwords;
	unsigned long dword_count, byte_count;
	unsigned long size = ((width << 16) | height);
	unsigned long srcoffset, src_value;

    /* ASSUME BITMAPS ARE DWORD ALIGNED */
	/* We will offset into the source data in DWORD increments.  We */
	/* set the source index to the remaining byte offset and        */
	/* increment the size of each line to account for the dont-care */
	/* pixel(s).                                                    */

	indent    = (srcx >> 3);
	srcoffset = (indent & ~3L);
	indent   &= 3;
	src_value = (indent | ((srcx & 7) << 26));

    /* PROGRAM THE NORMAL SOURCE CHANNEL REGISTERS                */
	/* We assume that a color pattern is being ROPed with source  */
	/* data if the pattern type is color and the preserve pattern */
	/* was set.                                                   */

	gp3_cmd_header |= GP3_BLT_HDR_SRC_OFF_ENABLE     |
		              GP3_BLT_HDR_DST_OFF_ENABLE     |
					  GP3_BLT_HDR_WIDHI_ENABLE       |
					  GP3_BLT_HDR_CH3_STR_ENABLE     |
					  GP3_BLT_HDR_BASE_OFFSET_ENABLE |
                      GP3_BLT_HDR_RASTER_ENABLE      |
					  GP3_BLT_HDR_BLT_MODE_ENABLE;

    if (gp3_ch3_pat)
	{
		gp3_cmd_header |= GP3_BLT_HDR_CH3_OFF_ENABLE | GP3_BLT_HDR_CH3_WIDHI_ENABLE;

		WRITE_COMMAND32 (GP3_BLT_CH3_OFFSET, gp3_pat_origin);
		WRITE_COMMAND32 (GP3_BLT_DST_OFFSET, (dstoffset & 0x3FFFFF));
		WRITE_COMMAND32 (GP3_BLT_CH3_MODE_STR, gp3_pat_format);
        WRITE_COMMAND32 (GP3_BLT_CH3_WIDHI, size);
	}
	else
	{
		WRITE_COMMAND32 (GP3_BLT_DST_OFFSET, ((dstoffset & 0x3FFFFF) | gp3_pat_origin));
		WRITE_COMMAND32 (GP3_BLT_CH3_MODE_STR, 0);
	}
    if (gp3_blt_flags & CIMGP_BLTFLAGS_INVERTMONO)
    {    
        WRITE_COMMAND32 (GP3_BLT_RASTER_MODE, gp3_raster_mode | GP3_RM_SOURCE_INVERT);
    }
    else
    {
        WRITE_COMMAND32 (GP3_BLT_RASTER_MODE, gp3_raster_mode & ~GP3_RM_SOURCE_INVERT);
    }
	WRITE_COMMAND32 (GP3_BLT_SRC_OFFSET, src_value);
	WRITE_COMMAND32 (GP3_BLT_WID_HEIGHT, size);
	WRITE_COMMAND32 (GP3_BLT_BASE_OFFSET,
		((gp3_fb_base << 24) + (dstoffset & 0xFFC00000)));
	WRITE_COMMAND32 (GP3_BLT_MODE, gp3_blt_mode | GP3_BM_SRC_HOST | GP3_BM_SRC_MONO);
		
	/* START THE BLT */

	WRITE_COMMAND32 (GP3_BLT_CMD_HEADER, gp3_cmd_header);
	WRITE_GP32 (GP3_CMD_WRITE, gp3_cmd_next);
	gp3_cmd_current = gp3_cmd_next;

	/* CALCULATE THE SIZE OF ONE LINE */

	size = ((width + (srcx & 7) + 7) >> 3) + indent;
	total_dwords = (size + 3) >> 2;
	size_dwords  = (total_dwords << 2) + 8;
	dword_count  = (size >> 2);
	byte_count   = (size & 3);

    /* CHECK FOR SMALL BLT CASE */
    /* If the total amount of monochrome data is less than 50K and we have */
    /* room in the command buffer, we will do all data writes in a single  */
    /* data packet.                                                        */

    if (((total_dwords << 2) * height) <= GP3_BLT_1PASS_SIZE &&
        (gp3_cmd_bottom - gp3_cmd_current) > (GP3_BLT_1PASS_SIZE + 72))
    {
        /* UPDATE THE COMMAND POINTER */

		cim_cmd_ptr = cim_cmd_base_ptr + gp3_cmd_current;

        /* CHECK IF A WRAP WILL BE NEEDED */

		gp3_cmd_next = gp3_cmd_current + ((total_dwords << 2) * height) + 8;

	    if ((gp3_cmd_bottom - gp3_cmd_next) <= GP3_MAX_COMMAND_SIZE)
		{
			gp3_cmd_next  = gp3_cmd_top;
        	        	
			GP3_WAIT_WRAP(temp);
			WRITE_COMMAND32 (0, GP3_DATA_LOAD_HDR_TYPE | GP3_DATA_LOAD_HDR_WRAP |
				GP3_DATA_LOAD_HDR_ENABLE);
		}
		else
		{
			GP3_WAIT_PRIMITIVE(temp);
			WRITE_COMMAND32 (0, GP3_DATA_LOAD_HDR_TYPE | GP3_DATA_LOAD_HDR_ENABLE);
		}

        /* WRITE DWORD COUNT */
        	
		WRITE_COMMAND32 (4, GP3_HOST_SOURCE_TYPE | (total_dwords * height));

        while (height--)
	    {
		    /* WRITE DATA */

		    WRITE_COMMAND_STRING32 (8, data, srcoffset, dword_count);
		    WRITE_COMMAND_STRING8 (8 + (dword_count << 2), data,
			    srcoffset + (dword_count << 2), byte_count);

		    srcoffset += stride;
            cim_cmd_ptr += total_dwords << 2;
	    }

        WRITE_GP32 (GP3_CMD_WRITE, gp3_cmd_next);
		gp3_cmd_current = gp3_cmd_next;
    }
    else
    {
	    /* WRITE DATA LINE BY LINE */
	    /* Each line will be created as a separate command buffer entry to allow */
	    /* line-by-line wrapping and to allow simultaneous rendering by the HW.  */
    	
	    while (height--)
	    {
		    /* UPDATE THE COMMAND POINTER */
		    /* The WRITE_COMMANDXX macros use a pointer to the current buffer space. */
		    /* This is created by adding gp3_cmd_current to the base pointer.        */

		    cim_cmd_ptr = cim_cmd_base_ptr + gp3_cmd_current;

		    /* CHECK IF A WRAP WILL BE NEEDED */

		    gp3_cmd_next = gp3_cmd_current + size_dwords;
		    if ((gp3_cmd_bottom - gp3_cmd_next) <= GP3_MAX_COMMAND_SIZE)
		    {
			    gp3_cmd_next  = gp3_cmd_top;
    			
			    /* WAIT FOR HARDWARE */
    			
			    GP3_WAIT_WRAP(temp);
			    WRITE_COMMAND32 (0, GP3_DATA_LOAD_HDR_TYPE | GP3_DATA_LOAD_HDR_WRAP |
				    GP3_DATA_LOAD_HDR_ENABLE);
		    }
		    else
		    {
			    /* WAIT FOR AVAILABLE SPACE */

			    GP3_WAIT_PRIMITIVE(temp);
			    WRITE_COMMAND32 (0, GP3_DATA_LOAD_HDR_TYPE | GP3_DATA_LOAD_HDR_ENABLE);
		    }

		    /* WRITE DWORD COUNT */
    		
		    WRITE_COMMAND32 (4, GP3_HOST_SOURCE_TYPE | total_dwords);

		    /* WRITE DATA */

		    WRITE_COMMAND_STRING32 (8, data, srcoffset, dword_count);
		    WRITE_COMMAND_STRING8 (8 + (dword_count << 2), data,
			    srcoffset + (dword_count << 2), byte_count);

		    /* UPDATE POINTERS */

		    srcoffset += stride;
		    WRITE_GP32 (GP3_CMD_WRITE, gp3_cmd_next);
		    gp3_cmd_current = gp3_cmd_next;
        }
    }
}

/*---------------------------------------------------------------------------
 * gp_text_blt
 *
 * This routine expands and BLTs byte-packed monochrome data to the screen.
 * There is assumed to be no x clipping involved in the BLT.
 *-------------------------------------------------------------------------*/

void gp_text_blt (unsigned long dstoffset, unsigned long width,
	unsigned long height, unsigned char *data)
{
	unsigned long temp, dwords_total;
	unsigned long dword_count, byte_count;
	unsigned long size = ((width << 16) | height);
    unsigned long srcoffset = 0;

	/* PROGRAM THE NORMAL SOURCE CHANNEL REGISTERS                */
	/* We assume that a color pattern is being ROPed with source  */
	/* data if the pattern type is color and the preserve pattern */
	/* was set.                                                   */

	gp3_cmd_header |= GP3_BLT_HDR_SRC_OFF_ENABLE     |
		              GP3_BLT_HDR_DST_OFF_ENABLE     |
					  GP3_BLT_HDR_WIDHI_ENABLE       |
					  GP3_BLT_HDR_CH3_STR_ENABLE     |
					  GP3_BLT_HDR_BASE_OFFSET_ENABLE |
                      GP3_BLT_HDR_RASTER_ENABLE      |
					  GP3_BLT_HDR_BLT_MODE_ENABLE;

    if (gp3_ch3_pat)
	{
		gp3_cmd_header |= GP3_BLT_HDR_CH3_OFF_ENABLE | GP3_BLT_HDR_CH3_WIDHI_ENABLE;

		WRITE_COMMAND32 (GP3_BLT_CH3_OFFSET, gp3_pat_origin);
		WRITE_COMMAND32 (GP3_BLT_DST_OFFSET, (dstoffset & 0x3FFFFF));
		WRITE_COMMAND32 (GP3_BLT_CH3_MODE_STR, gp3_pat_format);
        WRITE_COMMAND32 (GP3_BLT_CH3_WIDHI, size);
	}
	else
	{
		WRITE_COMMAND32 (GP3_BLT_DST_OFFSET, ((dstoffset & 0x3FFFFF) | gp3_pat_origin));
		WRITE_COMMAND32 (GP3_BLT_CH3_MODE_STR, 0);
	}
    if (gp3_blt_flags & CIMGP_BLTFLAGS_INVERTMONO)
    {    
        WRITE_COMMAND32 (GP3_BLT_RASTER_MODE, gp3_raster_mode | GP3_RM_SOURCE_INVERT);
    }
    else
    {
        WRITE_COMMAND32 (GP3_BLT_RASTER_MODE, gp3_raster_mode & ~GP3_RM_SOURCE_INVERT);
    }
	
	WRITE_COMMAND32 (GP3_BLT_SRC_OFFSET, 0);
	WRITE_COMMAND32 (GP3_BLT_WID_HEIGHT, size);
	WRITE_COMMAND32 (GP3_BLT_BASE_OFFSET,
		((gp3_fb_base << 24) + (dstoffset & 0xFFC00000)));
	WRITE_COMMAND32 (GP3_BLT_MODE, gp3_blt_mode | GP3_BM_SRC_HOST | GP3_BM_SRC_BP_MONO);
		
	/* START THE BLT */

	WRITE_COMMAND32 (GP3_BLT_CMD_HEADER, gp3_cmd_header);
	WRITE_GP32 (GP3_CMD_WRITE, gp3_cmd_next);
	gp3_cmd_current = gp3_cmd_next;

	/* CALCULATE THE TOTAL NUMBER OF BYTES */

	size = ((width + 7) >> 3) * height;

	/* WRITE ALL DATA IN CHUNKS */

	do
	{
		/* UPDATE THE COMMAND POINTER */
	
		cim_cmd_ptr = cim_cmd_base_ptr + gp3_cmd_current;

		if (size > 8192)
		{
			dword_count = 2048;
			byte_count  = 0;
			dwords_total = 2048;
			size -= 8192;
		}
		else
		{
			dword_count = (size >> 2);
			byte_count  = (size & 3);
			dwords_total = (size + 3) >> 2;
			size = 0;
		}
		gp3_cmd_next = gp3_cmd_current + (dwords_total << 2) + 8;

		/* CHECK IF A WRAP WILL BE NEEDED */

		if ((gp3_cmd_bottom - gp3_cmd_next) <= GP3_MAX_COMMAND_SIZE)
		{
			gp3_cmd_next  = gp3_cmd_top;
				
			/* WAIT FOR HARDWARE */
				
			GP3_WAIT_WRAP(temp);
			WRITE_COMMAND32 (0, GP3_DATA_LOAD_HDR_TYPE | GP3_DATA_LOAD_HDR_WRAP |
				GP3_DATA_LOAD_HDR_ENABLE);
		}
		else
		{
			/* WAIT FOR AVAILABLE SPACE */

			GP3_WAIT_PRIMITIVE(temp);
			WRITE_COMMAND32 (0, GP3_DATA_LOAD_HDR_TYPE | GP3_DATA_LOAD_HDR_ENABLE);
		}

		/* WRITE DWORD COUNT */
		
		WRITE_COMMAND32 (4, GP3_HOST_SOURCE_TYPE | dwords_total);

		/* WRITE DATA */

		WRITE_COMMAND_STRING32 (8, data, srcoffset, dword_count);
		WRITE_COMMAND_STRING8 (8 + (dword_count << 2), data,
			srcoffset + (dword_count << 2), byte_count);

		WRITE_GP32 (GP3_CMD_WRITE, gp3_cmd_next);
		gp3_cmd_current = gp3_cmd_next;
	
        /* UPDATE THE SOURCE OFFSET */
        /* We add a constant value because the code will loop only if the */
        /* data exceeds 8192 bytes.                                       */

        srcoffset += 8192;

    } while (size);
}

/*---------------------------------------------------------------------------
 * gp_mono_expand_blt
 *
 * This routine expands monochrome data that is stored in video memory into
 * the current BPP.  The source and destination regions are assumed not to
 * overlap.  The pitch of the source data is specified in gp_set_strides.
 * 'srcy' is deliberately omitted to prevent extra calculations for simple
 * applications that have no source indexes.
 *-------------------------------------------------------------------------*/

void gp_mono_expand_blt (unsigned long dstoffset, unsigned long srcoffset,
    unsigned long srcx, unsigned long width, unsigned long height, int byte_packed)
{
	unsigned long base;
	unsigned long blt_mode;
	unsigned long size = ((width << 16) | height);

	/* ADJUST SOURCE OFFSET */

	srcoffset += (srcx >> 3);
    srcx &= 7;

	/* CALCULATE BASE OFFSET REGISTER */

	base = ((gp3_fb_base << 24) +  (dstoffset & 0xFFC00000))        |
	       ((gp3_fb_base << 14) + ((srcoffset & 0xFFC00000) >> 10)) |
            (gp3_base_register & GP3_BASE_OFFSET_CH3MASK);

	/* SET THE SOURCE TYPE */

	if (byte_packed)
        blt_mode = gp3_blt_mode | GP3_BM_SRC_FB | GP3_BM_SRC_BP_MONO;
	else
		blt_mode = gp3_blt_mode | GP3_BM_SRC_FB | GP3_BM_SRC_MONO;

	/* SET HEADER ENABLES */

	gp3_cmd_header |= GP3_BLT_HDR_SRC_OFF_ENABLE     |
			          GP3_BLT_HDR_DST_OFF_ENABLE     |
					  GP3_BLT_HDR_WIDHI_ENABLE       |
					  GP3_BLT_HDR_CH3_STR_ENABLE     |
					  GP3_BLT_HDR_BASE_OFFSET_ENABLE |
                      GP3_BLT_HDR_RASTER_ENABLE      |
					  GP3_BLT_HDR_BLT_MODE_ENABLE;

	/* ENABLE COLOR PATTERN IF APPLICABLE */

    if (gp3_ch3_pat)
	{
		gp3_cmd_header |= GP3_BLT_HDR_CH3_OFF_ENABLE | GP3_BLT_HDR_CH3_WIDHI_ENABLE;

		WRITE_COMMAND32 (GP3_BLT_CH3_OFFSET, gp3_pat_origin);
		WRITE_COMMAND32 (GP3_BLT_DST_OFFSET, (dstoffset & 0x3FFFFF));
		WRITE_COMMAND32 (GP3_BLT_CH3_MODE_STR, gp3_pat_format);
        WRITE_COMMAND32 (GP3_BLT_CH3_WIDHI, size);
	}
	else
	{
		WRITE_COMMAND32 (GP3_BLT_DST_OFFSET, ((dstoffset & 0x3FFFFF) | gp3_pat_origin));
		WRITE_COMMAND32 (GP3_BLT_CH3_MODE_STR, 0);
	}
    if (gp3_blt_flags & CIMGP_BLTFLAGS_INVERTMONO)
    {    
        WRITE_COMMAND32 (GP3_BLT_RASTER_MODE, gp3_raster_mode | GP3_RM_SOURCE_INVERT);
    }
    else
    {
        WRITE_COMMAND32 (GP3_BLT_RASTER_MODE, gp3_raster_mode & ~GP3_RM_SOURCE_INVERT);
    }

    WRITE_COMMAND32 (GP3_BLT_BASE_OFFSET, base);	
	WRITE_COMMAND32 (GP3_BLT_SRC_OFFSET,
		(srcoffset & 0x3FFFFF) | (srcx << 26));
	WRITE_COMMAND32 (GP3_BLT_WID_HEIGHT, size);
			
    /* WORKAROUND FOR SIBZ #3744 */
    /* Under extremely rare conditions, very narrow byte-packed mono BLTs can hang  */
    /* the GP.  Even under the rare case, the bad condition will only happen        */
    /* once every 16 lines.  The workaround is to break the offending BLT into a    */
    /* a series of safer BLTs.  This method is preferred over a two-pass approach   */
    /* because it does not require saving and restoring any GP state, such as the   */
    /* ROP or mono colors.                                                          */

    if ((gp3_blt_mode & GP3_BM_DST_REQ) && byte_packed && (gp3_pix_shift < 2) &&
        (width < 5) && ((srcoffset & 0x1F) == 0x1F) && ((srcx + width) > 8))
    {
        unsigned long dstoff1, size1, org1;
        unsigned long dstoff2, size2, org2;
        unsigned long tempheight;

        size1 = ((8 - srcx) << 16) | 1;
        size2 = ((width + srcx - 8) << 16) | 1;
        org1  = gp3_pat_origin;
        org2  = (org1 & 0xE0000000) | ((org1 + ((8 - srcx) << 26)) & 0x1C000000);
        dstoff1 = dstoffset & 0x3FFFFF;
        dstoff2 = (dstoff1 + 8 - srcx) << gp3_pix_shift;

        while (height)
        {
            /* DIVIDE THE FIRST LINE INTO TWO SINGLE LINE BLTS */

            WRITE_COMMAND32 (GP3_BLT_WID_HEIGHT, size1);
            WRITE_COMMAND32 (GP3_BLT_CH3_WIDHI,  size1);
            WRITE_COMMAND32 (GP3_BLT_SRC_OFFSET, (srcoffset & 0x3FFFFF) | (srcx << 26));
            WRITE_COMMAND32 (GP3_BLT_DST_OFFSET, dstoff1 | org1);
            WRITE_COMMAND32 (GP3_BLT_CH3_OFFSET, org1);
            WRITE_COMMAND32 (GP3_BLT_CMD_HEADER, gp3_cmd_header);
	        WRITE_COMMAND32 (GP3_BLT_MODE, blt_mode);
	        WRITE_GP32 (GP3_CMD_WRITE, gp3_cmd_next);
	        gp3_cmd_current = gp3_cmd_next;
            gp_wait_until_idle();

            gp_declare_blt (gp3_blt_flags);
            gp3_cmd_header |= GP3_BLT_HDR_SRC_OFF_ENABLE     |
			                  GP3_BLT_HDR_DST_OFF_ENABLE     |
					          GP3_BLT_HDR_WIDHI_ENABLE       |
					          GP3_BLT_HDR_BLT_MODE_ENABLE    |
                              GP3_BLT_HDR_CH3_OFF_ENABLE     |
                              GP3_BLT_HDR_CH3_WIDHI_ENABLE;
            WRITE_COMMAND32 (GP3_BLT_WID_HEIGHT, size2);
            WRITE_COMMAND32 (GP3_BLT_CH3_WIDHI,  size2);
            WRITE_COMMAND32 (GP3_BLT_SRC_OFFSET, ((srcoffset + 1) & 0x3FFFFF));
            WRITE_COMMAND32 (GP3_BLT_DST_OFFSET, dstoff2 | org2);
            WRITE_COMMAND32 (GP3_BLT_CH3_OFFSET, org2);
            WRITE_COMMAND32 (GP3_BLT_CMD_HEADER, gp3_cmd_header);
	        WRITE_COMMAND32 (GP3_BLT_MODE, blt_mode);
	        WRITE_GP32 (GP3_CMD_WRITE, gp3_cmd_next);
	        gp3_cmd_current = gp3_cmd_next;
            gp_wait_until_idle();

            if (--height)
            {
                org1 += 0x20000000;
                org2 += 0x20000000;
                dstoff1 += gp3_dst_stride;
                dstoff2 += gp3_dst_stride;
                srcoffset += 2;

                /* THE NEXT 15 LINES ARE NOW 'SAFE' - THEY DO NOT SHOW THE PROBLEM */

                tempheight = 15;
                if (tempheight > height)
                    tempheight = height;

                gp_declare_blt (gp3_blt_flags);
                gp3_cmd_header |= GP3_BLT_HDR_SRC_OFF_ENABLE     |
			                    GP3_BLT_HDR_DST_OFF_ENABLE     |
					            GP3_BLT_HDR_WIDHI_ENABLE       |
					            GP3_BLT_HDR_BLT_MODE_ENABLE    |
                                GP3_BLT_HDR_CH3_OFF_ENABLE     |
                                GP3_BLT_HDR_CH3_WIDHI_ENABLE;
                WRITE_COMMAND32 (GP3_BLT_WID_HEIGHT, (width << 16) | tempheight);
                WRITE_COMMAND32 (GP3_BLT_CH3_WIDHI,  (width << 16) | tempheight);
                WRITE_COMMAND32 (GP3_BLT_SRC_OFFSET, (srcoffset & 0x3FFFFF) | (srcx << 26));
                WRITE_COMMAND32 (GP3_BLT_DST_OFFSET, dstoff1 | org1);
                WRITE_COMMAND32 (GP3_BLT_CH3_OFFSET, org1);
                WRITE_COMMAND32 (GP3_BLT_CMD_HEADER, gp3_cmd_header);
	            WRITE_COMMAND32 (GP3_BLT_MODE, blt_mode);
	            WRITE_GP32 (GP3_CMD_WRITE, gp3_cmd_next);
	            gp3_cmd_current = gp3_cmd_next;
                gp_wait_until_idle();

                height -= tempheight;

                if (height)
                {
                    gp_declare_blt (gp3_blt_flags);
                    gp3_cmd_header |= GP3_BLT_HDR_SRC_OFF_ENABLE     |
			                        GP3_BLT_HDR_DST_OFF_ENABLE     |
					                GP3_BLT_HDR_WIDHI_ENABLE       |
					                GP3_BLT_HDR_BLT_MODE_ENABLE    |
                                    GP3_BLT_HDR_CH3_OFF_ENABLE     |
                                    GP3_BLT_HDR_CH3_WIDHI_ENABLE;

                    /* ADJUST ORIGIN */
                    /* If we get here, we added a full 15 lines which is equivalent */
                    /* to subtracting one from the pattern y origin (adding 15).    */

                    org1 -= 0x20000000;
                    org2 -= 0x20000000;
                    dstoff1 += (gp3_dst_stride * 15);
                    dstoff2 += (gp3_dst_stride * 15);
                    srcoffset += 30;
                }
            }
        }
        return;
    }

	/* START THE BLT */

	WRITE_COMMAND32 (GP3_BLT_CMD_HEADER, gp3_cmd_header);
	WRITE_COMMAND32 (GP3_BLT_MODE, blt_mode);
	WRITE_GP32 (GP3_CMD_WRITE, gp3_cmd_next);
	gp3_cmd_current = gp3_cmd_next;
}

/*---------------------------------------------------------------------------
 * gp_antialiased_text
 *
 * This routine implements alpha blending between a constant source color and
 * a destination region.  The degree of the blend is controlled by an array
 * of 4BPP/8BPP values specified in 'data'.  'stride' indicates the pitch between
 * successive lines of blend data.  'srcx' indicates the x coordinate within
 * each line of blend data corresponding to the first pixel.  A y coordinate
 * for the source is deliberately omitted to avoid extra calculation for simple
 * cases that have no y index.  The calling program must adjust the data
 * pointer accordingly.  'fourbpp' selects between 4BPP and 8BPP alpha.
 *-------------------------------------------------------------------------*/

void gp_antialiased_text (unsigned long dstoffset, unsigned long srcx,
	unsigned long width, unsigned long height, unsigned char *data, long stride,
	int fourbpp)
{
	unsigned long indent, temp;
	unsigned long total_dwords, size_dwords;
	unsigned long dword_count, byte_count;
	unsigned long size = ((width << 16) | height);
	unsigned long ch3_offset, srcoffset;
    unsigned long base, depth_flag;

    base = ((gp3_fb_base << 24) + (dstoffset & 0xFFC00000)) |
            (gp3_base_register & ~GP3_BASE_OFFSET_DSTMASK);

	/* ENABLE ALL RELEVANT REGISTERS */
	/* We override the raster mode register to force the */
	/* correct alpha blend                               */

	gp3_cmd_header |= GP3_BLT_HDR_RASTER_ENABLE      |
		              GP3_BLT_HDR_DST_OFF_ENABLE     |
					  GP3_BLT_HDR_WIDHI_ENABLE       |
					  GP3_BLT_HDR_CH3_OFF_ENABLE     |
					  GP3_BLT_HDR_CH3_STR_ENABLE     |
					  GP3_BLT_HDR_CH3_WIDHI_ENABLE   |
					  GP3_BLT_HDR_BASE_OFFSET_ENABLE |
					  GP3_BLT_HDR_BLT_MODE_ENABLE;

	/* CALCULATIONS BASED ON ALPHA DEPTH */
	/* Although most antialiased text is 4BPP, the hardware supports */
	/* a full 8BPP.  Either case is supported by this routine.       */

	if (fourbpp)
	{
        depth_flag = GP3_CH3_SRC_4BPP_ALPHA;
		indent     = (srcx >> 1);
		srcoffset  = (indent & ~3L);
		indent    &= 3;
		ch3_offset = indent | ((srcx & 1) << 25);

		temp = ((width + (srcx & 1) + 1) >> 1) + indent;
	}
	else
	{
        depth_flag  = GP3_CH3_SRC_8BPP_ALPHA;
		indent     =  srcx;
		srcoffset  = (indent & ~3L);
		indent    &= 3;
		ch3_offset = indent;

		temp = width + indent;
	}

	total_dwords = (temp + 3) >> 2;
	size_dwords  = (total_dwords << 2) + 8;
	dword_count  = (temp >> 2);
	byte_count   = (temp & 3);

	/* SET RASTER MODE REGISTER */
	/* Alpha blending will only apply to RGB when no alpha component is present. */
	/* As 8BPP is not supported for this routine, the only alpha-less mode is    */
	/* 5:6:5.                                                                    */

	if (gp3_bpp == GP3_RM_BPPFMT_565)
	{
		WRITE_COMMAND32 (GP3_BLT_RASTER_MODE,
			gp3_bpp                    |
			GP3_RM_ALPHA_TO_RGB        |
			GP3_RM_ALPHA_A_PLUS_BETA_B |
			GP3_RM_SELECT_ALPHA_CHAN_3);
	}
	else
	{
		WRITE_COMMAND32 (GP3_BLT_RASTER_MODE,
			gp3_bpp                    |
			GP3_RM_ALPHA_ALL           |
			GP3_RM_ALPHA_A_PLUS_BETA_B |
			GP3_RM_SELECT_ALPHA_CHAN_3);
	}

	/* WRITE ALL REMAINING REGISTERS */

	WRITE_COMMAND32 (GP3_BLT_DST_OFFSET, (dstoffset & 0x3FFFFF));
	WRITE_COMMAND32 (GP3_BLT_CH3_OFFSET, ch3_offset);
	WRITE_COMMAND32 (GP3_BLT_WID_HEIGHT, size);
	WRITE_COMMAND32 (GP3_BLT_CH3_WIDHI, size);
	WRITE_COMMAND32 (GP3_BLT_BASE_OFFSET, base);
	WRITE_COMMAND32 (GP3_BLT_CH3_MODE_STR,
		GP3_CH3_C3EN           |
		GP3_CH3_HST_SRC_ENABLE |
		depth_flag             |
	  ((gp3_blt_flags & CIMGP_BLTFLAGS_PRES_LUT) << 20));
	WRITE_COMMAND32 (GP3_BLT_MODE, gp3_blt_mode | GP3_BM_DST_REQ);

	/* START THE BLT */

	WRITE_COMMAND32 (GP3_BLT_CMD_HEADER, gp3_cmd_header);
	WRITE_GP32 (GP3_CMD_WRITE, gp3_cmd_next);
	gp3_cmd_current = gp3_cmd_next;

	/* WRITE DATA LINE BY LINE */
	/* Each line will be created as a separate command buffer entry to allow */
	/* line-by-line wrapping and to allow simultaneous rendering by the HW.  */
	
    if (((total_dwords << 2) * height) <= GP3_BLT_1PASS_SIZE &&
        (gp3_cmd_bottom - gp3_cmd_current) > (GP3_BLT_1PASS_SIZE + 72))
    {
        /* UPDATE THE COMMAND POINTER */

		cim_cmd_ptr = cim_cmd_base_ptr + gp3_cmd_current;

        /* CHECK IF A WRAP WILL BE NEEDED */

		gp3_cmd_next = gp3_cmd_current + ((total_dwords << 2) * height) + 8;

	    if ((gp3_cmd_bottom - gp3_cmd_next) <= GP3_MAX_COMMAND_SIZE)
		{
			gp3_cmd_next  = gp3_cmd_top;
        	        	
			GP3_WAIT_WRAP(temp);
			WRITE_COMMAND32 (0, GP3_DATA_LOAD_HDR_TYPE | GP3_DATA_LOAD_HDR_WRAP |
				GP3_DATA_LOAD_HDR_ENABLE);
		}
		else
		{
			GP3_WAIT_PRIMITIVE(temp);
			WRITE_COMMAND32 (0, GP3_DATA_LOAD_HDR_TYPE | GP3_DATA_LOAD_HDR_ENABLE);
		}

        /* WRITE DWORD COUNT */
        	
		WRITE_COMMAND32 (4, GP3_CH3_HOST_SOURCE_TYPE | (total_dwords * height));

        while (height--)
	    {
		    /* WRITE DATA */

		    WRITE_COMMAND_STRING32 (8, data, srcoffset, dword_count);
		    WRITE_COMMAND_STRING8 (8 + (dword_count << 2), data,
			    srcoffset + (dword_count << 2), byte_count);

		    srcoffset += stride;
            cim_cmd_ptr += total_dwords << 2;
	    }

        WRITE_GP32 (GP3_CMD_WRITE, gp3_cmd_next);
		gp3_cmd_current = gp3_cmd_next;
    }
    else
    {
	    while (height--)
	    {
		    /* UPDATE THE COMMAND POINTER */
		    /* The WRITE_COMMANDXX macros use a pointer to the current buffer space. */
		    /* This is created by adding gp3_cmd_current to the base pointer.        */

		    cim_cmd_ptr = cim_cmd_base_ptr + gp3_cmd_current;

		    /* CHECK IF A WRAP WILL BE NEEDED */

		    gp3_cmd_next = gp3_cmd_current + size_dwords;
		    if ((gp3_cmd_bottom - gp3_cmd_next) <= GP3_MAX_COMMAND_SIZE)
		    {
			    gp3_cmd_next  = gp3_cmd_top;
			
			    /* WAIT FOR HARDWARE */
			
			    GP3_WAIT_WRAP(temp);
			    WRITE_COMMAND32 (0, GP3_DATA_LOAD_HDR_TYPE | GP3_DATA_LOAD_HDR_WRAP |
				    GP3_DATA_LOAD_HDR_ENABLE);
		    }
		    else
		    {
			    /* WAIT FOR AVAILABLE SPACE */

			    GP3_WAIT_PRIMITIVE(temp);
			    WRITE_COMMAND32 (0, GP3_DATA_LOAD_HDR_TYPE | GP3_DATA_LOAD_HDR_ENABLE);
		    }

		    /* WRITE DWORD COUNT */
		
		    WRITE_COMMAND32 (4, GP3_CH3_HOST_SOURCE_TYPE | total_dwords);

		    /* WRITE DATA */

		    WRITE_COMMAND_STRING32 (8, data, srcoffset, dword_count);
		    WRITE_COMMAND_STRING8 (8 + (dword_count << 2), data,
			    srcoffset + (dword_count << 2), byte_count);

		    /* UPDATE POINTERS */

		    srcoffset += stride;
		    WRITE_GP32 (GP3_CMD_WRITE, gp3_cmd_next);
		    gp3_cmd_current = gp3_cmd_next;
	    }
    }
}

/*---------------------------------------------------------------------------
 * gp_masked_blt
 *
 * This routine copies source data to the screen.  A monochrome mask is used
 * to specify source transparency.
 *-------------------------------------------------------------------------*/

void gp_masked_blt (unsigned long dstoffset, unsigned long width,
	unsigned long height, unsigned long mono_srcx, unsigned long color_srcx,
	unsigned char *mono_mask, unsigned char *color_data, long mono_pitch,
	long color_pitch)
{
    unsigned long indent, temp;
	unsigned long total_dwords, size_dwords;
	unsigned long dword_count, byte_count;
	unsigned long srcoffset, size;
    unsigned long i, ch3_offset, base;
    unsigned long flags = 0;

    if (gp3_blt_flags & CIMGP_BLTFLAGS_INVERTMONO)
        flags = GP3_RM_SOURCE_INVERT;

    /* MONO CALCULATIONS */

    indent    = (mono_srcx >> 3);
	srcoffset = (indent & ~3L);
	indent   &= 3;

    size = ((width + (mono_srcx & 7) + 7) >> 3) + indent;
	total_dwords = (size + 3) >> 2;
	size_dwords  = (total_dwords << 2) + 8;
	dword_count  = (size >> 2);
	byte_count   = (size & 3);
	
    base = ((gp3_fb_base << 24) + (gp3_scratch_base & 0xFFC00000)) |
            (gp3_base_register & ~GP3_BASE_OFFSET_DSTMASK);

    gp3_cmd_header |= GP3_BLT_HDR_RASTER_ENABLE      |
                      GP3_BLT_HDR_STRIDE_ENABLE      |
                      GP3_BLT_HDR_DST_OFF_ENABLE     |
                      GP3_BLT_HDR_WIDHI_ENABLE       |
                      GP3_BLT_HDR_CH3_STR_ENABLE     |
                      GP3_BLT_HDR_CH3_OFF_ENABLE     |
                      GP3_BLT_HDR_CH3_WIDHI_ENABLE   |
                      GP3_BLT_HDR_BASE_OFFSET_ENABLE |
                      GP3_BLT_HDR_BLT_MODE_ENABLE;

    WRITE_COMMAND32 (GP3_BLT_RASTER_MODE, GP3_RM_BPPFMT_8888 | 0xCC);
    WRITE_COMMAND32 (GP3_BLT_STRIDE, (total_dwords << 2));
    WRITE_COMMAND32 (GP3_BLT_DST_OFFSET, gp3_scratch_base & 0x3FFFFF);
    WRITE_COMMAND32 (GP3_BLT_WID_HEIGHT, (total_dwords << 16) | height);
    WRITE_COMMAND32 (GP3_BLT_CH3_WIDHI, (total_dwords << 16) | height);
    WRITE_COMMAND32 (GP3_BLT_CH3_OFFSET, 0);
    WRITE_COMMAND32 (GP3_BLT_BASE_OFFSET, base);
    WRITE_COMMAND32 (GP3_BLT_CH3_MODE_STR,
        GP3_CH3_C3EN           |
		GP3_CH3_REPLACE_SOURCE |
		GP3_CH3_HST_SRC_ENABLE |
        GP3_CH3_SRC_8_8_8_8    |
      ((gp3_blt_flags & CIMGP_BLTFLAGS_PRES_LUT) << 20));
    WRITE_COMMAND32 (GP3_BLT_MODE, 0);
    WRITE_COMMAND32 (GP3_BLT_CMD_HEADER, gp3_cmd_header);

    /* START THE BLT */

	WRITE_GP32 (GP3_CMD_WRITE, gp3_cmd_next);
	gp3_cmd_current = gp3_cmd_next;

    for (i = 0; i < height; i++)
	{
		/* UPDATE THE COMMAND POINTER */
		/* The WRITE_COMMANDXX macros use a pointer to the current buffer space. */
		/* This is created by adding gp3_cmd_current to the base pointer.        */

		cim_cmd_ptr = cim_cmd_base_ptr + gp3_cmd_current;

		/* CHECK IF A WRAP WILL BE NEEDED */

		gp3_cmd_next = gp3_cmd_current + size_dwords;
		if ((gp3_cmd_bottom - gp3_cmd_next) <= GP3_MAX_COMMAND_SIZE)
		{
			gp3_cmd_next  = gp3_cmd_top;
    		
			/* WAIT FOR HARDWARE */
    		
			GP3_WAIT_WRAP(temp);
			WRITE_COMMAND32 (0, GP3_DATA_LOAD_HDR_TYPE | GP3_DATA_LOAD_HDR_WRAP |
				GP3_DATA_LOAD_HDR_ENABLE);
		}
		else
		{
			/* WAIT FOR AVAILABLE SPACE */

			GP3_WAIT_PRIMITIVE(temp);
			WRITE_COMMAND32 (0, GP3_DATA_LOAD_HDR_TYPE | GP3_DATA_LOAD_HDR_ENABLE);
		}

		/* WRITE DWORD COUNT */
    	
		WRITE_COMMAND32 (4, GP3_CH3_HOST_SOURCE_TYPE | total_dwords);

		/* WRITE DATA */

		WRITE_COMMAND_STRING32 (8, mono_mask, srcoffset, dword_count);
		WRITE_COMMAND_STRING8 (8 + (dword_count << 2), mono_mask,
			srcoffset + (dword_count << 2), byte_count);

		/* UPDATE POINTERS */

		srcoffset += mono_pitch;
		WRITE_GP32 (GP3_CMD_WRITE, gp3_cmd_next);
		gp3_cmd_current = gp3_cmd_next;
	}

    /* SECOND BLT */

    gp_declare_blt (gp3_blt_flags | CIMGP_BLTFLAGS_HAZARD);

    base = ((gp3_fb_base << 24) +  (dstoffset & 0xFFC00000))                          |
	       ((gp3_fb_base << 14) + (((gp3_scratch_base + indent) & 0xFFC00000) >> 10)) |
            (gp3_base_register & GP3_BASE_OFFSET_CH3MASK);

    gp3_cmd_header |= GP3_BLT_HDR_RASTER_ENABLE      |
                      GP3_BLT_HDR_STRIDE_ENABLE      |
		              GP3_BLT_HDR_DST_OFF_ENABLE     |
					  GP3_BLT_HDR_SRC_OFF_ENABLE     |
		              GP3_BLT_HDR_WIDHI_ENABLE       |
					  GP3_BLT_HDR_CH3_STR_ENABLE     |
					  GP3_BLT_HDR_CH3_WIDHI_ENABLE   |
					  GP3_BLT_HDR_BASE_OFFSET_ENABLE |
					  GP3_BLT_HDR_CH3_OFF_ENABLE     |
					  GP3_BLT_HDR_BLT_MODE_ENABLE;

    /* ENABLE TRANSPARENCY AND PATTERN COPY ROP */
    /* The monochrome data is used as a mask but is otherwise not involved in */
    /* the BLT.  The color data is routed through the pattern channel.        */

    WRITE_COMMAND32 (GP3_BLT_RASTER_MODE, gp3_bpp | 0xF0 | GP3_RM_SRC_TRANS | flags);
    WRITE_COMMAND32 (GP3_BLT_STRIDE, (total_dwords << 18) | gp3_dst_stride);
    WRITE_COMMAND32 (GP3_BLT_DST_OFFSET, dstoffset & 0x3FFFFF);
    WRITE_COMMAND32 (GP3_BLT_SRC_OFFSET,
		((gp3_scratch_base + indent) & 0x3FFFFF ) | ((mono_srcx & 7) << 26));
    WRITE_COMMAND32 (GP3_BLT_WID_HEIGHT, (width << 16) | height);
    WRITE_COMMAND32 (GP3_BLT_CH3_WIDHI,  (width << 16) | height);
    WRITE_COMMAND32 (GP3_BLT_BASE_OFFSET, base);

    /* PROGRAM PARAMETERS FOR COLOR SOURCE DATA        */
	/* Data may be color converted along the way.      */
	
	if ((gp3_src_format & GP3_CH3_SRC_MASK) == GP3_CH3_SRC_24BPP_PACKED)
    {
        srcoffset  = color_srcx * 3;
        ch3_offset = 0;
        size       = width * 3;

        WRITE_COMMAND32 (GP3_BLT_CH3_WIDHI, (((size + 3) >> 2) << 16) | height);
    }
    else if (gp3_src_pix_shift == 3)
	{
		/* CALCULATE INDENT AND SOURCE OFFSET */

		indent       = (color_srcx >> 1);
		srcoffset    = (indent & ~3L);
		indent      &= 3;
		ch3_offset   = indent | ((color_srcx & 1) << 25);

		size = ((width + (color_srcx & 1) + 1) >> 1) + indent;
	}
	else
	{
		indent      = (color_srcx << gp3_src_pix_shift);
		srcoffset   = (indent & ~3L);
		indent     &= 3;
		ch3_offset  = indent;

		size = (width << gp3_src_pix_shift) + indent;
	}

    total_dwords  = (size + 3) >> 2;
	size_dwords   = (total_dwords << 2) + 8;
	dword_count   = (size >> 2);
	byte_count    = (size & 3);

	WRITE_COMMAND32 (GP3_BLT_CH3_OFFSET, ch3_offset);
	WRITE_COMMAND32 (GP3_BLT_CH3_MODE_STR,
		GP3_CH3_C3EN            |
		GP3_CH3_HST_SRC_ENABLE  |
		gp3_src_format          |
	  ((gp3_blt_flags & CIMGP_BLTFLAGS_PRES_LUT) << 20));
	WRITE_COMMAND32 (GP3_BLT_MODE, gp3_blt_mode | GP3_BM_SRC_MONO | GP3_BM_SRC_FB);

	/* START THE BLT */

	WRITE_COMMAND32 (GP3_BLT_CMD_HEADER, gp3_cmd_header);
	WRITE_GP32 (GP3_CMD_WRITE, gp3_cmd_next);
	gp3_cmd_current = gp3_cmd_next;

	/* WRITE DATA LINE BY LINE */
		
	while (height--)
	{
		/* UPDATE THE COMMAND POINTER */
		
		cim_cmd_ptr = cim_cmd_base_ptr + gp3_cmd_current;

		/* CHECK IF A WRAP WILL BE NEEDED */

		gp3_cmd_next = gp3_cmd_current + size_dwords;
		if ((gp3_cmd_bottom - gp3_cmd_next) <= GP3_MAX_COMMAND_SIZE)
		{
			gp3_cmd_next  = gp3_cmd_top;
						
			GP3_WAIT_WRAP(temp);
			WRITE_COMMAND32 (0, GP3_DATA_LOAD_HDR_TYPE | GP3_DATA_LOAD_HDR_WRAP |
				GP3_DATA_LOAD_HDR_ENABLE);
		}
		else
		{
			GP3_WAIT_PRIMITIVE(temp);
			WRITE_COMMAND32 (0, GP3_DATA_LOAD_HDR_TYPE | GP3_DATA_LOAD_HDR_ENABLE);
		}

		/* WRITE DWORD COUNT */
		
		WRITE_COMMAND32 (4, GP3_CH3_HOST_SOURCE_TYPE | total_dwords);

		/* WRITE COLOR DATA TO THE COMMAND BUFFER */

		WRITE_COMMAND_STRING32 (8, color_data, srcoffset, dword_count);
		WRITE_COMMAND_STRING8 (8 + (dword_count << 2), color_data,
			srcoffset + (dword_count << 2), byte_count);

		/* UPDATE COMMAND BUFFER POINTERS */
		/* We do this before writing the monochrome data because otherwise */
		/* the GP could throttle the writes to the host source register    */
		/* waiting for color data.  If the command buffer has not been     */
		/* updated to load the color data...                               */

		srcoffset += color_pitch;
		WRITE_GP32 (GP3_CMD_WRITE, gp3_cmd_next);
		gp3_cmd_current = gp3_cmd_next;
    }
}

/*---------------------------------------------------------------------------
 * gp_screen_to_screen_masked
 *
 * This routine performs a screen to screen BLT, using a monochrome mask to
 * specify source transparency.  The source data is assumed to be in the
 * current destination format and to not overlap the destination.
 *-------------------------------------------------------------------------*/

void gp_screen_to_screen_masked (unsigned long dstoffset, unsigned long srcoffset,
    unsigned long width, unsigned long height, unsigned long mono_srcx,
    unsigned char *mono_mask, long mono_pitch)
{
    unsigned long indent, temp;
	unsigned long total_dwords, size_dwords;
	unsigned long dword_count, byte_count;
	unsigned long srcoff, size;
    unsigned long i, base;
    unsigned long flags = 0;

    if (gp3_blt_flags & CIMGP_BLTFLAGS_INVERTMONO)
        flags = GP3_RM_SOURCE_INVERT;

    /* MONO CALCULATIONS */

    indent    = (mono_srcx >> 3);
	srcoff    = (indent & ~3L);
	indent   &= 3;

    size = ((width + (mono_srcx & 7) + 7) >> 3) + indent;
	total_dwords = (size + 3) >> 2;
	size_dwords  = (total_dwords << 2) + 8;
	dword_count  = (size >> 2);
	byte_count   = (size & 3);
	
    base = ((gp3_fb_base << 24) + (gp3_scratch_base & 0xFFC00000)) |
            (gp3_base_register & ~GP3_BASE_OFFSET_DSTMASK);

    gp3_cmd_header |= GP3_BLT_HDR_RASTER_ENABLE      |
                      GP3_BLT_HDR_STRIDE_ENABLE      |
                      GP3_BLT_HDR_DST_OFF_ENABLE     |
                      GP3_BLT_HDR_WIDHI_ENABLE       |
                      GP3_BLT_HDR_CH3_STR_ENABLE     |
                      GP3_BLT_HDR_CH3_OFF_ENABLE     |
                      GP3_BLT_HDR_CH3_WIDHI_ENABLE   |
                      GP3_BLT_HDR_BASE_OFFSET_ENABLE |
                      GP3_BLT_HDR_BLT_MODE_ENABLE;

    WRITE_COMMAND32 (GP3_BLT_RASTER_MODE, GP3_RM_BPPFMT_8888 | 0xCC);
    WRITE_COMMAND32 (GP3_BLT_STRIDE, (total_dwords << 2));
    WRITE_COMMAND32 (GP3_BLT_DST_OFFSET, gp3_scratch_base & 0x3FFFFF);
    WRITE_COMMAND32 (GP3_BLT_WID_HEIGHT, (total_dwords << 16) | height);
    WRITE_COMMAND32 (GP3_BLT_CH3_WIDHI, (total_dwords << 16) | height);
    WRITE_COMMAND32 (GP3_BLT_CH3_OFFSET, 0);
    WRITE_COMMAND32 (GP3_BLT_BASE_OFFSET, base);
    WRITE_COMMAND32 (GP3_BLT_CH3_MODE_STR,
        GP3_CH3_C3EN           |
		GP3_CH3_REPLACE_SOURCE |
		GP3_CH3_HST_SRC_ENABLE |
        GP3_CH3_SRC_8_8_8_8    |
      ((gp3_blt_flags & CIMGP_BLTFLAGS_PRES_LUT) << 20));
    WRITE_COMMAND32 (GP3_BLT_MODE, 0);
    WRITE_COMMAND32 (GP3_BLT_CMD_HEADER, gp3_cmd_header);

    /* START THE BLT */

	WRITE_GP32 (GP3_CMD_WRITE, gp3_cmd_next);
	gp3_cmd_current = gp3_cmd_next;

    for (i = 0; i < height; i++)
	{
		/* UPDATE THE COMMAND POINTER */
		/* The WRITE_COMMANDXX macros use a pointer to the current buffer space. */
		/* This is created by adding gp3_cmd_current to the base pointer.        */

		cim_cmd_ptr = cim_cmd_base_ptr + gp3_cmd_current;

		/* CHECK IF A WRAP WILL BE NEEDED */

		gp3_cmd_next = gp3_cmd_current + size_dwords;
		if ((gp3_cmd_bottom - gp3_cmd_next) <= GP3_MAX_COMMAND_SIZE)
		{
			gp3_cmd_next  = gp3_cmd_top;
    		
			/* WAIT FOR HARDWARE */
    		
			GP3_WAIT_WRAP(temp);
			WRITE_COMMAND32 (0, GP3_DATA_LOAD_HDR_TYPE | GP3_DATA_LOAD_HDR_WRAP |
				GP3_DATA_LOAD_HDR_ENABLE);
		}
		else
		{
			/* WAIT FOR AVAILABLE SPACE */

			GP3_WAIT_PRIMITIVE(temp);
			WRITE_COMMAND32 (0, GP3_DATA_LOAD_HDR_TYPE | GP3_DATA_LOAD_HDR_ENABLE);
		}

		/* WRITE DWORD COUNT */
    	
		WRITE_COMMAND32 (4, GP3_CH3_HOST_SOURCE_TYPE | total_dwords);

		/* WRITE DATA */

		WRITE_COMMAND_STRING32 (8, mono_mask, srcoff, dword_count);
		WRITE_COMMAND_STRING8 (8 + (dword_count << 2), mono_mask,
			srcoff + (dword_count << 2), byte_count);

		/* UPDATE POINTERS */

		srcoff += mono_pitch;
		WRITE_GP32 (GP3_CMD_WRITE, gp3_cmd_next);
		gp3_cmd_current = gp3_cmd_next;
	}

    /* SECOND BLT */

    gp_declare_blt (gp3_blt_flags | CIMGP_BLTFLAGS_HAZARD);

    base = ((gp3_fb_base << 24) +  (dstoffset & 0xFFC00000))                          |
	       ((gp3_fb_base << 14) + (((gp3_scratch_base + indent) & 0xFFC00000) >> 10)) |
           ((gp3_fb_base << 4)  + ((srcoffset & 0xFFC00000) >> 20));

    gp3_cmd_header |= GP3_BLT_HDR_RASTER_ENABLE      |
                      GP3_BLT_HDR_STRIDE_ENABLE      |
		              GP3_BLT_HDR_DST_OFF_ENABLE     |
					  GP3_BLT_HDR_SRC_OFF_ENABLE     |
		              GP3_BLT_HDR_WIDHI_ENABLE       |
					  GP3_BLT_HDR_CH3_STR_ENABLE     |
					  GP3_BLT_HDR_CH3_WIDHI_ENABLE   |
					  GP3_BLT_HDR_BASE_OFFSET_ENABLE |
					  GP3_BLT_HDR_CH3_OFF_ENABLE     |
					  GP3_BLT_HDR_BLT_MODE_ENABLE;

    /* ENABLE TRANSPARENCY AND PATTERN COPY ROP */
    /* The monochrome data is used as a mask but is otherwise not involved in */
    /* the BLT.  The color data is routed through the pattern channel.        */

    WRITE_COMMAND32 (GP3_BLT_RASTER_MODE, gp3_bpp | 0xF0 | GP3_RM_SRC_TRANS | flags);
    WRITE_COMMAND32 (GP3_BLT_STRIDE, (total_dwords << 18) | gp3_dst_stride);
    WRITE_COMMAND32 (GP3_BLT_DST_OFFSET, dstoffset & 0x3FFFFF);
    WRITE_COMMAND32 (GP3_BLT_SRC_OFFSET,
		((gp3_scratch_base + indent) & 0x3FFFFF ) | ((mono_srcx & 7) << 26));
    WRITE_COMMAND32 (GP3_BLT_WID_HEIGHT, (width << 16) | height);
    WRITE_COMMAND32 (GP3_BLT_CH3_WIDHI,  (width << 16) | height);
    WRITE_COMMAND32 (GP3_BLT_BASE_OFFSET, base);

    /* PROGRAM PARAMETERS FOR COLOR SOURCE DATA  */
	
	WRITE_COMMAND32 (GP3_BLT_CH3_OFFSET, srcoffset & 0x3FFFFF);
	WRITE_COMMAND32 (GP3_BLT_CH3_MODE_STR,
		GP3_CH3_C3EN            |
        gp3_ch3_bpp             |
        gp3_src_stride          |
	  ((gp3_blt_flags & CIMGP_BLTFLAGS_PRES_LUT) << 20));
	WRITE_COMMAND32 (GP3_BLT_MODE, gp3_blt_mode | GP3_BM_SRC_MONO | GP3_BM_SRC_FB);

	/* START THE BLT */

	WRITE_COMMAND32 (GP3_BLT_CMD_HEADER, gp3_cmd_header);
	WRITE_GP32 (GP3_CMD_WRITE, gp3_cmd_next);
	gp3_cmd_current = gp3_cmd_next;
}

/*---------------------------------------------------------------------------
 * gp_bresenham_line
 *
 * This routine draws a vector using the specified Bresenham parameters.
 * Currently this file does not support a routine that accepts the two
 * endpoints of a vector and calculates the Bresenham parameters.  If it
 * ever does, this routine is still required for vectors that have been
 * clipped.
 *-------------------------------------------------------------------------*/

void gp_bresenham_line (unsigned long dstoffset, unsigned short length,
	unsigned short initerr, unsigned short axialerr, unsigned short diagerr,
	unsigned long flags)
{
    unsigned long base;
    long offset;

    /* HANDLE NEGATIVE VECTORS */
    /* We have to be very careful with vectors that increment negatively   */
    /* Our framebuffer scheme tries to align the destination of every      */
    /* BLT or vector to the nearest 4MB-aligned boundary.  This is         */
    /* necessary because the GP only supports offsets up to 16MB, but the  */
    /* framebuffer can be over 128MB.  To solve this problem, the GP       */
    /* base address registers are alignable to 4MB regions.  However, we   */
    /* cannot simply align the dest offset when the vector goes negative.  */
    /* The vector offset could underflow, causing the offset to jump from  */
    /* near 0 to 16MB.  As we cannot accurately determine the last address */
    /* that will be written in a vector short of walking the algorithm in  */
    /* software, we do a worst case approximation.                         */

    offset = dstoffset;
    if (!(flags & CIMGP_POSMAJOR))
    {
        if (flags & CIMGP_YMAJOR)
            offset -= length * gp3_dst_stride;
        else
            offset -= (length << gp3_pix_shift);

        if (offset < 0)
            offset = 0;
    }
    if (!(flags & CIMGP_POSMINOR))
    {
        if (flags & CIMGP_YMAJOR)
            offset -= (length << gp3_pix_shift);
        else
            offset -= length * gp3_dst_stride;

        if (offset < 0)
            offset = 0;
    }

    offset &= 0xFFC00000;
    dstoffset -= offset;

    base = ((gp3_fb_base << 24) + offset) |
            (gp3_base_register & ~GP3_BASE_OFFSET_DSTMASK);

    /* ENABLE RELEVANT REGISTERS */
	/* Note that we always enable and write the channel 3 mode, if only */
	/* to turn it off.   Cimarron also always writes the base offset    */
	/* register to allow operation with frame buffers larger than 16MB. */

	gp3_cmd_header |= GP3_VEC_HDR_DST_OFF_ENABLE     |
		              GP3_VEC_HDR_VEC_ERR_ENABLE     |
					  GP3_VEC_HDR_VEC_LEN_ENABLE     |
					  GP3_VEC_HDR_BASE_OFFSET_ENABLE |
					  GP3_VEC_HDR_CH3_STR_ENABLE     |
					  GP3_VEC_HDR_VEC_MODE_ENABLE;

	/* WRITE THE REGISTERS COMMON TO ALL PATTERN TYPES */
	/* The destination base is the frame buffer base plus whatever */
	/* 4MB segment we happen to be drawing to.                     */

	WRITE_COMMAND32 (GP3_VECTOR_VEC_ERR,
		(((unsigned long)axialerr << 16) | (unsigned long)diagerr));
	WRITE_COMMAND32 (GP3_VECTOR_VEC_LEN,
		(((unsigned long)length << 16)   | (unsigned long)initerr));
	WRITE_COMMAND32 (GP3_VECTOR_BASE_OFFSET, base);
	
	/* CHECK VECTOR PATTERN CASE */

	if (gp3_ch3_pat)
	{
		/* SET THE SOLID COLOR */
		/* The color for vector patterns from channel 3 comes from */
		/* the regular pattern registers.                          */

		gp3_cmd_header |= GP3_VEC_HDR_PAT_CLR0_ENABLE;

		WRITE_COMMAND32 (GP3_VECTOR_PAT_COLOR_0, gp3_vector_pattern_color);

		/* INITIALIZE CHANNEL 3 PARAMETERS */
        /* We route the channel 3 output to the old source channel.  If the user */
        /* sets a ROP that involves source, they will get unexpected results.    */

		WRITE_COMMAND32 (GP3_VECTOR_DST_OFFSET, dstoffset);
		WRITE_COMMAND32 (GP3_VECTOR_CH3_MODE_STR,
            GP3_CH3_C3EN | GP3_CH3_REPLACE_SOURCE | GP3_CH3_COLOR_PAT_ENABLE | GP3_CH3_SRC_8_8_8_8);
	}
	else
	{
		/* DISABLE CHANNEL 3 AND USE NORMAL PATTERN ORIGINS */

		WRITE_COMMAND32 (GP3_VECTOR_CH3_MODE_STR, 0);
		WRITE_COMMAND32 (GP3_VECTOR_DST_OFFSET, (dstoffset | gp3_pat_origin));
	}
	
	/* START THE VECTOR */

    WRITE_COMMAND32 (GP3_VEC_CMD_HEADER, gp3_cmd_header);
	WRITE_COMMAND32 (GP3_VECTOR_MODE, (gp3_vec_mode | flags));
    WRITE_GP32 (GP3_CMD_WRITE, gp3_cmd_next);

    gp3_cmd_current = gp3_cmd_next;

    /* ADD A SECOND VECTOR TO CLEAR THE BYTE ENABLES            */
    /* We set a transparent pattern to clear the byte enables.  */
    /* We then restore the previous pattern. (SiBZ #4001)       */

    if (gp3_ch3_pat)
    {
        cim_cmd_ptr = cim_cmd_base_ptr + gp3_cmd_current;

        WRITE_COMMAND32 (0, GP3_LUT_HDR_TYPE | GP3_LUT_HDR_DATA_ENABLE);
	    WRITE_COMMAND32 (4, 0x100);
	    WRITE_COMMAND32 (8, (1 | GP3_LUT_DATA_TYPE));	
	    WRITE_COMMAND32 (12, 0);

        /* DUMMY VECTOR */
        /* We shouldn't need to write anything but vector mode and the length. */

        WRITE_COMMAND32 (16, GP3_VEC_HDR_TYPE | GP3_VEC_HDR_VEC_MODE_ENABLE |
            GP3_VEC_HDR_VEC_LEN_ENABLE);
        WRITE_COMMAND32 (16 + GP3_VECTOR_MODE, (gp3_vec_mode | flags));
        WRITE_COMMAND32 (16 + GP3_VECTOR_VEC_LEN, (1 << 16) | (unsigned long)initerr);

        WRITE_COMMAND32 (16 + GP3_VECTOR_COMMAND_SIZE, GP3_LUT_HDR_TYPE | GP3_LUT_HDR_DATA_ENABLE);
	    WRITE_COMMAND32 (20 + GP3_VECTOR_COMMAND_SIZE, 0x100);
	    WRITE_COMMAND32 (24 + GP3_VECTOR_COMMAND_SIZE, (1 | GP3_LUT_DATA_TYPE));	
	    WRITE_COMMAND32 (28 + GP3_VECTOR_COMMAND_SIZE, gp3_vec_pat);

        gp3_cmd_current += 32 + GP3_VECTOR_COMMAND_SIZE;
    }
}

/*---------------------------------------------------------------------------
 * gp_line_from_endpoints
 *
 * This routine draws a vector from a set of rectangular coordinates.  The
 * rectangle is assumed to use the currently specified destination stride.
 *-------------------------------------------------------------------------*/

void gp_line_from_endpoints (unsigned long dstoffset, unsigned long x0,
    unsigned long y0, unsigned long x1, unsigned long y1, int inclusive)
{
    unsigned long base;
    unsigned long length;
    unsigned long flags;
    unsigned short initerr, axialerr, diagerr;
    long dx, dy, dmaj, dmin;
    long offset;

    /* ADJUST DESTINATION OFFSET BASED ON STARTING COORDINATE */

    dstoffset += (x0 << gp3_pix_shift) + (y0 * gp3_dst_stride);

    /* CALCULATE BRESENHAM TERMS */

    dx = (long)x1 - (long)x0;
    dy = (long)y1 - (long)y0;
    if (dx < 0) dx = -dx;
    if (dy < 0) dy = -dy;

    if (dx >= dy)
    {
        dmaj = dx;
        dmin = dy;
        flags = 0;
        if (x1 > x0) flags |= CIMGP_POSMAJOR;
        if (y1 > y0) flags |= CIMGP_POSMINOR;
    }
    else
    {
        dmaj = dy;
        dmin = dx;
        flags = CIMGP_YMAJOR;
        if (x1 > x0) flags |= CIMGP_POSMINOR;
        if (y1 > y0) flags |= CIMGP_POSMAJOR;
    }

    axialerr = (unsigned short)(dmin << 1);
    diagerr  = (unsigned short)((dmin-dmaj) << 1);
    initerr  = (unsigned short)((dmin << 1) - dmaj);
    if (!(flags & CIMGP_POSMINOR)) initerr--;

    /* CHECK FOR NO WORK */

    if (!dmaj)
        return;

    /* CHECK INCLUSIVE OR EXCLUSIVE */
    /* An inclusive line can be accomplished by simply adding 1 to the */
    /* line length.                                                    */

    length = dmaj;
    if (inclusive)
        length++;

    /* HANDLE NEGATIVE VECTORS */

    offset = dstoffset;
    if (!(flags & CIMGP_POSMAJOR))
    {
        if (flags & CIMGP_YMAJOR)
            offset -= length * gp3_dst_stride;
        else
            offset -= (length << gp3_pix_shift);

        if (offset < 0)
            offset = 0;
    }
    if (!(flags & CIMGP_POSMINOR))
    {
        if (flags & CIMGP_YMAJOR)
            offset -= (length << gp3_pix_shift);
        else
            offset -= length * gp3_dst_stride;

        if (offset < 0)
            offset = 0;
    }

    offset &= 0xFFC00000;
    dstoffset -= offset;

    base = ((gp3_fb_base << 24) + offset) |
            (gp3_base_register & ~GP3_BASE_OFFSET_DSTMASK);

    /* ENABLE RELEVANT REGISTERS */
	/* Note that we always enable and write the channel 3 mode, if only */
	/* to turn it off.   Cimarron also always writes the base offset    */
	/* register to allow operation with frame buffers larger than 16MB. */

	gp3_cmd_header |= GP3_VEC_HDR_DST_OFF_ENABLE     |
		              GP3_VEC_HDR_VEC_ERR_ENABLE     |
					  GP3_VEC_HDR_VEC_LEN_ENABLE     |
					  GP3_VEC_HDR_BASE_OFFSET_ENABLE |
					  GP3_VEC_HDR_CH3_STR_ENABLE     |
					  GP3_VEC_HDR_VEC_MODE_ENABLE;

	/* WRITE THE REGISTERS COMMON TO ALL PATTERN TYPES */
	/* The destination base is the frame buffer base plus whatever */
	/* 4MB segment we happen to be drawing to.                     */

	WRITE_COMMAND32 (GP3_VECTOR_VEC_ERR,
		(((unsigned long)axialerr << 16) | (unsigned long)diagerr));
	WRITE_COMMAND32 (GP3_VECTOR_VEC_LEN,
		(((unsigned long)length << 16)   | (unsigned long)initerr));
	WRITE_COMMAND32 (GP3_VECTOR_BASE_OFFSET, base);
	
	/* CHECK VECTOR PATTERN CASE */

	if (gp3_ch3_pat)
	{
		/* SET THE SOLID COLOR */
		/* The color for vector patterns from channel 3 comes from */
		/* the regular pattern registers.                          */

		gp3_cmd_header |= GP3_VEC_HDR_PAT_CLR0_ENABLE;

		WRITE_COMMAND32 (GP3_VECTOR_PAT_COLOR_0, gp3_vector_pattern_color);

		/* INITIALIZE CHANNEL 3 PARAMETERS */
        /* We route the channel 3 output to the old source channel.  If the user */
        /* sets a ROP that involves source, they will get unexpected results.    */

		WRITE_COMMAND32 (GP3_VECTOR_DST_OFFSET, dstoffset);
		WRITE_COMMAND32 (GP3_VECTOR_CH3_MODE_STR,
            GP3_CH3_C3EN | GP3_CH3_REPLACE_SOURCE | GP3_CH3_COLOR_PAT_ENABLE | GP3_CH3_SRC_8_8_8_8);
	}
	else
	{
		/* DISABLE CHANNEL 3 AND USE NORMAL PATTERN ORIGINS */

		WRITE_COMMAND32 (GP3_VECTOR_CH3_MODE_STR, 0);
		WRITE_COMMAND32 (GP3_VECTOR_DST_OFFSET, (dstoffset | gp3_pat_origin));
	}
	
	/* START THE VECTOR */

    WRITE_COMMAND32 (GP3_VEC_CMD_HEADER, gp3_cmd_header);
	WRITE_COMMAND32 (GP3_VECTOR_MODE, (gp3_vec_mode | flags));
    WRITE_GP32 (GP3_CMD_WRITE, gp3_cmd_next);
	gp3_cmd_current = gp3_cmd_next;

    /* ADD A SECOND VECTOR TO CLEAR THE BYTE ENABLES            */
    /* We set a transparent pattern to clear the byte enables.  */
    /* We then restore the previous pattern.  (SiBZ #4001)      */

    if (gp3_ch3_pat)
    {
        cim_cmd_ptr = cim_cmd_base_ptr + gp3_cmd_current;

        WRITE_COMMAND32 (0, GP3_LUT_HDR_TYPE | GP3_LUT_HDR_DATA_ENABLE);
	    WRITE_COMMAND32 (4, 0x100);
	    WRITE_COMMAND32 (8, (1 | GP3_LUT_DATA_TYPE));	
	    WRITE_COMMAND32 (12, 0);

        /* DUMMY VECTOR */
        /* We shouldn't need to write anything but vector mode and the length. */

        WRITE_COMMAND32 (16, GP3_VEC_HDR_TYPE | GP3_VEC_HDR_VEC_MODE_ENABLE |
            GP3_VEC_HDR_VEC_LEN_ENABLE);
        WRITE_COMMAND32 (16 + GP3_VECTOR_MODE, (gp3_vec_mode | flags));
        WRITE_COMMAND32 (16 + GP3_VECTOR_VEC_LEN, (1 << 16) | (unsigned long)initerr);

        WRITE_COMMAND32 (16 + GP3_VECTOR_COMMAND_SIZE, GP3_LUT_HDR_TYPE | GP3_LUT_HDR_DATA_ENABLE);
	    WRITE_COMMAND32 (20 + GP3_VECTOR_COMMAND_SIZE, 0x100);
	    WRITE_COMMAND32 (24 + GP3_VECTOR_COMMAND_SIZE, (1 | GP3_LUT_DATA_TYPE));	
	    WRITE_COMMAND32 (28 + GP3_VECTOR_COMMAND_SIZE, gp3_vec_pat);

        gp3_cmd_current += 32 + GP3_VECTOR_COMMAND_SIZE;
    }
}

/*---------------------------------------------------------------------------
 * gp_wait_until_idle
 *
 * This routine stalls execution until the GP is no longer actively rendering.
 *-------------------------------------------------------------------------*/

void gp_wait_until_idle (void)
{
    unsigned long temp;
    while (((temp = READ_GP32 (GP3_BLT_STATUS)) & GP3_BS_BLT_BUSY) ||
            !(temp & GP3_BS_CB_EMPTY))
    {
        ;
    }
}

/*---------------------------------------------------------------------------
 * gp_test_blt_busy
 *-------------------------------------------------------------------------*/

int gp_test_blt_busy (void)
{
    unsigned long temp;

    if (((temp = READ_GP32 (GP3_BLT_STATUS)) & GP3_BS_BLT_BUSY) ||
        !(temp & GP3_BS_CB_EMPTY))
        return 1;
	
    return 0;
}

/*---------------------------------------------------------------------------
 * gp_test_blt_pending
 *-------------------------------------------------------------------------*/

int gp_test_blt_pending (void)
{
    if ((READ_GP32 (GP3_BLT_STATUS)) & GP3_BS_BLT_PENDING)
        return 1;
	
    return 0;
}

/*---------------------------------------------------------------------------
 * gp_wait_blt_pending
 *-------------------------------------------------------------------------*/

void gp_wait_blt_pending (void)
{
    while ((READ_GP32 (GP3_BLT_STATUS)) & GP3_BS_BLT_PENDING)
        ;
}

/*---------------------------------------------------------------------------
 * gp_save_state
 *
 * This routine saves all persistent GP information.
 *-------------------------------------------------------------------------*/

void gp_save_state (GP_SAVE_RESTORE *gp_state)
{
    Q_WORD msr_value;

    gp_wait_until_idle();

    msr_read64 (MSR_DEVICE_GEODELX_GP, MSR_GEODELINK_CONFIG, &msr_value);
    gp_state->cmd_bottom  = READ_GP32 (GP3_CMD_BOT) & 0xFFFFFF;
    gp_state->cmd_top     = READ_GP32 (GP3_CMD_TOP) & 0xFFFFFF;
    gp_state->cmd_base    = (msr_value.low << 4) & 0xFFF00000;
    gp_state->base_offset = READ_GP32 (GP3_BASE_OFFSET);

    /* RESET THE READ POINTER */

    gp_set_command_buffer_base (gp_state->cmd_base, gp_state->cmd_top, gp_state->cmd_bottom);
}

/*---------------------------------------------------------------------------
 * gp_restore_state
 *
 * This routine restores all persistent GP information.
 *-------------------------------------------------------------------------*/

void gp_restore_state (GP_SAVE_RESTORE *gp_state)
{
    gp_wait_until_idle();

    WRITE_GP32 (GP3_BASE_OFFSET, gp_state->base_offset);

    gp_set_command_buffer_base (gp_state->cmd_base, gp_state->cmd_top, gp_state->cmd_bottom);
}

