/* $XFree86: xc/programs/Xserver/hw/xfree86/accel/ibm8514/imfill.s,v 3.1 1996/12/23 06:37:55 dawes Exp $ */ /* $XConsortium: imfill.s /main/3 1996/02/21 17:24:49 kaleb $ */ /****************************************************************************** This is a assembly language version of the ibm8514ImageFill routine. It is renamed to enable a filter routine to catch the simplest cases and dispatch them to the more efficent ibm8514Imagewrite. Written by Hans Nasten ( nasten@everyware.se ) AUG 29, 1993. The equivalent C-code looks like this. void ibm8514RealImageFill(x, y, w, h, psrc, pwidth, pw, ph, pox, poy, alu, planemask) int x; int y; int w; int h; unsigned char *psrc; int pwidth; int pw; int ph; int pox; int poy; short alu; short planemask; { int srcxsave, srcx, srcy, dstw, srcw, srch; int wtemp, count, i, j; unsigned short btemp, *p; WaitQueue(2); outw(FRGD_MIX, FSS_PCDATA | alu); outw(WRT_MASK, planemask); WaitQueue(7); outw(CUR_X, (short)x); outw(CUR_Y, (short)y); if (w&1) outw(MAJ_AXIS_PCNT, (short)w); else outw(MAJ_AXIS_PCNT, (short)w-1); outw(MULTIFUNC_CNTL, MIN_AXIS_PCNT | h-1); outw(MULTIFUNC_CNTL, SCISSORS_L | x); outw(MULTIFUNC_CNTL, SCISSORS_R | (x+w-1)); outw(CMD, CMD_RECT | INC_Y | INC_X | DRAW | PCDATA | WRTDATA | _16BIT | BYTSEQ); WaitQueue(8); modulus(y-poy,ph,srcy); while( h > 0 ) { srch = ( srcy+h > ph ? ph - srcy : h ); modulus(x-pox,pw,srcxsave); for( i = 0; i < srch; i++ ) { dstw = w; srcx = srcxsave; srcw = ( srcx+w > pw ? pw - srcx : w ); wtemp = 0; while( dstw > 0 ) { p = (unsigned short *)((unsigned char *)(psrc + pwidth * srcy + srcx)); if( wtemp & 1 ) { outw( PIX_TRANS, (btemp & 0x00ff) | (*p << 8 ) ); p = (unsigned short *)((unsigned char *)(p)++); wtemp = srcw - 1; } else wtemp = srcw; count = wtemp / 2; for( j = 0; j < count; j++ ) outw( PIX_TRANS, *p++ ); dstw -= srcw; srcx = 0; if( wtemp & 1 ) { if( dstw != 0 ) { btemp = *p; } else outw( PIX_TRANS, *p ); } srcw = ( dstw < pw ? dstw : pw ); } srcy++; h--; } srcy = 0; } WaitQueue(3); outw(FRGD_MIX, FSS_FRGDCOL | MIX_SRC); outw(MULTIFUNC_CNTL, SCISSORS_L); outw(MULTIFUNC_CNTL, SCISSORS_R | 1023); } ******************************************************************************/ #define _8514_ASM_ #include "assyntax.h" #include "reg8514.h" AS_BEGIN /* * Defines for in arguments. */ #define x_arg REGOFF(8,EBP) #define y_arg REGOFF(12,EBP) #define w_arg REGOFF(16,EBP) #define h_arg REGOFF(20,EBP) #define psrc_arg REGOFF(24,EBP) #define pwidth_arg REGOFF(28,EBP) #define pw_arg REGOFF(32,EBP) #define ph_arg REGOFF(36,EBP) #define pox_arg REGOFF(40,EBP) #define poy_arg REGOFF(44,EBP) #define alu_arg REGOFF(48,EBP) #define planemask_arg REGOFF(52,EBP) /* * Defines for local variables. */ #define srcy_loc REGOFF(-4,EBP) #define srch_loc REGOFF(-8,EBP) #define srcx_loc REGOFF(-12,EBP) #define srcxsave_loc REGOFF(-16,EBP) SEG_TEXT ALIGNTEXT4 GLOBL GLNAME(ibm8514RealImageFill) GLNAME(ibm8514RealImageFill): PUSH_L (EBP) MOV_L (ESP,EBP) SUB_L (CONST(16),ESP) PUSH_L (EDI) PUSH_L (ESI) PUSH_L (EBX) /* * Check if height or width is 0. */ MOV_L (w_arg,EDI) MOV_L (h_arg,EBX) OR_L (EDI,EDI) JZ (.finish) OR_L (EBX,EBX) JZ (.finish) /* * Wait for 2 queue entries */ MOV_L (GP_STAT,EDX) .wait_queue_0: IN_B TEST_B (CONST(0x40),AL) JNZ (.wait_queue_0) /* * Init 8514 registers. */ MOV_L (FRGD_MIX,EDX) MOV_W (alu_arg,AX) OR_W (FSS_PCDATA,AX) OUT_W MOV_L (WRT_MASK,EDX) MOV_W (planemask_arg,AX) OUT_W /* * Wait for 7 queue entries */ MOV_L (GP_STAT,EDX) .wait_queue_1: IN_B TEST_B (CONST(0x02),AL) JNZ (.wait_queue_1) MOV_L (CUR_X,EDX) MOV_W (x_arg,AX) OUT_W MOV_L (CUR_Y,EDX) MOV_W (y_arg,AX) OUT_W /* * If the width is odd, program the 8514 registers for width+1. */ MOV_L (MAJ_AXIS_PCNT,EDX) MOV_W (w_arg,AX) TEST_W (CONST(1),AX) JNZ (.odd) DEC_W (AX) .odd: OUT_W .cont1: /* * Set height and scissors registers. * The scissors is used to clip the last unwanted pixel on * lines with a odd length. */ MOV_L (MULTIFUNC_CNTL,EDX) MOV_W (h_arg,AX) DEC_W (AX) /* OR_W (MIN_AXIS_PCNT,AX)*/ OUT_W MOV_W (x_arg,AX) OR_W (SCISSORS_L,AX) OUT_W MOV_W (x_arg,AX) ADD_W (w_arg,AX) DEC_W (AX) OR_W (SCISSORS_R,AX) OUT_W /* * Give command to 8514. * The command is : CMD_RECT | INC_Y | INC_X | DRAW * | PCDATA | WRTDATA | _16BIT | BYTSEQ); */ MOV_L (CMD,EDX) MOV_W (GP_WRITE_CMD,AX) OUT_W /* * Do the modulo trick for the y coordinate. * This is stolen from the gcc 2.4.5 output. */ MOV_L (y_arg,EAX) SUB_L (poy_arg,EAX) CDQ IDIV_L (ph_arg) TEST_L (EDX,EDX) JGE (.mod1) ADD_L (ph_arg,EDX) .mod1: MOV_L (EDX,srcy_loc) /* * Wait until the fifo is empty. */ MOV_L (GP_STAT,EDX) .wait_queue_2: IN_B TEST_B (CONST(1),AL) JNZ (.wait_queue_2) CLD /* * Process all lines on screen repeating the pixmap if needed. * This loop paints from the present y location to the end of * the pixmap. ( or to the end of the screen area if the pixmap * is nonrepeating ). * Start by checking if the pixmap is high enough to completely * cover the screen area vertically. */ .next_pixmap_vertical: MOV_L (srcy_loc,EAX) ADD_L (h_arg,EAX) MOV_L (ph_arg,EBX) CMP_L (EBX,EAX) JLE (.cmp_get_h) SUB_L (srcy_loc,EBX) JMP (.cmp2) .cmp_get_h: MOV_L (h_arg,EBX) .cmp2: MOV_L (EBX,srch_loc) SUB_L (EBX,h_arg) /* * Do the modulo trick for the x coordinate. */ MOV_L (x_arg,EAX) SUB_L (pox_arg,EAX) CDQ IDIV_L (pw_arg) TEST_L (EDX,EDX) JGE (.mod2) ADD_L (pw_arg,EDX) .mod2: MOV_L (EDX,srcxsave_loc) /* * Process one line of pixels in the pixmap, * repeating the pixmap horisontally if needed. * Start by checking if the pixmap is wide enough to completely * cover the screen area horisontally. */ .next_pixmap_line: MOV_L (w_arg,EBX) MOV_L (srcxsave_loc,EAX) ADD_L (EBX,EAX) MOV_L (pw_arg,EDI) CMP_L (EDI,EAX) JLE (.cmp_get_w) SUB_L (srcxsave_loc,EDI) JMP (.cmp4) .cmp_get_w: MOV_L (EBX,EDI) .cmp4: SUB_L (ECX,ECX) /* * Calculate and save a pointer to the first pixel on this line in * the pixmap. Add the x offset for the first lap and then skip the * first part of the loop. * ( all this just to avoid one multiplication and two add's in the loop ). */ MOV_L (pwidth_arg,EAX) MUL_L (srcy_loc) ADD_L (psrc_arg,EAX) MOV_L (EAX,srcx_loc) MOV_L (srcxsave_loc,ESI) ADD_L (EAX,ESI) JMP (.previously_even) /* * Copy one line of the pixmap to the screen. * This loop paints one line of pixels from the pixmap onto the screen. */ .next_pixmap_horizontal: MOV_L (srcx_loc,ESI) /* * Is there a pixel leftover from the previous lap in the loop ? * ( since CH is used to store the saved pixel, only CL is in scope ). */ TEST_B (CONST(1),CL) JZ (.previously_even) /* * Yes there was, combine it with the first pixel and write them * to the 8514 engine. */ MOV_B (CH,AL) MOV_B (REGIND(ESI),AH) MOV_L (PIX_TRANS,EDX) OUT_W INC_L (ESI) MOV_L (EDI,ECX) DEC_L (ECX) JMP (.move_pixels) .previously_even: MOV_L (EDI,ECX) MOV_L (PIX_TRANS,EDX) /* * Move the rest of the line using a "rep outsw" instruction. */ .move_pixels: MOV_L (ECX,EAX) SHR_L (CONST(1),ECX) REP OUTS_W MOV_L (EAX,ECX) SUB_L (EDI,EBX) /* * Is there a pixel left unwritten ? */ TEST_W (CONST(1),CX) JZ (.all_written) /* * Yes, check if this is the last ( or only ) repetition of the pixmap. */ AND_L (EBX,EBX) JZ (.write_pixel) /* * No, there is more to come. Save this pixel for later. */ MOV_B (REGIND(ESI),CH) JMP (.all_written) /* * Yes, this is the last pixel displayed on this line. * Write it and let the scissors cut the unwanted extra pixel. */ .write_pixel: MOV_B (REGIND(ESI),AL) OUT_W /* * Check if the next repetition of the pixmap is using the entire * pixmap width. */ .all_written: MOV_L (pw_arg,EAX) CMP_L (EAX,EBX) JG (.cmp6) MOV_L (EBX,EAX) .cmp6: MOV_L (EAX,EDI) AND_L (EBX,EBX) JNZ (.next_pixmap_horizontal) INC_L (srcy_loc) DEC_L (srch_loc) JNZ (.next_pixmap_line) SUB_L (EDX,EDX) MOV_L (EDX,srcy_loc) CMP_L (EDX,h_arg) JNZ (.next_pixmap_vertical) /* * Wait until room for 3 entries in the fifo. */ MOV_L (GP_STAT,EDX) .wait_queue_3: IN_B TEST_B (CONST(0x20),AL) JNZ (.wait_queue_3) /* * Reset FRGD_MIX to default. */ MOV_L (FRGD_MIX,EDX) MOV_W (GP_DEF_FRGD_MIX,AX) OUT_W /* * Reset the scissors regsiters. */ MOV_L (MULTIFUNC_CNTL,EDX) MOV_W (SCISSORS_L,AX) OUT_W MOV_W (SCISSORS_R,AX) OR_W (CONST(1023),AX) OUT_W .finish: POP_L (EBX) POP_L (ESI) POP_L (EDI) ADD_L (CONST(16),ESP) POP_L (EBP) RET