/* liqbase
 * Copyright (C) 2008 Gary Birkett
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2
 * as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 */

/*
 *
 * Routines which write data onto an liqimage surface
 *
 */


//20080726:gb:fixed up edge cases for glyph rendering  (offscreen left/right was just bailing rather than drawing a partial)




// do some alpha blending :)
// i see now how it can work
//http://www.phatcode.net/articles.php?id=233
//DIM alpha AS LONG
//alpha = 30   '30 percent
//blendpix.Red = (srcpixone.Red - srcpixtwo.Red) * alpha + srcpixtwo.Red) \ 100



#include <memory.h>

// http://www.x.org/docs/Xv/video
// todo:lcuk: damn all this time i have been calculating planes and offsets and sizes i could have used properties from the liqimage itself.
// what a plonker i am, ahhh well too late to adjust right now, that will come later

#define MIN(x,y) (((x) < (y)) ? (x) : (y))
#define MAX(x,y) (((x) > (y)) ? (x) : (y))
#define ABS(x) (((x) >= 0) ? (x) : (-(x)))
#define SGN(x) (((x) >= 0) ? 1 : -1)

#include "liqapp.h"
#include "liq_xsurface.h"			// include available workhorse functions
#include "liqfont.h"




//########################################################################
//######################################################################## quick blitter
//########################################################################




inline void xsurface_drawstrip_or(
	register unsigned int  linecount,
	register unsigned int  charsperline,
	register unsigned char *srcdataptr,
	register unsigned char *dstdataptr,
	register unsigned int  srclinejump,
	register unsigned int  dstlinejump)
{

	if(charsperline<=0) return;
	register int charwidth;
	while(linecount--)
	{
		charwidth=(int)charsperline;
		// unwind the loop and handle the big byte blocks first :) thanks ssvb for prompting
		
	/*	//############################ handle the int64 blocks using standard c: try using the drawstrip
		if(charwidth>7)
		{
			// this fails as well, ill look at it another time again,
			// but now i have failed to write >32bits in one chunk via any recognised method
			// something is strange
			struct drawstrip_64
			{
				int a;
				int b;
			};
			struct drawstrip_64 *src = (struct drawstrip_64 *)srcdataptr;
			struct drawstrip_64 *dst = (struct drawstrip_64 *)dstdataptr;
			while((charwidth)>7)
			{
				// *dst++ = *src++;
				struct drawstrip_64 a;//=*src;
				*dst=a;
				dst++; src++;
				charwidth-=8;
			}
			srcdataptr = (unsigned char*)src;
			dstdataptr = (unsigned char*)dst;
		}
				 */
		//############################ handle the int64 blocks using standard c doubles
	/*	if(charwidth>7)
		{
			double *src = (double *)srcdataptr;		// why wont this work with doubles?
			double *dst = (double *)dstdataptr;
			while((charwidth)>7)
			{
				*dst++ = *src++;
				charwidth-=8;
			}
			srcdataptr = (unsigned char*)src;
			dstdataptr = (unsigned char*)dst;
		}
	*/	
		//############################ handle the int64 blocks using standard c ull
	/*	if(charwidth>7)
		{
			unsigned long long *src = (unsigned long long *)srcdataptr;		// why wont this work with ull?
			unsigned long long *dst = (unsigned long long *)dstdataptr;
			while((charwidth)>7)
			{
				*dst++ = *src++;
				charwidth-=8;
			}
			srcdataptr = (unsigned char*)src;
			dstdataptr = (unsigned char*)dst;
		}
	*/
	
		
		//############################ handle the int64 blocks using ASM - fails. it locks like all other >32bit options attempted
	/*
// lets see, RST says asm is good..
//http://code.google.com/p/arm1136j-s/source/browse/trunk/graphics/inline-asm-mem-16bpp.c

#define load2int_wb(addr, int1, int2) \
        __asm__("ldmia %[address]!, {%[tmp1], %[tmp2]}":\
              [tmp1] "=r" (int1), [tmp2] "=r" (int2),\
              [address] "+r" (addr))

#define store2int_wb(addr, int1, int2) \
        __asm__("stmia %[address]!, {%[tmp1], %[tmp2]}":\
              [address] "+r" (addr): [tmp1] "r" (int1), [tmp2] "r" (int2)\
              : "memory")
              
	 	if(charwidth>7)
		{
			//unsigned long long *src = (unsigned long long *)srcdataptr;
			//unsigned long long *dst = (unsigned long long *)dstdataptr;
			while((charwidth)>7)
			{
				register unsigned tmp1 __asm__("r4");
				register unsigned tmp2 __asm__("r5");
				
				load2int_wb(srcdataptr, tmp1, tmp2);
				store2int_wb(dstdataptr, tmp1, tmp2);
				
				charwidth-=8;
			}
			//srcdataptr = (unsigned char*)src;
			//dstdataptr = (unsigned char*)dst;
		}
	*/

		//################################################################### memcpy version
		// 7.5 on the speedtest for all this below here with nothing above
		// 5.9 on the speedtest for the OR | variation.  this is acceptable forthe quality improvement for now..
		// convert the |= back to = for speedup, but graphically it looks bad
		
		//############################ handle the int32 blocks
		if(charwidth>3)
		{
			register unsigned int *src = (unsigned int*)srcdataptr;
			register unsigned int *dst = (unsigned int*)dstdataptr;
			while((charwidth)>3)
			{
				*dst++ |= *src++;
				charwidth-=4;
			}
			srcdataptr = (unsigned char*)src;
			dstdataptr = (unsigned char*)dst;
		}
		//############################ handle the trailing int16 block
		if(charwidth>1)
		{
			*(short *)dstdataptr |= *(short *)srcdataptr;
			dstdataptr+=2;
			srcdataptr+=2;
			charwidth-=2;
		}
		//############################ handle the final int8 block
		if(charwidth)
		{
			*dstdataptr++ |= *srcdataptr++;
		}

		//################################################################### memcpy version
		// 3.5 on the speedtest
		//memcpy(dstdataptr,srcdataptr,charwidth);
		//dstdataptr+=charwidth;
		//srcdataptr+=charwidth;
		
		srcdataptr+=srclinejump;
		dstdataptr+=dstlinejump;
	}
}





inline void xsurface_drawstrip(
	register unsigned int  linecount,
	register unsigned int  charsperline,
	register unsigned char *srcdataptr,
	register unsigned char *dstdataptr,
	register unsigned int  srclinejump,
	register unsigned int  dstlinejump)
{

	if(charsperline<=0) return;
	register int charwidth;
	while(linecount--)
	{
		charwidth=(int)charsperline;
		// unwind the loop and handle the big byte blocks first :) thanks ssvb for prompting
		
	/*	//############################ handle the int64 blocks using standard c: try using the drawstrip
		if(charwidth>7)
		{
			// this fails as well, ill look at it another time again,
			// but now i have failed to write >32bits in one chunk via any recognised method
			// something is strange
			struct drawstrip_64
			{
				int a;
				int b;
			};
			struct drawstrip_64 *src = (struct drawstrip_64 *)srcdataptr;
			struct drawstrip_64 *dst = (struct drawstrip_64 *)dstdataptr;
			while((charwidth)>7)
			{
				// *dst++ = *src++;
				struct drawstrip_64 a;//=*src;
				*dst=a;
				dst++; src++;
				charwidth-=8;
			}
			srcdataptr = (unsigned char*)src;
			dstdataptr = (unsigned char*)dst;
		}
				 */
		//############################ handle the int64 blocks using standard c doubles
	/*	if(charwidth>7)
		{
			double *src = (double *)srcdataptr;		// why wont this work with doubles?
			double *dst = (double *)dstdataptr;
			while((charwidth)>7)
			{
				*dst++ = *src++;
				charwidth-=8;
			}
			srcdataptr = (unsigned char*)src;
			dstdataptr = (unsigned char*)dst;
		}
	*/	
		//############################ handle the int64 blocks using standard c ull
	/*	if(charwidth>7)
		{
			unsigned long long *src = (unsigned long long *)srcdataptr;		// why wont this work with ull?
			unsigned long long *dst = (unsigned long long *)dstdataptr;
			while((charwidth)>7)
			{
				*dst++ = *src++;
				charwidth-=8;
			}
			srcdataptr = (unsigned char*)src;
			dstdataptr = (unsigned char*)dst;
		}
	*/
	
		
		//############################ handle the int64 blocks using ASM - fails. it locks like all other >32bit options attempted
	/*
// lets see, RST says asm is good..
//http://code.google.com/p/arm1136j-s/source/browse/trunk/graphics/inline-asm-mem-16bpp.c

#define load2int_wb(addr, int1, int2) \
        __asm__("ldmia %[address]!, {%[tmp1], %[tmp2]}":\
              [tmp1] "=r" (int1), [tmp2] "=r" (int2),\
              [address] "+r" (addr))

#define store2int_wb(addr, int1, int2) \
        __asm__("stmia %[address]!, {%[tmp1], %[tmp2]}":\
              [address] "+r" (addr): [tmp1] "r" (int1), [tmp2] "r" (int2)\
              : "memory")
              
	 	if(charwidth>7)
		{
			//unsigned long long *src = (unsigned long long *)srcdataptr;
			//unsigned long long *dst = (unsigned long long *)dstdataptr;
			while((charwidth)>7)
			{
				register unsigned tmp1 __asm__("r4");
				register unsigned tmp2 __asm__("r5");
				
				load2int_wb(srcdataptr, tmp1, tmp2);
				store2int_wb(dstdataptr, tmp1, tmp2);
				
				charwidth-=8;
			}
			//srcdataptr = (unsigned char*)src;
			//dstdataptr = (unsigned char*)dst;
		}
	*/

		//################################################################### memcpy version
		// 7.5 on the speedtest for all this below here with nothing above
		
		//############################ handle the int32 blocks
		if(charwidth>3)
		{
			register unsigned int *src = (unsigned int*)srcdataptr;
			register unsigned int *dst = (unsigned int*)dstdataptr;
			while((charwidth)>3)
			{
				*dst++ = *src++;
				charwidth-=4;
			}
			srcdataptr = (unsigned char*)src;
			dstdataptr = (unsigned char*)dst;
		}
		//############################ handle the trailing int16 block
		if(charwidth>1)
		{
			*(short *)dstdataptr = *(short *)srcdataptr;
			dstdataptr+=2;
			srcdataptr+=2;
			charwidth-=2;
		}
		//############################ handle the final int8 block
		if(charwidth)
		{
			*dstdataptr++ = *srcdataptr++;
		}

		//################################################################### memcpy version
		// 3.5 on the speedtest
		//memcpy(dstdataptr,srcdataptr,charwidth);
		//dstdataptr+=charwidth;
		//srcdataptr+=charwidth;
		
		srcdataptr+=srclinejump;
		dstdataptr+=dstlinejump;
	}
}







inline void xdata_drawimage_grey(char *surfdata,int surfw,int surfh,char *imgdata,int imgw,int imgh,int x,int y)
{
	// cleanse this function now, it requires TLC
	// font has all changed and there is no longer a single contigious glyph buffer
	// this is better memory wise, but its more fragmented
	// the offsets are simpler
	if(x+imgw<0)return;
	//if(font->glyphdata[glyph]==NULL) return;
	int gw  = imgw;
	int gh  = imgh;
	int gtw = gw;//font->glyphtilew;
	//int gth = gh;//font->glyphmaxh;

	int sw  = surfw;
	int sh  = surfh;

	if(x>sw)return;
	if(x+gw>sw)
	{
		gw=sw-x;
	}
	
	if(x<0)
	{
		if(x<-gw) return;
		imgdata = &imgdata[-x];
		gw+=x;
		x=0;
	}
	
	
	
	if(y+gh<0)return;
	if(y+gh>sh)
	{
		if(y>=sh) return;
		gh=(sh-y);
	}

	unsigned int goff = 0;//((font->glyphtilesize) * (int)(glyph));
	unsigned int gskip = gtw-gw;

	unsigned int poff = sw * y + x;
	unsigned int pskip = sw-gw;
//---------------------------------------
	unsigned char *pdata;
	unsigned char *gdata;
	if(y<0)
	{
		y=-y;
		goff+=gtw*y;
		poff+=sw*y;
		gh-=y;
		y=0;
	}

	gdata = & ((unsigned char*)imgdata)                [ goff ] ;
	pdata = & ((unsigned char*)surfdata)               [ poff ];
	//app_log("strip.. gh=%i, gw=%i,  gd=%i,pd=%i gskip=%i,pskip=%i",gh,gw,(int)gdata,(int)pdata,gskip,pskip);
	xsurface_drawstrip(gh,gw,gdata,pdata,gskip,pskip);
}


inline void xsurface_drawimage_color(liqimage *surface,liqimage *image,int x,int y)
{
	int surfw=surface->width;
	int surfh=surface->height;
	int imgw =image->width;
	int imgh =image->height;
	xdata_drawimage_grey(&surface->data[surface->offsets[0]],surface->width,surface->height,  &image->data[image->offsets[0]],image->width,image->height,   x,y);

	x>>=1;
	y>>=1;
	surfw>>=1;
	surfh>>=1;
	imgw>>=1;
	imgh>>=1;
	
	xdata_drawimage_grey(&surface->data[surface->offsets[1]],surfw,surfh,  &image->data[image->offsets[1]],imgw,imgh,   x,y);
	xdata_drawimage_grey(&surface->data[surface->offsets[2]],surfw,surfh,  &image->data[image->offsets[2]],imgw,imgh,   x,y);
}



//####################################################################################### 
//####################################################################################### ScaleLine variations Std
//####################################################################################### 


inline void ScaleLine_grey(char *Target, char *Source, int SrcWidth, int TgtWidth,int TgtDrawStartOffset, int TgtDrawPixelCount)
{
  int NumPixels = TgtDrawPixelCount;//TgtWidth;
  int IntPart = SrcWidth / TgtWidth;
  int FractPart = SrcWidth % TgtWidth;
  int E = 0;
  for(NumPixels=0;NumPixels<TgtDrawPixelCount;NumPixels++)
  {
    if(NumPixels>=TgtDrawStartOffset)
	{
		*Target++ = *Source;
	}
	else
	{
		Target++;
	}
    Source += IntPart;

    E += FractPart;
    if (E >= TgtWidth) {
      E -= TgtWidth;
      Source++;
    }
  }
}



inline void ScaleLine_uv(char *Target, char *Source, int SrcWidth, int TgtWidth,int TgtDrawStartOffset, int TgtDrawPixelCount)
{
  int NumPixels = TgtDrawPixelCount;//TgtWidth;
  int IntPart = SrcWidth / TgtWidth;
  int FractPart = SrcWidth % TgtWidth;
  int E = 0;
  for(NumPixels=0;NumPixels<TgtDrawPixelCount;NumPixels++)
  {
    if(NumPixels>=TgtDrawStartOffset)
	{
		*Target++ = *Source;
	}
	else
	{
		Target++;
	}
    Source += IntPart;

    E += FractPart;
    if (E >= TgtWidth) {
      E -= TgtWidth;
      Source++;
    }
  }
}

//####################################################################################### 
//####################################################################################### ScaleLine variations Alpha
//####################################################################################### 


inline void ScaleLine_alphablend_grey(char *Target, char *Source, int SrcWidth, int TgtWidth,int TgtDrawStartOffset, int TgtDrawPixelCount,char *Src_alphachannelfullres,char blend)
{
  int NumPixels = TgtDrawPixelCount;//TgtWidth;
  int IntPart = SrcWidth / TgtWidth;
  int FractPart = SrcWidth % TgtWidth;
  int E = 0;
  //while (NumPixels-- > 0)
  for(NumPixels=0;NumPixels<TgtDrawPixelCount;NumPixels++)
  {
    if(NumPixels>=TgtDrawStartOffset)
	{
		// alpha blending from an actual alpha channel
		int s=*Source;
		int t=*Target;	
		int a=*Src_alphachannelfullres;
		//*Target++ = t+((s-t)*a)/256;
		*Target++ = t+(((s-t)*blend*a)  >>16);// /65536;
	}
	else
	{
		Target++;
	}
    Source += IntPart;
	Src_alphachannelfullres += IntPart;

    E += FractPart;
    if (E >= TgtWidth)
	{
      E -= TgtWidth;
      Source++;
	  Src_alphachannelfullres++; 
    }
  }
}




inline void ScaleLine_alphablend_uv(char *Target, char *Source, int SrcWidth, int TgtWidth,int TgtDrawStartOffset, int TgtDrawPixelCount,char *Src_alphachanneldoubleres,char blend)
{
  int NumPixels = TgtDrawPixelCount;//TgtWidth;
  int IntPart = SrcWidth / TgtWidth;
  int FractPart = SrcWidth % TgtWidth;
  int E = 0;
  
  for(NumPixels=0;NumPixels<TgtDrawPixelCount;NumPixels++)
  {
    if(NumPixels>=TgtDrawStartOffset)
	{
		// alpha blending from an actual alpha channel
		int s=*Source;
		int t=*Target;
		if(!s)s=128;
		if(!t)t=128;
		int a=*Src_alphachanneldoubleres;
		//*Target++ = t+((s-t)*a)/256;
		int r=  t+( ((s-t)*blend*a)  >>16) ;//  /65536;
		if(!r)r=1;
		*Target++ = r;//t+((s-t)*blend*a)/65536;
		
	}
	else
	{
		Target++;
	}
    Source += IntPart;
	Src_alphachanneldoubleres+=(IntPart*2);

    E += FractPart;
    if (E >= TgtWidth)
	{
		E -= TgtWidth;
		Source++;
		Src_alphachanneldoubleres+=2;
    }
  }
}




//####################################################################################### 
//####################################################################################### ScaleLine variations Standard Blend
//####################################################################################### 




inline void ScaleLine_blend_grey(char *Target, char *Source, int SrcWidth, int TgtWidth,int TgtDrawStartOffset, int TgtDrawPixelCount,char blend)
{
  int NumPixels = TgtDrawPixelCount;//TgtWidth;
  int IntPart = SrcWidth / TgtWidth;
  int FractPart = SrcWidth % TgtWidth;
  int E = 0;
  //while (NumPixels-- > 0)
  for(NumPixels=0;NumPixels<TgtDrawPixelCount;NumPixels++)
  {
    if(NumPixels>=TgtDrawStartOffset)
	{
		// simple blending
		int s=*Source;
		int t=*Target;
		*Target++ = t+((s-t)*blend)/256;
		
		// blend blending from an actual blend channel
		//int a=*Source;
		//*Target++ = t+((s-t)*blend*a)/65536;
		
		// original none blended code
		//*Target++ = *Source;
	}
	else
	{
		Target++;
	}
    Source += IntPart;

    E += FractPart;
    if (E >= TgtWidth) {
      E -= TgtWidth;
      Source++;
    }
  }
}



inline void ScaleLine_blend_uv(char *Target, char *Source, int SrcWidth, int TgtWidth,int TgtDrawStartOffset, int TgtDrawPixelCount,char blend)
{
  int NumPixels = TgtDrawPixelCount;//TgtWidth;
  int IntPart = SrcWidth / TgtWidth;
  int FractPart = SrcWidth % TgtWidth;
  int E = 0;
  
  for(NumPixels=0;NumPixels<TgtDrawPixelCount;NumPixels++)
  {
    if(NumPixels>=TgtDrawStartOffset)
	{
		// do some blend blending :)
		int s=*Source;
		int t=*Target;
		if(!s)s=128;
		if(!t)t=128;
		
		*Target++ = t+((s-t)*blend)/256;
		
		// blend blending from an actual blend channel
		//int a=*Source;
		//*Target++ = t+((s-t)*blend*a)/65536;
		
		// original none blended code
		//*Target++ = *Source;
	}
	else
	{
		Target++;
	}
    Source += IntPart;

    E += FractPart;
    if (E >= TgtWidth) {
      E -= TgtWidth;
      Source++;
    }
  }
}


//####################################################################################### 
//####################################################################################### drawzoomimage_blend_raw
//####################################################################################### 


static inline void xsurface_drawzoomimage_blend_raw(
	
										char *srcmem,
										int smw,int smh,
										int six,int siy,		// SrcImgPos
										int siw,int sih, 		// SrcImgSize

										char *dstmem,
										int dmw,int dmh,
										int dix,int diy,		// DstImgPos
										int diw,int dih, 		// DstImgSize
										int plane_is_uv,
										char *srcalphachannel,
										char blend
										)
{
	if(!siw || !sih) return;
	if(!diw || !dih) return;
	if(diy+dih<0)return;
	if(diy>=dmh)return;
	if(dix+diw<0)return;
	if(dix>=dmw)return;
	if(!blend) return;

int dso;
	if(dix>=0)
		dso=0;
	else
		dso=-dix;
		
int dpc;
	if(dix+diw<=dmw)
		dpc=diw;
	else
		dpc=dmw-dix;

	
	char *dstdataptr = &dstmem[  diy * dmw + dix ];
	char *srcdataptr = &srcmem[  siy * smw + six ];
	int yNumPixels = dih-1;
	int yIntPart = sih / dih;
	int yFractPart = sih % dih;
	int yE = 0;
	int y=0;
	while (yNumPixels-- >= 0)
	{
		//===================
		//draw a line
		//===================
		int yy=diy+y;

		if(yy>=0 && yy<dmh)
		{
			
			if(srcalphachannel)
			{
				//app_log("blend");
				if(!plane_is_uv)
					ScaleLine_alphablend_grey(
							  dstdataptr,
							  srcdataptr,
							  siw,
							  diw,
							  dso,
							  dpc,
							  srcalphachannel,
							  blend
							 );
				else
					ScaleLine_alphablend_uv(
							  dstdataptr,
							  srcdataptr,
							  siw,
							  diw,
							  dso,
							  dpc,
							  srcalphachannel,
							  blend
							 );							
			}
			else
			if(blend<255)
			{
				if(!plane_is_uv)
					ScaleLine_blend_grey(
							  dstdataptr,
							  srcdataptr,
							  siw,
							  diw,
							  dso,
							  dpc,
							  blend
							 );
				else
					ScaleLine_blend_uv(
							  dstdataptr,
							  srcdataptr,
							  siw,
							  diw,
							  dso,
							  dpc,
							  blend
							 );				
			}
			else
			{
				if(!plane_is_uv)
					ScaleLine_grey(
							  dstdataptr,
							  srcdataptr,
							  siw,
							  diw,
							  dso,
							  dpc
							 );
				else
					ScaleLine_uv(
							  dstdataptr,
							  srcdataptr,
							  siw,
							  diw,
							  dso,
							  dpc
							 );				
			}

			
		}
			dstdataptr+=dmw;
			
		
		//move source along if required
		srcdataptr += (yIntPart*smw);
		
		if(srcalphachannel)
		{
			
			if(plane_is_uv)
				srcalphachannel+=(yIntPart*smw*4);
			else
				srcalphachannel+=(yIntPart*smw);
		}
		
		yE += yFractPart;
		if (yE >= dih)
		{
			// ready to skip a line
			yE -= dih;
			// skip a line
			srcdataptr+=smw;
			
			if(srcalphachannel)
			{
				
				if(plane_is_uv)
					srcalphachannel+=(smw*4);
				else
					srcalphachannel+=(smw);
			}
		}
		y++;
	}	
}







void xsurface_drawzoomblendimage(
	
										liqimage *srcimage,
										int six,int siy,		// SrcImgPos
										int siw,int sih, 		// SrcImgSize

										liqimage *dstimage,
										int dix,int diy,		// DstImgPos
										int diw,int dih, 		// DstImgSize
										
										char blend
										)
{
	int smw=srcimage->width;		// SrcMemW
	int smh=srcimage->height;		// SrcMemH
	
	int dmw=dstimage->width;		// DstMemW
	int dmh=dstimage->height;		// DstMemH
	
	if(diy+dih<0)return;
	if(diy>=dmh)return;
	
	if(dix+diw<0)return;
	if(dix>=dmw)return;
	
	char *alphachannel = NULL;
	
	if(srcimage->num_planes==2)
	{
		// grey with alpha :)
		alphachannel = &srcimage->data[srcimage->offsets[1]];
	}
	else
	if(srcimage->num_planes==4)
	{
		alphachannel = &srcimage->data[srcimage->offsets[3]];
	}
	
	
	//app_log("blend: %d, sm:%i,%i  di:%i,%i",blend,smw,smh,diw,dih);
	// jump if expected dimensions match image dimensions and we are not doing any blending
	
	if(smw==diw && smh==dih && blend==255 && srcimage->num_planes==3)
	{
		// can only quickdraw YUV only images, no alpha support
		//app_log("quickdraw..");
		xsurface_drawimage_color(dstimage,srcimage,dix,diy);
		//inline void xsurface_drawimage_color(   liqimage *surface,liqimage *image,int x,int y);
		return;
	}
 	

	
	xsurface_drawzoomimage_blend_raw(
								&srcimage->data[srcimage->offsets[0]],
								smw,smh,
								six,siy,
								siw,sih,
								
								&dstimage->data[dstimage->offsets[0]],
								dmw,dmh,
								dix,diy,
								diw,dih,
								0,
								
								alphachannel,
								
								blend
							  );
	//return;
	// move onto the color portion
	smw>>=1;
	smh>>=1;
	six>>=1;
	siy>>=1;
	siw>>=1;
	sih>>=1;
	
	dmw>>=1;
	dmh>>=1;	
	dix>>=1;
	diy>>=1;
	diw>>=1;
	dih>>=1;
	
	if(!siw || !sih) return;
	if(!diw || !dih) return;
	
	// eliminate greys (they might have had alpha)
	if(srcimage->num_planes<3) return;
	
	xsurface_drawzoomimage_blend_raw(
								&srcimage->data[srcimage->offsets[1]],
								smw,smh,
								six,siy,
								siw,sih,
								
								&dstimage->data[dstimage->offsets[1]],
								dmw,dmh,
								dix,diy,
								diw,dih,
								1,
								alphachannel,
								blend
							  );

	xsurface_drawzoomimage_blend_raw(
								&srcimage->data[srcimage->offsets[2]],
								smw,smh,
								six,siy,
								siw,sih,
								
								&dstimage->data[dstimage->offsets[2]],
								dmw,dmh,
								dix,diy,
								diw,dih,
								1,
								alphachannel,
								blend
							  );
	
	
	
}













void xsurface_drawzoomimage(
	
										liqimage *srcimage,
										int six,int siy,		// SrcImgPos
										int siw,int sih, 		// SrcImgSize

										liqimage *dstimage,
										int dix,int diy,		// DstImgPos
										int diw,int dih 		// DstImgSize
										
										)
{
	xsurface_drawzoomblendimage(
										srcimage,
										six,siy,
										siw,sih,
										dstimage,
										dix,diy,
										diw,dih,
										255
										);
}



//########################################################################
//######################################################################## draw a single glyph onto the surface
//########################################################################


inline void xsurface_drawglyph_grey(liqimage *surface,LIQFONT *font,int x,int y,unsigned char glyph)
{
	// cleanse this function now, it requires TLC
	// font has all changed and there is no longer a single contigious glyph buffer
	// this is better memory wise, but its more fragmented
	// the offsets are simpler
	
	//if(x<0)return;
	
	if(font->glyphdata[glyph]==NULL) return;
	int gw  = font->glyphwidths[glyph];
	int gh  = font->glyphheights[glyph];
	int gtw = gw;//font->glyphtilew;
	//int gth = gh;//font->glyphmaxh;

	int sw  = surface->width;
	int sh  = surface->height;
	
	unsigned int goff = 0;
	
	
	if(x<0)
	{
		if(x<-gw) return;
		gw+=x;
		goff-=x;
		x=0;
	}

	unsigned int gskip = gtw-gw;

	//void page_rendertocanvas(PAGE *self,int l,int t,int w,int h)
	//if(page)
	//{
	//	page_rendertocanvas(page,x,y,gw,gh);
	//	return;
	//}

	if(y+gh<0)return;
	
	if(x+gw>sw)
	{
		if(x>=sw)return;
		gskip+=(x+gw)-(sw);
		gw=(sw-x);
	}
	
	if(y+gh>sh)
	{
		if(y>=sh) return;
		gh=(sh-y);
	}


	unsigned int poff = sw * y + x;
	unsigned int pskip = sw-gw;
//---------------------------------------
	unsigned char *pdata;
	unsigned char *gdata;
	if(y<0)
	{
		y=-y;
		goff+=gtw*y;
		poff+=sw*y;
		gh-=y;
		y=0;
	}

	gdata = & ((unsigned char*)font->glyphdata[glyph]) [ goff ] ;
	pdata = & ((unsigned char*)&surface->data[surface->offsets[0]])          [ poff ];

	xsurface_drawstrip_or(gh,gw,gdata,pdata,gskip,pskip);
}

//########################################################################
//######################################################################## draw text as fast as possible onto xv surface :)
//########################################################################

int xsurface_drawtext_grey(liqimage *surface,LIQFONT *font,int xs,int ys,char *data)
{
	int x=xs;
	unsigned char ch;
	while ( (ch=*data++) )
	{
		xsurface_drawglyph_grey(surface,font,x,ys, ch );
		x+=font->glyphwidths[ch];
	}
	return x;
}

int xsurface_drawtextn_grey(liqimage *surface,LIQFONT *font,int xs,int ys,char *data,int datalen)
{
	int x=xs;
	unsigned char ch;
	if(datalen<=0)return x;
	while(datalen--)
	{
		ch=*data++;
		xsurface_drawglyph_grey(surface,font,x,ys, ch );
		x+=font->glyphwidths[ch];
	}
	return x;
}

//########################################################################
//######################################################################## clear canvass 
//########################################################################

inline void xsurface_drawclear_grey(liqimage *surface,unsigned char grey)
{
	int uo = surface->width*surface->height;
	//int vo = uo + (uo >> 2);
	int uvplanesize = (uo >> 2);
	memset(&surface->data[surface->offsets[0]]      ,grey,surface->width*surface->height);	
	memset(&surface->data[surface->offsets[1]] ,128 ,uvplanesize);	
	memset(&surface->data[surface->offsets[2]] ,128 ,uvplanesize);
}

inline void xsurface_drawclear_yuv(liqimage *surface,unsigned char grey,unsigned char u,unsigned char v)
{
	int uo = surface->width*surface->height;
	//int vo = uo + (uo >> 2);
	int uvplanesize = (uo >> 2);
	memset(&surface->data[surface->offsets[0]]      ,grey,surface->width*surface->height);	
	memset(&surface->data[surface->offsets[1]] ,u   ,uvplanesize);	
	memset(&surface->data[surface->offsets[2]] ,v   ,uvplanesize);
}

//########################################################################
//######################################################################## drawrect color optimised (older than other fns) 
//########################################################################
//fixed:todo: fix bug with unaligned widths, at present it does not fill upto 3 right hand side pixels

inline void xsurface_drawrect_yuv(liqimage *surface,int x,int y,int w,int h, unsigned char grey,unsigned char u,unsigned char v)
{
	if(x+w<0)return;
	if(y+h<0)return;
	if(w<=0)return;
	if(h<=0)return;
	if(y<0)	{		h=h+y;		y=0;	}
	if(x<0)	{		w=w+x;		x=0;	}	
	if(x+w>=surface->width)
	{
		if(x>=surface->width) return;
		w=(surface->width-x);
	}
	if(y+h>=surface->height)
	{
		if(y>=surface->height) return;
		h=(surface->height-y);
	}

unsigned int grey4;
	grey4=grey<<24 | grey<<16 | grey<<8 | grey;
	register unsigned int xx,yy;
	register unsigned char *pdata;
	register unsigned int *epdata;
	for (yy = y; yy < (y+h); yy++) 
	{
		pdata = &surface->data[ surface->offsets[0] + (surface->width*yy) + x ];
		epdata=(unsigned int*)pdata;
		for (xx = x; (xx+4) < (x+w); xx+=4) 
		{
			*epdata++ = grey4;
		}
		
		pdata=(unsigned char *)epdata;
		while((xx) < (x+w)) 
		{
			xx++;
			*pdata++ = (unsigned char)grey;
		}
		
	}

	y>>=1;
	x>>=1;
	w>>=1;
	h>>=1;
	if(w<1 || h<1) return;
	// same process now for the color ranges, but we will use shorts

	unsigned short u2;
	unsigned short v2;
	u2=u<<8 | u;
	v2=v<<8 | v;
	register unsigned char *udata;
	register unsigned short *eudata;
	register unsigned char *vdata;
	register unsigned short *evdata;
	unsigned int pw = surface->width;
	unsigned int ph = surface->height;
	unsigned int uo = surface->offsets[1];//pw*ph;
	unsigned int vo = surface->offsets[2];//uo + (uo >> 2);	
	pw>>=1;
	ph>>=1;
	for (yy = y; yy < (y+h); yy++) 
	{
		udata = &surface->data[ uo+(pw*yy) + x ];
		eudata=(unsigned short*)udata;
		vdata = &surface->data[ vo+(pw*yy) + x ];
		evdata=(unsigned short*)vdata;
		for (xx = x; (xx+2) < (x+w); xx+=2) 
		{
			//app_log("xy(%i,%i),   %i",xx,yy,(unsigned int)epdata);
			*eudata++ = u2;
			*evdata++ = v2;
		}
		udata=(unsigned char *)eudata;
		vdata=(unsigned char *)evdata;
		while((xx) < (x+w)) 
		{
			xx++;
			*udata++ = (unsigned char)u;
			*vdata++ = (unsigned char)v;
		}
	}	
}

//########################################################################
//######################################################################## drawrectwash uv wash
//########################################################################
//fixed:todo: fix bug with unaligned widths, at present it does not fill upto 3 right hand side pixels
inline void xsurface_drawrectwash_uv(liqimage *surface,int x,int y,int w,int h, unsigned char u,unsigned char v)
{
	if(x+w<0)return;
	if(y+h<0)return;
	if(w<=0)return;
	if(h<=0)return;
	if(y<0)	{		h=h+y;		y=0;	}
	if(x<0)	{		w=w+x;		x=0;	}	
	if(x+w>=surface->width)
	{
		if(x>=surface->width) return;
		w=(surface->width-x);
	}
	if(y+h>=surface->height)
	{
		if(y>=surface->height) return;
		h=(surface->height-y);
	}
	if(y&1){ y--;h++; }
	if(h&1){     h++; }
/*
unsigned int grey4;
	grey4=grey<<24 | grey<<16 | grey<<8 | grey;

*/
	register unsigned int xx,yy;
	//register unsigned char *pdata;
	//register unsigned int *epdata;
/*
	for (yy = y; yy < (y+h); yy++) 
	{
		pdata = &surface->data[ (surface->width*yy) + x ];
		epdata=(unsigned int*)pdata;
		for (xx = x; xx < (x+w); xx+=4) 
		{
			//app_log("xy(%i,%i),   %i",xx,yy,(unsigned int)epdata);
			*epdata++ = grey4;
		}
	}
*/
	y>>=1;
	x>>=1;
	w>>=1;
	h>>=1;
	if(w<1 || h<1) return;
	// same process now for the color ranges, but we will use shorts

unsigned short u2;
	u2=u<<8 | u;
unsigned short v2;
	v2=v<<8 | v;
	register unsigned char *udata;
	register unsigned short *eudata;
	register unsigned char *vdata;
	register unsigned short *evdata;
	unsigned int pw=surface->width;
	unsigned int ph=surface->height;
	unsigned int uo = surface->offsets[1];//pw*ph;
	unsigned int vo = surface->offsets[2];//uo + (uo >> 2);	
	pw>>=1;
	ph>>=1;
	for (yy = y; yy < (y+h); yy++) 
	{
		udata = &surface->data[ uo+(pw*yy) + x ];
		eudata=(unsigned short*)udata;
		vdata = &surface->data[ vo+(pw*yy) + x ];
		evdata=(unsigned short*)vdata;
		for (xx = x; (xx+2) < (x+w); xx+=2) 
		{
			//app_log("xy(%i,%i),   %i",xx,yy,(unsigned int)epdata);
			*eudata++ = u2;
			*evdata++ = v2;
		}
		
		
		udata=(unsigned char *)eudata;
		vdata=(unsigned char *)evdata;
		while((xx) < (x+w)) 
		{
			xx++;
			*udata++ = (unsigned char)u;
			*vdata++ = (unsigned char)v;
		}
		
		
	}	
}

//########################################################################
//######################################################################## drawrect grey optimised
//########################################################################
//fixed:todo: fix bug with unaligned widths, at present it does not fill upto 3 right hand side pixels
inline void xsurface_drawrect_grey(liqimage *surface,int x,int y,int w,int h, unsigned char grey)
{
	if(x+w<0)return;
	if(y+h<0)return;
	if(w<=0)return;
	if(h<=0)return;
	if(y<0)	{		h=h+y;		y=0;	}
	if(x<0)	{		w=w+x;		x=0;	}	
	if(x+w>=surface->width)
	{
		if(x>=surface->width) return;
		w=(surface->width-x);
	}
	if(y+h>=surface->height)
	{
		if(y>=surface->height) return;
		h=(surface->height-y);
	}

unsigned int grey4;
	grey4=grey<<24 | grey<<16 | grey<<8 | grey;
	register unsigned int xx,yy;
	register unsigned char *pdata;
	register unsigned int *epdata;
	for (yy = y; yy < (y+h); yy++) 
	{
		pdata = &surface->data[ surface->offsets[0] + (surface->width*yy) + x ];
		epdata=(unsigned int*)pdata;
		for (xx = x; (xx+4) < (x+w); xx+=4) 
		{
			//app_log("xy(%i,%i),   %i",xx,yy,(unsigned int)epdata);
			*epdata++ = grey4;
		}
		pdata=(unsigned char *)epdata;
		while((xx) < (x+w)) 
		{
			xx++;
			*pdata++ = (unsigned char)grey;
		}
	}
}

//########################################################################
//######################################################################## pset color
//########################################################################

inline void xsurface_drawpset_yuv(liqimage *surface,int x,int y,char grey,char u,char v)
{
	if(x<0)return;
	if(y<0)return;
	if(x>=surface->width)return;
	if(y>=surface->height)return;
//	int uo = surface->width*surface->height;
//	int vo = uo + (uo >> 2);

	unsigned int uo = surface->offsets[1];//pw*ph;
	unsigned int vo = surface->offsets[2];//uo + (uo >> 2);	

	int uvw = surface->width>>1;
	surface->data[ surface->offsets[0] + (surface->width*y+ x)] = grey;
	x>>=1;
	y>>=1;
	surface->data[uo + (uvw*y+ x)] = u;
	surface->data[vo + (uvw*y+ x)] = v;

}

//########################################################################
//######################################################################## pset grey
//########################################################################

inline void xsurface_drawpset_grey(liqimage *surface,int x,int y,char grey)
{
	if(x<0)return;
	if(y<0)return;
	if(x>=surface->width)return;
	if(y>=surface->height)return;
	surface->data[ surface->offsets[0] + (surface->width*y+ x)] = grey;
}

//########################################################################
//######################################################################## pget color
//########################################################################

#define interal_getchar(x,y,buffer,linewidth)  (buffer)[ (linewidth) * (y) + (x) ]

inline void xsurface_drawpget_yuv(liqimage *surface,int x1, int y1, unsigned char *grey,unsigned char *u,unsigned char *v)
{
unsigned int pw=surface->width;
//unsigned int ph=surface->height;
//unsigned int uo = pw*ph;
//unsigned int vo = uo + (uo >> 2);
	unsigned int uo = surface->offsets[1];//pw*ph;
	unsigned int vo = surface->offsets[2];//uo + (uo >> 2);	


//int px=x1,py=y1;
	grey[0]=interal_getchar(x1,y1,&surface->data[0 ],pw);
	x1>>=1;
	y1>>=1;
	pw>>=1;
	u[0]=interal_getchar(x1,y1,&surface->data[uo],pw);
	v[0]=interal_getchar(x1,y1,&surface->data[vo],pw);
}

//########################################################################
//######################################################################## line internal
//########################################################################

#define interal_linepaintchar(x,y,c,buffer,linewidth)  buffer[ (linewidth) * (y) + (x) ] = (c)

void xsurface_interalline(liqimage *surface,int x1, int y1, int x2, int y2, char grey,char *buffer,int linewidth)
{
	int dx=x2-x1;		// distance
	int dy=y2-y1;
	int dxabs=ABS(dx);	// absolute
	int dyabs=ABS(dy);
	int sdx=SGN(dx);	// sign (direction)
	int sdy=SGN(dy);
	int x=dyabs>>1;		// abs centre
	int y=dxabs>>1;
	int px=x1;			// start point
	int py=y1;
	int i;

	interal_linepaintchar(px,py,grey,buffer,linewidth);
	if (dxabs>=dyabs)
	{
		// Line runs hoizontally
    	for(i=0;i<dxabs;i++)
		{
			y+=dyabs;
			if (y>=dxabs)
			{
				y-=dxabs;
				py+=sdy;
			}
			px+=sdx;
			interal_linepaintchar(px,py,grey,buffer,linewidth);
		}
	}
	else
	{
		// Line runs vertically
		for(i=0;i<dyabs;i++)
		{
			x+=dxabs;
			if (x>=dyabs)
			{
				x-=dyabs;
				px+=sdx;
			}
			py+=sdy;
			interal_linepaintchar(px,py,grey,buffer,linewidth);
		}
	}
}

#define interal_linepaintchar_invert(x,y,buffer,linewidth)  { unsigned char c=buffer[ (linewidth) * (y) + (x) ];buffer[ (linewidth) * (y) + (x) ] = 255-c; }

void xsurface_interalline_invert(liqimage *surface,int x1, int y1, int x2, int y2, char *buffer,int linewidth)
{
	int dx=x2-x1;		// distance
	int dy=y2-y1;
	int dxabs=ABS(dx);	// absolute
	int dyabs=ABS(dy);
	int sdx=SGN(dx);	// sign (direction)
	int sdy=SGN(dy);
	int x=dyabs>>1;		// abs centre
	int y=dxabs>>1;
	int px=x1;			// start point
	int py=y1;
	int i;

	interal_linepaintchar_invert(px,py,buffer,linewidth);
	if (dxabs>=dyabs)
	{
		// Line runs hoizontally
    	for(i=0;i<dxabs;i++)
		{
			y+=dyabs;
			if (y>=dxabs)
			{
				y-=dxabs;
				py+=sdy;
			}
			px+=sdx;
			interal_linepaintchar_invert(px,py,buffer,linewidth);
		}
	}
	else
	{
		// Line runs vertically
		for(i=0;i<dyabs;i++)
		{
			x+=dxabs;
			if (x>=dyabs)
			{
				x-=dyabs;
				px+=sdx;
			}
			py+=sdy;
			interal_linepaintchar_invert(px,py,buffer,linewidth);
		}
	}
}

//########################################################################
//######################################################################## linecolor
//########################################################################

void xsurface_drawline_yuv(liqimage *surface,int x1, int y1, int x2, int y2, char grey,char u,char v)
{
	if(x1<0)return;
	if(y1<0)return;
	if(x1>=surface->width)return;
	if(y1>=surface->height)return;
	if(x2<0)return;
	if(y2<0)return;
	if(x2>=surface->width)return;
	if(y2>=surface->height)return;


unsigned int pw=surface->width;
//unsigned int ph=surface->height;
//unsigned int uo = pw*ph;
//unsigned int vo = uo + (uo >> 2);
	unsigned int uo = surface->offsets[1];//pw*ph;
	unsigned int vo = surface->offsets[2];//uo + (uo >> 2);	

	xsurface_interalline(surface,x1,y1,   x2,y2,grey, &surface->data[surface->offsets[0]], pw);
	x1>>=1;
	y1>>=1;
	x2>>=1;
	y2>>=1;
	pw>>=1;
	xsurface_interalline(surface,x1,y1,   x2,y2,u   ,&surface->data[uo], pw);
	xsurface_interalline(surface,x1,y1,   x2,y2,v   ,&surface->data[vo], pw);
}

//########################################################################
//######################################################################## linegrey
//########################################################################

void xsurface_drawline_grey(liqimage *surface,int x1, int y1, int x2, int y2, char grey)
{
	if(x1<0)return;
	if(y1<0)return;
	if(x1>=surface->width)return;
	if(y1>=surface->height)return;
	if(x2<0)return;
	if(y2<0)return;
	if(x2>=surface->width)return;
	if(y2>=surface->height)return;


unsigned int pw=surface->width;
//unsigned int ph=surface->height;
//unsigned int uo = pw*ph;
//unsigned int vo = uo + (uo >> 2);	
	xsurface_interalline(surface,x1,y1,   x2,y2,grey, &surface->data[surface->offsets[0]], pw);
}

//########################################################################
//######################################################################## linegreyinvert
//########################################################################

void xsurface_drawline_greyinv(liqimage *surface,int x1, int y1, int x2, int y2)
{
	if(x1<0)return;
	if(y1<0)return;
	if(x1>=surface->width)return;
	if(y1>=surface->height)return;
	if(x2<0)return;
	if(y2<0)return;
	if(x2>=surface->width)return;
	if(y2>=surface->height)return;


unsigned int pw=surface->width;
//unsigned int ph=surface->height;	
	xsurface_interalline_invert(surface,x1,y1,   x2,y2, &surface->data[surface->offsets[0]], pw);
}

//########################################################################
//######################################################################## linegreyinvert
//########################################################################

inline void interal_linepaintcharf(int x, int y,char c,char *buffer,int linewidth,int numlines)
{
	if(x<0)return;
	if(y<0)return;
	if(x>=linewidth)return;
	if(y>=numlines)return;
	interal_linepaintchar(x,y,                         c,buffer,linewidth);

}

#define interal_circlepaintchar(cx,cy,x,y,c,buffer,linewidth,numlines)  \
		{ \
			interal_linepaintcharf(cx+x,cy+y,                         c,buffer,linewidth,numlines); \
			interal_linepaintcharf(cx+x,cy-y,                         c,buffer,linewidth,numlines); \
			interal_linepaintcharf(cx-x,cy+y,                         c,buffer,linewidth,numlines); \
			interal_linepaintcharf(cx-x,cy-y,                         c,buffer,linewidth,numlines); \
			interal_linepaintcharf(cx+y,cy+x,                         c,buffer,linewidth,numlines); \
			interal_linepaintcharf(cx+y,cy-x,                         c,buffer,linewidth,numlines); \
			interal_linepaintcharf(cx-y,cy+x,                         c,buffer,linewidth,numlines); \
			interal_linepaintcharf(cx-y,cy-x,                         c,buffer,linewidth,numlines); \
		}

void xsurface_interalcircle(int cx, int cy, int r,char grey,char *buffer,int linewidth,int numlines)
{	
int d=3-(2*r);
int x=0;
int y=r;
	for(x=0;x<y;x++)
	{
		interal_circlepaintchar(cx,cy,x,y,grey,buffer,linewidth,numlines);
		if(d<0)
			d+=(x<<2)+6;
		else
		{
			d+=((x-y)<<2)+10;
			y--;
		}
	}
}

//########################################################################
//######################################################################## circlegrey
//########################################################################

void xsurface_drawcircle_grey(liqimage *surface,int cx, int cy, int r,unsigned char grey)
{
unsigned int pw=surface->width;
unsigned int ph=surface->height;
//unsigned int uo = pw*ph;
//unsigned int vo = uo + (uo >> 2);	
	xsurface_interalcircle(cx,cy,r,grey, &surface->data[surface->offsets[0]], pw,ph);
}
