#include "ninox.h"

static int upscale_image_A2(struct Image *img, int n);
static int upscale_image_A3(struct Image *img, int n);
static int upscale_image_A4(struct Image *img, int n);
static int upscale_image_A5(struct Image *img, int n);

#define SHUFFLE_POPFILTER 1

// divide the image into tiles measuring S x S pixels, within each tile replace the
// brightest an darkest pixels with the average of it's neighbors.
// Avoid processing too close to the edge of the window so that the neighbor calculation is easier
int
pop_filter(struct Image *img)
	{
	int w = img->width;
	int h = img->height;
	int d = img->depth;
	unsigned short *udata = (unsigned short *)img->data;
	unsigned char *data = (unsigned char *)img->data;
	int v,total,count,r1,r2,tmp,thresh;
	int o,x,y,x1,y1,x2,y2,S,npix;
	int n;
	static int *map = NULL;
	char *chmap;
	int passes = PopFilter_PassCount;
	int tilecount=0,maxcount=0, mincount=0;	// How many pixels were changed

	if (! PopFilter || ! passes) return(1);

	S=PopFilter;
	npix = S * S;

	if (! Quiet) Print("POP filter = %d\n",S);

	// Allocate the delta map so that we scan each subregion
	// in a random way
	if (map == NULL) {
	   map = Malloc(sizeof(int) * npix);
	   for(y=o=0; y<S; ++y) for(x=0; x<S; ++x,++o) { map[o] = y * w + x; }
	   }

	// Allocate a map to indicate which pixels have been changed, so that multiple passes
	// guarantee to change different pixels
	if (passes>1) chmap = (char *)ZeroMalloc(w*h);
	else chmap=NULL;

#ifdef SHUFFLE_POPFILTER
	// shuffle the lookup map
	for(o=0; o<npix; ++o) {
	   r2 = Random(npix);
	   tmp=map[o]; map[o]=map[r2]; map[r2]=tmp;
	   }
#endif

again:

	// Define the region to be processed, randomise the
	// top left coord
	x1 = 1 + img->cutout.x1 + Random(S);
	y1 = 1 + img->cutout.y1 + Random(S);
	x2 = img->cutout.x2 - S;
	y2 = img->cutout.y2 - S;

	//Print("Scan %d %d - %d %d\n",x1,y1,x2,y2);

	switch(d) {
	   case 8:
		thresh = ThreshHold;
		for(y=y1; y<y2; y+=S) {
		   for(x=x1,o=y*w+x; x<x2; x+=S,o+=S) {
			int n,max=-1,min=255,d,omin,omax;
			unsigned int tile_avg,count;
			int tile_change=0;

			// scan the SxS box in a randomised pattern using map[]
			tile_avg = count = 0;
			omin=omax=-1;
			for(n=0; n<npix; ++n) {
			  int O = o + map[n];
			  if (chmap==NULL || ! chmap[O]) {
			     v=data[O];
			     tile_avg += v; count++;
			     if (v>max) {max=v; omax=O;}
			     if (v<min) {min=v; omin=O;}
			     }
			  }
			if (count) tile_avg /= count;
			tilecount++;

			// shuffle a bit more
#ifdef SHUFFLE_POPFILTER
	   		r1 = Random(npix); r2 = Random(npix);
			if (r1 != r2) { tmp=map[r1]; map[r1]=map[r2]; map[r2]=tmp; }
#endif
			// If there are no bright pixels in this region then do nothing
			if (max < thresh) continue;

			// recalculate brightest pixel from its neighbors. Note that we ignore pixels in the same
			// column as this one
			if (omax>0) {
			   // Check if this pixel is more than 50% brighter than the tile average
			   if ((max>>1) > tile_avg) {
			      total =  data[omax-1];   total += data[omax+1];   total += data[omax-w-1];
			      total += data[omax-w+1]; total += data[omax+w-1]; total += data[omax+w+1];
			      data[omax] = (int)((double)total/6.0 + 0.5);
			      maxcount++; tile_change=1;
			      if (chmap) chmap[omax]=1;
			      }
			   }

			// recalculate darkest pixel from its neighbors. Note we use only pixels from other columns
			// to try and avoid column related sensitivity
			if (omin>0 && omin != omax) {
			   // Check if this pixel is more than 50% dimmer than the tile average
			   if ((min<<1) < tile_avg) {
			      total =  data[omin-1];   total += data[omin+1];   total += data[omin-w-1];
			      total += data[omin-w+1]; total += data[omin+w-1]; total += data[omin+w+1];
			      data[omin] = (int)((double)total/6.0 + 0.5);
			      mincount++; tile_change=1;
			      if (chmap) chmap[omin]=1;
			      }
			   }

			// If the tile has changed and we have PopFilter visualisation
			// enabled then brighten the tile and highlight the pixel that was replaced
			if (VisualisePopFilter && tile_change) {
			   for(n=0; n<npix; ++n) {
			  	int O = o + map[n];
			  	v=data[O]; v += 46; if (v>255) v=255;
			  	data[O] = v;
			  	}
			   if (omax>0) data[omax] = 255;
			   }
			}
		   }
		break;
	   case 16:
		thresh = ThreshHold << 8;
		for(y=y1; y<y2; y+=S) {
		   for(x=x1,o=y*w+x1; x<x2; x+=S,o+=S) {
			int n,max=-1,min=65535,d,omin,omax;
			unsigned int tile_avg, count;
			int tile_change=0;

			// scan the SxS box in a randomised pattern
			tile_avg = count = 0;
			omin=omax=-1;
			for(n=0; n<npix; ++n) {
			  int O = o + map[n];
			  if (chmap==NULL || ! chmap[O]) {
			     v=udata[O];
			     tile_avg += v; ++count;
			     if (v>max) {max=v; omax=O;}
			     if (v<min) {min=v; omin=O;}
			     }
			  }
			if (count) tile_avg /= count;
			tilecount++;

			// shuffle a bit more
#ifdef SHUFFLE_POPFILTER
	   		r1 = Random(npix); r2 = Random(npix); 
			if (r1 != r2) { tmp=map[r1]; map[r1]=map[r2]; map[r2]=tmp; }
			//Print("[%d] [%d]\n",r1,r2);
#endif

			// If there are no bright pixels in this region then do nothing
			if (max < thresh) continue;

			// recalculate brightest pixel from its neighbors. Note that we ignore pixels in the same
			// column as this one
			if (omax>0) {
			   // Check if this pixel is more than 50% brighter than the tile average
			   if ((max>>1) > tile_avg) {
			      total =  udata[omax-1]; total += udata[omax+1]; total += udata[omax-w-1];
			      total += udata[omax-w+1]; total += udata[omax+w-1]; total += udata[omax+w+1];
			      udata[omax] = (int)((double)total/6.0 + 0.5);
			      maxcount++; tile_change=1;
			      if (chmap) chmap[omax]=1;
			      }
			   }

			// recalculate darkest pixel from its neighbors. Note we use only pixels from other columns
			// to try and avoid column related sensitivity
			if (omin>0 && omin != omax) {
			   // Check if this pixel is more than 50% dimmer than the tile average
			   if ((max>>1) < tile_avg) {
			      total =  udata[omin-1]; total += udata[omin+1]; total += udata[omin-w-1];
			      total += udata[omin-w+1]; total += udata[omin+w-1]; total += udata[omin+w+1];
			      udata[omin] = (int)((double)total/6.0 + 0.5);
			      mincount++; tile_change=1;
			      if (chmap) chmap[omin]=1;
			      }
			   }

			// If the tile has changed and we have PopFilter visualisation
			// enabled then brighten the tile and highlight the pixel that was replaced
			if (VisualisePopFilter && tile_change) {
			   for(n=0; n<npix; ++n) {
			  	int O = o + map[n];
			  	v=udata[O]; v += 12000; if (v>65535) v=65535;
			  	udata[O] = v;
			  	}
			   if (omax>0) udata[omax] = 65535;
			   }

			}
		   }
		break;
	   default:
		Print("pop filtering not supported on images of depth %d\n",d);
		Free(map);
		if (chmap) Free(chmap);
		return 0;
		break;
	   }
		
	// If we are doing multiple passes then loop again. Note the use of chmap[]
	// to ensure that we ignore previously changed pixels
	if (--passes > 0) goto again;

	//Free(map);
	if (chmap) Free(chmap);
	if (! Quiet) Print("Popfilter: %d tiles, %d bright pixels, %d dark pixels\n",tilecount,maxcount,mincount);
	return 1;
	}

//########################################################################

// scale widthxheight image by factor n and smooth
int
upscale_image(struct Image *img, int n)
	{
	// short-cut, nothing to do
	if (n<=1) return 1;

	if (n==2) return upscale_image_A2(img,n);
	if (n==3) return upscale_image_A3(img,n);
	if (n==4) return upscale_image_A4(img,n);
	if (n==5) return upscale_image_A5(img,n);

	Print("upscale_image: size n=%d not supported\n",n);
	exit(1);

	return(0);
	}

static int
upscale_image_A2(struct Image *img, int n)
	{
	int width = img->width;
	int height = img->height;
	int bpp = img->depth/8;
	int i,j,k,l;
	register unsigned short y,*isrc,*idst;
	register unsigned char *src,*dst;

	int x1 = img->cutout.x1 * n; // Define our destination cutout to scan
	int y1 = img->cutout.y1 * n; // and integrate from the source image
	int x2 = img->cutout.x2 * n;
	int y2 = img->cutout.y2 * n;

	int nw = width * n;
	int n2 = n * n;

	switch(bpp) {
	   case 1:

		src = (unsigned char *)img->data;
		img->data = (unsigned char *)ZeroMalloc(width * height * n2);
		dst = img->data;

		for(y=y1; y<y2; ++y) {
		   register unsigned short x;
		   register unsigned char *ptr = dst + y*nw + x1;
		   register int yy;

		   yy = y/n * width;

		   for(x=x1; x<x2; ++x) {
			register unsigned short v = src[yy + x/n] / 9;

			*ptr += v; *(ptr+1) += v; ptr += nw;
			*ptr += v; *(ptr+1) += v;
			ptr -= nw-1;
			}
		   }

		Free(src);
		break;

	   case 2:

		isrc = (unsigned short *)img->data;
		img->data = (unsigned char *)ZeroMalloc(width * height * n2 * 2);
		idst = (unsigned short *)img->data;

		for(y=y1; y<y2; ++y) {
		   register unsigned short x;
		   register unsigned short *ptr = idst + y*nw + x1;
		   register int yy;

		   yy = y/n * width;

		   for(x=x1; x<x2; ++x) {
			register unsigned short v = isrc[yy + x/n] / 4;

			*ptr += v; *(ptr+1) += v; ptr += nw;
			*ptr += v; *(ptr+1) += v;
			ptr -= nw-1;
			}
		   }

		Free(isrc);
		break;

	   case 3:
		break;
	   }

	img->width  *= n; img->height *= n;
	img->dst_width  *= n; img->dst_height *= n;

        img->cutout.x1 = x1; img->cutout.x2 = x2;
        img->cutout.y1 = y1; img->cutout.y2 = y2;

	return(0);
	}

// 3x specific upscaler, n==3
static int
upscale_image_A3(struct Image *img, int n)
	{
	int width = img->width;
	int height = img->height;
	int bpp = img->depth/8;
	int i,j,k,l;
	register unsigned short y,*isrc,*idst;
	register unsigned char *src,*dst;

	int x1 = img->cutout.x1 * n; // Define our destination cutout to scan
	int y1 = img->cutout.y1 * n; // and integrate from the source image
	int x2 = img->cutout.x2 * n;
	int y2 = img->cutout.y2 * n;

	int nw = width * n;
	int n2 = n * n;

	switch(bpp) {
	   case 1:

		src = (unsigned char *)img->data;
		img->data = (unsigned char *)ZeroMalloc(width * height * n2);
		dst = img->data;

		for(y=y1; y<y2; ++y) {
		   register unsigned short x;
		   register unsigned char *ptr = dst + y*nw + x1;
		   register int yy;

		   yy = y/n * width;

		   for(x=x1; x<x2; ++x) {
			register unsigned short v = src[yy + x/n] / 9;

			*ptr += v; *(ptr+1) += v; *(ptr+2) += v; ptr += nw;
			*ptr += v; *(ptr+1) += v+4; *(ptr+2) += v; ptr += nw;
			*ptr += v; *(ptr+1) += v; *(ptr+2) += v;
			ptr -= nw+nw-1;
			}
		   }

		Free(src);
		break;

	   case 2:

		isrc = (unsigned short *)img->data;
		img->data = (unsigned char *)ZeroMalloc(width * height * n2 * 2);
		idst = (unsigned short *)img->data;

		for(y=y1; y<y2; ++y) {
		   register unsigned short x;
		   register unsigned short *ptr = idst + y*nw + x1;
		   register int yy;

		   yy = y/n * width;

		   for(x=x1; x<x2; ++x) {
			register unsigned short v = isrc[yy + x/n] / 9;

			*ptr += v; *(ptr+1) += v; *(ptr+2) += v; ptr += nw;
			*ptr += v; *(ptr+1) += v+4; *(ptr+2) += v; ptr += nw;
			*ptr += v; *(ptr+1) += v; *(ptr+2) += v;
			ptr -= nw+nw-1;
			}
		   }

		Free(isrc);
		break;

	   case 3:
		break;
	   }

	img->width  *= n; img->height *= n;
	img->dst_width  *= n; img->dst_height *= n;

        img->cutout.x1 = x1; img->cutout.x2 = x2;
        img->cutout.y1 = y1; img->cutout.y2 = y2;

	return(0);
	}

// 4x specific upscaler, n==4
static int
upscale_image_A4(struct Image *img, int n)
	{
	int width = img->width;
	int height = img->height;
	int bpp = img->depth/8;
	int i,j,k,l;
	register unsigned short y,*isrc,*idst;
	register unsigned char *src,*dst;

	int x1 = img->cutout.x1 * n; // Define our destination cutout to scan
	int y1 = img->cutout.y1 * n; // and integrate from the source image
	int x2 = img->cutout.x2 * n;
	int y2 = img->cutout.y2 * n;

	int nw = width * n;
	int n2 = n * n;

	switch(bpp) {
	   case 1:
		src = (unsigned char *)img->data;
		img->data = (unsigned char *)ZeroMalloc(width * height * n2);
		dst = img->data;

		for(y=y1; y<y2; ++y) {
		   register unsigned short x;
		   register unsigned char *ptr = dst + y*nw + x1;
		   register int yy;

		   yy = y/n * width;

		   for(x=x1; x<x2; ++x) {
			register unsigned short v = src[yy + x/n] / 9;

			*ptr += v; *(ptr+1) += v; *(ptr+2) += v; *(ptr+3) += v; ptr += nw;
			*ptr += v; *(ptr+1) += v+2; *(ptr+2) += v+2; *(ptr+3) += v; ptr += nw;
			*ptr += v; *(ptr+1) += v+2; *(ptr+2) += v+2; *(ptr+3) += v; ptr += nw;
			*ptr += v; *(ptr+1) += v; *(ptr+2) += v; *(ptr+3) += v;
			ptr -= nw+nw+nw-1;
			}
		   }

		Free(src);
		break;

	   case 2:
		isrc = (unsigned short *)img->data;
		img->data = (unsigned char *)ZeroMalloc(width * height * n2 * 2);
		idst = (unsigned short *)img->data;

		for(y=y1; y<y2; ++y) {
		   register unsigned short x;
		   register unsigned short *ptr = idst + y*nw + x1;
		   register int yy;

		   yy = y/n * width;

		   for(x=x1; x<x2; ++x) {
			register unsigned short v = isrc[yy + x/n] / 16;

			*ptr += v; *(ptr+1) += v; *(ptr+2) += v; *(ptr+3) += v; ptr += nw;
			*ptr += v; *(ptr+1) += v+2; *(ptr+2) += v+2; *(ptr+3) += v; ptr += nw;
			*ptr += v; *(ptr+1) += v+2; *(ptr+2) += v+2; *(ptr+3) += v; ptr += nw;
			*ptr += v; *(ptr+1) += v; *(ptr+2) += v; *(ptr+3) += v;
			ptr -= nw+nw+nw-1;
			}
		   }

		Free(isrc);
		break;

	   case 3:
		break;
	   }

	img->width  *= n; img->height *= n;
	img->dst_width  *= n; img->dst_height *= n;

        img->cutout.x1 = x1; img->cutout.x2 = x2;
        img->cutout.y1 = y1; img->cutout.y2 = y2;

	return(0);
	}

// 5x specific upscaler  n==5
static int
upscale_image_A5(struct Image *img, int n)
	{
	int width = img->width;
	int height = img->height;
	int bpp = img->depth/8;
	int i,j,k,l;
	register unsigned short y,*isrc,*idst;
	register unsigned char *src,*dst;

	int x1 = img->cutout.x1 * n; // Define our destination cutout to scan
	int y1 = img->cutout.y1 * n; // and integrate from the source image
	int x2 = img->cutout.x2 * n;
	int y2 = img->cutout.y2 * n;

	int nw = width * n;
	int n2 = n * n;

	switch(bpp) {
	   case 1:

		src = (unsigned char *)img->data;
		img->data = (unsigned char *)ZeroMalloc(width * height * n2);
		dst = img->data;

		for(y=y1; y<y2; ++y) {
		   register unsigned short x;
		   register unsigned char *ptr = dst + y*nw + x1;
		   register int yy;

		   yy = y/n * width;

		   for(x=x1; x<x2; ++x) {
			register unsigned short v = src[yy + x/n] / 9;

			*ptr += v; *(ptr+1) += v; *(ptr+2) += v; *(ptr+3) += v; *(ptr+4) += v; ptr += nw;
			*ptr += v; *(ptr+1) += v+1; *(ptr+2) += v+1; *(ptr+3) += v+1; *(ptr+4) += v; ptr += nw;
			*ptr += v; *(ptr+1) += v+1; *(ptr+2) += v+4; *(ptr+3) += v+1; *(ptr+4) += v; ptr += nw;
			*ptr += v; *(ptr+1) += v+1; *(ptr+2) += v+1; *(ptr+3) += v+1; *(ptr+4) += v; ptr += nw;
			*ptr += v; *(ptr+1) += v; *(ptr+2) += v; *(ptr+3) += v; *(ptr+4) += v;
			ptr -= nw*4-1;
			}
		   }

		break;

	   case 2:

		isrc = (unsigned short *)img->data;
		img->data = (unsigned char *)ZeroMalloc(width * height * n2 * 2);
		idst = (unsigned short *)img->data;

		for(y=y1; y<y2; ++y) {
		   register unsigned short x;
		   register unsigned short *ptr = idst + y*nw + x1;
		   register int yy;

		   yy = y/n * width;

		   for(x=x1; x<x2; ++x) {
			register unsigned short v = isrc[yy + x/n] / 25;

			*ptr += v; *(ptr+1) += v; *(ptr+2) += v; *(ptr+3) += v; *(ptr+4) += v; ptr += nw;
			*ptr += v; *(ptr+1) += v+1; *(ptr+2) += v+1; *(ptr+3) += v+1; *(ptr+4) += v; ptr += nw;
			*ptr += v; *(ptr+1) += v+1; *(ptr+2) += v+4; *(ptr+3) += v+1; *(ptr+4) += v; ptr += nw;
			*ptr += v; *(ptr+1) += v+1; *(ptr+2) += v+1; *(ptr+3) += v+1; *(ptr+4) += v; ptr += nw;
			*ptr += v; *(ptr+1) += v; *(ptr+2) += v; *(ptr+3) += v; *(ptr+4) += v;
			ptr -= nw*4-1;
			}
		   }

		Free(isrc);
		break;

	   case 3:
		break;
	   }

	img->width  *= n; img->height *= n;
	img->dst_width  *= n; img->dst_height *= n;

        img->cutout.x1 = x1; img->cutout.x2 = x2;
        img->cutout.y1 = y1; img->cutout.y2 = y2;

	return(0);
	}

//=============================================================================

// In-place downscale the src image, write it back into the source image data
int
downscale_image(struct Image *img, int n)
	{
	unsigned char *data = img->data;
	int width = img->width;
	int height = img->height;
	int bpp = img->depth/8;
	int x1 = img->cutout.x1; int nx1 = x1/n;
	int y1 = img->cutout.y1; int ny1 = y1/n;
	int x2 = img->cutout.x2; int nx2 = x2/n;
	int y2 = img->cutout.y2; int ny2 = y2/n;
	int i,j,o,x,y;
	unsigned char *src,*dst;
	unsigned short *isrc,*ibuf,*idst;
	int rowbytes = width * bpp;
	int nw = width/n;
	int nh = height/n;
	unsigned int *rowbuffer_i = NULL;
	unsigned int *rowbuffer_r = NULL;
	unsigned int *rowbuffer_b = NULL;
	unsigned int *rowbuffer_g = NULL;
	int total,total_r,total_g,total_b;
	int sample_size = n * n;

	// No change if we downscale by 1
	if (n<=1) return 1;

	src = data;   // first pixel in input image

	switch(bpp) {
	   case 1:
		// Monochrome 8bpp

		rowbuffer_i = (unsigned int *)ZeroMalloc(nw * sizeof(unsigned int));
		dst = (unsigned char *)data;

		for(y=Random(n/2); y<height-n; y+=n) {
		   for(i=y; i<y+n; ++i) {
		      x = Random(n/2);	// start each row in a different place to remove artifacts
		      src = (unsigned char *)data + i*width +x;

		      for(o=0; x<width-n; x+=n,++o)
			for(j=total=0; j<n; ++j) rowbuffer_i[o] += *(src++);
		      }

		   // Output averages and reset the row buffer
		   for(x=0; x<nw; ++x) {
			*(dst++) = rowbuffer_i[x] / sample_size;
			rowbuffer_i[x]=0;
			}
		   }

		Free(rowbuffer_i);
		break;

	   case 3:
		// RGB 24bpp
		rowbuffer_r = (unsigned int *)ZeroMalloc(nw * sizeof(unsigned int));
		rowbuffer_g = (unsigned int *)ZeroMalloc(nw * sizeof(unsigned int));
		rowbuffer_b = (unsigned int *)ZeroMalloc(nw * sizeof(unsigned int));
		dst = (unsigned char *)data;

		for(y=Random(n/2); y<height-n; y+=n) {
		   for(i=y; i<y+n; ++i) {
		      x = Random(n/2);	// start each row in a different place to remove artifacts
		      src = (unsigned char *)data + (i*width+x) * 3;

		      for(o=0; x<width-n; x+=n, ++o)
			for(j=0; j<n; ++j) {
			   rowbuffer_r[o] += *(src++);
			   rowbuffer_g[o] += *(src++);
			   rowbuffer_b[o] += *(src++);
			   }
		      }

		   // Output averages
		   for(x=0; x<nw; ++x) {
			*(dst++) = rowbuffer_r[x] / sample_size; rowbuffer_r[x]=0;
			*(dst++) = rowbuffer_g[x] / sample_size; rowbuffer_g[x]=0;
			*(dst++) = rowbuffer_b[x] / sample_size; rowbuffer_b[x]=0;
			}
		   }

		Free(rowbuffer_r);
		Free(rowbuffer_g);
		Free(rowbuffer_b);
		break;

	   case 2:
		// Monochrome 16bpp

		ibuf = (unsigned short *)ZeroMalloc(nw * nh * sizeof(unsigned short));
		rowbuffer_i = (unsigned int *)ZeroMalloc(width * sizeof(unsigned int));

		for(y=y1; y<y2-n; y+=n) {
		   register unsigned int *ip;

		   for(i=y; i<y+n; ++i) {
		      register int j,x;
		      register unsigned short *isrc = (unsigned short *)data + i*width +x1;

		      ip = rowbuffer_i;

		      // optimise for n==2, unroll inner loop
		      if (n==2) for(x=x1; x<x2-2; x+=2,++ip) { *ip += *(isrc++); *ip += *(isrc++); }
		      else for(x=x1; x<x2-n; x+=n,++ip) { j=n; while(j--) *ip += *(isrc++); }
		      }

		   // Output averages and reset the row buffer
		   idst = ibuf + (y/n)*nw + nx1;
		   ip = rowbuffer_i;
		   for(x=nx1; x<nx2; ++x) {
			*(idst++) = *ip / sample_size;
			*(ip++)=0;
			}
		   }

		Free(img->data);
		Free(rowbuffer_i);
		img->data = (unsigned char *)ibuf;
		break;
	      }

	img->width = nw; img->height = nh;
	img->dst_width = nw; img->dst_height = nh;

        img->cutout.x1 = nx1; img->cutout.x2 = nx2;
        img->cutout.y1 = ny1; img->cutout.y2 = ny2;

	return(1);
	}
