
#include <config.h>
#include <stdio.h>
#include <math.h>
#include "global.h"
#include "attributes.h"
#include "cpu_accel.h"

#ifdef HAVE_ALTIVEC
#include "../utils/altivec/altivec_transform.h"
#endif

int select_dct_type( uint8_t *cur_lum_mb, uint8_t *pred_lum_mb);

extern void fdct( int16_t *blk );
extern void idct( int16_t *blk );

/* private prototypes*/
/* static */ void add_pred (uint8_t *pred, uint8_t *cur,
					  int lx, int16_t *blk);
/* static */ void sub_pred (uint8_t *pred, uint8_t *cur,
					  int lx, int16_t *blk);

/*
  Pointers to version of transform and prediction manipulation
  routines to be used..
 */

static void (*pfdct)( int16_t * blk );
static void (*pidct)( int16_t * blk );
static void (*padd_pred) (uint8_t *pred, uint8_t *cur,
						  int lx, int16_t *blk);
static void (*psub_pred) (uint8_t *pred, uint8_t *cur,
						  int lx, int16_t *blk);
static int (*pselect_dct_type)( uint8_t *cur_lum_mb, uint8_t *pred_lum_mb);
/*
  Initialise DCT transformation routines
  Currently just activates MMX routines if available
 */


void init_transform(void)
{
	int flags;
	flags = cpu_accel();

	pfdct = fdct;
	pidct = idct;
	padd_pred = add_pred;
	psub_pred = sub_pred;
	pselect_dct_type = select_dct_type;

#ifdef HAVE_ALTIVEC
	if (flags > 0)
	{

	    mjpeg_info("SETTING AltiVec for TRANSFORM!");
	    pfdct = ALTIVEC_SUFFIX(fdct);
	    pidct = ALTIVEC_SUFFIX(idct);
	    padd_pred = ALTIVEC_SUFFIX(add_pred);
	    psub_pred = ALTIVEC_SUFFIX(sub_pred);
	}
#endif
}


int select_dct_type( uint8_t *cur_lum_mb, uint8_t *pred_lum_mb)
{
	/*
	 * calculate prediction error (cur-pred) for top (blk0)
	 * and bottom field (blk1)
	 */
	double r,d;
	int rowoffs = 0;
	int sumtop, sumbot, sumsqtop, sumsqbot, sumbottop;
	int j,i;
	int topvar, botvar;
	sumtop = sumsqtop = sumbot = sumsqbot = sumbottop = 0;
	for (j=0; j<8; j++)
	{
		for (i=0; i<16; i++)
		{
			register int toppix = 
				cur_lum_mb[rowoffs+i] - pred_lum_mb[rowoffs+i];
			register int botpix = 
				cur_lum_mb[rowoffs+opt_phy_width+i] 
				- pred_lum_mb[rowoffs+opt_phy_width+i];
			sumtop += toppix;
			sumsqtop += toppix*toppix;
			sumbot += botpix;
			sumsqbot += botpix*botpix;
			sumbottop += toppix*botpix;
		}
		rowoffs += (opt_phy_width<<1);
	}

	/* Calculate Variances top and bottom.  If they're of similar
	 sign estimate correlation if its good use frame DCT otherwise
	 use field.
	*/
	r = 0.0;
	topvar = sumsqtop-sumtop*sumtop/128;
	botvar = sumsqbot-sumbot*sumbot/128;
	if (!((topvar>0) ^ (botvar>0)))
	{
		d = ((double) topvar) * ((double)botvar);
		r = (sumbottop-(sumtop*sumbot)/128);
		if (r>0.5*sqrt(d))
			return 0; /* frame DCT */
		else
			return 1; /* field DCT */
	}
        return 1; /* field DCT */
}

/* subtract prediction and transform prediction error */
void transform( pict_data_s *picture )
{
	int i, j, i1, j1, k, n, cc, offs, lx;
	uint8_t **cur = picture->curorg;
	uint8_t **pred = picture->pred;
	mbinfo_s *mbi = picture->mbinfo;
	int16_t (*blocks)[64] = picture->blocks;
	int introwstart = 0;
	k = 0;

	for (j=0; j<opt_enc_height; j+=16)
	{
		for (i=0; i<opt_enc_width; i+=16)
		{
			mbi[k].dctblocks = &blocks[k*block_count];
			mbi[k].dct_type = 0;

			for (n=0; n<block_count; n++)
			{
				cc = (n<4) ? 0 : (n&1)+1; /* color component index */
				if (cc==0)
				{
					/* A.Stevens Jul 2000 Record dct blocks associated with macroblock */
					/* We'll use this for quantisation calculations                    */
					/* luminance */
					if ((picture->pict_struct==FRAME_PICTURE) && mbi[k].dct_type)
					{
						/* field DCT */
						offs = i + ((n&1)<<3) + opt_phy_width*(j+((n&2)>>1));
						lx =  opt_phy_width<<1;
					}
					else
					{
						/* frame DCT */
						offs = i + ((n&1)<<3) +  opt_phy_width2*(j+((n&2)<<2));
						lx =  opt_phy_width2;
					}

					if (picture->pict_struct==BOTTOM_FIELD)
						offs +=  opt_phy_width;
				}
				else
				{
					/* chrominance */

					/* scale coordinates */
					i1 = (opt_chroma_format==CHROMA444) ? i : i>>1;
					j1 = (opt_chroma_format!=CHROMA420) ? j : j>>1;

					if ((picture->pict_struct==FRAME_PICTURE) && mbi[k].dct_type
						&& (opt_chroma_format!=CHROMA420))
					{
						/* field DCT */
						offs = i1 + (n&8) +  opt_phy_chrom_width*(j1+((n&2)>>1));
						lx =  opt_phy_chrom_width<<1;
					}
					else
					{
						/* frame DCT */
						offs = i1 + (n&8) +  opt_phy_chrom_width2*(j1+((n&2)<<2));
						lx =  opt_phy_chrom_width2;
					}

					if (picture->pict_struct==BOTTOM_FIELD)
						offs +=  opt_phy_chrom_width;
				}

				(*psub_pred)(pred[cc]+offs,cur[cc]+offs,lx,
							 blocks[k*block_count+n]);
				(*pfdct)(blocks[k*block_count+n]);
			}

			k++;
		}
		introwstart += 16* opt_phy_width;
	}
}


/* inverse transform prediction error and add prediction */
void itransform(pict_data_s *picture)
{
    mbinfo_s *mbi = picture->mbinfo;
	uint8_t **cur = picture->curref;
	uint8_t **pred = picture->pred;
	/* Its the quantised / inverse quantised blocks were interested in
	   for inverse transformation */
	int16_t (*blocks)[64] = picture->qblocks;
	int i, j, i1, j1, k, n, cc, offs, lx;

	k = 0;

	for (j=0; j<opt_enc_height; j+=16)
		for (i=0; i<opt_enc_width; i+=16)
		{
			for (n=0; n<block_count; n++)
			{
				cc = (n<4) ? 0 : (n&1)+1; /* color component index */

				if (cc==0)
				{
					/* luminance */
					if ((picture->pict_struct==FRAME_PICTURE) && mbi[k].dct_type)
					{
						/* field DCT */
						offs = i + ((n&1)<<3) + opt_phy_width*(j+((n&2)>>1));
						lx = opt_phy_width<<1;
					}
					else
					{
						/* frame DCT */
						offs = i + ((n&1)<<3) + opt_phy_width2*(j+((n&2)<<2));
						lx = opt_phy_width2;
					}

					if (picture->pict_struct==BOTTOM_FIELD)
						offs +=  opt_phy_width;
				}
				else
				{
					/* chrominance */

					/* scale coordinates */
					i1 = (opt_chroma_format==CHROMA444) ? i : i>>1;
					j1 = (opt_chroma_format!=CHROMA420) ? j : j>>1;

					if ((picture->pict_struct==FRAME_PICTURE) && mbi[k].dct_type
						&& (opt_chroma_format!=CHROMA420))
					{
						/* field DCT */
						offs = i1 + (n&8) + opt_phy_chrom_width*(j1+((n&2)>>1));
						lx = opt_phy_chrom_width<<1;
					}
					else
					{
						/* frame DCT */
						offs = i1 + (n&8) + opt_phy_chrom_width2*(j1+((n&2)<<2));
						lx = opt_phy_chrom_width2;
					}

					if (picture->pict_struct==BOTTOM_FIELD)
						offs +=  opt_phy_chrom_width;
				}
				(*pidct)(blocks[k*block_count+n]);
				(*padd_pred)(pred[cc]+offs,cur[cc]+offs,lx,blocks[k*block_count+n]);
			}

			k++;
		}
}


/* add prediction and prediction error, saturate to 0...255 */
/* static */ void add_pred(pred,cur,lx,blk)
	uint8_t *pred, *cur;
	int lx;
	int16_t *blk;
{
	int i, j;

	for (j=0; j<8; j++)
	{
		for (i=0; i<8; i++)
			cur[i] = clp_0_255[blk[i] + pred[i]];
		blk+= 8;
		cur+= lx;
		pred+= lx;
	}
}


/* subtract prediction from block data */
/* static */
void sub_pred(pred,cur,lx,blk)
	uint8_t *pred, *cur;
	int lx;
	int16_t *blk;
{
	int i, j;

	for (j=0; j<8; j++)
	{
		for (i=0; i<8; i++)
			blk[i] = cur[i] - pred[i];
		blk+= 8;
		cur+= lx;
		pred+= lx;
	}
}


