#include <config.h>
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#ifdef HAVE_FENV_H
#include <fenv.h>
#endif
#include "global.h"
#include "cpu_accel.h"
#include "fastintfns.h"


/* Global function pointers for SIMD-dependent functions */
int (*pquant_non_intra)(pict_data_s *picture, int16_t *src, int16_t *dst,
						int mquant, int *nonsat_mquant);
int (*pquant_weight_coeff_sum)(int16_t *blk, uint16_t*i_quant_mat );

/* Local functions pointers for SIMD-dependent functions */

/* static */ void (*piquant_non_intra_m1)(int16_t *src, int16_t *dst,  uint16_t *quant_mat);


/* static */ int quant_weight_coeff_sum( int16_t *blk, uint16_t * i_quant_mat );
/* static */ void iquant_non_intra_m1(int16_t *src, int16_t *dst, uint16_t *quant_mat);

#ifdef HAVE_ALTIVEC
extern void enable_altivec_quantization();
#endif

/*
  Initialise quantization routines.
  Currently just setting up MMX routines if available...
 */

void init_quantizer(void)
{
	pquant_non_intra = quant_non_intra;	  
	pquant_weight_coeff_sum = quant_weight_coeff_sum;
	piquant_non_intra_m1 = iquant_non_intra_m1;

#ifdef HAVE_ALTIVEC
	if (cpu_accel()) enable_altivec_quantization();
#endif
}


int next_larger_quant( pict_data_s *picture, int quant )
{
	if( picture->q_scale_type )
	{
		if( map_non_linear_mquant[quant]+1 > 31 ) return quant;
		else return non_linear_mquant_table[map_non_linear_mquant[quant]+1];
	}
	else 
	{
		if( quant+2 > 31 ) return quant;
		else return quant+2;
	}
	
}


void quant_intra(
	pict_data_s *picture,
	int16_t *src, 
	int16_t *dst,
	int mquant,
	int *nonsat_mquant
	)
{
  int16_t *psrc,*pbuf;
  int i,comp;
  int x, y, d;
  int clipping;
  int clipvalue  = opt_dctsatlim;
  uint16_t *quant_mat = intra_q_tbl[mquant] /* intra_q */;


  /* Inspired by suggestion by Juan.  Quantize a little harder if we clip...
   */

  do
	{
	  clipping = 0;
	  pbuf = dst;
	  psrc = src;
	  for( comp = 0; comp<block_count && !clipping; ++comp )
	  {
		x = psrc[0];
		d = 8>>picture->dc_prec; /* intra_dc_mult */
		pbuf[0] = (x>=0) ? (x+(d>>1))/d : -((-x+(d>>1))/d); /* round(x/d) */


		for (i=1; i<64 ; i++)
		  {
			x = psrc[i];
			d = quant_mat[i];

			/* RJ: save one divide operation */
			y = ((abs(x)<<5)+ ((3*quant_mat[i])>>2))/(quant_mat[i]<<1)
				/*(32*abs(x) + (d>>1) + d*((3*mquant+2)>>2))/(quant_mat[i]*2*mquant) */
				;
			if ( y > clipvalue )
			  {
				clipping = 1;
				mquant = next_larger_quant( picture, mquant );
				quant_mat = intra_q_tbl[mquant];
				break;
			  }
		  
		  	pbuf[i] = intsamesign(x,y);
		  }
		pbuf += 64;
		psrc += 64;
	  }
			
	} while( clipping );
  *nonsat_mquant = mquant;
}


int quant_weight_coeff_sum( int16_t *blk, uint16_t * i_quant_mat )
{
  int i;
  int sum = 0;
   for( i = 0; i < 64; i+=2 )
	{
		sum += abs((int)blk[i]) * (i_quant_mat[i]) + abs((int)blk[i+1]) * (i_quant_mat[i+1]);
	}
    return sum;
}

																							     											     
int quant_non_intra(
						   pict_data_s *picture,
						   int16_t *src, int16_t *dst,
						   int mquant,
						   int *nonsat_mquant)
{
	int i;
	int x, y, d;
	int nzflag;
	int coeff_count;
	int clipvalue  = opt_dctsatlim;
	int flags = 0;
	int saturated = 0;
	uint16_t *quant_mat = inter_q_tbl[mquant]; /* inter_q */
	
	coeff_count = 64*block_count;
	flags = 0;
	nzflag = 0;
	for (i=0; i<coeff_count; ++i)
	{
restart:
		if( (i%64) == 0 )
		{
			nzflag = (nzflag<<1) | !!flags;
			flags = 0;
			  
		}
		/* RJ: save one divide operation */

		x = abs( ((int)src[i]) ) /*(src[i] >= 0 ? src[i] : -src[i])*/ ;
		d = (int)quant_mat[(i&63)]; 

		y = (x<<4) /  (d) /* (32*x + (d>>1))/(d*2*mquant)*/ ;
		if ( y > clipvalue )
		{
			if( saturated )
			{
				y = clipvalue;
			}
			else
			{
				int new_mquant = next_larger_quant( picture, mquant );
				if( new_mquant != mquant )
				{
					mquant = new_mquant;
					quant_mat = inter_q_tbl[mquant];
				}
				else
				{
					saturated = 1;
				}
				i=0;
				nzflag =0;
				goto restart;
			}
		}
		dst[i] = intsamesign(src[i], y) /* (src[i] >= 0 ? y : -y) */;
		flags |= dst[i];
	}
	nzflag = (nzflag<<1) | !!flags;

    *nonsat_mquant = mquant;
    return nzflag;
}


/* MPEG-2 inverse quantization */
void iquant_intra(int16_t *src, int16_t *dst, int dc_prec, int mquant)
{
	int i, val, sum;
	
	sum = dst[0] = src[0] << (3-dc_prec);
	for (i=1; i<64; i++)
	{
		val = (int)(src[i]*opt_intra_q[i]*mquant)/16;
		sum+= dst[i] = (val>2047) ? 2047 : ((val<-2048) ? -2048 : val);
	}
	
	/* mismatch control */
	if ((sum&1)==0) dst[63]^= 1;
}


/* static */ void iquant_non_intra_m1(int16_t *src, int16_t *dst,  uint16_t *quant_mat)
{
  int i, val;

  for (i=0; i<64; i++)
  {
    val = src[i];
    if (val!=0)
    {
      val = (int)((2*val+(val>0 ? 1 : -1))*quant_mat[i])/32;

      /* mismatch control */
      if ((val&1)==0 && val!=0)
        val+= (val>0) ? -1 : 1;
    }

    /* saturation */
     dst[i] = (val>2047) ? 2047 : ((val<-2048) ? -2048 : val);
 }

}


void iquant_non_intra(int16_t *src, int16_t *dst, int mquant )
{
	int i, val, sum;
	uint16_t *quant_mat;
	
	sum = 0;
	
	quant_mat = inter_q_tbl[mquant];
	for (i=0; i<64; i++)
	{
		val = src[i];
		if( val != 0 )
		{
			val = abs(val);
			val = (int)((val+val+1)*quant_mat[i])>>5;
			val = intmin( val, 2047);
			sum += val;
		}
		dst[i] = intsamesign(src[i],val);
	}
	
	
	/* mismatch control */
	if ((sum&1)==0) dst[63]^= 1;
}

void iquantize( pict_data_s *picture )
{
	int j,k;
	int16_t (*qblocks)[64] = picture->qblocks;
	for (k=0; k<mb_per_pict; k++)
	{
		if (picture->mbinfo[k].mb_type & MB_INTRA)
			for (j=0; j<block_count; j++)
				iquant_intra(qblocks[k*block_count+j],
							 qblocks[k*block_count+j],
							 picture->dc_prec,
							 picture->mbinfo[k].mquant);
		else
			for (j=0;j<block_count;j++)
				iquant_non_intra(qblocks[k*block_count+j],
								 qblocks[k*block_count+j],
								 picture->mbinfo[k].mquant);
	}
}
