
/* $Id: checksum.c,v 1.6 2005/01/12 18:52:41 alien-science Exp $ */

#include <sys/types.h>
#include <stdio.h>

#include "common.h"
#include "prog_config.h"

struct running_sum {
   int           is_empty;
   unsigned      s1;
   unsigned      s2;
   unsigned char dropoff;
   };


static int Crc_Table[256];

/* 
 * Currently using code from:
 * http://remus.rutgers.edu/~rhoads/Code/arccrc16.c
 * Set up a table to speed up CRC calculation
 */
void init_crc_table(void)
{
   unsigned int i, j;
   unsigned int k;

   for (i = 0; i < 256; i++) {
      k = 0xC0C1;

      for (j = 1; j < 256; j <<= 1) {
         if (i & j) {
            Crc_Table[i] ^= k;
            }
         k = ((k << 1) & 0xffff) ^ 0x4003;
         }
      }

}


/*
 * Calculates a crc
 */
unsigned crc_calc(unsigned crc, char *buf, unsigned nbytes)
{
   unsigned char *p, *lim;

   p   = (unsigned char *)buf;
   lim = p + nbytes;
   while (p < lim) {
      crc = (crc >> 8 ) ^ Crc_Table[(crc & 0xFF) ^ *p++];
      }

   return crc;
}


/*
 * Calculates the checksum of some data (currently alder-32)
 *
 * Implementation derived from rfc1950 (Appendix)
 *  Copyright  1996 L. Peter Deutsch and Jean-Loup Gailly
 */
static unsigned
ck_calc(unsigned char *buf, size_t len, struct running_sum *r)
{

   unsigned sum1, sum2;
   unsigned i;

   #define BASE 65521   /* largest prime smaller than 65536 */

   if (r->is_empty) {
      sum1 = 1;
      sum2 = 0;

      /* Do the full sum over the length of data 
         This can overflow if len > 4096 */
      for (i = 0; i < len; i += 2) {
         sum1 += buf[i];
         sum2 += sum1;
         } 
      }
   else {
      /* First take off the effect of the byte that is dropping off
         (assuming this is len bytes ago) */
      sum1 = r->s1 -  r->dropoff;
      sum2 = r->s2 - ( r->dropoff * len);
      
      /* Add the next byte */
      sum1 += buf[i];
      sum2 += sum1;
      }

   /* Make note of the running sums for later calls */
   r->s1       = sum1;
   r->s2       = sum2;
   r->dropoff  = *buf;
   
   /* Convert the running sums into a correct checksum */
   sum1 = sum1 % BASE;
   sum2 = sum2 % BASE;
   return (sum2 << 16) + sum1;
}


/*
 * Returns the number of ignored bits that gives checksums an  entropy
 * that will work within the given max_block_size
 */
unsigned 
get_entropy_bits(size_t max_block_size)
{
   unsigned i;
   unsigned entropy_bytes, entropy_bits;

   /* Have blocks match at an average of 1/3 max_block_size (heuristic) */
   entropy_bytes   = max_block_size / 3;

   /* Search the available bitmasks */
   for (i=0; i< 32; i++) {
      entropy_bits = i;
      if (entropy_bytes > (0xffffffff >> i)) {
         break;
         }
      }
 
   VERBOSE_2("Entropy ignore bits 0x%x, max_block_size 0x%x", 
               entropy_bits, max_block_size); 
   
   return entropy_bits;
}


/*
 * See if a block exists in the given buffer which matches a certain
 * checksum
 * returns :
 * -1 on error
 * 0  on checksum found
 * 1  on maximum length checked and no checksum found
 *
 */
int
ck_findblock(char *buf, size_t available, struct config *conf,
                        size_t window, size_t *block_len)
{
   unsigned char *p, *lim;
   unsigned sum;
   unsigned matching_sum;
   struct running_sum rs;

   /* Mark the running sums as clear */
   rs.is_empty = 1;

   if (available <= window) {
      *block_len = available;
      return 1;
      }

   /* Calculate the sum that counts as a matched block */
   matching_sum = conf->magic_sum >> conf->entropy_bits;

   p   = (unsigned char *)buf;
   lim = p + available - window;
   while (p < lim) {
     
      /* Calculate the checksum for the window */
      sum = ck_calc(p, window, &rs);
      /* p  += 2; */
      ++p;

      /* See if the checksum matches the sum that marks a block */
      if ((sum >> conf->entropy_bits) == matching_sum) {
         *block_len = (char *) p - buf;
         return 0;
         }
      }

   /* If this point is reached the checksum didn't fire so mark all
      the available data as a block, this stuffs up rsync efficiency 
      and so a better approach might be needed */
   *block_len = available;
   return 1;
}

