#!/opt/local/bin/perl

# rebuilds bayesian spam database
# (c) John Hanna 2003 under the terms of the GPL
 # Updated July 2004 for simple proxy support.

open(F,"<assp.cfg"); local $/; (%Config)=split(/:=|\n/,<F>); close F;
$spamlog=$Config{spamlog} && "$Config{base}/$Config{spamlog}" || 'spam';
$notspamlog=$Config{notspamlog} && "$Config{base}/$Config{notspamlog}" || 'notspam';
$correctedspam=$Config{correctedspam} && "$Config{base}/$Config{correctedspam}" || 'errors/spam';
$correctednotspam=$Config{correctednotspam} && "$Config{base}/$Config{correctednotspam}" || 'errors/notspam';
$spamdb=$Config{spamdb} && "$Config{base}/$Config{spamdb}" || 'spamdb';
$whitelistdb=$Config{whitelistdb} && "$Config{base}/$Config{whitelistdb}" || 'whitelist';
$MaxWhitelistDays=$Config{MaxWhitelistDays} || 90;
$OrderedTieHashSize=$Config{OrderedTieHashSize} || 5000;
$Log=$Config{logfile} && "$Config{base}/$Config{logfile}" || 'maillog.txt';
$whiteRE=$Config{whiteRE};
$KeepWhitelistedSpam=$Config{KeepWhitelistedSpam};
$proxyserver=$Config{proxyserver};

use bytes; # get rid of anoying 'Malformed UTF-8' messages
$EmailAdrRe="[^()<>@,;:\\/\"\\[\\]\000-\040]+";
$EmailDomainRe='\w[\w\.\-]*\.\w+';

# data for DayOfWeek function
#my %Months=(Jan,1,Feb,2,Mar,3,Apr,4,May,5,Jun,6,Jul,7,Aug,8,Sep,9,Oct,10,Nov,11,Dec,12);
#my %Month=(1,0,2,3,3,2,4,5,5,0,6,3,7,5,8,1,9,4,10,6,11,2,12,4,);
#my %Weekday=(0,'srdSUN',1,'srdMON',2,'srdTUE',3,'srdWED',4,'srdTHU',5,'srdFRI',6,'srdSAT',);

getmaxtick('rebuild');

$spamObject=tie(%spam,orderedtie,"spamtmp") if $Config{RamSaver};
$WhitelistObject=tie %Whitelist,orderedtie,"$base/$whitelistdb" unless $KeepWhitelistedSpam;

$starttime=time;

# add(isspam,filename,weight)
# read the corrections
print "Analyzing $correctedspam\n";
add(1,$_,2,\&spamHash) for (glob("$correctedspam/*"));
&tick;
print "\n\n";

print "Analyzing $correctednotspam\n";
add(0,$_,4,\&hamHash) for (glob("$correctednotspam/*"));
&tick;
print "\n\n";

# read spam database
print "Analyzing $spamlog\n";
add(1,$_,1,\&checkspam) for (glob("$spamlog/*"));
#for $n (0 .. 4000) {add(1,"spam/$n",1,\&checkspam)};
&tick;

print "\n\n";
# read non-spam database
print "Analyzing $notspamlog\n";
add(0,$_,1,\&checkham) for (glob("$notspamlog/*"));
#for $n (0 .. 4000) {add(0,"notspam/$n",1,\&checkspam)};
&tick;

print "\n\n";

# update stats
$norm= $HamWordCount? $SpamWordCount/$HamWordCount: 1;
print "Found $SpamWordCount spam words, $HamWordCount non-spam words.\nGenerating weighted keys...\n";
printf "norm=%.4f\n",$norm;
$unknowns=0; $unknownt=0;
open(F,">$spamdb.tmp") || die "couldn't open $spamdb.tmp: $!\n";
binmode F;
print F "\n";

if($spamObject) {
 $spamObject->flush();
 open(I,"<spamtmp");
 local $/="\n";
 while(<I>) {
  ($_,$s,$t)=/(.*)\002(\d+) (\d+)/;
  $t=($t-$s)*$norm+$s; # normalize t
  if($t < 5) {
   #$unknowns+=$s; $unknownt+=$t;
   next;
  }
  # if token represents all spam or all ham then square its value
  if($s==$t || $s==0) {
   $s=$s*$s; $t=$t*$t;
  }
  $v=(1+$s)/($t+2);
  $v=sprintf("%.7f",$v); $v='0.9999999' if $v >= 1; $v='0.0000001' if $v<=0;
  print F "$_\002$v\n" if abs($v-.5) > .09;
  &tick if ($Tick++ & 0x3f)==0;
 }
 close F;
} else {
 my ($k,$v);
 while(($_,$v)=each(%spam)) {
  ($s,$t)=split(' ',$v);
  $t=($t-$s)*$norm+$s; # normalize t
  if($t < 5) {
   #$unknowns+=$s; $unknownt+=$t;
   next;
  }
  # if token represents all spam or all ham then square its value
  if($s==$t || $s==0) {
   $s=$s*$s; $t=$t*$t;
  }
  $v=(1+$s)/($t+2);
  $v=sprintf("%.7f",$v); $v='0.9999999' if $v >= 1; $v='0.0000001' if $v<=0;
  push(@result,"$_\002$v\n") if abs($v-.5) > .09;
  &tick if ($Tick++ & 0x3f)==0;
 }
 print "Saving rebuilt SPAM database\n";
 undef %spam; # free some memory
 for (sort @result) { print F $_; }
 close F;
}
#printf "unk=($unknowns/$unknownt)=%.7f\n", $unknowns/$unknownt;

 unlink("$spamdb.bak") || print "Couldn't unlink '$spamdb.bak' $!\n";
 rename($spamdb,"$spamdb.bak") || print "Couldn't rename '$spamdb' to '$spamdb.bak' $!\n";
 rename("$spamdb.tmp",$spamdb) || print "Couldn't rename '$spamdb.tmp' to '$spamdb' $!\n";


# create helo blacklist
open(F,">$spamdb.helo.tmp") || print "Couldn't open '$spamdb.helo.tmp' $!\n";
binmode F;
print F "\n";
while(($k,$v)=each(%Helo)) {
 push(@Helo,"$k\0021\n") if $v->[1]/($v->[0]+$v->[1]+.1) > .98;
}
print F sort @Helo;
close F;
undef @Helo; undef %Helo;
unlink("$spamdb.helo.bak");
rename("$spamdb.helo","$spamdb.helo.bak");
rename("$spamdb.helo.tmp","$spamdb.helo");

 if(rand()< .05) {
  # rarely, let's clean the whitelist of old entries
  $t=time - 24*3600*$MaxWhitelistDays;
  print "Cleaning whitelist\n";

  if(open(F,"<$whitelistdb") && open(O,">$whitelistdb.tmp")) {
   binmode(F);
   binmode(O);
   local $/="\n";
   <F>; print O "\n";
   while(<F>) {
    my ($a,$time)=split("\002",$_);
    next if $t > $time || length($a)>60;
    print O;
   }
   close F; close O;
   unlink("$whitelistdb.bak");
   rename($whitelistdb,"$whitelistdb.bak");
   rename("$whitelistdb.tmp",$whitelistdb);
  }

  if( open(F,"<goodhosts") && open(O,">goodhosts.tmp")) {
   binmode(F);
   binmode(O);
   $t=time - 24*3600*20;
   local $/="\n";
   <F>; print O "\n";
   while(<F>) {
    my ($a,$time)=split("\002",$_);
    next if $time > 99999999 && $t > $time;
    print O;
   }
   close F; close O;
   unlink("goodhosts.bak");
   rename('goodhosts',"goodhosts.bak");
   rename("goodhosts.tmp",'goodhosts');
  }
 }

putmaxtick('rebuild');
printf "\ntotal time processing=%d second(s)\n",time-$starttime;
if($spamObject) {unlink("spamtmp");}

uploadgreylist() unless $Config{noGreyListUpload};

sub mlog { print "$_[1]\n";}
sub spamHash { $SpamHash{hash($_[1])}=''; }

sub hamHash { $HamHash{hash($_[1])}=''; }

sub checkspam {
 my $h;
 #if(whitelisted($_[1])) {print "wl: $_[1]\n\n"; return 1;}
 if(defined($HamHash{$h=hash($_[1])}) || whitelisted($_[1])) {
  # we've found a message in the spam database that is the same as one in the corrected Ham group
  my $fn=shift;
  # delete it
  #print "$fn is spam match\n'$h' -> $HT{$h}\n";
  #$fn2=$fn; $fn2=~s/spam/spam2/;
  #rename($fn,$fn2);
  unlink($fn);
  return 1;
 }
 0;
}

sub whitelisted {
 return 0 if $KeepWhitelistedSpam;
 my $m=shift;
 # test against expression to recognize whitelisted mail
 return 1 if $whiteRE && $m=~/$whiteRE/ois;
 # we should test whitere against "clean"ed mail, but I don't want to waste the cpu time
 $m=~s/\n\r?\n.*//s; # remove body
 while($m=~/([^:<>,;"'\(\)\s\[\]]+\@[^<>,;"'\(\)\s\?\[\]]+\.[^<>,;"'\(\)\s\?\[\]]+)/ig) {
  return 1 if $Whitelist{lc $1};
 }
 0;
}

sub checkham {
 my $h;
 if(defined($SpamHash{$h=hash($_[1])}) ) {
  # we've found a message in the ham database that is the same as one in the corrected spam group
  my $fn=shift;
  # delete it
  #print "$fn is hpam match\n'$h' -> $HT{$h}\n";
  #$fn2=$fn; $fn2=~s/spam/spam2/;
  #rename($fn,$fn2);
  unlink($fn);
  return 1;
 }
 0;
}

sub get {
 my ($fn,$sub)=@_;
 open(F,"<$fn") || return '';
 my $m;
 read(F,$m,10000); $orig=$m;
 close F;
 return '' if $sub->($fn,$m);
 if($spamObject && $GetCount++>500) {
  #print "flushing\n";
  $spamObject->flush();
  $GetCount=0;
 }
 $m;
}

sub add {
 #print "+" if $Counter++ % 100 ==0;
 &tick if ($Tick++ & 0x3f)==0;
 my ($spam,$fn,$factor,$sub)=@_;
 #print "$fn <$spam> [$factor]\n";
 return if -d $fn;
 my ($t,$lt,$llt);
 local $_=get($fn,$sub);
 return unless $_; # use $$sub to identify and remove spam or ham that matches corrected items
 my ($helo)=$_=~/helo=(.*?)\)/i;
 $Helo{lc $helo}->[$spam]+=$factor;
 $_=clean($_);
 while(/([-\$A-Za-z0-9\'\.!\240-\377]+)/g) {
  next if length($1) > 20 || length($1) < 2;
  $llt=$lt; $lt=$t; $t=lc $1;
  #next if $t=~/^\d/; # ignore numbers
  $t=~s/[,.']+$//; $t=~s/!!!+/!!/g; $t=~s/--+/-/g;
  if($spam) {$SpamWordCount+=$factor} else {$HamWordCount+=$factor}
  #$spam{$t}+=$factor if $spam;
  #$tot{$t}+=$factor;
  #$spam{"$lt $t"}+=$factor if $spam;
  #$tot{"$lt $t"}+=$factor;
  my ($sfac, $tfac)=split(' ',$spam{"$lt $t"});
  $sfac+=$spam? $factor: 0;
  $tfac+=$factor;
  $spam{"$lt $t"}="$sfac $tfac";
 }
}

# clean up source email
sub clean {
 local $_="\n".shift;
 my ($helo)=/helo=([^)]+)\)/i;
 $helo=~s/(\w+)/ hlo $1 /g if length($helo) > 19; # if the helo string is long, break it up
 my $rcpt="rcpt ".join(" rcpt ",/($EmailAdrRe\@$EmailDomainRe)/g);
 # replace &#ddd encoding
 s/&#(\d{1,3});?/chr($1)/ge;
 #s/base64.{0,99}\n\n([a-zA-Z0-9+\/\n=]+)/base64decode($1)/gse;
 # replace base64 encoding
 s/\n([a-zA-Z0-9+\/=]{40,}\r?\n[a-zA-Z0-9+\/=\r\n]+)/base64decode($1)/gse;
 # clean up quoted-printable references
 s/(Subject: .*)=\r?\n/$1\n/;
 #if(/quoted-printable/) {
  s/=\r?\n//g;
  s/=([0-9a-fA-F]{2})/pack("C",hex($1))/gei;
 #}
 #s/(http:\/\/\S+)/fixurl($1)/ige;
 s/%([0-9a-fA-F][0-9a-fA-F])/pack('C',hex($1))/ge; # replace url encoding
 # strip out mime continuation
 s/.*---=_NextPart_.*\n//g;
 # mark the subject
 s/\nsubject: (.*)/fixsub($1)/ige;
 # remove received lines
 s/\n(received|Content-Type): .*(\n[\t ].*)*//ig;
 # remove other header lines
 s/(\n[a-zA-Z\-]{2,40}: .*(\n[\t ].*)*){2,}//g;
 # clean up &nbsp; and &amp;
 s/&nbsp;?/ /gi; s/&amp;?/and/gi;
 s/(\d),(\d)/$1$2/g;
 s/\r//g; s/ *\n/\n/g;
 s/\n\n\n\n\n+/\nblines blines\n/g;
 # clean up html stuff
 s/<script.*?>\s*(<!\S*)?/ jscripttag jscripttag /ig;
 while(s/(\w+)(<[^>]*>)((<[^>]*>)*\w+)/$2$1$3/g){} # move html out of words
 s/<([biu]|strong)>/ boldifytext boldifytext /gi;
 # remove some tags that are not informative
 s/<\/?(p|br|div|t[dr])[^>]*>/\n/gi; s/<\/([biu]|font|strong)>//gi;
 s/<\/?(html|meta|head|body|span|o)[^>]*>//ig;
 s/(<a\s[^>]*>)(.*?)(<\s*\/a\s*>)/$1.fixlinktext($2).$3/igse;
 s/<\s*\/a\s*>//gi;
 # treat titles like subjects
 s/<title[^>]*>(.*?)<\/title>/fixsub($1)/ige;
 # remove style sheets
 s/<style[^>]*>.*?<\/style>//igs;
 # remove html comments
 s/<!.*?-->//gs; s/<![^>]*>//g;
 # look for random words
 s/[ a-z0-9][ghjklmnpqrstvwxz_]{2}[bcdfghjklmnpqrstvwxz_0-9]{3}\S*/ randword randword /gi;
 # remove mime seperators
 s/\n--.*randword.*//g;
 # look for linked images
 s/(<a[^>]*>[^<]*<img)/ linkedimage linkedimage $1/gis;
 s/<[^>]*href\s*=\s*("[^"]*"|\S*)/fixhref($1)/isge;
 s/http:\/\/(\S*)/fixhref($1)/isge;
 s/(\S+\@\S*\.\w{2,3})\b/fixhref($1)/ge;
 #open(F,">t"); print F $_; close F;
 "helo: $helo\n$rcpt\n$_";
 #"helo: $helo\n$rtime\n$_";
}

sub dayofweek {
 # this is mercilessly hacked from John Von Essen's Date::Day
 my ($d, $m, $y)=$_[0]=~/(\S+) +(\S+) +(\S+)/;
 $y+=2000;
 $m=$Months{$m};
 if($m <= 2){ $y--; }
 my $wday = (($d+$Month{$m}+$y+(int($y/4))-(int($y/100))+(int($y/400)))%7);
 return $Weekday{$wday};
}

sub fixhref { my $t=shift; $t=~s/(\w+)/ href $1 /g; $t;}

sub fixlinktext { my $t=shift; $t=~s/(\w+)/atxt $1/g; $t;}

sub fixurl {
 my $a=shift;
 $a=~s/%([0-9a-fA-F][0-9a-fA-F])/pack('C',hex($1))/ge;
 $a;
}

sub fixsub {
 my $s=shift;
 #print "$s=>";
 $s=~s/ {3,}/ lotsaspaces /g;
 $s=~s/(\S+)/ssub $1/g;
 #print "$s\n";
 "\n$s ssub";
}

sub base64decode {
 my $str = shift;
 my $res = "\n\n";
 $str =~ tr|A-Za-z0-9+/||cd;
 $str =~ tr|A-Za-z0-9+/| -_|;
 while ($str =~ /(.{1,60})/gs) {
  my $len = chr(32 + length($1)*3/4);
 $res .= unpack("u", $len . $1 );
 }
 $res;
}

sub hash {
 # creates a $len length hash of $msg
 my $msg=shift;
 my $len=20;
 my ($head,$bod)=$msg=~/^(.*?)\n\r?\n(.*)/s;
 my ($sub)=$msg=~/Subject: (.*)/i;
 my $hash=substr($sub,0,20).substr($bod,0,260);
 my @a=(0 .. 94);
 my $c=0;
 for my $n (0 .. length($hash)) {
  $c=($a[$c]+$c+ord(substr($hash,$n,1))) % 95;
  my $n2=$n % 95;
  @a[$n2,$c]=@a[$c,$n2];
 }
 for my $n (0 .. length($hash)) {
  $c=($a[$c]+$c+ord(substr($hash,$n,1))) % 95;
  my $n2=$n % 95;
  @a[$n2,$c]=@a[$c,$n2];
 }
 my $r='';
 $c=0;
 for my $n (@a) {
  $r.=chr($n+32);
  last if ++$c > $len;
 }
 #$HT{$r}=$hash;
 $r;
}

sub tick {
  my $stars=(70 * $Tick / $MaxTick); $stars=70 if $stars > 70;
  $stars='*' x $stars;
  print "$Tick $stars \r";
}
sub getmaxtick {
  if(open(F,"<$_[0].mt")) {
   $MaxTick = <F>;
   close F;
   $MaxTick=~y/0-9//cd;
   $MaxTick+=0;
  }
  $MaxTick=1000000 unless $MaxTick>1000;
  print "mt=$MaxTick\n";
}
sub putmaxtick {
  open(F,">$_[0].mt");
  print F $Tick;
  close F;
}

sub uploadgreylist {
 use IO::Socket;
 my ($day,$gooddays);
 $day = localtime(); $day=~s/^... (...) +(\d+) (\S+) ..(..)/$1-$2-$4/; $gooddays.="$day|";
 $day = localtime(time - 24*3600); $day=~s/^... (...) +(\d+) (\S+) ..(..)/$1-$2-$4/; $gooddays.="$day|";
 $day = localtime(time - 48*3600); $day=~s/^... (...) +(\d+) (\S+) ..(..)/$1-$2-$4/; $gooddays.="$day|";
 $day = localtime(time - 72*3600); $day=~s/^... (...) +(\d+) (\S+) ..(..)/$1-$2-$4/; $gooddays.="$day";
 local $/="\n";
 open(F,"<$Log") || return; #die "Couldn't open logfile '$Log': $!\n";
 my %locals=(127,1,10,1,'192.168',1,'169.254',1);
 for (16 .. 31) {$locals{"172.$_"}=1}
 my ($date,$ip,$i1,$i2,$m,%m);
 while(<F>) {
  next unless ($date,$ip,$i1,$i2,$m)=/($gooddays) \S+ ((\d+)\.(\d+)\.\d+)\.\d+ .* to: \S+ (.*)/io;
  next if $locals{$i1} || $locals{"$i1.$i2"};
  if($m=~/local or whitelisted|message ok/) {$m{$ip}++; $ok{$ip}++;}
  elsif($m=~/ spam/i) {$m{$ip}++}
 }
 return unless %m;
 for (sort keys %m) {$st.= "$_\001$m{$_}\002$ok{$_}\003";}
 my $peeraddress,$connect;
 if ($proxyserver) {
   print "Uploading Greylist via Proxy: $proxyserver\n";
   $peeraddress = $proxyserver;
   $connect = "POST http://assp.sourceforge.net/cgi-bin/uploadGrey.pl HTTP/1.0";
 } else {
   print "Uploading Greylist via Direct Connection\n";
   $peeraddress = "assp.sourceforge.net:80";
   $connect = "POST /cgi-bin/uploadGrey.pl HTTP/1.1
Host: assp.sourceforge.net";
 }
 my $s=new IO::Socket::INET(Proto=>'tcp',PeerAddr=>$peeraddress,Timeout=>2);
 if($s) {
  my $len=length($st);
  $connect.="
Content-Type: application/x-www-form-urlencoded
Content-Length: $len

$st";
  print $s $connect;
  $s->close;
  print "uploaded $len bytes\n";
 } else {
  print "Couldn't connect to assp.sourceforge.net to upload greylist\n";
 }
}


#####################################################################################
#                orderedtie
{
package orderedtie;
# This is a tied value that caches lookups from a sorted file; \n separates records,
# \002 separates the key from the value. After main::OrderedTieHashSize lookups the cache is
# cleared. This give us most of the speed of the hash without the huge memory overhead of storing
# the entire hash and should be totally portable. Picking the best value for n requires some
# tuning. A \n is required to start the file.

# if you're updating entries it behoves you to call flush every so often to make sure that your
# changes are saved. This also frees the memory used to remember updated values.

# for my purposes a value of undef and a nonexistant key are the same

# Obviosly if your keys or values contain \n or \002 it will totally goof things up.


sub TIEHASH {
 my ($c,$fn)=@_;
 my $self={
  fn => $fn,
  age => mtime($fn),
  cnt => 0,
  cache => {},
  updated => {},
  ptr => 1,
 };
 bless $self, $c;
 return $self;
}
sub DESTROY { $_[0]->flush(); }

sub mtime { my @s=stat($_[0]); $s[9]; }

sub flush {
 my $this=shift;
 return unless %{$this->{updated}};
 my $f=$this->{fn};
 #print "flushing $f\n";
 open(O,">$f.tmp") || return undef;
 binmode(O);
 open(I,"<$f") || print O "\n";
 binmode(I);
 local $/="\n";
 my @l=(sort keys %{$this->{updated}});
 my ($k,$d,$r,$v);
 while($r=<I>) {
  ($k,$d)=split("\002",$r);
  while(@l && $l[0] lt $k) {
   $v=$this->{updated}{$l[0]};
   print O "$l[0]\002$v\n" if $v;
   shift(@l);
  }
  if($l[0] eq $k) {
   $v=$this->{updated}{$l[0]};
   print O "$l[0]\002$v\n" if $v;
   shift(@l);
  } else {
   print O $r;
  }
 }
 while(@l) {
  $v=$this->{updated}{$l[0]};
  print O "$l[0]\002$v\n" if $v;
  shift(@l);
 }
 close I; close O; unlink($f); rename("$f.tmp", $f);
 $this->{updated}={};
}

sub STORE {
 my ($this, $key, $value)=@_;
 $this->{cache}{$key}=$this->{updated}{$key}=$value;
}

sub FETCH { my ($this, $key)=@_;
 return $this->{cache}{$key} if exists $this->{cache}{$key};
 $this->resetCache() if($this->{cnt}++ >$main::OrderedTieHashSize || ($this->{cnt} & 0x1f) == 0 && mtime($this->{fn}) != $this->{age});

 return $this->{cache}{$key}=binsearch($this->{fn},$key);
}

sub resetCache {
 my $this=shift;
 $this->{cnt}=0;
 $this->{age}=mtime($this->{fn});
 $this->{cache}={%{$this->{updated}}};
 #main::mlog(0,"cache reset ($this->{fn})");
}

sub binsearch {
 my ($f,$k)=@_;
 open(F,"<$f") || return undef;
 binmode(F);
 my $siz=my $h=-s $f;
 $siz-=1024;
 my $l=0;
 my $k0=$k;
 $k=~s/([\[\]\(\)\*\^\!\|\+\.\\\/\?\`\$\@\{\}])/\\$1/g; # make sure there's no re chars unqutoed in the key
 #print "k=$k ($_[1])\n";
 while(1) {
  my $m=(($l+$h)>>1)-1024;
  $m=0 if $m < 0;
  #print "($l $m $h) ";
  seek(F,$m,0);
  my $d; my $read= read(F,$d,2048);
  if( $d=~/\n$k\002([^\n]*)\n/) {
   close F;
   #print "got $1\n";
   return $1;
  }
  my ($pre,$first,$last,$post)=$d=~/^(.*?)\n(.*?)\002.*\n(.*?)\002.*?\n(.*?)$/s;
  #print "f=$first ";
  last unless defined $first;
  if($k0 gt $first && $k0 lt $last) {
   #print "got miss\n";
   last;
  }
  if($k0 lt $first) {
   last if $m ==0;
   $h=$m-1024+length($pre);
   $h=0 if $h < 0;
  }
  if($k0 gt $last) {
   last if $m >= $siz;
   $l=$m+$read-length($post);
  }
  #print "l=$l h=$h ";
 }
 close F;
 return undef;
}

sub FIRSTKEY { $this=shift;
 $this->flush();
 $this->{ptr}=1;
 $this->NEXTKEY();
}
sub NEXTKEY { my ($this, $lastkey)=@_;
 local $/="\n";
 open(F,"<$this->{fn}") || return undef;
 binmode(F);
 seek(F,$this->{ptr},0);
 my $r=<F>;
 return undef unless $r;
 $this->{ptr}=tell F;
 close F;
 my ($k,$v)=$r=~/(.*?)\002(.*?)\n/s;
 if(!exists($this->{cache}{$k}) && $this->{cnt}++ >$main::OrderedTieHashSize) {
  $this->{cnt}=0;
  $this->{cache}={%{$this->{updated}}};
 }
 $this->{cache}{$k}=$v;
 $k
}

sub EXISTS { my ($this, $key)=@_;
 return FETCH($this, $key);
}

sub DELETE {my ($this, $key)=@_;
 $this->{cache}{$key}=$this->{updated}{$key}=undef;
}

sub CLEAR {my ($this)=@_;
 open(F,">$this->{fn}"); binmode(F); print "\n"; close F;
 $this->{cache}={};
 $this->{updated}={};
 $this->{cnt}=0;
}
}

