#!/bin/bash

malletdir=/opt/local/share/java/mallet-2.0.8

cp=$malletdir/mallet.jar:$malletdir/mallet-deps.jar:$CLASSPATH
#echo $cp

MEMORY=${MALLET_MEMORY:-1g}

CMD=$1
shift

help()
{
cat <<EOF
Mallet 2.0 commands: 

  import-dir         load the contents of a directory into mallet instances (one per file)
  import-file        load a single file into mallet instances (one per line)
  import-svmlight    load SVMLight format data files into Mallet instances
  info               get information about Mallet instances
  train-classifier   train a classifier from Mallet data files
  classify-dir       classify data from a single file with a saved classifier
  classify-file      classify the contents of a directory with a saved classifier
  classify-svmlight  classify data from a single file in SVMLight format
  tag                train, test, or run a generic CRF-based tagger
  train-topics       train a topic model from Mallet data files
  infer-topics       use a trained topic model to infer topics for new documents
  evaluate-topics    estimate the probability of new documents under a trained model
  prune              remove features based on frequency or information gain
  split              divide data into testing, training, and validation portions
  bulk-load          for big input files, efficiently prune vocabulary and import docs
  run                run the main method of the specified class

Include --help with any option for more information
EOF
}

CLASS=

case $CMD in
	import-dir) CLASS=cc.mallet.classify.tui.Text2Vectors;;
	import-file) CLASS=cc.mallet.classify.tui.Csv2Vectors;;
	import-svmlight) CLASS=cc.mallet.classify.tui.SvmLight2Vectors;;
	info) CLASS=cc.mallet.classify.tui.Vectors2Info;;
	train-classifier) CLASS=cc.mallet.classify.tui.Vectors2Classify;;
	classify-dir) CLASS=cc.mallet.classify.tui.Text2Classify;;
	classify-file) CLASS=cc.mallet.classify.tui.Csv2Classify;;
	classify-svmlight) CLASS=cc.mallet.classify.tui.SvmLight2Classify;;
        tag) CLASS=cc.mallet.fst.SimpleTagger;;
	train-topics) CLASS=cc.mallet.topics.tui.TopicTrainer;;
	infer-topics) CLASS=cc.mallet.topics.tui.InferTopics;;
	evaluate-topics) CLASS=cc.mallet.topics.tui.EvaluateTopics;;
	prune) CLASS=cc.mallet.classify.tui.Vectors2Vectors;;
	split) CLASS=cc.mallet.classify.tui.Vectors2Vectors;;
	bulk-load) CLASS=cc.mallet.util.BulkLoader;;
	run) CLASS=$1; shift;;
	*) echo "Unrecognized command: $CMD"; help; exit 1;;
esac

java -Xmx$MEMORY -ea -Djava.awt.headless=true -Dfile.encoding=UTF-8 -server -classpath "$cp" $CLASS "$@"
