#! /bin/sh
#                               -*- Mode: Sh -*-
# re-learn-crm114.sh ---
# Author           : Manoj Srivastava ( srivasta@golden-gryphon.com )
# Created On       : Mon Jan  1 15:06:18 2007
# Created On Node  : glaurung.internal.golden-gryphon.com
# Last Modified By : Manoj Srivastava
# Last Modified On : Fri Nov 23 13:02:34 2007
# Last Machine Used: anzu.internal.golden-gryphon.com
# Update Count     : 15
# Status           : Unknown, Use with caution!
# HISTORY          :
# Description      :
#
#

# Make sure we abort on error
set -e

Corpus_Top="/backup/classify/Done"
setq() {
    # Variable Value Doc string
    if [ "x$2" = "x" ]; then
        echo >&2 "$progname: Unable to determine $3"
        exit 1;
    else
        if [ ! "x$Verbose" = "x" ]; then
            echo "$progname: $3 is $2";
        fi
        eval "$1=\"\$2\"";
    fi
}

withecho () {
        echo " $@" >&2
        "$@"
}

usageversion () {
        cat >&2 <<END
Debian GNU/Linux $progname $pversion.
           Copyright (C) 2007 Manoj Srivastava.
This is free software; see the Artistic Licence for copying
conditions.  There is NO warranty.

Usage: $progname  [options]
Options:
  -h           print this message
  -p [ zero|one|two|four|ten ]
END
}

#
# Long term variables, which may be set in the config file or the
# environment:
# DEBUG rootdir workdir (if all original sources are kept in one dir)
#
#
action='withecho'

j=$(perl -le 'print int rand(10)')
k=$(perl -le 'print int rand(10)')
l=$(perl -le 'print int rand(10)')
m=$(perl -le 'print int rand(10)')

num_spam=$(ls -1 $Corpus_Top/Spam | wc -l)
num_good=$(ls -1 $Corpus_Top/Ham  | wc -l)
count=$(( $num_spam + $num_good))

percent='ten'
## This reserves 10%, and should only be used for mature css files
v="--validate=[$j][_][_]"
s="--streak=$count"
r="--repeat=100"

# Command line
TEMP=$(getopt -a -s bash -o hp: --long help,percent -n 're-learn' -- "$@")

# Note the quotes around `$TEMP': they are essential!
eval set -- "$TEMP"
if [ $? != 0 ] ; then echo "Terminating..." >&2 ; exit 1 ; fi

while true ; do
    case "$1" in
        -h)    usageversion; exit 0  ; shift   ;;
        -p)    opt_percent="$2"      ; shift 2 ;;
        --)      shift ; break ;;
        '')   break ;;
        *) echo >&2 "Internal error!($1)"
            usageversion; exit 1           ;;
    esac
done


if [ ! -x mailtrainer.crm ]; then
    echo >&2 "Could not find mailtrainer"
    exit 2
fi
# Handle percentages
if [ -n "$opt_percent" ]; then
    case $opt_percent in
        zero)
            percent="$opt_percent"
            v=''
            ;;
        one)
            percent="$opt_percent"
            v="--validate=[$j][$k][_][_]"
            count=$(($count * 99 / 100))
            s="--streak=$count"
            ;;
        two)
            percent="$opt_percent"
            v="--validate=[$j][$k][_][_]|[$l][$m][_][_]"
            count=$(($count * 98 / 100))
            s="--streak=$count"
            ;;
        four)
            percent="$opt_percent"
            v="--validate=[$j][$k][_][_]|[$l][$m][_][_]|[$k][$l][_][_]|[$m][$j][_][_]"
            count=$(($count * 96 / 100))
            s="--streak=$count"
            ;;
        ten|'')
            percent="$opt_percent"
            v="--validate=[$j][_][_]"
            count=$(($count * 90 / 100))
            s="--streak=$count"
            ;;
        forty)
            percent="$opt_percent"
            v="--validate=[$j][_][_]|[$k][_][_]|[$l][_][_]|[$m][_][_]"
            count=$(($count * 60 / 100))
            s="--streak=$count"
            ;;
        hundred)
            percent="$opt_percent"
            v="--validate=[_][_][_][_]"
            count=0
            s=""
            r="--repeat=1"
            ;;
        *)
            echo >&2 "Unknown percentage $opt_percent, using $percent% instead"
    esac
fi


#  For the very first run, there should be no validation -- we need
#  all the data points to initialize our css files
if [ ! -f spam.css ]; then
    cssutil -b -r -S 4194000  spam.css
    cssutil -b -r -S 4194000  nonspam.css
    percent='zero'
    v=''
else
    test -f spam.css     && cp -f spam.css     spam.css.save
    test -f nonspam.css  && cp -f nonspam.css  nonspam.css.save
    cssutil -b -r spam.css
    cssutil -b -r nonspam.css
fi




$action ./mailtrainer.crm   $v $s $r                     \
             --spam=/backup/classify/Done/Spam/          \
             --good=/backup/classify/Done/Ham/           |
  perl -nle 'm/^\s+\S+|^Override|^\.CSS|^Running|\s+train|^Excel|^Finish|accuracy:/ && print'

if [ -n "$v" ]; then
    echo -n Date:
    date --utc
fi



 # |  egrep -i '^( +|Finishing|.*train|Excell|Running)'