#! /bin/sh # -*- Mode: Sh -*- # re-learn-crm114.sh --- # Author : Manoj Srivastava ( srivasta@golden-gryphon.com ) # Created On : Mon Jan 1 15:06:18 2007 # Created On Node : glaurung.internal.golden-gryphon.com # Last Modified By : Manoj Srivastava # Last Modified On : Fri Nov 23 13:02:34 2007 # Last Machine Used: anzu.internal.golden-gryphon.com # Update Count : 15 # Status : Unknown, Use with caution! # HISTORY : # Description : # # # Make sure we abort on error set -e Corpus_Top="/backup/classify/Done" setq() { # Variable Value Doc string if [ "x$2" = "x" ]; then echo >&2 "$progname: Unable to determine $3" exit 1; else if [ ! "x$Verbose" = "x" ]; then echo "$progname: $3 is $2"; fi eval "$1=\"\$2\""; fi } withecho () { echo " $@" >&2 "$@" } usageversion () { cat >&2 <<END Debian GNU/Linux $progname $pversion. Copyright (C) 2007 Manoj Srivastava. This is free software; see the Artistic Licence for copying conditions. There is NO warranty. Usage: $progname [options] Options: -h print this message -p [ zero|one|two|four|ten ] END } # # Long term variables, which may be set in the config file or the # environment: # DEBUG rootdir workdir (if all original sources are kept in one dir) # # action='withecho' j=$(perl -le 'print int rand(10)') k=$(perl -le 'print int rand(10)') l=$(perl -le 'print int rand(10)') m=$(perl -le 'print int rand(10)') num_spam=$(ls -1 $Corpus_Top/Spam | wc -l) num_good=$(ls -1 $Corpus_Top/Ham | wc -l) count=$(( $num_spam + $num_good)) percent='ten' ## This reserves 10%, and should only be used for mature css files v="--validate=[$j][_][_]" s="--streak=$count" r="--repeat=100" # Command line TEMP=$(getopt -a -s bash -o hp: --long help,percent -n 're-learn' -- "$@") # Note the quotes around `$TEMP': they are essential! eval set -- "$TEMP" if [ $? != 0 ] ; then echo "Terminating..." >&2 ; exit 1 ; fi while true ; do case "$1" in -h) usageversion; exit 0 ; shift ;; -p) opt_percent="$2" ; shift 2 ;; --) shift ; break ;; '') break ;; *) echo >&2 "Internal error!($1)" usageversion; exit 1 ;; esac done if [ ! -x mailtrainer.crm ]; then echo >&2 "Could not find mailtrainer" exit 2 fi # Handle percentages if [ -n "$opt_percent" ]; then case $opt_percent in zero) percent="$opt_percent" v='' ;; one) percent="$opt_percent" v="--validate=[$j][$k][_][_]" count=$(($count * 99 / 100)) s="--streak=$count" ;; two) percent="$opt_percent" v="--validate=[$j][$k][_][_]|[$l][$m][_][_]" count=$(($count * 98 / 100)) s="--streak=$count" ;; four) percent="$opt_percent" v="--validate=[$j][$k][_][_]|[$l][$m][_][_]|[$k][$l][_][_]|[$m][$j][_][_]" count=$(($count * 96 / 100)) s="--streak=$count" ;; ten|'') percent="$opt_percent" v="--validate=[$j][_][_]" count=$(($count * 90 / 100)) s="--streak=$count" ;; forty) percent="$opt_percent" v="--validate=[$j][_][_]|[$k][_][_]|[$l][_][_]|[$m][_][_]" count=$(($count * 60 / 100)) s="--streak=$count" ;; hundred) percent="$opt_percent" v="--validate=[_][_][_][_]" count=0 s="" r="--repeat=1" ;; *) echo >&2 "Unknown percentage $opt_percent, using $percent% instead" esac fi # For the very first run, there should be no validation -- we need # all the data points to initialize our css files if [ ! -f spam.css ]; then cssutil -b -r -S 4194000 spam.css cssutil -b -r -S 4194000 nonspam.css percent='zero' v='' else test -f spam.css && cp -f spam.css spam.css.save test -f nonspam.css && cp -f nonspam.css nonspam.css.save cssutil -b -r spam.css cssutil -b -r nonspam.css fi $action ./mailtrainer.crm $v $s $r \ --spam=/backup/classify/Done/Spam/ \ --good=/backup/classify/Done/Ham/ | perl -nle 'm/^\s+\S+|^Override|^\.CSS|^Running|\s+train|^Excel|^Finish|accuracy:/ && print' if [ -n "$v" ]; then echo -n Date: date --utc fi # | egrep -i '^( +|Finishing|.*train|Excell|Running)'