#! /bin/zsh -f # -*- Mode: Sh -*- # train.sh --- # Author : Manoj Srivastava ( srivasta@glaurung.internal.golden-gryphon.com ) # Created On : Wed Nov 22 12:33:55 2006 # Created On Node : glaurung.internal.golden-gryphon.com # Last Modified By : Manoj Srivastava # Last Modified On : Thu Nov 23 01:36:07 2006 # Last Machine Used: glaurung.internal.golden-gryphon.com # Update Count : 44 # Status : Unknown, Use with caution! # HISTORY : # Description : # # set -e Corpus_Top="/backup/classify/Done" chunk=500 startiter=1 maxiter=6 withecho () { echo " $@" >&2 "$@" } action='withecho' test -d Ham || mkdir Ham test -d Spam || mkdir Spam here=$(pwd) num_spam=$(ls -1 $Corpus_Top/Spam | wc -l) num_good=$(ls -1 $Corpus_Top/Ham | wc -l) if [ $num_spam -lt $num_good ]; then low_max=$num_spam; else low_max=$num_good; fi limit=$chunk for interations in $(seq $startiter $maxiter); do if [ $limit -gt $low_max ]; then limit=$low_max fi for type in Spam Ham; do test ! -d $type || rm -rf $type mkdir $type k=1 for i in $(ls -1tr $Corpus_Top/$type/ | tail -n $limit); do cp $Corpus_Top/$type/$i $type/msg.$(print -f "%08i\n" $k)______$i k=$(( $k + 1 )) done done $action /usr/share/crm114/mailtrainer.crm --spam=$here/Spam/ \ --good=$here/Ham/ --repeat=100 --streak=$(( 10 + 2 * $limit)) $action cp --sparse=always spam.css spam.css.$interations $action cp --sparse=always nonspam.css nonspam.css.$interations cssdiff spam.css nonspam.css echo "Done with $interations runs." limit=$(( 2 * $limit )) #sleep 10 #echo -n "Done with $interations runs (chunk = $limit). Continue? " #read ans done