#!/bin/bash
###############################################################################
# Name %n  : run-sub-boosted-classifier.sh
###############################################################################

if [[ $# -ne 8 ]]; then
  echo "Usage: $0 <models_dir> <fasta> <pssm> <ss_sa> <window_size> <output> <Device><output_dir>";
  exit;
fi

models_dir=$1
fasta=$2
pssm=$3
ss_sa=$4
win_size=$5
output_fname=$6
Device=$7
Outdir=$8
subset_size=20000

# Get path to script, works generally but not if code is sourced
abspath=$(cd ${0%/*} && echo $PWD/${0##*/})
BASEPATH=`dirname $abspath`;

#trap cleanup 1 2 3 6
rand=$$
while true; do
   FEAT_FILE=/$Outdir/${rand}_feat
   DAT_FILE=/$Outdir/${rand}_dat
   COMMENT_FILE=/$Outdir/${rand}_comment
   OUTPUT_FILE=/$Outdir/${rand}_output
   PROBS_FILE=/$Outdir/${rand}_probs
   TMP_FILE=/$Outdir/${rand}_tmp
   DUMMY_DIST=/$Outdir/${rand}_dummy
   ROUND_PROBS=/$Outdir/${rand}_round_probs
   SUBS_PREFIX=/$Outdir/${rand}_sub
   rand=$[2*$rand]
   if [[ -e FEAT_FILE ]]; then
      continue;
   fi
   if [[ -e ROUND_PROBS ]]; then
      continue;
   fi
   if [[ -e SUBS_PREFIX ]]; then
      continue;
   fi
   if [[ -e DAT_FILE ]]; then
      continue;
   fi
   if [[ -e COMMENT_FILE ]]; then
      continue;
   fi
   if [[ -e OUTPUT_FILE ]]; then
      continue;
   fi
   if [[ -e PROBS_FILE ]]; then
      continue;
   fi
   if [[ -e TMP_FILE ]]; then
      continue;
   fi
   if [[ -e DUMMY_DIST ]]; then
      continue;
   fi
   break
done

touch $OUTPUT_FILE
touch $DUMMY_DIST
function cleanup {
  if [[ -e $FEAT_FILE ]]; then
    /bin/rm $FEAT_FILE;
  fi
  if [[ -e $DAT_FILE ]]; then
    /bin/rm $DAT_FILE.npy
  fi
  if [[ -e $COMMENT_FILE ]]; then
    /bin/rm $COMMENT_FILE;
  fi
  if [[ -e $OUTPUT_FILE ]]; then
    /bin/rm $OUTPUT_FILE
  fi
  if [[ -e $TMP_FILE ]]; then
    /bin/rm $TMP_FILE
  fi
  if [[ -e $PROBS_FILE ]]; then
    /bin/rm $PROBS_FILE
  fi
  if [[ -e $ROUND_PROBS ]]; then
    /bin/rm $ROUND_PROBS
  fi
  #/bin/rm $SUBS_PREFIX
  /bin/rm $DUMMY_DIST

  /bin/rm -v /$Outdir/$$_sub_*
}

$BASEPATH/create-features.pl $fasta $pssm $ss_sa $DUMMY_DIST 8 $win_size > $FEAT_FILE

sed -ne '/^#/p' $FEAT_FILE > $COMMENT_FILE

total=`cat $FEAT_FILE | sed -e '/^#/d' | wc -l | awk '{print $1}'`
let "SUBSET_CNT=total/subset_size"

echo "Num subset: $SUBSET_CNT";

for i in `seq 0 $SUBSET_CNT`; do
  
  echo "Splitting subset $i"

  let str=(i)*subset_size+1
  let end=(i+1)*subset_size

  sed -e '/^#/d' $FEAT_FILE | sed -ne "$str,$end p" > ${SUBS_PREFIX}_${i}.txt
  wc -l ${SUBS_PREFIX}_${i}.txt

 
  python2 $BASEPATH/feat-2-npy.py ${SUBS_PREFIX}_${i}.txt ${SUBS_PREFIX}_${i}.dat  > /dev/null


done

# Run through all of the classifiers...
model_cnt=`ls $models_dir/*.dat | tr ' ' '\n' | sed -e 's/.*\(model-[0-9]*\).*/\1/' | sed -e 's/model-//' | sort -nr | head -n 1`;

echo "Number of models in this boosted classifier: $model_cnt";

for i in `seq 1 $model_cnt`; do
  model=`ls $models_dir/model-${i}_*`;
  echo "Using model $model";

  echo -n "" > $ROUND_PROBS

  for j in `seq 0 $SUBSET_CNT`; do

    python2 $BASEPATH/run-cuda-classifier.py ${SUBS_PREFIX}_${j}.dat.npy `echo $model | sed -e 's/_wghts.dat//'` $PROBS_FILE  $Device
    cat $PROBS_FILE >> $ROUND_PROBS
    
  done

  paste -d' ' $OUTPUT_FILE $ROUND_PROBS > $TMP_FILE
  mv $TMP_FILE $OUTPUT_FILE

done

head $OUTPUT_FILE >> $output_fname

# Combine classifiers using alphas
$BASEPATH/list-boosted-n-scaled-output.pl $OUTPUT_FILE $models_dir/alphas.txt $model_cnt> $TMP_FILE

# Make list w/ output
cut -d',' -f 3 $COMMENT_FILE | sed -e 's/^ \s*//' > $OUTPUT_FILE
paste -d' ' $OUTPUT_FILE $TMP_FILE > $output_fname

cleanup


