#!/bin/bash

# Written by Karolis Uziela in 2015

# ------------------------------- Reading in arguments ------------------------------- #

SCRIPT_NAME=`basename $0`
PROQ3_DIR=`dirname $0`

source $PROQ3_DIR/paths.sh       # Read in paths 

function usage {
    echo "
NOTE: It is easier and recommended to run ProQ3/ProQ3D using run_proq3.sh script. If you are running this script manually, make sure you know what you are doing.

Usage: $SCRIPT_NAME [parameters]

-m/--model               [pdb-model]
                             PDB model file to be evaluated
-d/--deep                [yes/no] default=no
                            Should we use Deep Learning (Theano) instead of SVM? Runs ProQ3D
-r/--repack              [yes/no] default=yes
                             Should we perform the side chain repacking step?  
-t/--target_length       [Number_of_residues] default='model_length'
                             The global score is calculated as sum_of_local/target_length
-k/--keep_files          [yes/no] default=no 
                             Should we keep the repacked models, svm input files and psiblast files?
-l/--local_output        [output_file_local] default=[pdb-model].proq3.local
                             The output file for local predictions
-g/--global_output       [output_file_global] default=[pdb-model].proq3.global
                             The output file for global predictions
-o/--rosetta_log         [rosetta_log_file] default=[pdb-model].rosetta.log
                             The output file with Rosetta logs
--debug_mode             [yes/no] default=no 
                             Debug mode keeps all temporary files
"
   exit 1 
}

if [[ $# -lt 2 ]] ; then
    echo "ERROR: too few suplied arguments"
    usage
fi

while [[ $# > 1 ]]
do
key="$1"

case $key in
    -m|--model)
    MODEL="$2"
    shift # past argument
    ;;
    -d|--deep)
    DEEP="$2"
    shift # past argument
    ;;
    -r|--repack)
    REPACK="$2"
    shift # past argument
    ;;
    -t|--target_length)
    TARGET_LENGTH="$2"
    shift # past argument
    ;;
    -k|--keep)
    KEEP="$2"
    shift # past argument
    ;;
    -g|--global_output)
    GLOBAL_OUT="$2"
    shift # past argument
    ;;
    -l|--local_output)
    LOCAL_OUT="$2"
    shift # past argument
    ;;
    -o|--rosetta_log)
    ROSETTA_LOG="$2"
    shift # past argument
    ;;
    -d|--debug_mode)
    DEBUG="$2"
    shift # past argument
    ;;
    *)
    echo "ERROR: Unknown argument: $1"
    usage
    ;;
esac
shift # past argument or value
done

if [[ -n $1 ]]; then
    echo "ERROR: Argument without any supplied value: $1"
    usage
fi

if [[ $MODEL == "" ]] ; then
    echo "ERROR: -m/--model parameter is not set"
    usage
elif [[ ! -f $MODEL  ]] ; then
    echo "ERROR: model file $MODEL does not exist"
    usage
elif [[  ${MODEL: -4} != ".pdb" ]] ; then
    echo "WARNING: input file does not have .pdb extension. Creating a copy of input file with .pdb extension..."
    cp $MODEL $MODEL.pdb
    for ext in psi mtx ss2 acc ; do
        if [[ -f $MODEL.$ext && ! -f $MODEL.pdb.$ext ]] ; then
            cp $MODEL.$ext $MODEL.pdb.$ext
        fi
    done
    MODEL=$MODEL.pdb
fi

if [[ $TARGET_LENGTH != "" ]] ; then
    re='^[0-9]+$'
    if ! [[  $TARGET_LENGTH =~ $re ]] ; then
        echo "ERROR: -t/--target_length must be a number. You supplied: $TARGET_LENGTH"
        usage
    fi
fi

if [[ $GLOBAL_OUT == "" ]] ; then
    GLOBAL_OUT=$MODEL.proq3.global
fi

if [[ $LOCAL_OUT == "" ]] ; then
    LOCAL_OUT=$MODEL.proq3.local
fi

if [[ -f $GLOBAL_OUT ]] ; then
    echo "ERROR: The output file $GLOBAL_OUT already exists. Remove the file before (re)running ProQ3."
    exit 1
fi

if [[ -f $LOCAL_OUT ]] ; then
    echo "ERROR: The output file $LOCAL_OUT already exists. Remove the file before (re)running ProQ3."
    exit 1
fi

if [[ $DEEP == "no" ]] || [[ $DEEP == "No" ]] || [[ $DEEP == "NO" ]] || [[ $DEEP == "" ]] ; then
    DEEP="no"
elif [[ $DEEP == "yes" ]] || [[ $DEEP == "Yes" ]] ||  [[ $DEEP == "YES" ]] ; then
    DEEP="yes"
    if [[ "$DEEP_INSTALLED" == "no" ]] ; then
        echo "ERROR: You are trying to run the deep learning version of predictor (ProQ3D), but you don't have python with keras and numpy packages installed"
        exit 1
    fi
else
    echo "ERROR: -d/--deep $DEEP value is not recognized (should be yes or no)"
    usage
fi

if [[ $REPACK == "yes" ]] || [[ $REPACK == "Yes" ]] || [[ $REPACK == "YES" ]] || [[ $REPACK == "" ]] ; then
    R=".repacked"
    M=".minimized"
elif [[ $REPACK == "no" ]] || [[ $REPACK == "No" ]] ||  [[ $REPACK == "NO" ]] ; then
    R=""
    M=""
else
    echo "ERROR: -r/--reppack $REPACK value is not recognized"
    usage
fi

if [[ $ROSETTA_LOG == "" ]] ; then
    MODEL_PATH=`readlink -f $MODEL`
    ROSETTA_LOG="${MODEL_PATH}.rosetta.log"
fi

if [[ $KEEP == "no" ]] || [[ $KEEP == "No" ]] || [[ $KEEP == "NO" ]] || [[ $KEEP == "" ]] ; then
    KEEP="no"
elif [[ $KEEP == "yes" ]] || [[ $KEEP == "Yes" ]] ||  [[ $KEEP == "YES" ]] ; then
    KEEP="yes"
else
    echo "ERROR: -k/--keep $KEEP value is not recognized (should be yes or no)"
    usage
fi

if [[ $DEBUG == "no" ]] || [[ $DEBUG == "No" ]] || [[ $DEBUG == "NO" ]] || [[ $DEBUG == "" ]] ; then
    DEBUG="no"
elif [[ $DEBUG == "yes" ]] || [[ $DEBUG == "Yes" ]] ||  [[ $DEBUG == "YES" ]] ; then
    DEBUG="yes"
else
    echo "ERROR: --debug_mode $DEBUG value is not recognized (should be yes or no)"
    usage
fi

if [[ ! -f $MODEL.ss2 || ! -f $MODEL.acc || ! -f $MODEL.psi || ! -f $MODEL.mtx ]] ; then
    echo "ERROR: .ss2 .acc .psi or .mtx file is missing. Run <ProQ3_dir>/bin/run_all_external.pl first. Check README for details."
    exit 1
fi

# Filter out HETATM records
hetatm=`grep "HETATM" $MODEL | wc -l`
if [[ "$hetatm" != "0" ]] ; then
    grep -v "HETATM" $MODEL > ${MODEL}.no_hetatm.pdb
    for ext in psi mtx ss2 acc ; do
        cp $MODEL.$ext ${MODEL}.no_hetatm.pdb.$ext
    done
    MODEL=${MODEL}.no_hetatm.pdb
fi

if [[ $R == ".repacked" ]] ; then
    if [ ! -f $MODEL$R.ss2 ] ; then
        cp $MODEL.ss2 $MODEL$R.ss2
        cp $MODEL.acc $MODEL$R.acc
        cp $MODEL.psi $MODEL$R.psi
        cp $MODEL.mtx $MODEL$R.mtx
    fi
fi


# -------------------------------- Main script --------------------------------------- #

MODEL_DIR=`dirname $MODEL`
CURRENT_DIR=`pwd`
BASE=`basename $MODEL .pdb`
REPACK_ITERATIONS=1

if [[ ! -f $MODEL$R ]] ; then
    echo "------------------- Repacking the side chains -----------------"
    $ROSETTA_BIN/relax.linuxgccrelease -database $ROSETTA_DB -in:file:fullatom -out:file:silent_struct_type binary -nstruct $REPACK_ITERATIONS -relax:script $PROQ3_DIR/ProQ3_scripts/repack_min.script -in:file:s $MODEL -out:file:silent $MODEL$R.silent -relax:constrain_relax_to_start_coords -ignore_zero_occupancy F >>$ROSETTA_LOG
    if [[ -f $MODEL$R.silent ]] ; then
        to_extract=`grep ^SCORE $MODEL$R.silent | tail -n +2 | sort -n -k2 | head -n 1 | awk '{print $(NF)}'`
        $ROSETTA_BIN/extract_pdbs.linuxgccrelease -database $ROSETTA_DB -in:file:silent $MODEL$R.silent -in::file::tags $to_extract -out::prefix $MODEL_DIR/ -ignore_zero_occupancy F >>$ROSETTA_LOG
        mv $MODEL_DIR/$to_extract.pdb $MODEL$R
        if [[ $DEBUG == "no" ]] ; then
            rm $MODEL$R.silent
        fi
    else
        echo "WARNING: Repack failed. Using the original model instead of repacked."
        cp $MODEL $MODEL$R
    fi
fi

if [[ ! -f $MODEL$R.features.proq2.$$.temp ]] ; then
    echo "------------------- Extracting ProQ2 features -------------------"
    $ROSETTA_BIN/score.linuxgccrelease -database $ROSETTA_DB -in:file:fullatom -ProQ:basename $MODEL$R -in:file:s $MODEL$R -score:weights ProQ2 -ProQ:output_feature_vector -ignore_zero_occupancy F > $MODEL$R.features.$$.temp 
    sed '/^Outfile:/q' $MODEL$R.features.$$.temp > $MODEL$R.features.$$.temp2
    grep ^SVM $MODEL$R.features.$$.temp2 > $MODEL$R.features.proq2.$$.temp
    if [[ $DEBUG == "no" ]] ; then
        rm $MODEL$R.features.$$.temp $MODEL$R.features.$$.temp2
    fi
fi

if [[ ! -f $MODEL$R$M ]] ; then
    echo "------------------- Minimizing energies -------------------"
    cd $MODEL_DIR
    /bin/ls $BASE.pdb$R > $BASE.pdb$R.list
    $ROSETTA_BIN/minimize_with_cst.linuxgccrelease -in:file:l $BASE.pdb$R.list -in:file:fullatom -ddg::out_pdb_prefix minimized_${BASE}_$$ -database $ROSETTA_DB -ddg:min_cst -ignore_zero_occupancy F >>$ROSETTA_LOG # Different Rosetta releases name output files differently. Better be careful and add a long prefix to be sure
    mv minimized_${BASE}_$$* $BASE.pdb$R$M    
    if [[ $DEBUG == "no" ]] ; then
        rm $BASE.pdb$R.list
    fi
    cd $CURRENT_DIR
fi

if [[ ! -f $MODEL$R$M.score.$$.temp ]] ; then
    echo "------------------- Calculating Rosetta energies -------------------"
    $ROSETTA_BIN/per_residue_energies.linuxgccrelease -in:file:s $MODEL$R$M -database $ROSETTA_DB -out:file:silent $MODEL$R$M.score.$$.temp -score:weights $PROQ3_DIR/weights/talaris2013.wts -ignore_zero_occupancy F >>$ROSETTA_LOG
    $ROSETTA_BIN/per_residue_energies.linuxgccrelease -in:file:s $MODEL$R$M -database $ROSETTA_DB -out:file:silent $MODEL$R$M.score.$$.temp2 -score:weights $PROQ3_DIR/weights/some_centroid.wts -in:file:residue_type_set centroid -ignore_zero_occupancy F >>$ROSETTA_LOG
    $ROSETTA_BIN/score.linuxgccrelease -in:file:s $MODEL$R$M -database $ROSETTA_DB -out:file:silent $MODEL$R$M.score.$$.temp3 -score:weights $PROQ3_DIR/weights/global_centroid.wts -in:file:residue_type_set centroid -ignore_zero_occupancy F >>$ROSETTA_LOG
fi

if [[ ! -f $MODEL$R.proq3.svm ]] ; then
    echo "------------------- Generating SVM input -------------------"
    $R_SCRIPT $PROQ3_DIR/ProQ3_scripts/extract-per-residue-features.R $MODEL$R$M.score.$$.temp $MODEL$R$M.features.highres.$$.temp
    $R_SCRIPT $PROQ3_DIR/ProQ3_scripts/extract-per-residue-centroid-features.R $MODEL$R$M.score.$$.temp2 $MODEL$R$M.features.lowres.$$.temp
    feat_len=`cat $MODEL$R$M.features.highres.$$.temp | wc -l`
    cat $MODEL$R$M.score.$$.temp3 | grep "^SCORE:" > $MODEL$R$M.score.$$.temp4
    $R_SCRIPT $PROQ3_DIR/ProQ3_scripts/extract-per-residue-centroid-features-global-normalized.R $MODEL$R$M.score.$$.temp4 $MODEL$R$M.features.lowres_global.$$.temp $feat_len
    $PROQ3_DIR/ProQ3_scripts/svm_to_txt $MODEL$R.features.proq2.$$.temp $MODEL$R.features.proq2.$$.temp2
    cut -f 3-176 -d " " $MODEL$R.features.proq2.$$.temp2 > $MODEL$R.features.proq2.$$.temp3
    cut -f 1 -d " " $MODEL$R.features.proq2.$$.temp3 > $MODEL$R.features.target.$$.temp
    cut -f 152-154,155-157,171-172 -d " " $MODEL$R.features.proq2.$$.temp3 > $MODEL$R.features.rsa_ss.$$.temp
    paste -d " " $MODEL$R.features.target.$$.temp $MODEL$R.features.proq2.$$.temp3 $MODEL$R$M.features.highres.$$.temp $MODEL$R$M.features.lowres.$$.temp $MODEL$R$M.features.lowres_global.$$.temp > $MODEL$R.proq3
    paste -d " " $MODEL$R.features.target.$$.temp $MODEL$R.features.proq2.$$.temp3 > $MODEL$R.proq2
    paste -d " " $MODEL$R.features.target.$$.temp $MODEL$R.features.rsa_ss.$$.temp $MODEL$R$M.features.highres.$$.temp > $MODEL$R.highres
    paste -d " " $MODEL$R.features.target.$$.temp $MODEL$R.features.rsa_ss.$$.temp $MODEL$R$M.features.lowres.$$.temp $MODEL$R$M.features.lowres_global.$$.temp > $MODEL$R.lowres
    for txt_file in $MODEL$R.proq3 $MODEL$R.proq2 $MODEL$R.highres $MODEL$R.lowres ; do
        $PROQ3_DIR/ProQ3_scripts/txt_to_svm $txt_file $txt_file.svm
    done
    if [[ $DEBUG == "no" ]] ; then
        rm $MODEL$R$M.features.highres.$$.temp $MODEL$R$M.features.lowres.$$.temp $MODEL$R$M.features.lowres_global.$$.temp $MODEL$R.features.proq2.$$.temp $MODEL$R.features.proq2.$$.temp2 $MODEL$R.features.proq2.$$.temp3 $MODEL$R.features.target.$$.temp $MODEL$R$M.score.$$.temp $MODEL$R$M.score.$$.temp2 $MODEL$R$M.score.$$.temp3 $MODEL$R$M.score.$$.temp4 $MODEL$R.features.rsa_ss.$$.temp
    fi
fi

if [[ ! -f $GLOBAL_OUT ]] ; then
    echo "------------------- Making predictions -------------------"
    if [[ "$DEEP" == "no" ]] ; then
        $PROQ3_DIR/apps/svm_light/svm_classify $MODEL$R.proq2.svm $PROQ3_DIR/svm_models/ProQ2$R.model $MODEL$R.proq2.pred.$$.temp 
        $PROQ3_DIR/apps/svm_light/svm_classify $MODEL$R.lowres.svm $PROQ3_DIR/svm_models/lowres$R.model $MODEL$R.lowres.pred.$$.temp 
        $PROQ3_DIR/apps/svm_light/svm_classify $MODEL$R.highres.svm $PROQ3_DIR/svm_models/highres$R.model $MODEL$R.highres.pred.$$.temp 
        $PROQ3_DIR/apps/svm_light/svm_classify $MODEL$R.proq3.svm $PROQ3_DIR/svm_models/ProQ3$R.model $MODEL$R.proq3.pred.$$.temp 
        echo "ProQ2 ProQRosCen ProQRosFA ProQ3" > $LOCAL_OUT
        echo "ProQ2 ProQRosCen ProQRosFA ProQ3" > $GLOBAL_OUT
    elif [[ "$DEEP" == "yes" ]] ; then
        KERAS_BACKEND=theano $PYTHON_BIN $PROQ3_DIR/ProQ3_scripts/predict_theano.py $MODEL$R.proq2 $MODEL$R.proq2.pred.$$.temp $PROQ3_DIR/theano_models/casp9-and-casp10${R}.proq2_shuffled_ONLY_L2_ADADELTA_model_500_1e-11_model.json $PROQ3_DIR/theano_models/casp9-and-casp10${R}.proq2_shuffled_ONLY_L2_ADADELTA_model_500_1e-11_weights.h5
        KERAS_BACKEND=theano $PYTHON_BIN $PROQ3_DIR/ProQ3_scripts/predict_theano.py $MODEL$R.lowres $MODEL$R.lowres.pred.$$.temp $PROQ3_DIR/theano_models/casp9-and-casp10${R}.lowres_shuffled_ONLY_L2_ADADELTA_model_500_1e-11_model.json $PROQ3_DIR/theano_models/casp9-and-casp10${R}.lowres_shuffled_ONLY_L2_ADADELTA_model_500_1e-11_weights.h5
        KERAS_BACKEND=theano $PYTHON_BIN $PROQ3_DIR/ProQ3_scripts/predict_theano.py $MODEL$R.highres $MODEL$R.highres.pred.$$.temp $PROQ3_DIR/theano_models/casp9-and-casp10${R}.highres_shuffled_ONLY_L2_ADADELTA_model_500_1e-11_model.json $PROQ3_DIR/theano_models/casp9-and-casp10${R}.highres_shuffled_ONLY_L2_ADADELTA_model_500_1e-11_weights.h5
        KERAS_BACKEND=theano $PYTHON_BIN $PROQ3_DIR/ProQ3_scripts/predict_theano.py $MODEL$R.proq3 $MODEL$R.proq3.pred.$$.temp $PROQ3_DIR/theano_models/casp9-and-casp10${R}.proq3_shuffled_ONLY_L2_ADADELTA_model_500_1e-11_model.json $PROQ3_DIR/theano_models/casp9-and-casp10${R}.proq3_shuffled_ONLY_L2_ADADELTA_model_500_1e-11_weights.h5
        echo "ProQ2D ProQRosCenD ProQRosFAD ProQ3D" > $LOCAL_OUT
        echo "ProQ2D ProQRosCenD ProQRosFAD ProQ3D" > $GLOBAL_OUT
    else
        echo "ERROR: invalid --deep parameter"
    fi    
    paste -d " " $MODEL$R.proq2.pred.$$.temp $MODEL$R.lowres.pred.$$.temp $MODEL$R.highres.pred.$$.temp $MODEL$R.proq3.pred.$$.temp >> $LOCAL_OUT
    if [[ $TARGET_LENGTH == "" ]] ; then
        TARGET_LENGTH=`cat $MODEL$R.proq2.pred.$$.temp | wc -l`
    fi
    tail -n +2 $LOCAL_OUT | awk -v my_length="$TARGET_LENGTH" '{ sum1 += $1 ; sum2 += $2 ; sum3 += $3 ; sum4 += $4 } END { if (my_length > 0) print sum1 / my_length, sum2 / my_length, sum3 / my_length, sum4 / my_length }' >> $GLOBAL_OUT
    if [[ $DEBUG == "no" ]] ; then
        rm $MODEL$R.proq2.pred.$$.temp $MODEL$R.lowres.pred.$$.temp $MODEL$R.highres.pred.$$.temp $MODEL$R.proq3.pred.$$.temp $MODEL$R.proq3 $MODEL$R.proq2 $MODEL$R.highres $MODEL$R.lowres
    fi
fi

if [[ "$hetatm" != "0" ]] ; then
    rm ${MODEL} # remove <model>.pdb.no_hetatm.pdb
fi

if [[ $KEEP == "no" ]] && [[ $DEBUG == "no" ]] ; then
    if [[ $R == ".repacked" ]] ; then
        rm $MODEL$R $MODEL$R$M $MODEL$R.ss2 $MODEL$R.acc $MODEL$R.psi $MODEL$R.mtx
    fi
    rm $MODEL$R.proq3.svm $MODEL$R.proq2.svm $MODEL$R.highres.svm $MODEL$R.lowres.svm $MODEL.ss2 $MODEL.acc $MODEL.psi $MODEL.mtx $MODEL.rosetta.log
elif [[ "$hetatm" != "0" ]] ; then
    for i in ${MODEL}* ; do
        newname=`echo $i | sed -e 's/.no_hetatm.pdb//'` # move <model>.pdb.no_hetatm.pdb.repacked to <model>.pdb.repacked and so on
        mv $i $newname
    done
fi