#!/usr/bin/perl -w
#####################################################
#
#  conpro2casp.pl: Convert from CONPRO to CASP format
#
#  Author: Mike Sweredoski
#  Date: 4/12/2004
#
#  Usage: conpro2casp.pl author target_name model_num threshold input_file output_file
#  Modification by Jianlin Cheng, 4/16/2004: add method id parameter, ignore pair with negative probability, let
#  index of aa start from 1.  
#  Method id: 1: full probability matrix, 2: fixed threshold, 3: sum of prob 4: regression 5: band with fixed threshold
#
# Copied from pspro, add one method type 5
# Jianlin Cheng, 4/07/2005
#####################################################
use strict;

my $author = shift @ARGV;
my $target = shift @ARGV;
my $model_num = shift @ARGV;
my $threshold = shift @ARGV;
my $input_file = shift @ARGV;
my $output_file = shift @ARGV;
my $method_id = shift @ARGV; 

open(CONPRO,"<$input_file") or die "Couldn't open CONPRO file: $input_file\n";
open(CASP,">$output_file") or die "Couldn't open CASP file: $output_file\n";
my @data = <CONPRO>;
my ($protein_name, $aa, $ss, $acc, @contact_lines) = @data;
chomp($protein_name);
chomp($aa);
chomp($ss);
chomp($acc);

my @contacts = ();
for(my $i = 0; $i <= $#contact_lines; $i++) {
    my @temp_cons = split /\s/,$contact_lines[$i];
    $contacts[$#contacts+1] = \@temp_cons;
}

my $remark = "";
my $method = "";
$remark = "Sequence separation of predicted contacts >= 6."; 
if ($method_id == 1) #full probability matrix
{
	$method = "cmappro: 2D recurrent neural network, optimized for far contacts";	
	#$remark .= "may have the best f-measure score ."; 
	#$remark = "All the pairs of amino acids in the sequence are included."; 
}
elsif ($method_id == 2)
{
	#$remark = "All the pairs of amino acids in the sequence are included."; 
	$method = "cmappro_band: 2D recurrent neural network, predict contacts from band to band";	
	#$remark .= "may have best precision of contact."; 
}
elsif ($method_id == 3)
{
	$method = "cmappro: 2D recurrent neural network";	
}
elsif ($method_id == 4)
{
	$method = "cmappro_band: 2D recurrent neural network, predict from band to band, optimized for far contacts";	
	#$remark .= "may have best recall."; 
}
elsif ($method_id == 5)
{
	#$method = "conpro_fix: 2D recurrent neural network, using fix threshold"
	$method = "cmappro_con: a consensus approach from band and full map prediction";	
}
elsif ($method_id == 6)
{
	$method = "Augment contact map with beta-residue pairs"; 
	$remark .= "\nREMARK J. Cheng and P. Baldi Bioinformatics 2005:21(suppl):i75-84";
}
else
{
	die "wrong method id.\n"; 
}

print(CASP "PFRMAT RR\n");
print(CASP "TARGET $target\n");
print(CASP "AUTHOR $author\n");
print(CASP "REMARK $remark\n"); 
my $i = 0;
my $temp_string = "";
for($i = 0; $i+60 < length($method); $i+=60) {
    $temp_string = substr($method,$i,60);
    print(CASP "METHOD $temp_string\n");
}
$temp_string = substr($method,$i);
print(CASP "METHOD $temp_string\n");
print(CASP "MODEL $model_num\n");

for($i = 0; $i+50 < length($aa); $i+=50) {
    $temp_string = substr($aa,$i,50);
    print(CASP "$temp_string\n");
}
$temp_string = substr($aa,$i);
print(CASP "$temp_string\n");


for(my $i = 1; $i <= length($aa); $i++) {
    for(my $j = $i+1; $j <= length($aa); $j++) {
	if ($contacts[$i-1][$j-1] > 0)
	{
		if (abs($i-$j) > 5)
		{
			printf(CASP "%3u %3u 0 %2u %4.3f\n",($i,$j,$threshold,$contacts[$i-1][$j-1]));
		}
	}
    }
}

print(CASP "END\n");

