#!/usr/bin/perl -w
###############################################################################
# Combine the contact map prediction with beta pairings
# input: pspro dir, betapro dir, fasta file, output dir, type(1:8A,2:12A)
# output: .comb(combined map), .csp(cafasp format), .eva(eva format). prefix
# output: .bcomb (combined with band), .bcsp(band cafasp), .beva(band eva format)
# name is fasta file name (with . replace with _)
# Can be combined with 12A or 8A 
# Author: Jianlin Cheng
# Starting: 3/28/2005
###############################################################################

if (@ARGV != 4)
{
	die "need four parameters: pspro dir, betapro dir, fasta file, output dir"; 
}

$pspro_dir = shift @ARGV;
$beta_dir = shift @ARGV;
$fasta_file = shift @ARGV;
$output_dir = shift @ARGV;
#$type = shift @ARGV;

 -d $pspro_dir || die "pspro dir doesn't exist.\n";
 -d $beta_dir || die "betapro dir doesn't exist.\n";
 -d $output_dir || die "output dir doesn't exist.\n";
 -f $fasta_file || die "can't read fasta file.\n";

#if ($type != 1 && $type != 2)
#{
#	die "supported type: 1: 8A, 2:12A\n";
#}

#get the output file name for pspro
$pos = rindex($fasta_file, "/");
if ($pos >= 0)
{
	$filename = substr($fasta_file, $pos + 1);
}
else
{
	$filename = $fasta_file;
}
$filename =~ s/\./_/g;

#make contact map prediction
system("$pspro_dir/bin/predict_homo_cm.sh $fasta_file $output_dir");

#choose contact map for combination
@map_files = (); 
#if ($type == 1)
#{
$cmap = "$output_dir/$filename";
if ( -f "$cmap.cm8a") #use full map first
{
	$cmap1 =  "$cmap.cm8a";
	push @map_files, $cmap1; 
}
if ( -f "$cmap.bcm8a") #then band map
{
	$cmap2 = "$cmap.bcm8a";
	push @map_files, $cmap2; 
}
if (-f  "$cmap.con8a" ) #then combination
{
	$cmap3 = "$cmap.con8a";
	push @map_files, $cmap3; 
}
#	else
#	{
#		die "fail to predict contact map for the sequence.\n";
#	}
#}
#elsif ($type == 2)
#{
if (-f "$cmap.cm12a")
{
	$cmap4 = "$cmap.cm12a";
	push @map_files, $cmap4; 
}
#	else
#	{
#		die "fail to predict 12A contact map for the sequence.\n";
#	}
#}

#predict the beta pairings
$beta_file = "$output_dir/$filename.beta"; 
print "Predict beta-residue pairings...";
system("$beta_dir/bin/predict_beta_fasta.sh $fasta_file $beta_file");
print "done\n"; 

$use_beta = 1; 
open(BETA, $beta_file) || die "can't read the predictions of beta pairings.\n"; 
@beta = <BETA>;
close BETA;
if (@beta <= 4)
{
	print "less than two strands, no beta pairings are generated.\n";
	$use_beta = 0; 
}

foreach $cmap (@map_files)
{

#combinations
$comb_map = "$cmap.comb";
if ($use_beta == 0)
{
	#use the full contact map
	`cp $cmap $comb_map`; 
	$name1 = $beta[0]; 
}
else
{
	#how to combine	
	#simple combine by values (if the predicted value of 
	#beta-pairs is bigger, replace the old ones)

	#read the full contact map
	open(MAP, $cmap) || die "can't read contact map file.\n"; 
	@map = <MAP>;
	$name1 = shift @map; 
	$seq1 = shift @map;
	$ss1 = shift @map;
	$sa1 = shift @map;
	close MAP;

	open(BETA, $beta_file) || die "can't read the predictions of beta pairings.\n"; 
	@beta = <BETA>;
	close BETA;
	$name2 = ""; 
	$name2 = shift @beta;
	$seq2 = shift @beta;
	$ss2 = shift @beta;
	shift @beta; shift @beta;

	#consistency checking
	if ($seq1 ne $seq2 || $ss1 ne $ss2)
	{
		die "sequence or secondary structure doesn't match.\n"; 
	}

	chomp $seq2; 
	$length = length($seq2); 
	print "length of sequence: $length\n"; 
	#read the contact map
	if ($length != @map)
	{
		die "the length of sequence doesn't match with contact map size.\n";
	}
	@conmap = ();
	for ($i = 0; $i < $length; $i++)
	{
		$line = $map[$i]; 
		for ($j = 0; $j < $length; $j++)
		{
			@cons = split(/\s+/, $line);
			$conmap[$i][$j] = $cons[$j]; 
		}
	}

	#create a map from beta-residues to their positions
	%b2p = ();
	$idx = 0; 
	for($i = 0; $i < $length; $i++)
	{
		$sec = substr($ss1, $i, 1);
		if ($sec eq "E" || $sec eq "B")
		{
			$b2p{$idx} = $i; 				
			$idx++; 
		}
	}

	#read beta-residue contacts
	#@bmap = (); 
	$beta_num = $idx; 
	print "number of beta-residues: $beta_num\n"; 
	if ($beta_num != @beta)
	{
		die "the number of beta-residues doesn't match with the size of the beta pairings matrix.\n"; 
	}

	for ($i = 0; $i < $beta_num; $i++)
	{
		$line = shift @beta;
		for ($j = 0; $j < $beta_num; $j++)
		{
			@probs = split(/\s+/, $line); 
			#$bmap[$i][$j] = $probs[$j]; 

			#copy the value to full contact map if necessary
			#we might want to weigth them(coef for beta is 1.4)
			#currently we set weight to 1	
			$prob = $probs[$j];
			$weight = 1; 
			$prob *= $weight;  

			$m = $b2p{$i};
			$n = $b2p{$j}; 
			#0.25 <-> 55% accuracy, 0.30<->60%, 0.35<->65%
			#$prob_th = 0.30; 
			#set to 0.15 for server
			#$prob_th = 0.15; 
			#if ($prob > $conmap[$m][$n] && $prob >= $prob_th)
			if ($prob > $conmap[$m][$n])
			{
				$conmap[$m][$n] = $prob; 
			}
		}
	}
	open(MAP, ">$comb_map") || die "can't create combine contact map.\n";
	print MAP "$name1$seq1$ss1$sa1";
	for ($i = 0; $i < $length; $i++)
	{
		for ($j = 0; $j < $length; $j++)
		{
			$prob = $conmap[$i][$j];
			print MAP "$prob ";
		}
		print MAP "\n";
	}
	close MAP; 
}

#use threshold 0.1 to select contacts. 
$prefix = "$output_dir/$filename";
#to selected map
system("$pspro_dir/script/conpro2fixmap.pl $comb_map $comb_map.1 0.1 1"); 
#to casp format
chomp $name1; 
system("$beta_dir/script/conpro2casp.pl BETApro $name1 1 8 $comb_map.1 $comb_map.csp 6");
# to eva format
system("$pspro_dir/script/casp2eva.pl $comb_map.csp $comb_map.eva"); 

#generate maps for cm8a too
system("$pspro_dir/script/conpro2fixmap.pl $cmap $cmap.1 0.1 1"); 
system("$beta_dir/script/conpro2casp.pl BETApro $name1 1 8 $cmap.1 $cmap.csp 6");
system("$pspro_dir/script/casp2eva.pl $cmap.csp $cmap.eva"); 

}

`rm $prefix*.pxml $prefix*c.rr? $prefix*.map`; 
#remove temporary files





