#!/usr/bin/perl -w

##############################################################################
# COPY from predict_greedy_pair.pl 
#Given a set of H-Bond bonding matrice, predict and evaluate beta-strand pair
#And compute the pairing distance between adjacent strands, sequence separation 
#between two adjacent strands. 
#Matrix Format: Name, Sequence, SS, BP1, BP2, matrix
#Input Parameters: base pair program, input directory, output file  
#Output file format:  pairing_dist: seq_dist1, seq_dist2,.....,  
#to the standard output: overall precision and recall.  

#Generate  sequence distance and pairing distance

#Author: Jianlin Cheng
#Date: 9/26/2004
#############################################################################

if (@ARGV != 3)
{
	die "need 3 params:base pair program, input directory of h-bond probability file, output file\n"; 
}

$predictor = shift @ARGV;
$input_dir = shift @ARGV;
$output_file = shift @ARGV; 

if (! -f $predictor)
{
	die "can't find base pair program: $predictor\n";  
}

if (! -d $input_dir)
{
	die "input directory doesn't exist.\n"; 
}

if ( substr($input_dir, length($input_dir) - 1, 1) ne "/" )
{
        $input_dir .= "/";
}

opendir(DIR, $input_dir) || die "can't open input directory.\n";
@file_list = readdir(DIR);
closedir(DIR); 

open(OUTPUT, ">$output_file") || die "can't create output file.\n"; 

$true_total = 0;
$pre_total = 0; 
$corr_total = 0; 

$naive_pre_total = 0; 
$naive_corr_total = 0; 

#hold pairing distance from 1 - 50, 51 is infinity
@distances = (); 
for ($i = 0; $i <= 50; $i++)
{
	$distances[$i] = ""; 
}


foreach $file(@file_list)
{
	if ($file eq "." || $file eq "..")
	{
		next; 
	}
	$filename = $input_dir . $file;
	open(INPUT, "$filename") || die "can't read input file $filename\n"; 
	$name = <INPUT>;
	$seq = <INPUT>;
	$ss = <INPUT>;
	$bp1 = <INPUT>;
	$bp2 = <INPUT>; 
	close INPUT; 

	$tmp = $file . ".tmp"; 
	$res = system("$predictor $filename > $tmp"); 
	if ($res != 0)
	{
		print "error happens in process file $filename\n"; 
	}
	open(RES, "$tmp") || die "can't read result file, $tmp\n";
	<RES>;
	$strand_list = <RES>;
	<RES>;
	$pre_list = <RES>; 
	<RES>;
	$true_list = <RES>;

	$corr_num = <RES>;
	chomp $corr_num; 
	($other, $corr_num) = split(/: /, $corr_num); 
	$pre_num = <RES>;
	($other, $pre_num) = split(/: /, $pre_num); 
	chomp $pre_num; 
	$true_num = <RES>;
	chomp $true_num; 
	($other, $true_num) = split(/: /, $true_num); 
	$naive_pre = <RES>; 
	chomp $naive_pre; 
	($other, $naive_pre) = split(/: /, $naive_pre); 
	$naive_corr = <RES>; 
	chomp $naive_corr; 
	($other, $naive_corr) = split(/: /, $naive_corr); 

	#process contraints 
	#shift detailed pair list
	<RES>;
	#shift title of constraints
	<RES>;
	$constraint = <RES>;
	chomp $constraint;
	@pairs = split(/\s+/, $constraint); 
	for ($j = 0; $j < @pairs; $j++)
	{
		$par = $pairs[$j];
		($pair_dist, $seq_dist) = split(/:/, $par);
		if ($pair_dist < 1)
		{
			die "$name: paring distance is less than 1.\n"; 
		}
		if ($seq_dist < 1 )
		{
			die "$name: seq distance is less than 1.\n";
		}
		if ($pair_dist > 50)
		{
			if ($pair_dist == 100000) #no pairing connection
			{
				$pair_dist = 51;
			}
			else
			{
				die "pair distance is bigger than 50: $name, $pair_dist\n"; 
			}
		}
		$distances[$pair_dist - 1] .= ",$seq_dist";  
	}
	print "$name$strand_list$constraint\n\n"; 



	#calcualte accuracy
	$corr_total += $corr_num;
	$pre_total += $pre_num;
	$true_total += $true_num; 
	$naive_pre_total += $naive_pre;
	$naive_corr_total += $naive_corr; 
	`rm $tmp`; 
}

#output the distance contraints

for ($i = 0; $i <= 50; $i++)
{
	print OUTPUT $i+1, ": $distances[$i]\n"; 
}

close OUTPUT; 

print "pair precision: ", $corr_total / $pre_total, "\n"; 
print "pair recall: ", $corr_total / $true_total, "\n"; 

print "naive pair precision: ", $naive_corr_total / $naive_pre_total, "\n";
print "naive pair recall: ", $naive_corr_total / $true_total, "\n"; 








