#!/usr/bin/perl -w

##############################################################################
#Given a set of H-Bond bonding matrice, count the relation between strand num
#and pair number
#Matrix Format: Name, Sequence, SS, BP1, BP2, matrix
#Input Parameters: base pair program, input directory, output file , (probability)threshold
#Output file format: strand_num pair_num 
#Author: Jianlin Cheng
#Date: 9/20/2004
#############################################################################

if (@ARGV != 4)
{
	die "need 4 params:base pair program, input directory of h-bond probability file, output file, probability threshold\n"; 
}

$predictor = shift @ARGV;
$input_dir = shift @ARGV;
$output_file = shift @ARGV; 
$threshold = shift @ARGV; 

if (! -f $predictor)
{
	die "can't find base pair program: $predictor\n";  
}

if (! -d $input_dir)
{
	die "input directory doesn't exist.\n"; 
}

if ( substr($input_dir, length($input_dir) - 1, 1) ne "/" )
{
        $input_dir .= "/";
}

opendir(DIR, $input_dir) || die "can't open input directory.\n";
@file_list = readdir(DIR);
closedir(DIR); 

open(OUTPUT, ">$output_file") || die "can't create output file.\n"; 

$true_total = 0;
$pre_total = 0; 
$corr_total = 0; 
print OUTPUT "strand_num pair_num\n"; 
foreach $file(@file_list)
{
	if ($file eq "." || $file eq "..")
	{
		next; 
	}
	$filename = $input_dir . $file;
	open(INPUT, "$filename") || die "can't read input file $filename\n"; 
	$name = <INPUT>;
	$seq = <INPUT>;
	$ss = <INPUT>;
	$bp1 = <INPUT>;
	$bp2 = <INPUT>; 
	close INPUT; 

	$tmp = $file . ".tmp"; 
	$res = system("$predictor $filename $threshold > $tmp"); 
	if ($res != 0)
	{
		print "error happens in process file $filename\n"; 
	}
	open(RES, "$tmp") || die "can't read result file, $tmp\n";
	<RES>;
	$strand_list = <RES>;
	@strands = split(/, /, $strand_list); 
	$strand_num = @strands; 
	<RES>;
	$pre_list = <RES>; 
	<RES>;
	$true_list = <RES>;

	$corr_num = <RES>;
	chomp $corr_num; 
	($other, $corr_num) = split(/: /, $corr_num); 
	$pre_num = <RES>;
	($other, $pre_num) = split(/: /, $pre_num); 
	chomp $pre_num; 
	$true_num = <RES>;
	chomp $true_num; 
	($other, $true_num) = split(/: /, $true_num); 

	$corr_total += $corr_num;
	$pre_total += $pre_num;
	$true_total += $true_num; 
	print OUTPUT "$strand_num $true_num\n"; 
	`rm $tmp`; 
}
close OUTPUT; 

print "pair precision: ", $corr_total / $pre_total, "\n"; 
print "pair recall: ", $corr_total / $true_total, "\n"; 








