#!/usr/bin/perl -w
###################################################################################
# COPY from predict_greedy_all.pl 
#Input: a beta-pairing matrix
#Matrix Format: Name, Sequence, SS, BP1(not used), BP2(not used), matrix
#Predict the beta-strand pairing
#Output: output.str(human readable strand pairing infor), output.gviz(strand pairing 
#graph for graphviz)

#Input Parameters:strand pair program(span_pair_pre or greedy_pair_pre), input file, prefix of output file  
#Author: Jianlin Cheng
#Date: 3/18/2005
#####################################################################################
sub round
{
	my ($value) = @_;
	$value *= 100;
	$value += 0.5;
	$value = int($value);
	$value /= 100;
}
sub round3
{
	my ($value) = @_;
	$value *= 1000;
	$value += 0.5;
	$value = int($value);
	$value /= 1000;
}

if (@ARGV != 3)
{
	die "need 3 params:strand pairing program, beta-residue pairing matrix, the prefix of output files\n"; 
}

$predictor = shift @ARGV;
$input_file = shift @ARGV;
$output_file = shift @ARGV; 

if (! -f $predictor)
{
	die "can't find strand pairing program: $predictor\n";  
}

if (! -f $input_file)
{
	die "input file doesn't exist.\n"; 
}

open(OUTSTR, ">$output_file.str") || die "can't create strand output file.\n"; 
open(OUTVIZ, ">$output_file.viz") || die "can't create graphviz file.\n"; 

open(INPUT, "$input_file") || die "can't read input file $input_file\n"; 
$name = <INPUT>;
$seq = <INPUT>;
$ss = <INPUT>;
$bp1 = <INPUT>;
$bp2 = <INPUT>; 
@beta_matrix = <INPUT>;
close INPUT; 
print OUTSTR "Name:\n$name\n";
print OUTSTR "Sequence:\n$seq\n";
print OUTSTR "Secondary Structure(H:helix, E:Beta Strand/Bridge, C:Coil):\n$ss\n";

#predict strand from beta-residue pairing matrix
$tmp = $input_file . ".tmp"; 
$res = system("$predictor $input_file > $tmp"); 
if ($res != 0)
{
	print "error happens in strand pairing prediction\n"; 
}

open(RES, "$tmp") || die "can't read result file, $tmp\n";
<RES>;
$strand_list = <RES>;
chomp $strand_list;
@strands = split(/, /, $strand_list);
<RES>;
#predict strand pairs without alignments
$pre_list = "";
$pre_list = <RES>; 
#alignment of predicted pairs
<RES>;
$ali_pre_pair = <RES>;
#print $ali_pre_pair; 
chomp $ali_pre_pair;
<RES>;
@energy_matrix = <RES>;


#generate strand list
@id_list = ();
#start positions
@spos = ();
#end positions
@epos = ();


print OUTSTR "The predicted beta-strands:\n";
print OUTSTR "Format: id:[a-b]:[c-d].\nid: the index of strand;\na-b: the starting and ending positions of strand in the sequence;\n";
print OUTSTR "c-d: the indices of the first and last beta-residue of strand. All the beta-residues in the sequence are ordered sequentially and assigned an index starting from 1.\n";
$pre_id = -1;
%id2pos = ();
$order = 1; 
foreach $strand(@strands)
{
	if ($strand =~ /^\((\d+),(\d+),(\d+)\)$/)
	{
		if ($1 < $pre_id)
		{
			die "assume the strand id always increases.\n";
		}
		$pre_id = $1;
		push @id_list, $1;
		push @spos, $2;
		push @epos, $3;
		
		for ($i = $2; $i <= $3; $i++)
		{
			if ($i == $2)
			{
				$first = $order;
			}
			if ($i == $3)
			{
				$last = $order;
			}
			$id2pos{$order} = $i;
			$order++;
		}
		print OUTSTR "    $1:\[$2-$3\]:\[$first-$last\]\n"; 
	}
	else
	{
		die "strand list format error.\n";
	}
}

print OUTSTR "\nThe predicted strand pairs and strand alignments:\n";
print OUTSTR "Format: x--y:r:[a-b:c-d]:w\n";
print OUTSTR "x: the index of strand 1, y: the index of strand 2. x and y are paired.\n";
print OUTSTR "r: pairing direction (A:antiparallel, P: parallel, B: bridge)\n";
print OUTSTR "[a-b:c-d] specifies the alignment. The amino acids between position a and b of strand 1";
print OUTSTR " are paired with the amino acids between c and d of strand2 one by one.\n";
print OUTSTR "w: the pseudo-binding energy between strand 1 and 2, an indicator of confidence.\n";


print OUTVIZ "graph G {\n";
print OUTVIZ "\tnode[shape=circle, color=blue];\n";
@ali_pre = split(/, /, $ali_pre_pair);
for ($i = 0; $i < @ali_pre; $i++)
{
	$rec = $ali_pre[$i]; 
	#vars: 1:id1,2:pos11,3:pos12,4:id2,5:pos21,6:pos22,7:energy,8:direction,9:start1,10:end1,11:start2,12:end2 
	#if ($rec =~ /^\[\((\d+),(\d+),(\d+)\),\((\d+),(\d+),(\d+)\),.+,<(\d+),(\d+),(\d+),(\d+),(\d+)>\]$/)
	if ($rec =~ /^\[\((\d+),(\d+),(\d+)\),\((\d+),(\d+),(\d+)\),(.+),<(\d+),(\d+),(\d+),(\d+),(\d+)>\]$/)
	{
		if ($1 > $4 || $9 > $10)
		{
			die "assumption violation in the alignment."; 
		}
		#round the weight
		$weight = $7;
		$weight *= 100;
		$weight += 0.5;
		$weight = int($weight);
		$weight /= 100;

		print OUTSTR "    $1--$4:";
		if ($8 == 0)
		{
			print OUTVIZ "\t $1 -- $4 [label=\"$weight\", color=black];\n";
			print OUTSTR "B:";
		}
		elsif ($8 == 1)
		{
		;	#antiparallel
			print OUTVIZ "\t $1 -- $4 [label=\"$weight\", color=blue];\n";
			print OUTSTR "A:";
		}
		else
		{
			print OUTVIZ "\t $1 -- $4 [label=\"$weight\", color=red];\n";
			print OUTSTR "P:";
		;	#parallel
		}
		print OUTSTR "\[$id2pos{$9}-$id2pos{$10}:$id2pos{$11}-$id2pos{$12}\]:$weight\n";
	}
}

print OUTVIZ "}\n";
close OUTVIZ;

#############construct predicted sheets#######################
@psheets = ();
#including isolated bridge 
@pre_pairs = @ali_pre; 
while(@pre_pairs)
{
	@sheet = ();
	$seed = shift @pre_pairs; 
	push @sheet, $seed; 
	while (@pre_pairs)
	{
		#check if any pair is connected with the sheet	
		$found = 0;
		@temp_pairs = (); 
		foreach $rec1(@pre_pairs)
		{
			#vars: 1:id1,2:pos11,3:pos12,4:id2,5:pos21,6:pos22,7:direction,8:start1,9:end1,10:start2,11:end2 
			$rec1 =~ /^\[\((\d+),(\d+),(\d+)\),\((\d+),(\d+),(\d+)\),.+,<(\d+),(\d+),(\d+),(\d+),(\d+)>\]$/;
			$id1 = $1; $id2 = $4;
			$flag = 0; 
			foreach $rec2 (@sheet)
			{
				$rec2 =~ /^\[\((\d+),(\d+),(\d+)\),\((\d+),(\d+),(\d+)\),.+,<(\d+),(\d+),(\d+),(\d+),(\d+)>\]$/;
				if ($id1 == $1 || $id1 == $4 || $id2 == $1 || $id2 == $4)
				{
					$found = 1; 
					$flag = 1; 
				}
			}
			if ($flag == 1)
			{
				push @sheet, $rec1; 
			}
			else
			{
				push @temp_pairs, $rec1; 
			}
		}

		if ($found == 0 && @sheet > 0)
		{
			#put one sheet into a list
			push @psheets, join("*",@sheet); 
			last; 
		}
		@pre_pairs = @temp_pairs; 
	}
	if (@pre_pairs == 0 && @sheet > 0)
	{
		push @psheets, join("*",@sheet); 
	}
}

$psheet_num = @psheets;

print OUTSTR "\nSee the attached image file (strand_pair.png) for the visulized Strand Pairing Graph.\nblue edge:antiparallel, red edge:parallel strand pair, black edge:beta bridge.\n";


#print out beta-sheets
#printing out of beta-sheets is not finished. 
#first: format is not consistent with alignment output
#second: index of alignment is not converted to positions, or they can be removed.
print OUTSTR "\nThe predicted beta-sheets:\n";
print OUTSTR "   The number of beta-sheets: $psheet_num\n"; 
$num = 0;
foreach $pre_sheet(@psheets)
{
	$num++;
	print OUTSTR "   Sheet $num (strand pairs): ";
	@sheet_pairs = split(/\*+/, $pre_sheet);
	foreach $rec1(@sheet_pairs)
	{
		#vars: 1:id1,2:pos11,3:pos12,4:id2,5:pos21,6:pos22,7:direction,8:start1,9:end1,10:start2,11:end2 
		$rec1 =~ /^\[\((\d+),(\d+),(\d+)\),\((\d+),(\d+),(\d+)\),.+,<(\d+),(\d+),(\d+),(\d+),(\d+)>\]$/;
		print OUTSTR "$1--$4  "; 
	}
	print OUTSTR "\n";
}

print OUTSTR "\nThe pseudo-energy matrix of all strand pairs:\n";
#print OUTSTR @energy_matrix;
$size = @energy_matrix;
printf OUTSTR "%7s", "strand";
for ($i = 1; $i <= $size; $i++)
{
	printf OUTSTR "%5s", $i;
}
print OUTSTR "\n";

$num = 1;
foreach $line(@energy_matrix)
{
	printf OUTSTR "%7s", "$num   ";
	$num++;
	chomp $line;
	@energy = split(/\s+/, $line);
	foreach $val(@energy)
	{
		printf OUTSTR "%5s", &round($val);
	}
	print OUTSTR "\n";
}
print OUTSTR "\n";

print OUTSTR "See the attached image file (beta_residue_pair.png) for the visulized Residue Pairing Map.\n\n";

print OUTSTR "The predicted pairing probability matrix of all beta-residues:\n";
$size = @beta_matrix;
printf OUTSTR "%6s", "index";
for ($i = 1; $i <= $size; $i++)
{
	printf OUTSTR "%5s", $i;
}
print OUTSTR "\n";

$num = 1;
foreach $line(@beta_matrix)
{
	printf OUTSTR "%6s", "$num   ";
	$num++;
	chomp $line;
	@prob = split(/\s+/, $line);
	foreach $val(@prob)
	{
		printf OUTSTR "%5s", &round($val);
	}
	print OUTSTR "\n";
}
close OUTSTR;
`rm $tmp`;

