#!/usr/bin/perl -w
##########################################################################
#generate beta-residue, strand, sheet statistics including: 
# total number of residues, total number of beta residues, total number of strands, total number of strand pair, total number of partners
#strand length, the distance between pair strands, distance between pair residue
#beta-sheet nubmer? (not count, may do it in C++ code)
#Author: Jianlin Cheng
#Date: Oct. 19, 2004
##########################################################################

if (@ARGV != 2)
{
	die "need two parameters: input data set file(9-line with title), output file.\n"; 
}

$input_file = shift @ARGV;
$output_file = shift @ARGV;

open(INPUT, "$input_file") || die "can't open input file.\n";
open(OUTPUT, ">$output_file") || die "can't create output file.\n";

@content = <INPUT>;
close INPUT; 
shift @content;

$tot_aa = 0;
$tot_res = 0;
$tot_strand = 0;
$tot_partners = 0;
$tot_strand_pair = 0; 

@strand_dist = ();
@res_dist = (); 

@strand_num = ();
@strand_pair_num = ();
@res_num = ();
@res_part_num = (); 
@strand_length = (); 

while(@content)
{
	$name = shift @content;
	$length = shift @content;
	chomp $length; 
	$seq = shift @content;
	chomp $seq; 
	@seq = split(/\s+/, $seq); 
	$ss = shift @content;
	chomp $ss; 
	@ss = split(/\s+/, $ss); 
	$bp1 = shift @content;
	chomp $bp1; 
	@bp1 = split(/\s+/, $bp1); 
	$bp2 = shift @content;
	chomp $bp2; 
	@bp2 = split(/\s+/, $bp2); 
	shift @content;
	shift @content;
	shift @content; 

	#error check
	if ($length != @seq || $length != @ss || $length != @bp1 || $length != @bp2 )
	{
		die "length not match.\n"; 
	}
	$tot_aa += $length; 

	$seq_res_num = 0; 
	$seq_part_num = 0; 

	$in_strand = 0; 
	@strand_start = ();
	@strand_end = ();
	for ($i = 0; $i < $length; $i++)
	{
		$sec = $ss[$i]; 

		#count partner number and distance
		if ($bp1[$i] > 0 && ($sec eq "E" || $sec eq "B") )
		{
			$tot_partners++; 
			$seq_part_num++; 
			push @res_dist, abs($bp1[$i] - $i - 1); 
		}
		if ($bp2[$i] > 0 && ($sec eq "E" || $sec eq "B"))
		{
			$tot_partners++; 
			$seq_part_num++; 
			push @res_dist, abs($bp2[$i] - $i - 1); 
		}

		#generate strand
		if ( $sec eq "E" || $sec eq "B")
		{
			$tot_res++; 
			$seq_res_num++; 
			if ($in_strand == 0)
			{
				push @strand_start, $i; 
				$in_strand = 1; 
			}

			if ($i == $length - 1) #at the end, just in case, shouldn't happen. 
			{
				push @strand_end, $i; 
			}
		}
		else #not beta-residue
		{
			if ($in_strand == 1)
			{
				push @strand_end, $i - 1; 
				$in_strand = 0; 
			}
		}
	}
	push @res_num, $seq_res_num; 
	#number of beta-residue pairs are counted twice
	push @res_part_num, $seq_part_num / 2; 
	$st_num = @strand_start; 
	if ($st_num != @strand_end)
	{
		die "size of strand_start doesn't equal size of strand_end: $name"; 
	}
	push @strand_num, $st_num; 
	for ($i = 0; $i < @strand_start; $i++)
	{
		#generate strand length
		push @strand_length, $strand_end[$i] - $strand_start[$i] + 1; 
	}

	#cout strand pair number and distance
	$size = @strand_start;
	$tot_strand += $size; 
	$st_bind_num = 0; 
	for ($i = 0; $i < $size; $i++)
	{
		$a_start = $strand_start[$i];
		$a_end = $strand_end[$i]; 
		for ($j = $i+1; $j < $size; $j++)
		{
			$b_start = $strand_start[$j]; 
			$b_end = $strand_end[$j]; 
			#check if two strands are paired (partner index start from 1, so add 1)
			$range1 = $b_start + 1; 
			$range2 = $b_end + 1; 
			$paired = 0; 
			for ($k = $a_start; $k <= $a_end; $k++)
			{
				if ( ($range1 <= $bp1[$k] && $bp1[$k] <= $range2) || ($range1 <= $bp2[$k] && $bp2[$k] <= $range2) )
				{
					$paired = 1; 
				}
				
			}
			if ($paired == 1)
			{
				$tot_strand_pair++; 
				$st_bind_num++; 
				#calculate the distance between two strands
				$sdist = $strand_start[$j] - $strand_end[$i]; 
				push @strand_dist, $sdist; 
			}
		}
	}
	push @strand_pair_num, $st_bind_num; 
}

$tot_partners /= 2; 

#output statistics


print OUTPUT "tot_aa = $tot_aa\n";
print OUTPUT "tot_beta_res = $tot_res\n";
print OUTPUT "tot_strand_num = $tot_strand\n";
print OUTPUT "tot_beta_pair = $tot_partners\n";
print OUTPUT "tot_strand_pair = $tot_strand_pair\n";
print OUTPUT "strand_pair_dist = c(", join(",", @strand_dist), ")\n";
print OUTPUT "beta_res_dist = c(", join(",", @res_dist), ")\n"; 
print OUTPUT "strand_num = c(", join(",", @strand_num), ")\n";
print OUTPUT "strand_pair_num = c(", join(",", @strand_pair_num), ")\n";
print OUTPUT "beta_res_num = c(", join(",", @res_num), ")\n";
print OUTPUT "res_partner_num = c(", join(",", @res_part_num), ")\n";
print OUTPUT "strand_length = c(", join(",", @strand_length), ")\n"; 

close OUTPUT; 
