#!/usr/bin/perl -w

##############################################################################
#Given a 9-line sequence, predict a H-Bond bonding matrix for each sequence
#Format: 1: matrix only,  2: Name, Sequence, SS, BP1, BP2, matrix
#Input Parameters: 9-line sequence (with title), output file 
#Author: Jianlin Cheng
#
#Copy from predict_set_emap.pl, make it predict the h-map for one single sequence
#03/14/2005
#############################################################################

if (@ARGV != 7)
{
	die "need 7 params: 1.beta map predictor predictor(predict_beta_full.sh), 2.sequence in 9-line format (with a title), 3.output file, 4.alignment file, 5.probability threshold, 6.format(1:matrix only, 2:name,seq,ss,bp1,bp2, matrix), 7.binary(1:binary, 0:probability(threshold won't affect anymore) )\n"; 
}

$predictor = shift @ARGV;
$input_file = shift @ARGV;
$output_file = shift @ARGV;
$align_file = shift @ARGV; 
$threshold = shift @ARGV; 
$format = shift @ARGV; 
$binary = shift @ARGV; 

if (! -f $predictor)
{
	die "can't find h-bond predictor: $predictor\n";  
}

open(INPUT, "$input_file") || die "can't open input file.\n"; 

$no_align = 0;
if (! -f $align_file)
{
	warn "alignment file doesn't exist.\n"; 
	$no_align = 1; 
}

@context = <INPUT>;
close INPUT; 

#shift the title line
shift @context; 

#read in the sequence information
$name = shift @context;
chomp $name;
$length = shift @context;
chomp $length; 
$seq = shift @context;
chomp $seq;
$seq = uc($seq);
$ss = shift @context;
chomp $ss;
$ss = uc($ss);
$bp1 = shift @context;
chomp $bp1;
$bp2 = shift @context; 
chomp $bp2;
$sa = shift @context;
chomp $sa;
$coor = shift @context;
chomp $coor;

#check if the sequence has two beta-strands
@ss_vec = split(/\s+/, $ss); 
$sec_str = join("", @ss_vec);
$more_strand = 0;
if ($sec_str =~ /[BE]+[^BE]+[BE]+/)
{
	$more_strand = 1;
}
if ($more_strand == 0)
{
	#only output three line: name, seq, sa, so the other script can know problem happens.
	@aa_vec = split(/\s+/, $seq);
	@ss_vec = split(/\s+/, $ss); 
	open(OUT, ">$output_file"); 
	print OUT "$name\n"; 
	print OUT join("", @aa_vec), "\n";
	print OUT join("", @ss_vec), "\n";
	close OUT;
	die "there are less than 2 beta-strands in the secondary structure.\n";
}


open(TMP, ">$input_file.tmp") || die "can't create tmp file.\n"; 
print TMP "1 20 3\n$name\n$length\n$seq\n$ss\n$bp1\n$bp2\n$sa\n$coor\n\n"; 
close TMP; 

if ($format == 1)
{
#	print("$predictor $input_file.tmp $align_file $threshold $binary  $output_file\n");  
	system("$predictor $input_file.tmp $align_file $threshold $binary $output_file");  
}

if ($format == 2)
{
	@aa_vec = split(/\s+/, $seq);
	@ss_vec = split(/\s+/, $ss); 
	open(OUT, ">$output_file"); 
	print OUT "$name\n"; 
	print OUT join("", @aa_vec), "\n";
	print OUT join("", @ss_vec), "\n";
	print OUT "$bp1\n$bp2\n"; 
#	print("$predictor $input_file.tmp $align_file $threshold $binary  $output_file.tmp\n");  
	system("$predictor $input_file.tmp $align_file $threshold $binary $output_file.tmp");  

	#fix the problem of "an extra line "alignment file is not found" added to the results
	open(RES, "$output_file.tmp") || die "can't read result file";
	@res = <RES>;
	close RES;
	if ($no_align == 1)
	{
		#`cat $output_file.tmp >> $output_file`;
		shift @res;
	}
	print OUT @res;
	close OUT;
	`rm $output_file.tmp`; 
}

`rm $input_file.tmp`; 
