#!/usr/bin/perl -w

use strict;
use warnings;
use Carp;
use Cwd 'abs_path';
use File::Basename;
use Cwd qw();
use List::Util qw[min max];

my $hhblits2 = shift;
my $id = shift;
my $ext = shift;
my @files = glob("$id-mya3m/*");
foreach my $file (@files){
	print $file."\n";
	my $l = length(seq_fasta($file));
	print $l."\n";
	if($l > 15000){
		print $file." ".$l."\n";
		system_cmd("rm ".$file)
	}
}
system_cmd("perl $hhblits2/scripts/hhblitsdb.pl -cpu 8 -o $id-mydb/mydb -ia3m $id-mya3m > /dev/null");
system_cmd("$hhblits2/bin/hhblits -i $id.a3m -d $id-mydb/mydb -oa3m $id.a3m -e 1e-3 -n 3 -cpu 8 -diff inf -id 99 -cov 50");
system_cmd("egrep -v \"^>\" $id.a3m | sed 's/[a-z]//g' > $id.$ext");

####################################################################################################
sub seq_fasta{
	my $file_fasta = shift;
	confess "ERROR! Fasta file $file_fasta does not exist!" if not -f $file_fasta;
	my $seq = "";
	open FASTA, $file_fasta or confess $!;
	while (<FASTA>){
		next if (substr($_,0,1) eq ">");
		chomp $_;
		$_ =~ tr/\r//d; # chomp does not remove \r
		$seq .= $_;
	}
	close FASTA;
	return $seq;
}

####################################################################################################
sub system_cmd{
	my $command = shift;
	my $log = shift;
	confess "EXECUTE [$command]?\n" if (length($command) < 5  and $command =~ m/^rm/);
	if(defined $log){
		system("$command &> $log");
	}
	else{
		system($command);
	}
	if($? != 0){
		my $exit_code  = $? >> 8;
		confess "ERROR!! Could not execute [$command]! \nError message: [$!]";
	}
}
