import os,sys import pathlib import numpy as np from multiprocessing import Pool from time import sleep os.environ["LIBC_FATAL_STDERR_"] = "1" if len(sys.argv) != 7: print('please input the right parameters') print("Usage: python run_aln_parallel.py ") sys.exit(1) src = sys.argv[1] #'/data/wuti/DNCON4/DNCON4_aln/scripts_para2' tool_dir = sys.argv[2] #'/data/DNCON4_v1/alignment/tools' DB_dir = sys.argv[3] #'/data/DNCON4_v1/alignment/DB' fasta_dir = sys.argv[4] #'/data/wuti/CASP13/fasta' pdb_dir = sys.argv[5] #'/data/wuti/CASP13/pdb' out_dir = os.path.abspath(sys.argv[6]) #'/data/wuti/CASP13' if not os.path.isdir(src): print("Usage: python run_aln_parallel.py ") print("Cannot find scipts folder path:"+src) sys.exit(1) if not os.path.isdir(tool_dir): print("Usage: python run_aln_parallel.py ") print("Cannot find tool folder path:"+tool_dir) sys.exit(1) if not os.path.isdir(DB_dir): print("Usage: python run_aln_parallel.py ") print("Cannot find DB folder path:"+DB_dir) sys.exit(1) if not os.path.isdir(fasta_dir): print("Usage: python run_aln_parallel.py ") print("Cannot find fasta folder path:"+fasta_dir) sys.exit(1) if not os.path.isdir(pdb_dir): print("Usage: python run_aln_parallel.py ") print("Cannot find pdb folder path:"+pdb_dir) sys.exit(1) if not os.path.isdir(out_dir): print("Usage: python run_aln_parallel.py ") print("The output folder path: doesn't exist, Creating..."+out_dir) os.system("mkdir "+out_dir) else: if os.path.isdir(out_dir+"/aln"): os.system("rm -r "+out_dir+"/aln") if os.path.isdir(out_dir+"/ccmpred"): os.system("rm -r "+out_dir+"/ccmpred") if os.path.isdir(out_dir+"/rr"): os.system("rm -r "+out_dir+"/rr") os.system("mkdir "+out_dir+"/aln") os.system("mkdir "+out_dir+"/ccmpred") os.system("mkdir "+out_dir+"/rr") def Display_precision(result): rr = [] pdb= [] Top5 = [] TopL10 = [] TopL5 = [] TopL2 = [] TopL = [] Top2L = [] for line in open(result,"r"): if '.rr (precision)' in line: rr.append(line.strip()) if '.pdb (precision)' in line: pdb.append(line.strip()) if len(rr) != len(pdb) : print("Contact files are not consistent with pdb files! Please check "+out_dir+'rr/results.txt') sys.exit(1) else: for rr_out, pdb_out in zip(rr, pdb): arr_rr = rr_out.split() rr_file = arr_rr[0] fl_id = os.path.splitext(rr_file)[0] arr_pdb = pdb_out.split() pdb_file = arr_pdb[0] dm_id = os.path.splitext(pdb_file)[0] if dm_id.startswith(fl_id): print(dm_id+' '+arr_rr[2]+' '+arr_rr[3]+' '+arr_rr[4]+' '+arr_rr[5]+' '+arr_rr[6]+' '+arr_rr[7]) Top5.append(arr_rr[2]) TopL10.append(arr_rr[3]) TopL5.append(arr_rr[4]) TopL2.append(arr_rr[5]) TopL.append(arr_rr[6]) Top2L.append(arr_rr[7]) print('Total '+str(len(rr))+' domains') print('PRECISION Top5 TopL/10 TopL/5 TopL/2 TopL Top2L') print('Avg '+Average(Top5)+' '+Average(TopL10)+' '+Average(TopL5)+' '+Average(TopL2)+' '+Average(TopL)+' '+Average(Top2L)) # Python program to get average of a list def Average(lst): results = list(map(float, lst)) return "{0:.2f}".format(sum(results) / len(results)) def cmap2rr(fasta_dir,cmap_dir,rr_dir): os.chdir(rr_dir) for filename in cmap_dir: id = filename.stem f = open(rr_dir+"/"+id+".raw",'w') cmap = np.loadtxt(filename,dtype='float32') L = cmap.shape[0] for i in range(0,L): for j in range(i+1,L): f.write(str(i+1)+" "+str(j+1)+" 0 8 "+str(cmap[i][j])+"\n") f.close() os.system('egrep -v \"^>\" '+fasta_dir+'/'+id+'.fasta'+' > '+id+'.rr') os.system('cat '+id+'.raw >> '+id+'.rr') os.system('rm -f '+id+'.raw') def run_process(process): filename = process.stem print("Generating ALN for "+filename) if os.path.exists(out_dir+'/'+filename+'/'+filename+'.aln'): os.system("cp "+out_dir+'/'+filename+'/'+filename+'.aln '+out_dir+'/aln') print(filename+"....generated....skipped.....") else: cmd = './hhjack_hhmsearch3.sh {}'.format(process) os.system(cmd+' '+out_dir+'/'+filename+' '+tool_dir+' '+DB_dir+' >'+out_dir+'/'+filename+'.log 2>&1') if os.path.exists(out_dir+'/'+filename+'/'+filename+'.aln'): os.system("cp "+out_dir+'/'+filename+'/'+filename+'.aln '+out_dir+'/aln') else: print(filename+" ALN failed") print("Generating CCMpred for "+filename+".aln") if os.path.exists(out_dir+'/'+filename+'/'+filename+'.mat'): os.system("cp "+out_dir+'/'+filename+'/'+filename+'.mat '+out_dir+'/ccmpred') os.system("rm -r "+out_dir+'/'+filename) else: cmd = tool_dir+'/CCMpred/bin/ccmpred '+out_dir+'/'+filename+'/'+filename+'.aln '+out_dir+'/'+filename+'/'+filename+'.mat' os.system(cmd+'> /dev/null 2>&1') if os.path.exists(out_dir+'/'+filename+'/'+filename+'.mat'): os.system("cp "+out_dir+'/'+filename+'/'+filename+'.mat '+out_dir+'/ccmpred') os.system("rm -r "+out_dir+'/'+filename) else: print(filename+" CCMpred failed") processes = [] processes.extend(pathlib.Path(fasta_dir).glob('*.fasta')) #processes = ('process1.py', 'process2.py', 'process3.py') os.chdir(src) pool = Pool(processes=2) pool.map(run_process, processes) print("Evaluating alignment by CCMpred....") cmap_dir = [] cmap_dir.extend(pathlib.Path(out_dir+"/ccmpred").glob('*.mat')) cmap2rr(fasta_dir,cmap_dir,out_dir+'/rr') cmd = 'perl '+src+'/evaluation_pdb.pl '+out_dir+'/rr '+pdb_dir os.system('rm -f '+out_dir+'rr/results.txt') os.system(cmd+'> /dev/null 2>&1') Display_precision(out_dir+'/rr/results.txt') os.system('rm -r '+out_dir+"/ccmpred") os.system('rm -r '+out_dir+"/rr")