import sys import os import subprocess from pathlib import Path sys.path.append(str(Path(__file__).resolve().parents[1])) from config import para_json docstring = """JGImod.py datadir tmpdir hhblitsdb jackhmmerdb hhblits3db [Q] Combine JGIsearch result at datadir/JGI into datadir/JGI/{q3,q4,Deep}JGI.{a3m,aln} tmpdir is the temporary folder hhblitsdb jackhmmerdb hhblits3db are dummy path to hhblits2, hmmer and hhblits3 databases for DeepMSA and qMSA. They can be used so long as their format are correct. Q is the QOS for sbatch (default, urgent, casp) """ if len(sys.argv) < 6: print(docstring) exit() datadir = os.path.abspath(sys.argv[1]) tmpdir = os.path.abspath(sys.argv[2]) hhblitsdb = os.path.abspath(sys.argv[3]) jackhmmerdb = sys.argv[4] hhblits3db = sys.argv[5] Q = "normal" if len(sys.argv) > 6: Q = sys.argv[6] bindir = os.path.dirname(os.path.abspath(__file__)) HHLIB = f"{para_json['qMSApkg']}" DMSALIB = f"{para_json['dMSApkg']}" recorddir = f"{datadir}/record" python3=para_json['python_DeepPotential'] os.system(f"mkdir -p {tmpdir}") os.chdir(tmpdir) qMSA = "qMSA" if not os.path.isfile(f"{datadir}/MSA/{qMSA}.hhba3m"): print(f"WARNING! No such file {datadir}/MSA/{qMSA}.hhba3m") qMSA = "protein" if not os.path.isfile(f"{datadir}/MSA/{qMSA}.hhba3m"): print(f"ERROR! No such file {datadir}/MSA/{qMSA}.hhba3m") exit() a3m_list = [ f"{datadir}/MSA/{qMSA}.hmsa3m", f"{datadir}/MSA/{qMSA}.hh3a3m", f"{datadir}/MSA/{qMSA}.jaca3m", f"{datadir}/MSA/{qMSA}.hhba3m" ] for a3m in a3m_list: if os.path.isfile(a3m): os.system(f"cat {a3m} | {HHLIB}/bin/unaligna3m - {tmpdir}/DB.fasta.fseqs") break os.system(f"cat {datadir}/JGI/DB.fasta.*.cdhit >> {tmpdir}/DB.fasta.fseqs") cmd = f"{HHLIB}/bin/esl-sfetch --index {tmpdir}/DB.fasta.fseqs" print(f"Make index - {cmd}\n") content = subprocess.run(cmd, stdout=subprocess.PIPE, shell=True).stdout.decode("utf-8") print(content) if not os.path.isfile(f"{datadir}/MSA/{qMSA}.hmsa3m") and not os.path.isfile(f"{datadir}/MSA/dMSA.hmsa3m"): jackhmmerdb = f"{tmpdir}/DB.fasta.fseqs" # make q4JGI.{a3m, aln} #### os.system(f"ln -s ../MSA/{qMSA}.fasta {datadir}/JGI/q4JGI.fasta") os.system(f"ln -s ../MSA/{qMSA}.hhbaln {datadir}/JGI/q4JGI.hhbaln") os.system(f"ln -s ../MSA/{qMSA}.hhba3m {datadir}/JGI/q4JGI.hhba3m") if os.path.exists(f"{datadir}/MSA/{qMSA}.jacaln"): os.system(f"ln -s ../MSA/{qMSA}.jacaln {datadir}/JGI/q4JGI.jacaln") if os.path.exists(f"{datadir}/MSA/{qMSA}.jaca3m"): os.system(f"ln -s ../MSA/{qMSA}.jaca3m {datadir}/JGI/q4JGI.jaca3m") if os.path.exists(f"{datadir}/MSA/{qMSA}.hh3aln"): os.system(f"ln -s ../MSA/{qMSA}.hh3aln {datadir}/JGI/q4JGI.hh3aln") if os.path.exists(f"{datadir}/MSA/{qMSA}.hh3a3m"): os.system(f"ln -s ../MSA/{qMSA}.hh3a3m {datadir}/JGI/q4JGI.hh3a3m") cmd = f"{python3} {HHLIB}/scripts/qMSA2.py -hhblitsdb={hhblitsdb} -jackhmmerdb={jackhmmerdb} -hhblits3db={hhblits3db} -hmmsearchdb={tmpdir}/DB.fasta.fseqs -tmpdir={tmpdir}/MSA {datadir}/JGI/q4JGI.fasta" if not os.path.isfile(f"{datadir}/JGI/q4JGI.a3m"): print(f"first qMSA2 - {cmd}") content = subprocess.run(cmd, stdout=subprocess.PIPE, shell=True).stdout.decode("utf-8") print(content) # make q3JGI.{a3m, aln} ## os.system(f"ln -s ../MSA/{qMSA}.fasta {datadir}/JGI/q3JGI.fasta") os.system(f"ln -s ../MSA/{qMSA}.hhbaln {datadir}/JGI/q3JGI.hhbaln") os.system(f"ln -s ../MSA/{qMSA}.hhba3m {datadir}/JGI/q3JGI.hhba3m") if os.path.exists(f"{datadir}/MSA/{qMSA}.jacaln"): os.system(f"ln -s ../MSA/{qMSA}.jacaln {datadir}/JGI/q3JGI.jacaln") if os.path.exists(f"{datadir}/MSA/{qMSA}.jaca3m"): os.system(f"ln -s ../MSA/{qMSA}.jaca3m {datadir}/JGI/q3JGI.jaca3m") cmd = f"{python3} {HHLIB}/scripts/qMSA2.py -hhblitsdb={hhblitsdb} -jackhmmerdb={jackhmmerdb} -hmmsearchdb={tmpdir}/DB.fasta.fseqs -tmpdir={tmpdir}/MSA {datadir}/JGI/q3JGI.fasta" if not os.path.isfile(f"{datadir}/JGI/q3JGI.a3m"): print(f"second qMSA2 - {cmd}") content = subprocess.run(cmd, stdout=subprocess.PIPE, shell=True).stdout.decode("utf-8") print(content) # make DeepJGI.{a3m, aln} if os.path.exists(f"{datadir}/MSA/dMSA.jacaln"): os.system(f"ln -s ../MSA/dMSA.fasta {datadir}/JGI/DeepJGI.fasta") os.system(f"ln -s ../MSA/dMSA.hhbaln {datadir}/JGI/DeepJGI.hhbaln") os.system(f"ln -s ../MSA/dMSA.hhba3m {datadir}/JGI/DeepJGI.hhba3m") if os.path.exists(f"{datadir}/MSA/dMSA.jacaln"): os.system(f"ln -s ../MSA/dMSA.jacaln {datadir}/JGI/DeepJGI.jacaln") if os.path.exists(f"{datadir}/MSA/dMSA.jaca3m"): os.system(f"ln -s ../MSA/dMSA.jaca3m {datadir}/JGI/DeepJGI.jaca3m") cmd = f"{python3} {DMSALIB}/scripts/build_MSA.py -hhblitsdb={hhblitsdb} -jackhmmerdb={jackhmmerdb} -hmmsearchdb={tmpdir}/DB.fasta.fseqs -tmpdir={tmpdir}/MSA {datadir}/JGI/DeepJGI.fasta" if not os.path.isfile(f"{datadir}/JGI/DeepJGI.a3m"): print(f"first dMSA2 - {cmd}") content = subprocess.run(cmd, stdout=subprocess.PIPE, shell=True).stdout.decode("utf-8") print(content) if not os.path.isfile(f"{datadir}/JGI/DeepJGI.a3m"): if os.path.isfile(f"{datadir}/JGI/DeepJGI.hmsa3m"): os.system(f"cp {datadir}/JGI/DeepJGI.hmsa3m {datadir}/JGI/DeepJGI.a3m") elif os.path.isfile(f"{datadir}/JGI/DeepJGI.jaca3m"): os.system(f"cp {datadir}/JGI/DeepJGI.jaca3m {datadir}/JGI/DeepJGI.a3m") else: os.system(f"cp {datadir}/JGI/DeepJGI.hhba3m {datadir}/JGI/DeepJGI.a3m") else: os.system(f"ln -s ../MSA/dMSA.fasta {datadir}/JGI/DeepJGI.fasta") os.system(f"ln -s ../MSA/dMSA.hhbaln {datadir}/JGI/DeepJGI.hhbaln") os.system(f"ln -s ../MSA/dMSA.hhba3m {datadir}/JGI/DeepJGI.hhba3m") if os.path.exists(f"{datadir}/MSA/dMSA.hhbaln"): os.system(f"ln -s ../MSA/dMSA.hhbaln {datadir}/JGI/DeepJGI.jacaln") if os.path.exists(f"{datadir}/MSA/dMSA.hhba3m"): os.system(f"ln -s ../MSA/dMSA.hhba3m {datadir}/JGI/DeepJGI.jaca3m") cmd = f"{python3} {DMSALIB}/scripts/build_MSA.py -hhblitsdb={hhblitsdb} -jackhmmerdb={jackhmmerdb} -hmmsearchdb={tmpdir}/DB.fasta.fseqs -tmpdir={tmpdir}/MSA {datadir}/JGI/DeepJGI.fasta" if not os.path.isfile(f"{datadir}/JGI/DeepJGI.a3m"): print(f"first dMSA2 - {cmd}") content = subprocess.run(cmd, stdout=subprocess.PIPE, shell=True).stdout.decode("utf-8") print(content) if not os.path.isfile(f"{datadir}/JGI/DeepJGI.a3m"): if os.path.isfile(f"{datadir}/JGI/DeepJGI.hmsa3m"): os.system(f"cp {datadir}/JGI/DeepJGI.hmsa3m {datadir}/JGI/DeepJGI.a3m") elif os.path.isfile(f"{datadir}/JGI/DeepJGI.jaca3m"): os.system(f"cp {datadir}/JGI/DeepJGI.jaca3m {datadir}/JGI/DeepJGI.a3m") else: os.system(f"cp {datadir}/JGI/DeepJGI.hhba3m {datadir}/JGI/DeepJGI.a3m") print(f"tmpdir = {tmpdir}") if os.path.exists(tmpdir): os.system(f"rm -rf {tmpdir}") print(f"JGImod.py ended!\n")