import os import sys docstring = ''' Download_lib.py This program will download the DeepMSA2 sequence databases and AlphaFold2 databases. Including: uniclust30 uniref90 metaclust UniRef30 BFD MGnify JGIclust collected from IMG/M MetaSourceDB TaraDB PDB70 and sequences for PDB70 MMCIF from PDB AlphaFold2 parameters Uniprot Small_BFD ''' if __name__ == "__main__": print(docstring) #exit(0) zip_db_list=[ "uniclust30_2017_04", ### DeepMSA2 "uniref90", "metaclust", "UniRef30_2022_02", "mgnify", "params", ###AF2 "pdb70", "pdb_mmcif", "pdb_seqres", ###AF2-Multimer "uniprot", "small_bfd" ] targz_db_list=[ "bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt" ] #### DeepMSA2 if not os.path.exists("database"): os.system("mkdir -p database") os.chdir("database") for db in zip_db_list: print("download db %s"%db) os.system("wget -c https://zhanggroup.org/ftp/data/%s.zip"%db) print("decompress db %s"%db) os.system("unzip -u %s.zip"%db) for db in targz_db_list: print("download db %s"%db) os.system("wget -c https://zhanggroup.org/ftp/data/%s.tar.gz"%db) print("decompress db %s"%db) os.system("tar -zvxf %s.tar.gz"%db) if db.startswith('bfd_'): os.system("mkdir -p bfd") os.system("mv bfd_metaclust* bfd/") ######### download JGI (JGIclust, MetaSourceDB and TaraDB are included in here) if not os.path.exists("JGIclust"): os.system("mkdir -p JGIclust") os.chdir("JGIclust") os.system("wget -c https://zhanggroup.org/ftp/data/JGIclust30/list") jgiclust_db_list=[] jgifile=open('list','r') lines=jgifile.readlines() jgifile.close() for line in lines: jgiclust_db_list.append(line.strip('\n')) for jgidb in jgiclust_db_list: print("download jgi db %s"%jgidb) os.system("wget -c https://zhanggroup.org/ftp/data/JGIclust30/%s.xz"%jgidb) print("decompress jgi db %s"%jgidb) os.system("xz -dvf %s.xz"%jgidb) print("download jgi ssi db %s"%jgidb) os.system("wget -c https://zhanggroup.org/ftp/data/JGIclust30/%s.ssi.xz"%jgidb) print("decompress jgi ssi db %s"%jgidb) os.system("xz -dvf %s.ssi.xz"%jgidb)