####################################### # emboss.standard definition file ####################################### ####################################### # This file is included in the EMBOSS distribution # and installed automatically. # # The definitions are required by EMBOSS applications # to use the EDAM annotations of ACD files and # public data resources, and to make a set of # public data servers available without the # need for additional configuration. ############################################ ####################################### # Standard databases ####################################### ####################################### # EBI Chemical Ontology (CHEBI) ####################################### DBNAME chebi [ comment: "Chemical Entities of Biological Interest" release: "93" type: "obo" format: "obo" method: "emboss" indexdirectory: "$emboss_standard/index" directory: "$emboss_standard/data/OBO" # fields: "id acc nam isa des" field: "id ! identifier without the prefix" field: "acc ! full name and any alternate identifier(s)" field: "nam ! words in the name" field: "isa ! parent identifier from is_a relation(s)" field: "des ! words in the description" taxon: "none" edamtpc: "0154 ! Small molecules" edamdat: "0962 ! Small molecule report" edamid: "1174 ! ChEBI ID" edamfmt: "2196 ! OBO format" ] ####################################### # Evidence codes ontology (ECO) # used to define sequence database evidence ####################################### DBNAME eco [ comment: "Evidence code ontology" release: "2.02" type: "obo" format: "obo" method: "emboss" indexdirectory: "$emboss_standard/index" directory: "$emboss_standard/data/OBO" # fields: "id acc nam isa des" field: "id ! identifier without the prefix" field: "acc ! full name and any alternate identifier(s)" field: "nam ! words in the name" field: "isa ! parent identifier from is_a relation(s)" field: "des ! words in the description" taxon: "none" edamfmt: "2196 ! OBO format" ] ####################################### # EDAM ontology (EMBRACE Data And Methods) # used to annotate all ACD files # and the Data Resource Catalogue ####################################### DBNAME edam [ comment: "EMBRACE Data and Methods ontology" release: "1.1" type: "obo" format: "obo" method: "emboss" indexdirectory: "$emboss_standard/index" directory: "$emboss_standard/data" field: "id ! identifier without the prefix" field: "acc ! full name and any alternate identifier(s)" field: "nam ! words in the name" field: "isa ! parent identifier from is_a relation(s)" field: "des ! words in the description" field: "ns ! namespace" field: "hasattr ! identifier(s) from has_attribute relation(s)" field: "hasin ! identifier(s) from has_input relation(s)" field: "hasout ! identifier(s) from has_output relation(s)" field: "isid ! identifier(s) from is_identifier_of relation(s)" field: "isfmt ! identifier(s) from is_format_of relation(s)" field: "issrc ! identifier(s) from is_source_of relation(s)" taxon: "none" edamtpc: "0089 ! Ontologies" edamdat: "0966 ! Ontology term" edamid: "1176 ! GO term ID" edamfmt: "2196 ! OBO format" ] DBNAME edam_data [ comment: "EMBRACE Data and Methods ontology (data)" release: "1.1" type: "obo" format: "obo" method: "emboss" dbalias: "edam" namespace: "data|identifier" indexdirectory: "$emboss_standard/index" directory: "$emboss_standard/data" field: "id ! identifier without the prefix" field: "acc ! full name and any alternate identifier(s)" field: "nam ! words in the name" field: "isa ! parent identifier from is_a relation(s)" field: "des ! words in the description" field: "ns ! namespace" field: "hasattr ! identifier(s) from has_attribute relation(s)" field: "hasin ! identifier(s) from has_input relation(s)" field: "hasout ! identifier(s) from has_output relation(s)" field: "isid ! identifier(s) from is_identifier_of relation(s)" field: "isfmt ! identifier(s) from is_format_of relation(s)" field: "issrc ! identifier(s) from is_source_of relation(s)" taxon: "none" edamtpc: "0089 ! Ontologies" edamdat: "0966 ! Ontology term" edamid: "1176 ! GO term ID" edamfmt: "2196 ! OBO format" ] DBNAME edam_format [ comment: "EMBRACE Data and Methods ontology (formats)" release: "1.1" type: "obo" format: "obo" method: "emboss" dbalias: "edam" namespace: "format" indexdirectory: "$emboss_standard/index" directory: "$emboss_standard/data" field: "id ! identifier without the prefix" field: "acc ! full name and any alternate identifier(s)" field: "nam ! words in the name" field: "isa ! parent identifier from is_a relation(s)" field: "des ! words in the description" field: "ns ! namespace" field: "hasattr ! identifier(s) from has_attribute relation(s)" field: "hasin ! identifier(s) from has_input relation(s)" field: "hasout ! identifier(s) from has_output relation(s)" field: "isid ! identifier(s) from is_identifier_of relation(s)" field: "isfmt ! identifier(s) from is_format_of relation(s)" field: "issrc ! identifier(s) from is_source_of relation(s)" taxon: "none" edamtpc: "0089 ! Ontologies" edamdat: "0966 ! Ontology term" edamid: "1176 ! GO term ID" edamfmt: "2196 ! OBO format" ] DBNAME edam_identifier [ comment: "EMBRACE Data and Methods ontology (identifiers)" release: "1.1" type: "obo" format: "obo" method: "emboss" dbalias: "edam" namespace: "identifier" indexdirectory: "$emboss_standard/index" directory: "$emboss_standard/data" field: "id ! identifier without the prefix" field: "acc ! full name and any alternate identifier(s)" field: "nam ! words in the name" field: "isa ! parent identifier from is_a relation(s)" field: "des ! words in the description" field: "ns ! namespace" field: "hasattr ! identifier(s) from has_attribute relation(s)" field: "hasin ! identifier(s) from has_input relation(s)" field: "hasout ! identifier(s) from has_output relation(s)" field: "isid ! identifier(s) from is_identifier_of relation(s)" field: "isfmt ! identifier(s) from is_format_of relation(s)" field: "issrc ! identifier(s) from is_source_of relation(s)" taxon: "none" edamtpc: "0089 ! Ontologies" edamdat: "0966 ! Ontology term" edamid: "1176 ! GO term ID" edamfmt: "2196 ! OBO format" ] DBNAME edam_operation [ comment: "EMBRACE Data and Methods ontology (operations)" release: "1.1" type: "obo" format: "obo" method: "emboss" dbalias: "edam" namespace: "operation" indexdirectory: "$emboss_standard/index" directory: "$emboss_standard/data" field: "id ! identifier without the prefix" field: "acc ! full name and any alternate identifier(s)" field: "nam ! words in the name" field: "isa ! parent identifier from is_a relation(s)" field: "des ! words in the description" field: "ns ! namespace" field: "hasattr ! identifier(s) from has_attribute relation(s)" field: "hasin ! identifier(s) from has_input relation(s)" field: "hasout ! identifier(s) from has_output relation(s)" field: "isid ! identifier(s) from is_identifier_of relation(s)" field: "isfmt ! identifier(s) from is_format_of relation(s)" field: "issrc ! identifier(s) from is_source_of relation(s)" taxon: "none" edamtpc: "0089 ! Ontologies" edamdat: "0966 ! Ontology term" edamid: "1176 ! GO term ID" edamfmt: "2196 ! OBO format" ] DBNAME edam_topic [ comment: "EMBRACE Data and Methods ontology (topics)" release: "1.1" type: "obo" format: "obo" method: "emboss" dbalias: "edam" namespace: "topic" indexdirectory: "$emboss_standard/index" directory: "$emboss_standard/data" # fields: "id acc nam isa des ns hasattr hasin hasout # isid isfmt issrc" field: "id ! identifier without the prefix" field: "acc ! full name and any alternate identifier(s)" field: "nam ! words in the name" field: "isa ! parent identifier from is_a relation(s)" field: "des ! words in the description" field: "ns ! namespace" field: "hasattr ! identifier(s) from has_attribute relation(s)" field: "hasin ! identifier(s) from has_input relation(s)" field: "hasout ! identifier(s) from has_output relation(s)" field: "isid ! identifier(s) from is_identifier_of relation(s)" field: "isfmt ! identifier(s) from is_format_of relation(s)" field: "issrc ! identifier(s) from is_source_of relation(s)" taxon: "none" edamtpc: "0089 ! Ontologies" edamdat: "0966 ! Ontology term" edamid: "1176 ! GO term ID" edamfmt: "2196 ! OBO format" ] ############################################ # Gene ontology (GO) # used to annotate public sequence databases ############################################ DBNAME go [ comment: "Gene Ontology" release: "1.1.3300" type: "obo" format: "obo" method: "emboss" indexdirectory: "$emboss_standard/index" directory: "$emboss_standard/data/OBO" # fields: "id acc nam isa des ns" field: "id ! identifier without the prefix" field: "acc ! full name and any alternate identifier(s)" field: "nam ! words in the name" field: "isa ! parent identifier from is_a relation(s)" field: "des ! words in the description" field: "ns ! namespace" taxon: "1 all" edamtpc: "0089 ! Ontologies" edamdat: "0966 ! Ontology term" edamid: "1176 ! GO term ID" edamfmt: "2196 ! OBO format" ] DBNAME go_component [ comment: "Gene Ontology (cellular components)" release: "1.1.3300" type: "obo" format: "obo" method: "emboss" namespace: "cellular_component" indexdirectory: "$emboss_standard/index" directory: "$emboss_standard/data/OBO" # fields: "id acc nam isa des ns" field: "id ! identifier without the prefix" field: "acc ! full name and any alternate identifier(s)" field: "nam ! words in the name" field: "isa ! parent identifier from is_a relation(s)" field: "des ! words in the description" field: "ns ! namespace" taxon: "1 all" edamtpc: "0089 ! Ontologies" edamdat: "0966 ! Ontology term" edamid: "1176 ! GO term ID" edamfmt: "2196 ! OBO format" ] DBNAME go_function [ comment: "Gene Ontology (molecular functions)" release: "1.1.3300" type: "obo" format: "obo" method: "emboss" namespace: "molecular_function" indexdirectory: "$emboss_standard/index" directory: "$emboss_standard/data/OBO" # fields: "id acc nam isa des ns" field: "id ! identifier without the prefix" field: "acc ! full name and any alternate identifier(s)" field: "nam ! words in the name" field: "isa ! parent identifier from is_a relation(s)" field: "des ! words in the description" field: "ns ! namespace" taxon: "1 all" edamtpc: "0089 ! Ontologies" edamdat: "0966 ! Ontology term" edamid: "1176 ! GO term ID" edamfmt: "2196 ! OBO format" ] DBNAME go_process [ comment: "Gene Ontology (biological processes)" release: "1.1.3300" type: "obo" format: "obo" method: "emboss" namespace: "biological_process" indexdirectory: "$emboss_standard/index" directory: "$emboss_standard/data/OBO" # fields: "id acc nam isa des ns" field: "id ! identifier without the prefix" field: "acc ! full name and any alternate identifier(s)" field: "nam ! words in the name" field: "isa ! parent identifier from is_a relation(s)" field: "des ! words in the description" field: "ns ! namespace" taxon: "1 all" edamtpc: "0089 ! Ontologies" edamdat: "0966 ! Ontology term" edamid: "1176 ! GO term ID" edamfmt: "2196 ! OBO format" ] ####################################### # Pathways ontology (PW) ####################################### DBNAME pw [ comment: "Pathways ontology" release: "12:06:2012 11:03" type: "obo" format: "obo" method: "emboss" indexdirectory: "$emboss_standard/index" directory: "$emboss_standard/data/OBO" # fields: "id acc nam isa des" field: "id ! identifier without the prefix" field: "acc ! full name and any alternate identifier(s)" field: "nam ! words in the name" field: "isa ! parent identifier from is_a relation(s)" field: "des ! words in the description" taxon: "1 all" edamtpc: "0089 ! Ontologies" edamdat: "0966 ! Ontology term" edamid: "1176 ! GO term ID" edamfmt: "2196 ! OBO format" ] ####################################### # Relations ontology (RO) # used to define OBO file relations ####################################### DBNAME ro [ comment: "Relations ontology" release: "20:03:2009 11:58" type: "obo" format: "obo" method: "emboss" indexdirectory: "$emboss_standard/index" directory: "$emboss_standard/data/OBO" # fields: "id acc nam isa des" field: "id ! identifier without the prefix" field: "acc ! full name and any alternate identifier(s)" field: "nam ! words in the name" field: "isa ! parent identifier from is_a relation(s)" field: "des ! words in the description" taxon: "none" edamtpc: "0089 ! Ontologies" edamdat: "0966 ! Ontology term" edamid: "1176 ! GO term ID" edamfmt: "2196 ! OBO format" ] ####################################### # Sequence ontology (SO) # used to define sequence features ####################################### DBNAME so [ comment: "Sequence ontology" release: "02:05:2012 11:08" type: "obo" format: "obo" method: "emboss" indexdirectory: "$emboss_standard/index" directory: "$emboss_standard/data/OBO" # fields: "id acc nam isa des" field: "id ! identifier without the prefix" field: "acc ! full name and any alternate identifier(s)" field: "nam ! words in the name" field: "isa ! parent identifier from is_a relation(s)" field: "des ! words in the description" taxon: "none" edamtpc: "0089 ! Ontologies" edamdat: "0966 ! Ontology term" edamid: "1176 ! GO term ID" edamfmt: "2196 ! OBO format" ] ####################################### # Software ontology (SWO) # used to define general software # in collaboration with EDAM ####################################### DBNAME swo [ comment: "Software ontology" release: "12-jul-2012" type: "obo" format: "obo" method: "emboss" indexdirectory: "$emboss_standard/index" directory: "$emboss_standard/data/OBO" # fields: "id acc nam isa des" field: "id ! identifier without the prefix" field: "acc ! full name and any alternate identifier(s)" field: "nam ! words in the name" field: "isa ! parent identifier from is_a relation(s)" field: "des ! words in the description" taxon: "none" edamtpc: "0089 ! Ontologies" edamdat: "0966 ! Ontology term" edamid: "1176 ! GO term ID" edamfmt: "2196 ! OBO format" ] ############################################# # Data Resource Catalogue DRCAT.dat # lists all available public data resources # with example queries and annotation # using EDAM ontology terms to describe the # resource, and also the data type, format # and identifier for each query ############################################# DBNAME drcat [ comment: "Data Resource Catalogue" release: "alpha 1 21-jun-2011" type: "resource" format: "drcat" method: "emboss" indexdirectory: "$emboss_standard/index/" directory: "$emboss_standard/data/" # fields: "id acc nam des url cat edat efmt eid etpc xref # qout qfmt qin qurl cc stat" field: "id ! identifier without the prefix" field: "acc ! any alternate identifier(s)" field: "nam ! words in the name" field: "des ! words in the description" field: "url ! words in the main resource URL" field: "cat ! category" field: "edat ! EDAM data term(s) for query results" field: "efmt ! EDAM format term(s) for query results" field: "eid ! EDAM data term(s) for query identifier(s)" field: "etpc ! EDAM topic term(s)" field: "xref ! Data base cross-reference records" field: "qout ! Query output data type" field: "qfmt ! Query format name" field: "qin ! Query identifier name(s)" field: "qurl ! Query URLs" field: "cc ! words in the comment(s)" field: "rest ! REST service" field: "soap ! SOAP service" field: "stat ! status" field: "xref ! Database cross-reference" field: "taxid ! NCBI taxonomy identifier" taxon: "none" edamtpc: "0089 ! Ontologies" ] ############################################# # NCBI taxonomy nodes.dmp and names.dmp # lists all taxons in the public sequence databases ############################################# DBNAME taxon [ comment: "NCBI taxonomy" release: "July 2012" type: "taxonomy" format: "ncbi" method: "embosstax" indexdirectory: "$emboss_standard/index/" directory: "$emboss_standard/data/TAXONOMY" # fields: "id acc tax rnk up gc mgc" field: "id ! identifier without the prefix" field: "acc ! any alternate identifier(s)" field: "tax ! taxon name" field: "rnk ! taxon rank name" field: "up ! parent taxon id" field: "gc ! genetic code" field: "mgc ! mitochondrial genetic code" taxon: "1 ! all" edamtpc: "0637 ! Taxonomy" edamid: "1179 ! NCBI taxonomy ID" edamdat: "3028 ! Taxonomic data" ] ################################################ # Servers providing standard sets of databases ################################################ ###################### # SRS server at EBI ###################### SERVER srs [ method: "srswww" type: "unknown" url: "http://srs.ebi.ac.uk/srsbin/cgi-bin/wgetz" comment: "SRS from EBI" serverversion: "7.1.3" cachefile: "server.srs" ] SERVER dkfz [ method: "srswww" type: "sequence features text obo" # url: "http://www.dkfz-heidelberg.de/srs/srs7131bin/cgi-bin/wgetz" url: "http://www.dkfz.de/menu/cgi-bin/srs7.1.3.1/wgetz" comment: "SRS from DKFZ Heidelberg" serverversion: "7.1.3" cachefile: "server.dkfz" ] ####################### # ENSEMBL ####################### SERVER ensembl [ method: "ensembl" type: "sequence" cachefile: "server.ensembl" caseidmatch: "N" comment: "Public Ensembl MySQL instance, use for small-scale queries only." field: "id ! identifier" field: "eid ! Ensembl Exon stable identifier" field: "pid ! Ensembl Translation stable identifier" field: "tid ! Ensembl Transcript stable identifier" field: "grp ! Ensembl Database Adaptor group" format: "ensembl" hasaccession: "N" identifier: "id" url: "mysql://anonymous@ensembldb.ensembl.org:5306/" ] SERVER ensemblgenomes [ method: "ensembl" type: "sequence" cachefile: "server.ensemblgenomes" caseidmatch: "N" comment: "Public Ensembl Genomes MySQL instance, use for small-scale queries only." field: "id ! identifier" field: "eid ! Ensembl Exon stable identifier" field: "pid ! Ensembl Translation stable identifier" field: "tid ! Ensembl Transcript stable identifier" field: "grp ! Ensembl Database Adaptor group" format: "ensembl" hasaccession: "N" identifier: "id" url: "mysql://anonymous@mysql.ebi.ac.uk:4157/" ] ####################### # DAS ####################### SERVER das [ method: "das" type: "sequence, features" url: "http://www.dasregistry.org/das/" comment: "DAS sequence/feature sources listed on the DAS registry" cachefile: "server.dasregistry" ] ####################### # BioMart ####################### SERVER biomart [ method: "biomart" type: "sequence" url: "http://www.biomart.org:80/biomart/martservice" cachefile: "server.biomart" ] ####################### # MRS ####################### SERVER mrs [ methodquery: "mrs" type: "unknown" url: "http://mrs.cmbi.ru.nl/mrs-5/search" cachefile: "server.mrs" serverversion: "MRS-5" ] SERVER mrs4 [ methodquery: "mrs4" type: "unknown" url: "http://mrs.cmbi.ru.nl/mrs-web/plain.do" cachefile: "server.mrs4" serverversion: "MRS-4" ] SERVER mrs3 [ methodquery: "mrs3" type: "unknown" url: "http://mrs.cmbi.ru.nl/mrs-3/plain.do" cachefile: "server.mrs3" serverversion: "MRS-3" ] ####################### # Entrez at NCBI ####################### SERVER entrez [ methodquery: "entrez" type: "unknown" cachefile: "server.entrez" ] IFDEF EMBOSS_AXIS2C ############################# # EBI's EB-eye search service ############################# SERVER ebeye [ methodquery: "ebeye" type: "text" url: "http://www.ebi.ac.uk/ebisearch/service.ebi" comment: "EB-eye text search service that allows searching across EBI public databases" cachefile: "server.ebeye" ] ########################### # EBI's WsDbFetch service ########################### SERVER wsdbfetch [ methodentry: "wsdbfetch" type: "sequence, features, text, obo" url: "http://www.ebi.ac.uk/ws/services/WSDbfetchDoclit" comment: "EBI dbfetch webservices(SOAP)" cachefile: "server.wsdbfetch" ] ENDIF ########################### # EBI's DbFetch service ########################### SERVER dbfetch [ methodentry: "dbfetch" type: "sequence, features, text, obo" url: "http://www.ebi.ac.uk/Tools/dbfetch/" comment: "EBI dbfetch webservices(REST)" cachefile: "server.dbfetch" ] ################################################################## # Resource definitions for the dbx indexing system. ################################################################## RESOURCE dbxresource [ type: "Index" idlen: "15" acclen: "15" svlen: "15" deslen: "25" orglen: "15" keylen: "15" ] RESOURCE taxresource [ type: "Index" fields: "id acc tax rnk up gc mgc" rnklen: "16" idlen: "7" uplen: "7" taxlen: "120" gclen: "2" mgclen: "2" ] RESOURCE emblresource [ type: "Index" idlen: "15" acclen: "15" svlen: "15" keylen: "80" deslen: "75" orglen: "75" ] RESOURCE swissresource [ type: "Index" idlen: "12" acclen: "6" svlen: "10" deslen: "125" keylen: "75" orglen: "75" despagesize: "4096" keypagesize: "2048" orgpagesize: "2048" cachesize: "20000" seccachesize: "100000" ] RES pirresource [ type: "Index" idlen: "6" acclen: "6" svlen: "10" keylen: "25" deslen: "15" orglen: "35" ] RESOURCE chebiresource [ type: "Index" fields: "id acc nam isa des ns" acclen: "400" namlen: "125" deslen: "50" nslen: "20" accpagesize: "16384" nampagesize: "8192" despagesize: "8192" ] RES oboresource [ type: "Index" fields: "id acc nam isa des ns" acclen: "200" namlen: "70" deslen: "50" nslen: "20" accpagesize: "16384" nampagesize: "16384" despagesize: "4096" ] RESOURCE sworesource [ type: "Index" fields: "id acc nam isa des ns" idlen: "60" acclen: "100" namlen: "150" deslen: "50" nslen: "20" isalen: "50" idpagesize: "16384" accpagesize: "16384" nampagesize: "16384" despagesize: "4096" isapagesize: "16384" ] RESOURCE edamresource [ type: "Index" fields: "id acc nam isa des ns hasattr hasin hasout isid isfmt issrc" acclen: "80" namlen: "32" deslen: "30" isalen: "20" accpagesize: "8192" despagesize: "4096" ] RESOURCE drcatresource [ type: "Index" fields: "id acc nam des url cat taxid edat efmt eid etpc xref qout qfmt qin qurl rest soap cc stat" idlen: "32" acclen: "18" qfmtlen: "30" qinlen: "65" qoutlen: "90" qurllen: "30" namlen: "20" deslen: "20" urllen: "25" restlen: "20" cclen: "40" catlen: "60" taxidlen: "6" ] # Pagesize - this is the size of disc page blocks and is # required by the 'dbx' indexing programs and 'method: emboss' # We recommend a value of 2048 SET PAGESIZE 2048 SET SECPAGESIZE 512 # Cachesize - a cache of disc pages to be used by the 'dbx' indexing # system and is required for 'method emboss'. It is the number of # PAGESIZE or SECPAGESIZE blocks to cache and the initial size for the # primary and secondary caches. We recommend a value of 20000. The # secondary cache can be set separately as SECCACHESIZE. There is # little to gain by reducing the values, but heavy I/O loads can be # avoided by increasing them without exceeding the available physical # memory for the indexing runs and other system load. SET CACHESIZE 20000