--$Revision: 6.0 $ --********************************************************************** -- -- asn.all -- this file contains all NCBI ASN.1 specifications together -- -- by James Ostell, 1990 -- --********************************************************************** --$Revision: 6.5 $ --********************************************************************** -- -- NCBI General Data elements -- by James Ostell, 1990 -- Version 3.0 - June 1994 -- --********************************************************************** NCBI-General DEFINITIONS ::= BEGIN EXPORTS Date, Person-id, Object-id, Dbtag, Int-fuzz, User-object, User-field; -- StringStore is really a VisibleString. It is used to define very -- long strings which may need to be stored by the receiving program -- in special structures, such as a ByteStore, but it's just a hint. -- AsnTool stores StringStores in ByteStore structures. -- OCTET STRINGs are also stored in ByteStores by AsnTool -- -- typedef struct bsunit { /* for building multiline strings */ -- Nlm_Handle str; /* the string piece */ -- Nlm_Int2 len_avail, -- len; -- struct bsunit PNTR next; } /* the next one */ -- Nlm_BSUnit, PNTR Nlm_BSUnitPtr; -- -- typedef struct bytestore { -- Nlm_Int4 seekptr, /* current position */ -- totlen, /* total stored data length in bytes */ -- chain_offset; /* offset in ByteStore of first byte in curchain */ -- Nlm_BSUnitPtr chain, /* chain of elements */ -- curchain; /* the BSUnit containing seekptr */ -- } Nlm_ByteStore, PNTR Nlm_ByteStorePtr; -- -- AsnTool incorporates this as a primitive type, so the definition -- is here just for completeness -- -- StringStore ::= [APPLICATION 1] IMPLICIT OCTET STRING -- -- BigInt is really an INTEGER. It is used to warn the receiving code to expect -- a value bigger than Int4 (actually Int8). It will be stored in DataVal.bigintvalue -- -- Like StringStore, AsnTool incorporates it as a primitive. The definition would be: -- BigInt ::= [APPLICATION 2] IMPLICIT INTEGER -- -- Date is used to replace the (overly complex) UTCTtime, GeneralizedTime -- of ASN.1 -- It stores only a date -- Date ::= CHOICE { str VisibleString , -- for those unparsed dates std Date-std } -- use this if you can Date-std ::= SEQUENCE { -- NOTE: this is NOT a unix tm struct year INTEGER , -- full year (including 1900) month INTEGER OPTIONAL , -- month (1-12) day INTEGER OPTIONAL , -- day of month (1-31) season VisibleString OPTIONAL , -- for "spring", "may-june", etc hour INTEGER OPTIONAL , -- hour of day (0-23) minute INTEGER OPTIONAL , -- minute of hour (0-59) second INTEGER OPTIONAL } -- second of minute (0-59) -- Dbtag is generalized for tagging -- eg. { "Social Security", str "023-79-8841" } -- or { "member", id 8882224 } Dbtag ::= SEQUENCE { db VisibleString , -- name of database or system tag Object-id } -- appropriate tag -- Object-id can tag or name anything -- Object-id ::= CHOICE { id INTEGER , str VisibleString } -- Person-id is to define a std element for people -- Person-id ::= CHOICE { dbtag Dbtag , -- any defined database tag name Name-std , -- structured name ml VisibleString , -- MEDLINE name (semi-structured) -- eg. "Jones RM" str VisibleString, -- unstructured name consortium VisibleString } -- consortium name Name-std ::= SEQUENCE { -- Structured names last VisibleString , first VisibleString OPTIONAL , middle VisibleString OPTIONAL , full VisibleString OPTIONAL , -- full name eg. "J. John Smith, Esq" initials VisibleString OPTIONAL, -- first + middle initials suffix VisibleString OPTIONAL , -- Jr, Sr, III title VisibleString OPTIONAL } -- Dr., Sister, etc --**** Int-fuzz ********************************************** --* --* uncertainties in integer values Int-fuzz ::= CHOICE { p-m INTEGER , -- plus or minus fixed amount range SEQUENCE { -- max to min max INTEGER , min INTEGER } , pct INTEGER , -- % plus or minus (x10) 0-1000 lim ENUMERATED { -- some limit value unk (0) , -- unknown gt (1) , -- greater than lt (2) , -- less than tr (3) , -- space to right of position tl (4) , -- space to left of position circle (5) , -- artificial break at origin of circle other (255) } , -- something else alt SET OF INTEGER } -- set of alternatives for the integer --**** User-object ********************************************** --* --* a general object for a user defined structured data item --* used by Seq-feat and Seq-descr User-object ::= SEQUENCE { class VisibleString OPTIONAL , -- endeavor which designed this object type Object-id , -- type of object within class data SEQUENCE OF User-field } -- the object itself User-field ::= SEQUENCE { label Object-id , -- field label num INTEGER OPTIONAL , -- required for strs, ints, reals, oss data CHOICE { -- field contents str VisibleString , int INTEGER , real REAL , bool BOOLEAN , os OCTET STRING , object User-object , -- for using other definitions strs SEQUENCE OF VisibleString , ints SEQUENCE OF INTEGER , reals SEQUENCE OF REAL , oss SEQUENCE OF OCTET STRING , fields SEQUENCE OF User-field , objects SEQUENCE OF User-object } } END --$Revision: 6.3 $ --**************************************************************** -- -- NCBI Bibliographic data elements -- by James Ostell, 1990 -- -- Taken from the American National Standard for -- Bibliographic References -- ANSI Z39.29-1977 -- Version 3.0 - June 1994 -- PubMedId added in 1996 -- ArticleIds and eprint elements added in 1999 -- --**************************************************************** NCBI-Biblio DEFINITIONS ::= BEGIN EXPORTS Cit-art, Cit-jour, Cit-book, Cit-pat, Cit-let, Id-pat, Cit-gen, Cit-proc, Cit-sub, Title, Author, PubMedId, DOI; IMPORTS Person-id, Date, Dbtag FROM NCBI-General; -- Article Ids ArticleId ::= CHOICE { -- can be many ids for an article pubmed PubMedId , -- see types below medline MedlineUID , doi DOI , pii PII , pmcid PmcID , pmcpid PmcPid , pmpid PmPid , other Dbtag } -- generic catch all PubMedId ::= INTEGER -- Id from the PubMed database at NCBI MedlineUID ::= INTEGER -- Id from MEDLINE DOI ::= VisibleString -- Document Object Identifier PII ::= VisibleString -- Controlled Publisher Identifier PmcID ::= INTEGER -- PubMed Central Id PmcPid ::= VisibleString -- Publisher Id supplied to PubMed Central PmPid ::= VisibleString -- Publisher Id supplied to PubMed ArticleIdSet ::= SET OF ArticleId -- Status Dates PubStatus ::= INTEGER { -- points of publication received (1) , -- date manuscript received for review accepted (2) , -- accepted for publication epublish (3) , -- published electronically by publisher ppublish (4) , -- published in print by publisher revised (5) , -- article revised by publisher/author pmc (6) , -- article first appeared in PubMed Central pmcr (7) , -- article revision in PubMed Central pubmed (8) , -- article citation first appeared in PubMed pubmedr (9) , -- article citation revision in PubMed aheadofprint (10), -- epublish, but will be followed by print premedline (11), -- date into PreMedline status medline (12), -- date made a MEDLINE record other (255) } PubStatusDate ::= SEQUENCE { -- done as a structure so fields can be added pubstatus PubStatus , date Date } -- time may be added later PubStatusDateSet ::= SET OF PubStatusDate -- Citation Types Cit-art ::= SEQUENCE { -- article in journal or book title Title OPTIONAL , -- title of paper (ANSI requires) authors Auth-list OPTIONAL , -- authors (ANSI requires) from CHOICE { -- journal or book journal Cit-jour , book Cit-book , proc Cit-proc } , ids ArticleIdSet OPTIONAL } -- lots of ids Cit-jour ::= SEQUENCE { -- Journal citation title Title , -- title of journal imp Imprint } Cit-book ::= SEQUENCE { -- Book citation title Title , -- Title of book coll Title OPTIONAL , -- part of a collection authors Auth-list, -- authors imp Imprint } Cit-proc ::= SEQUENCE { -- Meeting proceedings book Cit-book , -- citation to meeting meet Meeting } -- time and location of meeting -- Patent number and date-issue were made optional in 1997 to -- support patent applications being issued from the USPTO -- Semantically a Cit-pat must have either a patent number or -- an application number (or both) to be valid Cit-pat ::= SEQUENCE { -- patent citation title VisibleString , authors Auth-list, -- author/inventor country VisibleString , -- Patent Document Country doc-type VisibleString , -- Patent Document Type number VisibleString OPTIONAL, -- Patent Document Number date-issue Date OPTIONAL, -- Patent Issue/Pub Date class SEQUENCE OF VisibleString OPTIONAL , -- Patent Doc Class Code app-number VisibleString OPTIONAL , -- Patent Doc Appl Number app-date Date OPTIONAL , -- Patent Appl File Date applicants Auth-list OPTIONAL , -- Applicants assignees Auth-list OPTIONAL , -- Assignees priority SEQUENCE OF Patent-priority OPTIONAL , -- Priorities abstract VisibleString OPTIONAL } -- abstract of patent Patent-priority ::= SEQUENCE { country VisibleString , -- Patent country code number VisibleString , -- number assigned in that country date Date } -- date of application Id-pat ::= SEQUENCE { -- just to identify a patent country VisibleString , -- Patent Document Country id CHOICE { number VisibleString , -- Patent Document Number app-number VisibleString } , -- Patent Doc Appl Number doc-type VisibleString OPTIONAL } -- Patent Doc Type Cit-let ::= SEQUENCE { -- letter, thesis, or manuscript cit Cit-book , -- same fields as a book man-id VisibleString OPTIONAL , -- Manuscript identifier type ENUMERATED { manuscript (1) , letter (2) , thesis (3) } OPTIONAL } -- NOTE: this is just to cite a -- direct data submission, see NCBI-Submit -- for the form of a sequence submission Cit-sub ::= SEQUENCE { -- citation for a direct submission authors Auth-list , -- not necessarily authors of the paper imp Imprint OPTIONAL , -- this only used to get date.. will go medium ENUMERATED { -- medium of submission paper (1) , tape (2) , floppy (3) , email (4) , other (255) } OPTIONAL , date Date OPTIONAL , -- replaces imp, will become required descr VisibleString OPTIONAL } -- description of changes for public view Cit-gen ::= SEQUENCE { -- NOT from ANSI, this is a catchall cit VisibleString OPTIONAL , -- anything, not parsable authors Auth-list OPTIONAL , muid INTEGER OPTIONAL , -- medline uid journal Title OPTIONAL , volume VisibleString OPTIONAL , issue VisibleString OPTIONAL , pages VisibleString OPTIONAL , date Date OPTIONAL , serial-number INTEGER OPTIONAL , -- for GenBank style references title VisibleString OPTIONAL , -- eg. cit="unpublished",title="title" pmid PubMedId OPTIONAL } -- PubMed Id -- Authorship Group Auth-list ::= SEQUENCE { names CHOICE { std SEQUENCE OF Author , -- full citations ml SEQUENCE OF VisibleString , -- MEDLINE, semi-structured str SEQUENCE OF VisibleString } , -- free for all affil Affil OPTIONAL } -- author affiliation Author ::= SEQUENCE { name Person-id , -- Author, Primary or Secondary level ENUMERATED { primary (1), secondary (2) } OPTIONAL , role ENUMERATED { -- Author Role Indicator compiler (1), editor (2), patent-assignee (3), translator (4) } OPTIONAL , affil Affil OPTIONAL , is-corr BOOLEAN OPTIONAL } -- TRUE if corresponding author Affil ::= CHOICE { str VisibleString , -- unparsed string std SEQUENCE { -- std representation affil VisibleString OPTIONAL , -- Author Affiliation, Name div VisibleString OPTIONAL , -- Author Affiliation, Division city VisibleString OPTIONAL , -- Author Affiliation, City sub VisibleString OPTIONAL , -- Author Affiliation, County Sub country VisibleString OPTIONAL , -- Author Affiliation, Country street VisibleString OPTIONAL , -- street address, not ANSI email VisibleString OPTIONAL , fax VisibleString OPTIONAL , phone VisibleString OPTIONAL , postal-code VisibleString OPTIONAL }} -- Title Group -- Valid for = A = Analytic (Cit-art) -- J = Journals (Cit-jour) -- B = Book (Cit-book) -- Valid for: Title ::= SET OF CHOICE { name VisibleString , -- Title, Anal,Coll,Mono AJB tsub VisibleString , -- Title, Subordinate A B trans VisibleString , -- Title, Translated AJB jta VisibleString , -- Title, Abbreviated J iso-jta VisibleString , -- specifically ISO jta J ml-jta VisibleString , -- specifically MEDLINE jta J coden VisibleString , -- a coden J issn VisibleString , -- ISSN J abr VisibleString , -- Title, Abbreviated B isbn VisibleString } -- ISBN B Imprint ::= SEQUENCE { -- Imprint group date Date , -- date of publication volume VisibleString OPTIONAL , issue VisibleString OPTIONAL , pages VisibleString OPTIONAL , section VisibleString OPTIONAL , pub Affil OPTIONAL, -- publisher, required for book cprt Date OPTIONAL, -- copyright date, " " " part-sup VisibleString OPTIONAL , -- part/sup of volume language VisibleString DEFAULT "ENG" , -- put here for simplicity prepub ENUMERATED { -- for prepublication citations submitted (1) , -- submitted, not accepted in-press (2) , -- accepted, not published other (255) } OPTIONAL , part-supi VisibleString OPTIONAL , -- part/sup on issue retract CitRetract OPTIONAL , -- retraction info pubstatus PubStatus OPTIONAL , -- current status of this publication history PubStatusDateSet OPTIONAL } -- dates for this record CitRetract ::= SEQUENCE { type ENUMERATED { -- retraction of an entry retracted (1) , -- this citation retracted notice (2) , -- this citation is a retraction notice in-error (3) , -- an erratum was published about this erratum (4) } , -- this is a published erratum exp VisibleString OPTIONAL } -- citation and/or explanation Meeting ::= SEQUENCE { number VisibleString , date Date , place Affil OPTIONAL } END --$Revision: 6.0 $ --********************************************************************** -- -- MEDLINE data definitions -- James Ostell, 1990 -- -- enhanced in 1996 to support PubMed records as well by simply adding -- the PubMedId and making MedlineId optional -- --********************************************************************** NCBI-Medline DEFINITIONS ::= BEGIN EXPORTS Medline-entry, Medline-si; IMPORTS Cit-art, PubMedId FROM NCBI-Biblio Date FROM NCBI-General; -- a MEDLINE or PubMed entry Medline-entry ::= SEQUENCE { uid INTEGER OPTIONAL , -- MEDLINE UID, sometimes not yet available if from PubMed em Date , -- Entry Month cit Cit-art , -- article citation abstract VisibleString OPTIONAL , mesh SET OF Medline-mesh OPTIONAL , substance SET OF Medline-rn OPTIONAL , xref SET OF Medline-si OPTIONAL , idnum SET OF VisibleString OPTIONAL , -- ID Number (grants, contracts) gene SET OF VisibleString OPTIONAL , pmid PubMedId OPTIONAL , -- MEDLINE records may include the PubMedId pub-type SET OF VisibleString OPTIONAL, -- may show publication types (review, etc) mlfield SET OF Medline-field OPTIONAL , -- additional Medline field types status INTEGER { publisher (1) , -- record as supplied by publisher premedline (2) , -- premedline record medline (3) } DEFAULT medline } -- regular medline record Medline-mesh ::= SEQUENCE { mp BOOLEAN DEFAULT FALSE , -- TRUE if main point (*) term VisibleString , -- the MeSH term qual SET OF Medline-qual OPTIONAL } -- qualifiers Medline-qual ::= SEQUENCE { mp BOOLEAN DEFAULT FALSE , -- TRUE if main point subh VisibleString } -- the subheading Medline-rn ::= SEQUENCE { -- medline substance records type ENUMERATED { -- type of record nameonly (0) , cas (1) , -- CAS number ec (2) } , -- EC number cit VisibleString OPTIONAL , -- CAS or EC number if present name VisibleString } -- name (always present) Medline-si ::= SEQUENCE { -- medline cross reference records type ENUMERATED { -- type of xref ddbj (1) , -- DNA Data Bank of Japan carbbank (2) , -- Carbohydrate Structure Database embl (3) , -- EMBL Data Library hdb (4) , -- Hybridoma Data Bank genbank (5) , -- GenBank hgml (6) , -- Human Gene Map Library mim (7) , -- Mendelian Inheritance in Man msd (8) , -- Microbial Strains Database pdb (9) , -- Protein Data Bank (Brookhaven) pir (10) , -- Protein Identification Resource prfseqdb (11) , -- Protein Research Foundation (Japan) psd (12) , -- Protein Sequence Database (Japan) swissprot (13) , -- SwissProt gdb (14) } , -- Genome Data Base cit VisibleString OPTIONAL } -- the citation/accession number Medline-field ::= SEQUENCE { type INTEGER { -- Keyed type other (0) , -- look in line code comment (1) , -- comment line erratum (2) } , -- retracted, corrected, etc str VisibleString , -- the text ids SEQUENCE OF DocRef OPTIONAL } -- pointers relevant to this text DocRef ::= SEQUENCE { -- reference to a document type INTEGER { medline (1) , pubmed (2) , ncbigi (3) } , uid INTEGER } END --$Revision: 6.0 $ --********************************************************************** -- -- PUBMED data definitions -- --********************************************************************** NCBI-PubMed DEFINITIONS ::= BEGIN EXPORTS Pubmed-entry, Pubmed-url; IMPORTS PubMedId FROM NCBI-Biblio Medline-entry FROM NCBI-Medline; Pubmed-entry ::= SEQUENCE { -- a PubMed entry -- PUBMED records must include the PubMedId pmid PubMedId, -- Medline entry information medent Medline-entry OPTIONAL, -- Publisher name publisher VisibleString OPTIONAL, -- List of URL to publisher cite urls SET OF Pubmed-url OPTIONAL, -- Publisher's article identifier pubid VisibleString OPTIONAL } Pubmed-url ::= SEQUENCE { location VisibleString OPTIONAL, -- Location code url VisibleString -- Selected URL for location } END --$Revision: 6.0 $ --********************************************************************** -- -- MEDLARS data definitions -- Grigoriy Starchenko, 1997 -- --********************************************************************** NCBI-Medlars DEFINITIONS ::= BEGIN EXPORTS Medlars-entry, Medlars-record; IMPORTS PubMedId FROM NCBI-Biblio; Medlars-entry ::= SEQUENCE { -- a MEDLARS entry pmid PubMedId, -- All entries in PubMed must have it muid INTEGER OPTIONAL, -- Medline(OCCS) id recs SET OF Medlars-record -- List of Medlars records } Medlars-record ::= SEQUENCE { code INTEGER, -- Unit record field type integer form abbr VisibleString OPTIONAL, -- Unit record field type abbreviation form data VisibleString -- Unit record data } END --$Revision: 6.0 $ --******************************************************************** -- -- Publication common set -- James Ostell, 1990 -- -- This is the base class definitions for Publications of all sorts -- -- support for PubMedId added in 1996 --******************************************************************** NCBI-Pub DEFINITIONS ::= BEGIN EXPORTS Pub, Pub-set, Pub-equiv; IMPORTS Medline-entry FROM NCBI-Medline Cit-art, Cit-jour, Cit-book, Cit-proc, Cit-pat, Id-pat, Cit-gen, Cit-let, Cit-sub, PubMedId FROM NCBI-Biblio; Pub ::= CHOICE { gen Cit-gen , -- general or generic unparsed sub Cit-sub , -- submission medline Medline-entry , muid INTEGER , -- medline uid article Cit-art , journal Cit-jour , book Cit-book , proc Cit-proc , -- proceedings of a meeting patent Cit-pat , pat-id Id-pat , -- identify a patent man Cit-let , -- manuscript, thesis, or letter equiv Pub-equiv, -- to cite a variety of ways pmid PubMedId } -- PubMedId Pub-equiv ::= SET OF Pub -- equivalent identifiers for same citation Pub-set ::= CHOICE { pub SET OF Pub , medline SET OF Medline-entry , article SET OF Cit-art , journal SET OF Cit-jour , book SET OF Cit-book , proc SET OF Cit-proc , -- proceedings of a meeting patent SET OF Cit-pat } END --$Revision: 6.5 $ --********************************************************************** -- -- NCBI Sequence location and identifier elements -- by James Ostell, 1990 -- -- Version 3.0 - 1994 -- --********************************************************************** NCBI-Seqloc DEFINITIONS ::= BEGIN EXPORTS Seq-id, Seq-loc, Seq-interval, Packed-seqint, Seq-point, Packed-seqpnt, Na-strand, Giimport-id; IMPORTS Object-id, Int-fuzz, Dbtag, Date FROM NCBI-General Id-pat FROM NCBI-Biblio Feat-id FROM NCBI-Seqfeat; --*** Sequence identifiers ******************************** --* Seq-id ::= CHOICE { local Object-id , -- local use gibbsq INTEGER , -- Geninfo backbone seqid gibbmt INTEGER , -- Geninfo backbone moltype giim Giimport-id , -- Geninfo import id genbank Textseq-id , embl Textseq-id , pir Textseq-id , swissprot Textseq-id , patent Patent-seq-id , other Textseq-id , -- for historical reasons, 'other' = 'refseq' general Dbtag , -- for other databases gi INTEGER , -- GenInfo Integrated Database ddbj Textseq-id , -- DDBJ prf Textseq-id , -- PRF SEQDB pdb PDB-seq-id , -- PDB sequence tpg Textseq-id , -- Third Party Annot/Seq Genbank tpe Textseq-id , -- Third Party Annot/Seq EMBL tpd Textseq-id , -- Third Party Annot/Seq DDBJ gpipe Textseq-id , -- Internal NCBI genome pipeline processing ID named-annot-track Textseq-id -- Internal named annotation tracking ID } Seq-id-set ::= SET OF Seq-id Patent-seq-id ::= SEQUENCE { seqid INTEGER , -- number of sequence in patent cit Id-pat } -- patent citation Textseq-id ::= SEQUENCE { name VisibleString OPTIONAL , accession VisibleString OPTIONAL , release VisibleString OPTIONAL , version INTEGER OPTIONAL } Giimport-id ::= SEQUENCE { id INTEGER , -- the id to use here db VisibleString OPTIONAL , -- dbase used in release VisibleString OPTIONAL } -- the release PDB-seq-id ::= SEQUENCE { mol PDB-mol-id , -- the molecule name chain INTEGER DEFAULT 32 , -- a single ASCII character, chain id rel Date OPTIONAL } -- release date, month and year PDB-mol-id ::= VisibleString -- name of mol, 4 chars --*** Sequence locations ********************************** --* Seq-loc ::= CHOICE { null NULL , -- not placed empty Seq-id , -- to NULL one Seq-id in a collection whole Seq-id , -- whole sequence int Seq-interval , -- from to packed-int Packed-seqint , pnt Seq-point , packed-pnt Packed-seqpnt , mix Seq-loc-mix , equiv Seq-loc-equiv , -- equivalent sets of locations bond Seq-bond , feat Feat-id } -- indirect, through a Seq-feat Seq-interval ::= SEQUENCE { from INTEGER , to INTEGER , strand Na-strand OPTIONAL , id Seq-id , -- WARNING: this used to be optional fuzz-from Int-fuzz OPTIONAL , fuzz-to Int-fuzz OPTIONAL } Packed-seqint ::= SEQUENCE OF Seq-interval Seq-point ::= SEQUENCE { point INTEGER , strand Na-strand OPTIONAL , id Seq-id , -- WARNING: this used to be optional fuzz Int-fuzz OPTIONAL } Packed-seqpnt ::= SEQUENCE { strand Na-strand OPTIONAL , id Seq-id , fuzz Int-fuzz OPTIONAL , points SEQUENCE OF INTEGER } Na-strand ::= ENUMERATED { -- strand of nucleic acid unknown (0) , plus (1) , minus (2) , both (3) , -- in forward orientation both-rev (4) , -- in reverse orientation other (255) } Seq-bond ::= SEQUENCE { -- bond between residues a Seq-point , -- connection to a least one residue b Seq-point OPTIONAL } -- other end may not be available Seq-loc-mix ::= SEQUENCE OF Seq-loc -- this will hold anything Seq-loc-equiv ::= SET OF Seq-loc -- for a set of equivalent locations END --$Revision: 6.24 $ --********************************************************************** -- -- NCBI Sequence elements -- by James Ostell, 1990 -- Version 3.0 - June 1994 -- --********************************************************************** NCBI-Sequence DEFINITIONS ::= BEGIN EXPORTS Annotdesc, Annot-descr, Bioseq, GIBB-mol, Heterogen, MolInfo, Numbering, Pubdesc, Seq-annot, Seq-data, Seqdesc, Seq-descr, Seq-ext, Seq-hist, Seq-inst, Seq-literal, Seqdesc, Delta-ext, Seq-gap; IMPORTS Date, Int-fuzz, Dbtag, Object-id, User-object FROM NCBI-General Seq-align FROM NCBI-Seqalign Seq-feat, ModelEvidenceSupport FROM NCBI-Seqfeat Seq-graph FROM NCBI-Seqres Pub-equiv FROM NCBI-Pub Org-ref FROM NCBI-Organism BioSource FROM NCBI-BioSource Seq-id, Seq-loc FROM NCBI-Seqloc GB-block FROM GenBank-General PIR-block FROM PIR-General EMBL-block FROM EMBL-General SP-block FROM SP-General PRF-block FROM PRF-General PDB-block FROM PDB-General Seq-table FROM NCBI-SeqTable; --*** Sequence ******************************** --* Bioseq ::= SEQUENCE { id SET OF Seq-id , -- equivalent identifiers descr Seq-descr OPTIONAL , -- descriptors inst Seq-inst , -- the sequence data annot SET OF Seq-annot OPTIONAL } --*** Descriptors ***************************** --* Seq-descr ::= SET OF Seqdesc Seqdesc ::= CHOICE { mol-type GIBB-mol , -- type of molecule modif SET OF GIBB-mod , -- modifiers method GIBB-method , -- sequencing method name VisibleString , -- a name for this sequence title VisibleString , -- a title for this sequence org Org-ref , -- if all from one organism comment VisibleString , -- a more extensive comment num Numbering , -- a numbering system maploc Dbtag , -- map location of this sequence pir PIR-block , -- PIR specific info genbank GB-block , -- GenBank specific info pub Pubdesc , -- a reference to the publication region VisibleString , -- overall region (globin locus) user User-object , -- user defined object sp SP-block , -- SWISSPROT specific info dbxref Dbtag , -- xref to other databases embl EMBL-block , -- EMBL specific information create-date Date , -- date entry first created/released update-date Date , -- date of last update prf PRF-block , -- PRF specific information pdb PDB-block , -- PDB specific information het Heterogen , -- cofactor, etc associated but not bound source BioSource , -- source of materials, includes Org-ref molinfo MolInfo , -- info on the molecule and techniques modelev ModelEvidenceSupport -- model evidence for XM records } --******* NOTE: --* mol-type, modif, method, and org are consolidated and expanded --* in Org-ref, BioSource, and MolInfo in this specification. They --* will be removed in later specifications. Do not use them in the --* the future. Instead expect the new structures. --* --*************************** --******************************************************************** -- -- MolInfo gives information on the -- classification of the type and quality of the sequence -- -- WARNING: this will replace GIBB-mol, GIBB-mod, GIBB-method -- --******************************************************************** MolInfo ::= SEQUENCE { biomol INTEGER { unknown (0) , genomic (1) , pre-RNA (2) , -- precursor RNA of any sort really mRNA (3) , rRNA (4) , tRNA (5) , snRNA (6) , scRNA (7) , peptide (8) , other-genetic (9) , -- other genetic material genomic-mRNA (10) , -- reported a mix of genomic and cdna sequence cRNA (11) , -- viral RNA genome copy intermediate snoRNA (12) , -- small nucleolar RNA transcribed-RNA (13) , -- transcribed RNA other than existing classes ncRNA (14) , tmRNA (15) , other (255) } DEFAULT unknown , tech INTEGER { unknown (0) , standard (1) , -- standard sequencing est (2) , -- Expressed Sequence Tag sts (3) , -- Sequence Tagged Site survey (4) , -- one-pass genomic sequence genemap (5) , -- from genetic mapping techniques physmap (6) , -- from physical mapping techniques derived (7) , -- derived from other data, not a primary entity concept-trans (8) , -- conceptual translation seq-pept (9) , -- peptide was sequenced both (10) , -- concept transl. w/ partial pept. seq. seq-pept-overlap (11) , -- sequenced peptide, ordered by overlap seq-pept-homol (12) , -- sequenced peptide, ordered by homology concept-trans-a (13) , -- conceptual transl. supplied by author htgs-1 (14) , -- unordered High Throughput sequence contig htgs-2 (15) , -- ordered High Throughput sequence contig htgs-3 (16) , -- finished High Throughput sequence fli-cdna (17) , -- full length insert cDNA htgs-0 (18) , -- single genomic reads for coordination htc (19) , -- high throughput cDNA wgs (20) , -- whole genome shotgun sequencing barcode (21) , -- barcode of life project composite-wgs-htgs (22) , -- composite of WGS and HTGS tsa (23) , -- transcriptome shotgun assembly other (255) } -- use Source.techexp DEFAULT unknown , techexp VisibleString OPTIONAL , -- explanation if tech not enough -- -- Completeness is not indicated in most records. For genomes, assume -- the sequences are incomplete unless specifically marked as complete. -- For mRNAs, assume the ends are not known exactly unless marked as -- having the left or right end. -- completeness INTEGER { unknown (0) , complete (1) , -- complete biological entity partial (2) , -- partial but no details given no-left (3) , -- missing 5' or NH3 end no-right (4) , -- missing 3' or COOH end no-ends (5) , -- missing both ends has-left (6) , -- 5' or NH3 end present has-right (7) , -- 3' or COOH end present other (255) } DEFAULT unknown , gbmoltype VisibleString OPTIONAL } -- identifies particular ncRNA GIBB-mol ::= ENUMERATED { -- type of molecule represented unknown (0) , genomic (1) , pre-mRNA (2) , -- precursor RNA of any sort really mRNA (3) , rRNA (4) , tRNA (5) , snRNA (6) , scRNA (7) , peptide (8) , other-genetic (9) , -- other genetic material genomic-mRNA (10) , -- reported a mix of genomic and cdna sequence other (255) } GIBB-mod ::= ENUMERATED { -- GenInfo Backbone modifiers dna (0) , rna (1) , extrachrom (2) , plasmid (3) , mitochondrial (4) , chloroplast (5) , kinetoplast (6) , cyanelle (7) , synthetic (8) , recombinant (9) , partial (10) , complete (11) , mutagen (12) , -- subject of mutagenesis ? natmut (13) , -- natural mutant ? transposon (14) , insertion-seq (15) , no-left (16) , -- missing left end (5' for na, NH2 for aa) no-right (17) , -- missing right end (3' or COOH) macronuclear (18) , proviral (19) , est (20) , -- expressed sequence tag sts (21) , -- sequence tagged site survey (22) , -- one pass survey sequence chromoplast (23) , genemap (24) , -- is a genetic map restmap (25) , -- is an ordered restriction map physmap (26) , -- is a physical map (not ordered restriction map) other (255) } GIBB-method ::= ENUMERATED { -- sequencing methods concept-trans (1) , -- conceptual translation seq-pept (2) , -- peptide was sequenced both (3) , -- concept transl. w/ partial pept. seq. seq-pept-overlap (4) , -- sequenced peptide, ordered by overlap seq-pept-homol (5) , -- sequenced peptide, ordered by homology concept-trans-a (6) , -- conceptual transl. supplied by author other (255) } Numbering ::= CHOICE { -- any display numbering system cont Num-cont , -- continuous numbering enum Num-enum , -- enumerated names for residues ref Num-ref , -- by reference to another sequence real Num-real } -- supports mapping to a float system Num-cont ::= SEQUENCE { -- continuous display numbering system refnum INTEGER DEFAULT 1, -- number assigned to first residue has-zero BOOLEAN DEFAULT FALSE , -- 0 used? ascending BOOLEAN DEFAULT TRUE } -- ascending numbers? Num-enum ::= SEQUENCE { -- any tags to residues num INTEGER , -- number of tags to follow names SEQUENCE OF VisibleString } -- the tags Num-ref ::= SEQUENCE { -- by reference to other sequences type ENUMERATED { -- type of reference not-set (0) , sources (1) , -- by segmented or const seq sources aligns (2) } , -- by alignments given below aligns Seq-align OPTIONAL } Num-real ::= SEQUENCE { -- mapping to floating point system a REAL , -- from an integer system used by Bioseq b REAL , -- position = (a * int_position) + b units VisibleString OPTIONAL } Pubdesc ::= SEQUENCE { -- how sequence presented in pub pub Pub-equiv , -- the citation(s) name VisibleString OPTIONAL , -- name used in paper fig VisibleString OPTIONAL , -- figure in paper num Numbering OPTIONAL , -- numbering from paper numexc BOOLEAN OPTIONAL , -- numbering problem with paper poly-a BOOLEAN OPTIONAL , -- poly A tail indicated in figure? maploc VisibleString OPTIONAL , -- map location reported in paper seq-raw StringStore OPTIONAL , -- original sequence from paper align-group INTEGER OPTIONAL , -- this seq aligned with others in paper comment VisibleString OPTIONAL, -- any comment on this pub in context reftype INTEGER { -- type of reference in a GenBank record seq (0) , -- refers to sequence sites (1) , -- refers to unspecified features feats (2) , -- refers to specified features no-target (3) } -- nothing specified (EMBL) DEFAULT seq } Heterogen ::= VisibleString -- cofactor, prosthetic group, inhibitor, etc --*** Instances of sequences ******************************* --* Seq-inst ::= SEQUENCE { -- the sequence data itself repr ENUMERATED { -- representation class not-set (0) , -- empty virtual (1) , -- no seq data raw (2) , -- continuous sequence seg (3) , -- segmented sequence const (4) , -- constructed sequence ref (5) , -- reference to another sequence consen (6) , -- consensus sequence or pattern map (7) , -- ordered map of any kind delta (8) , -- sequence made by changes (delta) to others other (255) } , mol ENUMERATED { -- molecule class in living organism not-set (0) , -- > cdna = rna dna (1) , rna (2) , aa (3) , na (4) , -- just a nucleic acid other (255) } , length INTEGER OPTIONAL , -- length of sequence in residues fuzz Int-fuzz OPTIONAL , -- length uncertainty topology ENUMERATED { -- topology of molecule not-set (0) , linear (1) , circular (2) , tandem (3) , -- some part of tandem repeat other (255) } DEFAULT linear , strand ENUMERATED { -- strandedness in living organism not-set (0) , ss (1) , -- single strand ds (2) , -- double strand mixed (3) , other (255) } OPTIONAL , -- default ds for DNA, ss for RNA, pept seq-data Seq-data OPTIONAL , -- the sequence ext Seq-ext OPTIONAL , -- extensions for special types hist Seq-hist OPTIONAL } -- sequence history --*** Sequence Extensions ********************************** --* for representing more complex types --* const type uses Seq-hist.assembly Seq-ext ::= CHOICE { seg Seg-ext , -- segmented sequences ref Ref-ext , -- hot link to another sequence (a view) map Map-ext , -- ordered map of markers delta Delta-ext } Seg-ext ::= SEQUENCE OF Seq-loc Ref-ext ::= Seq-loc Map-ext ::= SEQUENCE OF Seq-feat Delta-ext ::= SEQUENCE OF Delta-seq Delta-seq ::= CHOICE { loc Seq-loc , -- point to a sequence literal Seq-literal } -- a piece of sequence Seq-literal ::= SEQUENCE { length INTEGER , -- must give a length in residues fuzz Int-fuzz OPTIONAL , -- could be unsure seq-data Seq-data OPTIONAL } -- may have the data --*** Sequence History Record *********************************** --** assembly = records how seq was assembled from others --** replaces = records sequences made obsolete by this one --** replaced-by = this seq is made obsolete by another(s) Seq-hist ::= SEQUENCE { assembly SET OF Seq-align OPTIONAL ,-- how was this assembled? replaces Seq-hist-rec OPTIONAL , -- seq makes these seqs obsolete replaced-by Seq-hist-rec OPTIONAL , -- these seqs make this one obsolete deleted CHOICE { bool BOOLEAN , date Date } OPTIONAL } Seq-hist-rec ::= SEQUENCE { date Date OPTIONAL , ids SET OF Seq-id } --*** Various internal sequence representations ************ --* all are controlled, fixed length forms Seq-data ::= CHOICE { -- sequence representations iupacna IUPACna , -- IUPAC 1 letter nuc acid code iupacaa IUPACaa , -- IUPAC 1 letter amino acid code ncbi2na NCBI2na , -- 2 bit nucleic acid code ncbi4na NCBI4na , -- 4 bit nucleic acid code ncbi8na NCBI8na , -- 8 bit extended nucleic acid code ncbipna NCBIpna , -- nucleic acid probabilities ncbi8aa NCBI8aa , -- 8 bit extended amino acid codes ncbieaa NCBIeaa , -- extended ASCII 1 letter aa codes ncbipaa NCBIpaa , -- amino acid probabilities ncbistdaa NCBIstdaa, -- consecutive codes for std aas gap Seq-gap -- gap types } Seq-gap ::= SEQUENCE { type INTEGER { unknown(0), fragment(1), -- Deprecated. Used only for AGP 1.1 clone(2), -- Deprecated. Used only for AGP 1.1 short-arm(3), heterochromatin(4), centromere(5), telomere(6), repeat(7), contig(8), scaffold(9), other(255) }, linkage INTEGER { unlinked(0), linked(1), other(255) } OPTIONAL, linkage-evidence SET OF Linkage-evidence OPTIONAL } Linkage-evidence ::= SEQUENCE { type INTEGER { paired-ends(0), align-genus(1), align-xgenus(2), align-trnscpt(3), within-clone(4), clone-contig(5), map(6), strobe(7), unspecified(8), other(255) } } IUPACna ::= StringStore -- IUPAC 1 letter codes, no spaces IUPACaa ::= StringStore -- IUPAC 1 letter codes, no spaces NCBI2na ::= OCTET STRING -- 00=A, 01=C, 10=G, 11=T NCBI4na ::= OCTET STRING -- 1 bit each for agct -- 0001=A, 0010=C, 0100=G, 1000=T/U -- 0101=Purine, 1010=Pyrimidine, etc NCBI8na ::= OCTET STRING -- for modified nucleic acids NCBIpna ::= OCTET STRING -- 5 octets/base, prob for a,c,g,t,n -- probabilities are coded 0-255 = 0.0-1.0 NCBI8aa ::= OCTET STRING -- for modified amino acids NCBIeaa ::= StringStore -- ASCII extended 1 letter aa codes -- IUPAC codes + U=selenocysteine NCBIpaa ::= OCTET STRING -- 25 octets/aa, prob for IUPAC aas in order: -- A-Y,B,Z,X,(ter),anything -- probabilities are coded 0-255 = 0.0-1.0 NCBIstdaa ::= OCTET STRING -- codes 0-25, 1 per byte --*** Sequence Annotation ************************************* --* -- This is a replica of Textseq-id -- This is specific for annotations, and exists to maintain a semantic -- difference between IDs assigned to annotations and IDs assigned to -- sequences Textannot-id ::= SEQUENCE { name VisibleString OPTIONAL , accession VisibleString OPTIONAL , release VisibleString OPTIONAL , version INTEGER OPTIONAL } Annot-id ::= CHOICE { local Object-id , ncbi INTEGER , general Dbtag, other Textannot-id } Annot-descr ::= SET OF Annotdesc Annotdesc ::= CHOICE { name VisibleString , -- a short name for this collection title VisibleString , -- a title for this collection comment VisibleString , -- a more extensive comment pub Pubdesc , -- a reference to the publication user User-object , -- user defined object create-date Date , -- date entry first created/released update-date Date , -- date of last update src Seq-id , -- source sequence from which annot came align Align-def, -- definition of the SeqAligns region Seq-loc } -- all contents cover this region Align-def ::= SEQUENCE { align-type INTEGER { -- class of align Seq-annot ref (1) , -- set of alignments to the same sequence alt (2) , -- set of alternate alignments of the same seqs blocks (3) , -- set of aligned blocks in the same seqs other (255) } , ids SET OF Seq-id OPTIONAL } -- used for the one ref seqid for now Seq-annot ::= SEQUENCE { id SET OF Annot-id OPTIONAL , db INTEGER { -- source of annotation genbank (1) , embl (2) , ddbj (3) , pir (4) , sp (5) , bbone (6) , pdb (7) , other (255) } OPTIONAL , name VisibleString OPTIONAL ,-- source if "other" above desc Annot-descr OPTIONAL , -- used only for stand alone Seq-annots data CHOICE { ftable SET OF Seq-feat , align SET OF Seq-align , graph SET OF Seq-graph , ids SET OF Seq-id , -- used for communication between tools locs SET OF Seq-loc , -- used for communication between tools seq-table Seq-table } } -- features in table form END --$Revision: 6.6 $ --********************************************************************** -- -- NCBI Sequence Collections -- by James Ostell, 1990 -- -- Version 3.0 - 1994 -- --********************************************************************** NCBI-Seqset DEFINITIONS ::= BEGIN EXPORTS Bioseq-set, Seq-entry; IMPORTS Bioseq, Seq-annot, Seq-descr FROM NCBI-Sequence Object-id, Dbtag, Date FROM NCBI-General; --*** Sequence Collections ******************************** --* Bioseq-set ::= SEQUENCE { -- just a collection id Object-id OPTIONAL , coll Dbtag OPTIONAL , -- to identify a collection level INTEGER OPTIONAL , -- nesting level class ENUMERATED { not-set (0) , nuc-prot (1) , -- nuc acid and coded proteins segset (2) , -- segmented sequence + parts conset (3) , -- constructed sequence + parts parts (4) , -- parts for 2 or 3 gibb (5) , -- geninfo backbone gi (6) , -- geninfo genbank (7) , -- converted genbank pir (8) , -- converted pir pub-set (9) , -- all the seqs from a single publication equiv (10) , -- a set of equivalent maps or seqs swissprot (11) , -- converted SWISSPROT pdb-entry (12) , -- a complete PDB entry mut-set (13) , -- set of mutations pop-set (14) , -- population study phy-set (15) , -- phylogenetic study eco-set (16) , -- ecological sample study gen-prod-set (17) , -- genomic products, chrom+mRNA+protein wgs-set (18) , -- whole genome shotgun project named-annot (19) , -- named annotation set named-annot-prod (20) , -- with instantiated mRNA+protein read-set (21) , -- set from a single read paired-end-reads (22) , -- paired sequences within a read-set small-genome-set (23) , -- viral segments or mitochondrial minicircles other (255) } DEFAULT not-set , release VisibleString OPTIONAL , date Date OPTIONAL , descr Seq-descr OPTIONAL , seq-set SEQUENCE OF Seq-entry , annot SET OF Seq-annot OPTIONAL } Seq-entry ::= CHOICE { seq Bioseq , set Bioseq-set } END --$Revision: 6.0 $ -- ********************************************************************* -- -- These are code and conversion tables for NCBI sequence codes -- ASN.1 for the sequences themselves are define in seq.asn -- -- Seq-map-table and Seq-code-table REQUIRE that codes start with 0 -- and increase continuously. So IUPAC codes, which are upper case -- letters will always have 65 0 cells before the codes begin. This -- allows all codes to do indexed lookups for things -- -- Valid names for code tables are: -- IUPACna -- IUPACaa -- IUPACeaa -- IUPACaa3 3 letter amino acid codes : parallels IUPACeaa -- display only, not a data exchange type -- NCBI2na -- NCBI4na -- NCBI8na -- NCBI8aa -- NCBIstdaa -- probability types map to IUPAC types for display as characters NCBI-SeqCode DEFINITIONS ::= BEGIN EXPORTS Seq-code-table, Seq-map-table, Seq-code-set; Seq-code-type ::= ENUMERATED { -- sequence representations iupacna (1) , -- IUPAC 1 letter nuc acid code iupacaa (2) , -- IUPAC 1 letter amino acid code ncbi2na (3) , -- 2 bit nucleic acid code ncbi4na (4) , -- 4 bit nucleic acid code ncbi8na (5) , -- 8 bit extended nucleic acid code ncbipna (6) , -- nucleic acid probabilities ncbi8aa (7) , -- 8 bit extended amino acid codes ncbieaa (8) , -- extended ASCII 1 letter aa codes ncbipaa (9) , -- amino acid probabilities iupacaa3 (10) , -- 3 letter code only for display ncbistdaa (11) } -- consecutive codes for std aas, 0-25 Seq-map-table ::= SEQUENCE { -- for tables of sequence mappings from Seq-code-type , -- code to map from to Seq-code-type , -- code to map to num INTEGER , -- number of rows in table start-at INTEGER DEFAULT 0 , -- index offset of first element table SEQUENCE OF INTEGER } -- table of values, in from-to order Seq-code-table ::= SEQUENCE { -- for names of coded values code Seq-code-type , -- name of code num INTEGER , -- number of rows in table one-letter BOOLEAN , -- symbol is ALWAYS 1 letter? start-at INTEGER DEFAULT 0 , -- index offset of first element table SEQUENCE OF SEQUENCE { symbol VisibleString , -- the printed symbol or letter name VisibleString } , -- an explanatory name or string comps SEQUENCE OF INTEGER OPTIONAL } -- pointers to complement nuc acid Seq-code-set ::= SEQUENCE { -- for distribution codes SET OF Seq-code-table OPTIONAL , maps SET OF Seq-map-table OPTIONAL } END --$Revision: 6.0 $ --********************************************************************* -- -- 1990 - J.Ostell -- Version 3.0 - June 1994 -- --********************************************************************* --********************************************************************* -- -- EMBL specific data -- This block of specifications was developed by Reiner Fuchs of EMBL -- Updated by J.Ostell, 1994 -- --********************************************************************* EMBL-General DEFINITIONS ::= BEGIN EXPORTS EMBL-dbname, EMBL-xref, EMBL-block; IMPORTS Date, Object-id FROM NCBI-General; EMBL-dbname ::= CHOICE { code ENUMERATED { embl(0), genbank(1), ddbj(2), geninfo(3), medline(4), swissprot(5), pir(6), pdb(7), epd(8), ecd(9), tfd(10), flybase(11), prosite(12), enzyme(13), mim(14), ecoseq(15), hiv(16) , other (255) } , name VisibleString } EMBL-xref ::= SEQUENCE { dbname EMBL-dbname, id SEQUENCE OF Object-id } EMBL-block ::= SEQUENCE { class ENUMERATED { not-set(0), standard(1), unannotated(2), other(255) } DEFAULT standard, div ENUMERATED { fun(0), inv(1), mam(2), org(3), phg(4), pln(5), pri(6), pro(7), rod(8), syn(9), una(10), vrl(11), vrt(12), pat(13), est(14), sts(15), other (255) } OPTIONAL, creation-date Date, update-date Date, extra-acc SEQUENCE OF VisibleString OPTIONAL, keywords SEQUENCE OF VisibleString OPTIONAL, xref SEQUENCE OF EMBL-xref OPTIONAL } END --********************************************************************* -- -- SWISSPROT specific data -- This block of specifications was developed by Mark Cavanaugh of -- NCBI working with Amos Bairoch of SWISSPROT -- --********************************************************************* SP-General DEFINITIONS ::= BEGIN EXPORTS SP-block; IMPORTS Date, Dbtag FROM NCBI-General Seq-id FROM NCBI-Seqloc; SP-block ::= SEQUENCE { -- SWISSPROT specific descriptions class ENUMERATED { not-set (0) , standard (1) , -- conforms to all SWISSPROT checks prelim (2) , -- only seq and biblio checked other (255) } , extra-acc SET OF VisibleString OPTIONAL , -- old SWISSPROT ids imeth BOOLEAN DEFAULT FALSE , -- seq known to start with Met plasnm SET OF VisibleString OPTIONAL, -- plasmid names carrying gene seqref SET OF Seq-id OPTIONAL, -- xref to other sequences dbref SET OF Dbtag OPTIONAL , -- xref to non-sequence dbases keywords SET OF VisibleString OPTIONAL , -- keywords created Date OPTIONAL , -- creation date sequpd Date OPTIONAL , -- sequence update annotupd Date OPTIONAL } -- annotation update END --********************************************************************* -- -- PIR specific data -- This block of specifications was developed by Jim Ostell of -- NCBI -- --********************************************************************* PIR-General DEFINITIONS ::= BEGIN EXPORTS PIR-block; IMPORTS Seq-id FROM NCBI-Seqloc; PIR-block ::= SEQUENCE { -- PIR specific descriptions had-punct BOOLEAN OPTIONAL , -- had punctuation in sequence ? host VisibleString OPTIONAL , source VisibleString OPTIONAL , -- source line summary VisibleString OPTIONAL , genetic VisibleString OPTIONAL , includes VisibleString OPTIONAL , placement VisibleString OPTIONAL , superfamily VisibleString OPTIONAL , keywords SEQUENCE OF VisibleString OPTIONAL , cross-reference VisibleString OPTIONAL , date VisibleString OPTIONAL , seq-raw VisibleString OPTIONAL , -- seq with punctuation seqref SET OF Seq-id OPTIONAL } -- xref to other sequences END --********************************************************************* -- -- GenBank specific data -- This block of specifications was developed by Jim Ostell of -- NCBI -- --********************************************************************* GenBank-General DEFINITIONS ::= BEGIN EXPORTS GB-block; IMPORTS Date FROM NCBI-General; GB-block ::= SEQUENCE { -- GenBank specific descriptions extra-accessions SEQUENCE OF VisibleString OPTIONAL , source VisibleString OPTIONAL , -- source line keywords SEQUENCE OF VisibleString OPTIONAL , origin VisibleString OPTIONAL, date VisibleString OPTIONAL , -- OBSOLETE old form Entry Date entry-date Date OPTIONAL , -- replaces date div VisibleString OPTIONAL , -- GenBank division taxonomy VisibleString OPTIONAL } -- continuation line of organism END --********************************************************************** -- PRF specific definition -- PRF is a protein sequence database crated and maintained by -- Protein Research Foundation, Minoo-city, Osaka, Japan. -- -- Written by A.Ogiwara, Inst.Chem.Res. (Dr.Kanehisa's Lab), -- Kyoto Univ., Japan -- --********************************************************************** PRF-General DEFINITIONS ::= BEGIN EXPORTS PRF-block; PRF-block ::= SEQUENCE { extra-src PRF-ExtraSrc OPTIONAL, keywords SEQUENCE OF VisibleString OPTIONAL } PRF-ExtraSrc ::= SEQUENCE { host VisibleString OPTIONAL, part VisibleString OPTIONAL, state VisibleString OPTIONAL, strain VisibleString OPTIONAL, taxon VisibleString OPTIONAL } END --********************************************************************* -- -- PDB specific data -- This block of specifications was developed by Jim Ostell and -- Steve Bryant of NCBI -- --********************************************************************* PDB-General DEFINITIONS ::= BEGIN EXPORTS PDB-block; IMPORTS Date FROM NCBI-General; PDB-block ::= SEQUENCE { -- PDB specific descriptions deposition Date , -- deposition date month,year class VisibleString , compound SEQUENCE OF VisibleString , source SEQUENCE OF VisibleString , exp-method VisibleString OPTIONAL , -- present if NOT X-ray diffraction replace PDB-replace OPTIONAL } -- replacement history PDB-replace ::= SEQUENCE { date Date , ids SEQUENCE OF VisibleString } -- entry ids replace by this one END --$Revision: 6.48 $ --********************************************************************** -- -- NCBI Sequence Feature elements -- by James Ostell, 1990 -- Version 3.0 - June 1994 -- --********************************************************************** NCBI-Seqfeat DEFINITIONS ::= BEGIN EXPORTS Seq-feat, Feat-id, Genetic-code, ModelEvidenceSupport; IMPORTS Gene-ref FROM NCBI-Gene Prot-ref FROM NCBI-Protein Org-ref FROM NCBI-Organism Variation-ref FROM NCBI-Variation BioSource FROM NCBI-BioSource RNA-ref FROM NCBI-RNA Seq-id, Seq-loc, Giimport-id FROM NCBI-Seqloc Pubdesc, Numbering, Heterogen FROM NCBI-Sequence Rsite-ref FROM NCBI-Rsite Txinit FROM NCBI-TxInit DOI, PubMedId FROM NCBI-Biblio Pub-set FROM NCBI-Pub Object-id, Dbtag, User-object FROM NCBI-General; --*** Feature identifiers ******************************** --* Feat-id ::= CHOICE { gibb INTEGER , -- geninfo backbone giim Giimport-id , -- geninfo import local Object-id , -- for local software use general Dbtag } -- for use by various databases --*** Seq-feat ******************************************* --* sequence feature generalization Seq-feat ::= SEQUENCE { id Feat-id OPTIONAL , data SeqFeatData , -- the specific data partial BOOLEAN OPTIONAL , -- incomplete in some way? except BOOLEAN OPTIONAL , -- something funny about this? comment VisibleString OPTIONAL , product Seq-loc OPTIONAL , -- product of process location Seq-loc , -- feature made from qual SEQUENCE OF Gb-qual OPTIONAL , -- qualifiers title VisibleString OPTIONAL , -- for user defined label ext User-object OPTIONAL , -- user defined structure extension cit Pub-set OPTIONAL , -- citations for this feature exp-ev ENUMERATED { -- evidence for existence of feature experimental (1) , -- any reasonable experimental check not-experimental (2) } OPTIONAL , -- similarity, pattern, etc xref SET OF SeqFeatXref OPTIONAL , -- cite other relevant features dbxref SET OF Dbtag OPTIONAL , -- support for xref to other databases pseudo BOOLEAN OPTIONAL , -- annotated on pseudogene? except-text VisibleString OPTIONAL , -- explain if except=TRUE ids SET OF Feat-id OPTIONAL , -- set of Ids; will replace 'id' field exts SET OF User-object OPTIONAL , -- set of extensions; will replace 'ext' field support SeqFeatSupport OPTIONAL -- will replace /experiment, /inference, model-evidence } SeqFeatData ::= CHOICE { gene Gene-ref , org Org-ref , cdregion Cdregion , prot Prot-ref , rna RNA-ref , pub Pubdesc , -- publication applies to this seq seq Seq-loc , -- to annotate origin from another seq imp Imp-feat , region VisibleString, -- named region (globin locus) comment NULL , -- just a comment bond ENUMERATED { disulfide (1) , thiolester (2) , xlink (3) , thioether (4) , other (255) } , site ENUMERATED { active (1) , binding (2) , cleavage (3) , inhibit (4) , modified (5), glycosylation (6) , myristoylation (7) , mutagenized (8) , metal-binding (9) , phosphorylation (10) , acetylation (11) , amidation (12) , methylation (13) , hydroxylation (14) , sulfatation (15) , oxidative-deamination (16) , pyrrolidone-carboxylic-acid (17) , gamma-carboxyglutamic-acid (18) , blocked (19) , lipid-binding (20) , np-binding (21) , dna-binding (22) , signal-peptide (23) , transit-peptide (24) , transmembrane-region (25) , nitrosylation (26) , other (255) } , rsite Rsite-ref , -- restriction site (for maps really) user User-object , -- user defined structure txinit Txinit , -- transcription initiation num Numbering , -- a numbering system psec-str ENUMERATED { -- protein secondary structure helix (1) , -- any helix sheet (2) , -- beta sheet turn (3) } , -- beta or gamma turn non-std-residue VisibleString , -- non-standard residue here in seq het Heterogen , -- cofactor, prosthetic grp, etc, bound to seq biosrc BioSource, clone Clone-ref, variation Variation-ref } SeqFeatXref ::= SEQUENCE { -- both optional because can have one or both id Feat-id OPTIONAL , -- the feature copied data SeqFeatData OPTIONAL } -- the specific data SeqFeatSupport ::= SEQUENCE { experiment SET OF ExperimentSupport OPTIONAL , inference SET OF InferenceSupport OPTIONAL , model-evidence SET OF ModelEvidenceSupport OPTIONAL } EvidenceCategory ::= INTEGER { not-set (0) , coordinates (1) , description (2) , existence (3) } ExperimentSupport ::= SEQUENCE { category EvidenceCategory OPTIONAL , explanation VisibleString , pmids SET OF PubMedId OPTIONAL , dois SET OF DOI OPTIONAL } Program-id ::= SEQUENCE { name VisibleString , version VisibleString OPTIONAL } EvidenceBasis ::= SEQUENCE { programs SET OF Program-id OPTIONAL , accessions SET OF Seq-id OPTIONAL } InferenceSupport ::= SEQUENCE { category EvidenceCategory OPTIONAL , type INTEGER { not-set (0) , similar-to-sequence (1) , similar-to-aa (2) , similar-to-dna (3) , similar-to-rna (4) , similar-to-mrna (5) , similiar-to-est (6) , similar-to-other-rna (7) , profile (8) , nucleotide-motif (9) , protein-motif (10) , ab-initio-prediction (11) , alignment (12) , other (255) } DEFAULT not-set , other-type VisibleString OPTIONAL , same-species BOOLEAN DEFAULT FALSE , basis EvidenceBasis , pmids SET OF PubMedId OPTIONAL , dois SET OF DOI OPTIONAL } ModelEvidenceItem ::= SEQUENCE { id Seq-id , exon-count INTEGER OPTIONAL , exon-length INTEGER OPTIONAL , full-length BOOLEAN DEFAULT FALSE , supports-all-exon-combo BOOLEAN DEFAULT FALSE } ModelEvidenceSupport ::= SEQUENCE { method VisibleString OPTIONAL , mrna SET OF ModelEvidenceItem OPTIONAL , est SET OF ModelEvidenceItem OPTIONAL , protein SET OF ModelEvidenceItem OPTIONAL , identification Seq-id OPTIONAL , dbxref SET OF Dbtag OPTIONAL , exon-count INTEGER OPTIONAL , exon-length INTEGER OPTIONAL , full-length BOOLEAN DEFAULT FALSE , supports-all-exon-combo BOOLEAN DEFAULT FALSE } --*** CdRegion *********************************************** --* --* Instructions to translate from a nucleic acid to a peptide --* conflict means it's supposed to translate but doesn't --* Cdregion ::= SEQUENCE { orf BOOLEAN OPTIONAL , -- just an ORF ? frame ENUMERATED { not-set (0) , -- not set, code uses one one (1) , two (2) , three (3) } DEFAULT not-set , -- reading frame conflict BOOLEAN OPTIONAL , -- conflict gaps INTEGER OPTIONAL , -- number of gaps on conflict/except mismatch INTEGER OPTIONAL , -- number of mismatches on above code Genetic-code OPTIONAL , -- genetic code used code-break SEQUENCE OF Code-break OPTIONAL , -- individual exceptions stops INTEGER OPTIONAL } -- number of stop codons on above -- each code is 64 cells long, in the order where -- T=0,C=1,A=2,G=3, TTT=0, TTC=1, TCA=4, etc -- NOTE: this order does NOT correspond to a Seq-data -- encoding. It is "natural" to codon usage instead. -- the value in each cell is the AA coded for -- start= AA coded only if first in peptide -- in start array, if codon is not a legitimate start -- codon, that cell will have the "gap" symbol for -- that alphabet. Otherwise it will have the AA -- encoded when that codon is used at the start. Genetic-code ::= SET OF CHOICE { name VisibleString , -- name of a code id INTEGER , -- id in dbase ncbieaa VisibleString , -- indexed to IUPAC extended ncbi8aa OCTET STRING , -- indexed to NCBI8aa ncbistdaa OCTET STRING , -- indexed to NCBIstdaa sncbieaa VisibleString , -- start, indexed to IUPAC extended sncbi8aa OCTET STRING , -- start, indexed to NCBI8aa sncbistdaa OCTET STRING } -- start, indexed to NCBIstdaa Code-break ::= SEQUENCE { -- specific codon exceptions loc Seq-loc , -- location of exception aa CHOICE { -- the amino acid ncbieaa INTEGER , -- ASCII value of NCBIeaa code ncbi8aa INTEGER , -- NCBI8aa code ncbistdaa INTEGER } } -- NCBIstdaa code Genetic-code-table ::= SET OF Genetic-code -- table of genetic codes --*** Import *********************************************** --* --* Features imported from other databases --* Imp-feat ::= SEQUENCE { key VisibleString , loc VisibleString OPTIONAL , -- original location string descr VisibleString OPTIONAL } -- text description Gb-qual ::= SEQUENCE { qual VisibleString , val VisibleString } --*** Clone-ref *********************************************** --* --* Specification of clone features --* Clone-ref ::= SEQUENCE { name VisibleString, -- Official clone symbol library VisibleString OPTIONAL, -- Library name concordant BOOLEAN DEFAULT FALSE, -- OPTIONAL? unique BOOLEAN DEFAULT FALSE, -- OPTIONAL? placement-method INTEGER { end-seq (0), -- Clone placed by end sequence insert-alignment (1), -- Clone placed by insert alignment sts (2), -- Clone placed by STS fish (3), fingerprint (4), end-seq-insert-alignment (5), -- combined end-seq and insert align external (253), -- Placement provided externally curated (254), -- Human placed or approved other (255) } OPTIONAL, clone-seq Clone-seq-set OPTIONAL } Clone-seq-set ::= SET OF Clone-seq Clone-seq ::= SEQUENCE { type INTEGER { insert (0), end (1), other (255) }, confidence INTEGER { multiple (0), -- Multiple hits na (1), -- Unspecified nohit-rep (2), -- No hits, end flagged repetitive nohitnorep (3), -- No hits, end not flagged repetitive other-chrm (4), -- Hit on different chromosome unique (5), virtual (6), -- Virtual (hasn't been sequenced) multiple-rep (7), -- Multiple hits, end flagged repetitive multiplenorep (8), -- Multiple hits, end not flagged repetitive no-hit (9), -- No hits other (255) } OPTIONAL, location Seq-loc, -- location on sequence seq Seq-loc OPTIONAL, -- clone sequence location align-id Dbtag OPTIONAL, -- internal alignment identifier support INTEGER { prototype (0), -- sequence used to place clone supporting (1), -- sequence supports placement supports-other(2), -- supports a different placement non-supporting (3) -- does not support any placement } OPTIONAL } END --*** Variation-ref *********************************************** --* --* Specification of variation features --* NCBI-Variation DEFINITIONS ::= BEGIN EXPORTS Variation-ref, Variation-inst, VariantProperties, Population-data, Phenotype; IMPORTS Int-fuzz, User-object, Object-id, Dbtag FROM NCBI-General Seq-literal FROM NCBI-Sequence SubSource FROM NCBI-BioSource Seq-loc FROM NCBI-Seqloc Pub FROM NCBI-Pub; -- -------------------------------------------------------------------------- -- Historically, the dbSNP definitions document data structures used in the -- processing and annotation of variations by the dbSNP group. The intention -- is to provide information to clients that reflect internal information -- produced during the mapping of SNPs -- -------------------------------------------------------------------------- VariantProperties ::= SEQUENCE { version INTEGER, -- NOTE: -- The format for most of these values is as an integer -- Unless otherwise noted, these integers represent a bitwise OR (= simple -- sum) of the possible values, and as such, these values represent the -- specific bit flags that may be set for each of the possible attributes -- here. resource-link INTEGER { preserved (1), -- Clinical, Pubmed, Cited, (0x01) provisional (2), -- Provisional Third Party Annotations (0x02) has3D (4), -- Has 3D strcture SNP3D table (0x04) submitterLinkout (8), -- SNP->SubSNP->Batch link_out (0x08) clinical (16), -- Clinical if LSDB, OMIM, TPA, Diagnostic (0x10) genotypeKit (32) -- Marker exists on high density genotyping kit -- (0x20) } OPTIONAL, gene-location INTEGER { in-gene (1), -- Sequence intervals covered by a gene ID but not -- having an aligned transcript (0x01) near-gene-5 (2), -- Within 2kb of the 5' end of a gene feature near-gene-3 (4), -- Within 0.5kb of the 3' end of a gene feature intron (8), -- In Intron (0x08) donor (16), -- In donor splice-site (0x10) acceptor (32), -- In acceptor splice-site (0x20) utr-5 (64), -- In 5' UTR (0x40) utr-3 (128), -- In 3' UTR (0x80) in-start-codon(256), -- the variant is observed in a start codon -- (0x100) in-stop-codon (512), -- the variant is observed in a stop codon -- (0x200) intergenic (1024), -- variant located between genes (0x400) conserved-noncoding(2048) -- variant is located in a conserved -- non-coding region (0x800) } OPTIONAL, effect INTEGER { no-change (0), -- known to cause no functional changes -- since 0 does not combine with any other bit -- value, 'no-change' specifically implies that -- there are no consequences synonymous (1), -- one allele in the set does not change the encoded -- amino acid (0x1) nonsense (2), -- one allele in the set changes to STOP codon -- (TER). (0x2) missense (4), -- one allele in the set changes protein peptide -- (0x4) frameshift (8), -- one allele in the set changes all downstream -- amino acids (0x8) up-regulator (16), -- the variant causes increased transcription -- (0x10) down-regulator(32), -- the variant causes decreased transcription -- (0x20) methylation (64), stop-gain (128), -- reference codon is not stop codon, but the snp -- variant allele changes the codon to a -- terminating codon. stop-loss (256) -- reverse of STOP-GAIN: reference codon is a -- stop codon, but a snp variant allele changes -- the codon to a non-terminating codon. } OPTIONAL, mapping INTEGER { has-other-snp (1), -- Another SNP has the same mapped positions -- on reference assembly (0x01) has-assembly-conflict (2), -- Weight 1 or 2 SNPs that map to different -- chromosomes on different assemblies (0x02) is-assembly-specific (4) -- Only maps to 1 assembly (0x04) } OPTIONAL, -- map-weight captures specificity of placement -- NOTE: This is *NOT* a bitfield map-weight INTEGER { is-uniquely-placed(1), placed-twice-on-same-chrom(2), placed-twice-on-diff-chrom(3), many-placements(10) } OPTIONAL, frequency-based-validation INTEGER { is-mutation (1), -- low frequency variation that is cited in -- journal or other reputable sources (0x01) above-5pct-all (2), -- >5% minor allele freq in each and all -- populations (0x02) above-5pct-1plus (4), -- >5% minor allele freq in 1+ populations (0x04) validated (8), -- Bit is set if the variant has a minor allele -- observed in two or more separate chromosomes above-1pct-all (16), -- >1% minor allele freq in each and all -- populations (0x10) above-1pct-1plus (32) -- >1% minor allele freq in 1+ populations (0x20) } OPTIONAL, genotype INTEGER { in-haplotype-set (1), -- Exists in a haplotype tagging set (0x01) has-genotypes (2) -- SNP has individual genotype (0x02) } OPTIONAL, -- project IDs are IDs from BioProjects -- in order to report information about project relationships, we -- require projects to be registered -- This field in many ways duplicates dbxrefs; however, the -- intention of this field is to more adequately reflect -- ownership and data source -- -- 11/9/2010: DO NOT USE -- This field was changed in the spec in a breaking way; using it will -- break clients. We are officially suppressing / abandoning this field. -- Clients who need to use this should instead place the data in -- Seq-feat.dbxref, using the db name 'BioProject' project-data SET OF INTEGER OPTIONAL, quality-check INTEGER { contig-allele-missing (1), -- Reference sequence allele at the mapped -- position is not present in the SNP -- allele list, adjusted for orientation -- (0x01) withdrawn-by-submitter (2), -- One member SS is withdrawn by submitter -- (0x02) non-overlapping-alleles (4), -- RS set has 2+ alleles from different -- submissions and these sets share no -- alleles in common (0x04) strain-specific (8), -- Straing specific fixed difference (0x08) genotype-conflict (16) -- Has Genotype Conflict (0x10) } OPTIONAL, confidence INTEGER { unknown (0), likely-artifact (1), other (255) } OPTIONAL, -- has this variant been validated? -- While a boolean flag offers no subtle distinctions of validation -- methods, occasionally it is only known as a single boolean value -- NOTE: this flag is redundant and should be omitted if more comprehensive -- validation information is present other-validation BOOLEAN OPTIONAL, -- origin of this allele, if known -- note that these are powers-of-two, and represent bits; thus, we can -- represent more than one state simultaneously through a bitwise OR allele-origin INTEGER { unknown (0), germline (1), somatic (2), inherited (4), paternal (8), maternal (16), de-novo (32), biparental (64), uniparental (128), not-tested (256), tested-inconclusive (512), -- stopper - 2^31 other (1073741824) } OPTIONAL, -- observed allele state, if known -- NOTE: THIS IS NOT A BITFIELD! allele-state INTEGER { unknown (0), homozygous (1), heterozygous (2), hemizygous (3), nullizygous (4), other (255) } OPTIONAL, -- NOTE: -- 'allele-frequency' here refers to the minor allele frequency of the -- default population allele-frequency REAL OPTIONAL, -- is this variant the ancestral allele? is-ancestral-allele BOOLEAN OPTIONAL } Phenotype ::= SEQUENCE { source VisibleString OPTIONAL, term VisibleString OPTIONAL, xref SET OF Dbtag OPTIONAL, -- does this variant have known clinical significance? clinical-significance INTEGER { unknown (0), untested (1), non-pathogenic (2), probable-non-pathogenic (3), probable-pathogenic (4), pathogenic (5), drug-response (6), histocompatibility (7), other (255) } OPTIONAL } Population-data ::= SEQUENCE { -- assayed population (e.g. HAPMAP-CEU) population VisibleString, genotype-frequency REAL OPTIONAL, chromosomes-tested INTEGER OPTIONAL, sample-ids SET OF Object-id OPTIONAL, allele-frequency REAL OPTIONAL, -- This field is an explicit bit-field -- Valid values should be a bitwise combination (= simple sum) -- of any of the values below flags INTEGER { is-default-population (1), is-minor-allele (2), is-rare-allele (4) } OPTIONAL } Ext-loc ::= SEQUENCE { id Object-id, location Seq-loc } Variation-ref ::= SEQUENCE { -- ids (i.e., SNP rsid / ssid, dbVar nsv/nssv) -- expected values include 'dbSNP|rs12334', 'dbSNP|ss12345', 'dbVar|nsv1' -- -- we relate three kinds of IDs here: -- - our current object's id -- - the id of this object's parent, if it exists -- - the sample ID that this item originates from id Dbtag OPTIONAL, parent-id Dbtag OPTIONAL, sample-id Object-id OPTIONAL, other-ids SET OF Dbtag OPTIONAL, -- names and synonyms -- some variants have well-known canonical names and possible accepted -- synonyms name VisibleString OPTIONAL, synonyms SET OF VisibleString OPTIONAL, -- tag for comment and descriptions description VisibleString OPTIONAL, -- phenotype phenotype SET OF Phenotype OPTIONAL, -- sequencing / acuisition method method SET OF INTEGER { unknown (0), bac-acgh (1), computational (2), curated (3), digital-array (4), expression-array (5), fish (6), flanking-sequence (7), maph (8), mcd-analysis (9), mlpa (10), oea-assembly (11), oligo-acgh (12), paired-end (13), pcr (14), qpcr (15), read-depth (16), roma (17), rt-pcr (18), sage (19), sequence-alignment (20), sequencing (21), snp-array (22), snp-genoytyping (23), southern (24), western (25), optical-mapping (26), other (255) } OPTIONAL, -- Note about SNP representation and pretinent fields: allele-frequency, -- population, quality-codes: -- The case of multiple alleles for a SNP would be described by -- parent-feature of type Variation-set.diff-alleles, where the child -- features of type Variation-inst, all at the same location, would -- describe individual alleles. -- population data -- DEPRECATED - do not use population-data SET OF Population-data OPTIONAL, -- variant properties bit fields variant-prop VariantProperties OPTIONAL, -- has this variant been validated? -- DEPRECATED: new field = VariantProperties.other-validation validated BOOLEAN OPTIONAL, -- link-outs to GeneTests database -- DEPRECATED - do not use clinical-test SET OF Dbtag OPTIONAL, -- origin of this allele, if known -- note that these are powers-of-two, and represent bits; thus, we can -- represent more than one state simultaneously through a bitwise OR -- DEPRECATED: new field = VariantProperties.allele-origin allele-origin INTEGER { unknown (0), germline (1), somatic (2), inherited (4), paternal (8), maternal (16), de-novo (32), biparental (64), uniparental (128), not-tested (256), tested-inconclusive (512), -- stopper - 2^31 other (1073741824) } OPTIONAL, -- observed allele state, if known -- DEPRECATED: new field = VariantProperties.allele-state allele-state INTEGER { unknown (0), homozygous (1), heterozygous (2), hemizygous (3), nullizygous (4), other (255) } OPTIONAL, -- NOTE: -- 'allele-frequency' here refers to the minor allele frequency of the -- default population -- DEPRECATED: new field = VariantProperties.allele-frequency allele-frequency REAL OPTIONAL, -- is this variant the ancestral allele? -- DEPRECATED: new field = VariantProperties.is-ancestral-allele is-ancestral-allele BOOLEAN OPTIONAL, -- publication support. -- Note: made this pub instead of pub-equiv, since -- Pub can be pub-equiv and pub-equiv is a set of pubs, but it looks like -- Pub is more often used as top-level container -- DEPRECATED - do not use; use Seq-feat.dbxref instead pub Pub OPTIONAL, data CHOICE { unknown NULL, note VisibleString, --free-form uniparental-disomy NULL, -- actual sequence-edit at feat.location instance Variation-inst, -- Set of related Variations. -- Location of the set equals to the union of member locations set SEQUENCE { type INTEGER { unknown (0), compound (1), -- complex change at the same location on the -- same molecule products (2), -- different products arising from the same -- variation in a precursor, e.g. r.[13g>a, -- 13_88del] haplotype (3), -- changes on the same allele, e.g -- r.[13g>a;15u>c] genotype (4), -- changes on different alleles in the same -- genotype, e.g. g.[476C>T]+[476C>T] mosaic (5), -- different genotypes in the same individual individual (6), -- same organism; allele relationship unknown, -- e.g. g.[476C>T(+)183G>C] population (7), -- population alleles (8), -- set represents a set of observed alleles package (9), -- set represents a package of observations at -- a given location, generally containing -- asserted + reference other (255) }, variations SET OF Variation-ref, name VisibleString OPTIONAL }, -- variant is a complex and undescribed change at the location -- This type of variant is known to occur in dbVar submissions complex NULL }, consequence SET OF CHOICE { unknown NULL, splicing NULL, --some effect on splicing note VisibleString, --freeform -- Describe resulting variation in the product, e.g. missense, -- nonsense, silent, neutral, etc in a protein, that arises from -- THIS variation. variation Variation-ref, -- see http://www.hgvs.org/mutnomen/recs-prot.html frameshift SEQUENCE { phase INTEGER OPTIONAL, x-length INTEGER OPTIONAL }, loss-of-heterozygosity SEQUENCE { -- In germline comparison, it will be reference genome assembly -- (default) or reference/normal population. In somatic mutation, -- it will be a name of the normal tissue. reference VisibleString OPTIONAL, -- Name of the testing subject type or the testing tissue. test VisibleString OPTIONAL } } OPTIONAL, -- Observed location, if different from the parent set or feature.location. -- DEPRECATED - do not use location Seq-loc OPTIONAL, -- reference other locs, e.g. mapped source -- DEPRECATED - do not use ext-locs SET OF Ext-loc OPTIONAL, -- DEPRECATED - do not use; use Seq-feat.exts instead ext User-object OPTIONAL, somatic-origin SET OF SEQUENCE { -- description of the somatic origin itself source SubSource OPTIONAL, -- condition related to this origin's type condition SEQUENCE { description VisibleString OPTIONAL, -- reference to BioTerm / other descriptive database object-id SET OF Dbtag OPTIONAL } OPTIONAL } OPTIONAL } Delta-item ::= SEQUENCE { seq CHOICE { literal Seq-literal, loc Seq-loc, this NULL --same location as variation-ref itself } OPTIONAL, -- Multiplier allows representing a tandem, e.g. ATATAT as AT*3 -- This allows describing CNV/SSR where delta=self with a -- multiplier which specifies the count of the repeat unit. multiplier INTEGER OPTIONAL, --assumed 1 if not specified. multiplier-fuzz Int-fuzz OPTIONAL, action INTEGER { -- replace len(seq) positions starting with location.start with seq morph (0), -- go downstream by distance specified by multiplier (upstream if < 0), -- in genomic context. offset (1), -- excise sequence at location -- if multiplier is specified, delete len(location)*multiplier -- positions downstream del-at (2), -- insert seq before the location.start ins-before (3) } DEFAULT morph } -- Variation instance Variation-inst ::= SEQUENCE { type INTEGER { unknown (0), -- delta=[] identity (1), -- delta=[] inv (2), -- delta=[del, ins.seq= -- RevComp(variation-location)] snv (3), -- delta=[morph of length 1] -- NOTE: this is snV not snP; the latter -- requires frequency-based validation to be -- established in VariantProperties -- the strict definition of SNP is an SNV with -- an established population frequency of at -- least 1% in at least 1 popuplation mnp (4), -- delta=[morph of length >1] delins (5), -- delta=[del, ins] del (6), -- delta=[del] ins (7), -- delta=[ins] microsatellite (8), -- delta=[del, ins.seq= repeat-unit with fuzzy -- multiplier] -- variation-location is the microsat expansion -- on the sequence transposon (9), -- delta=[del, ins.seq= known donor or 'this'] -- variation-location is equiv of transposon -- locs. cnv (10), -- delta=[del, ins= 'this' with fuzzy -- multiplier] direct-copy (11), -- delta=[ins.seq= upstream location on the -- same strand] rev-direct-copy (12), -- delta=[ins.seq= downstream location on the -- same strand] inverted-copy (13), -- delta=[ins.seq= upstream location on the -- opposite strand] everted-copy (14), -- delta=[ins.seq= downstream location on the -- opposite strand] translocation (15), -- delta=like delins prot-missense (16), -- delta=[morph of length 1] prot-nonsense (17), -- delta=[del]; variation-location is the tail -- of the protein being truncated prot-neutral (18), -- delta=[morph of length 1] prot-silent (19), -- delta=[morph of length 1, same AA as at -- variation-location] prot-other (20), -- delta=any other (255) -- delta=any }, -- Sequence that replaces the location, in biological order. delta SEQUENCE OF Delta-item, -- 'observation' is used to label items in a Variation-ref package -- This field is explicitly a bit-field, so the bitwise OR (= sum) of any -- of the values may be observed. observation INTEGER { asserted (1), -- inst represents the asserted base at a -- position reference (2), -- inst represents the reference base at the -- position variant (4) -- inst represent the observed variant at a -- given position } OPTIONAL } END --********************************************************************** -- -- NCBI Restriction Sites -- by James Ostell, 1990 -- version 0.8 -- --********************************************************************** NCBI-Rsite DEFINITIONS ::= BEGIN EXPORTS Rsite-ref; IMPORTS Dbtag FROM NCBI-General; Rsite-ref ::= CHOICE { str VisibleString , -- may be unparsable db Dbtag } -- pointer to a restriction site database END --********************************************************************** -- -- NCBI RNAs -- by James Ostell, 1990 -- version 0.8 -- --********************************************************************** NCBI-RNA DEFINITIONS ::= BEGIN EXPORTS RNA-ref, Trna-ext, RNA-gen, RNA-qual, RNA-qual-set; IMPORTS Seq-loc FROM NCBI-Seqloc; --*** rnas *********************************************** --* --* various rnas --* -- minimal RNA sequence RNA-ref ::= SEQUENCE { type ENUMERATED { -- type of RNA feature unknown (0) , premsg (1) , mRNA (2) , tRNA (3) , rRNA (4) , snRNA (5) , -- will become ncRNA, with RNA-gen.class = snRNA scRNA (6) , -- will become ncRNA, with RNA-gen.class = scRNA snoRNA (7) , -- will become ncRNA, with RNA-gen.class = snoRNA ncRNA (8) , -- non-coding RNA; subsumes snRNA, scRNA, snoRNA tmRNA (9) , miscRNA (10) , other (255) } , pseudo BOOLEAN OPTIONAL , ext CHOICE { name VisibleString , -- for naming "other" type tRNA Trna-ext , -- for tRNAs gen RNA-gen } OPTIONAL -- generic fields for ncRNA, tmRNA, miscRNA } Trna-ext ::= SEQUENCE { -- tRNA feature extensions aa CHOICE { -- aa this carries iupacaa INTEGER , ncbieaa INTEGER , ncbi8aa INTEGER , ncbistdaa INTEGER } OPTIONAL , codon SET OF INTEGER OPTIONAL , -- codon(s) as in Genetic-code anticodon Seq-loc OPTIONAL } -- location of anticodon RNA-gen ::= SEQUENCE { class VisibleString OPTIONAL , -- for ncRNAs, the class of non-coding RNA: -- examples: antisense_RNA, guide_RNA, snRNA product VisibleString OPTIONAL , quals RNA-qual-set OPTIONAL -- e.g., tag_peptide qualifier for tmRNAs } RNA-qual ::= SEQUENCE { -- Additional data values for RNA-gen, qual VisibleString , -- in a tag (qual), value (val) format val VisibleString } RNA-qual-set ::= SEQUENCE OF RNA-qual END --********************************************************************** -- -- NCBI Genes -- by James Ostell, 1990 -- version 0.8 -- --********************************************************************** NCBI-Gene DEFINITIONS ::= BEGIN EXPORTS Gene-ref, Gene-nomenclature; IMPORTS Dbtag FROM NCBI-General; --*** Gene *********************************************** --* --* reference to a gene --* Gene-ref ::= SEQUENCE { locus VisibleString OPTIONAL , -- Official gene symbol allele VisibleString OPTIONAL , -- Official allele designation desc VisibleString OPTIONAL , -- descriptive name maploc VisibleString OPTIONAL , -- descriptive map location pseudo BOOLEAN DEFAULT FALSE , -- pseudogene db SET OF Dbtag OPTIONAL , -- ids in other dbases syn SET OF VisibleString OPTIONAL , -- synonyms for locus locus-tag VisibleString OPTIONAL , -- systematic gene name (e.g., MI0001, ORF0069) formal-name Gene-nomenclature OPTIONAL } Gene-nomenclature ::= SEQUENCE { status ENUMERATED { unknown (0) , official (1) , interim (2) } , symbol VisibleString OPTIONAL , name VisibleString OPTIONAL , source Dbtag OPTIONAL } END --********************************************************************** -- -- NCBI Organism -- by James Ostell, 1994 -- version 3.0 -- --********************************************************************** NCBI-Organism DEFINITIONS ::= BEGIN EXPORTS Org-ref; IMPORTS Dbtag FROM NCBI-General; --*** Org-ref *********************************************** --* --* Reference to an organism --* defines only the organism.. lower levels of detail for biological --* molecules are provided by the Source object --* Org-ref ::= SEQUENCE { taxname VisibleString OPTIONAL , -- preferred formal name common VisibleString OPTIONAL , -- common name mod SET OF VisibleString OPTIONAL , -- unstructured modifiers db SET OF Dbtag OPTIONAL , -- ids in taxonomic or culture dbases syn SET OF VisibleString OPTIONAL , -- synonyms for taxname or common orgname OrgName OPTIONAL } OrgName ::= SEQUENCE { name CHOICE { binomial BinomialOrgName , -- genus/species type name virus VisibleString , -- virus names are different hybrid MultiOrgName , -- hybrid between organisms namedhybrid BinomialOrgName , -- some hybrids have genus x species name partial PartialOrgName } OPTIONAL , -- when genus not known attrib VisibleString OPTIONAL , -- attribution of name mod SEQUENCE OF OrgMod OPTIONAL , lineage VisibleString OPTIONAL , -- lineage with semicolon separators gcode INTEGER OPTIONAL , -- genetic code (see CdRegion) mgcode INTEGER OPTIONAL , -- mitochondrial genetic code div VisibleString OPTIONAL , -- GenBank division code pgcode INTEGER OPTIONAL } -- plastid genetic code OrgMod ::= SEQUENCE { subtype INTEGER { strain (2) , substrain (3) , type (4) , subtype (5) , variety (6) , serotype (7) , serogroup (8) , serovar (9) , cultivar (10) , pathovar (11) , chemovar (12) , biovar (13) , biotype (14) , group (15) , subgroup (16) , isolate (17) , common (18) , acronym (19) , dosage (20) , -- chromosome dosage of hybrid nat-host (21) , -- natural host of this specimen sub-species (22) , specimen-voucher (23) , authority (24) , forma (25) , forma-specialis (26) , ecotype (27) , synonym (28) , anamorph (29) , teleomorph (30) , breed (31) , gb-acronym (32) , -- used by taxonomy database gb-anamorph (33) , -- used by taxonomy database gb-synonym (34) , -- used by taxonomy database culture-collection (35) , bio-material (36) , metagenome-source (37) , old-lineage (253) , old-name (254) , other (255) } , -- ASN5: old-name (254) will be added to next spec subname VisibleString , attrib VisibleString OPTIONAL } -- attribution/source of name BinomialOrgName ::= SEQUENCE { genus VisibleString , -- required species VisibleString OPTIONAL , -- species required if subspecies used subspecies VisibleString OPTIONAL } MultiOrgName ::= SEQUENCE OF OrgName -- the first will be used to assign division PartialOrgName ::= SEQUENCE OF TaxElement -- when we don't know the genus TaxElement ::= SEQUENCE { fixed-level INTEGER { other (0) , -- level must be set in string family (1) , order (2) , class (3) } , level VisibleString OPTIONAL , name VisibleString } END --********************************************************************** -- -- NCBI BioSource -- by James Ostell, 1994 -- version 3.0 -- --********************************************************************** NCBI-BioSource DEFINITIONS ::= BEGIN EXPORTS BioSource, SubSource; IMPORTS Org-ref FROM NCBI-Organism; --******************************************************************** -- -- BioSource gives the source of the biological material -- for sequences -- --******************************************************************** BioSource ::= SEQUENCE { genome INTEGER { -- biological context unknown (0) , genomic (1) , chloroplast (2) , chromoplast (3) , kinetoplast (4) , mitochondrion (5) , plastid (6) , macronuclear (7) , extrachrom (8) , plasmid (9) , transposon (10) , insertion-seq (11) , cyanelle (12) , proviral (13) , virion (14) , nucleomorph (15) , apicoplast (16) , leucoplast (17) , proplastid (18) , endogenous-virus (19) , hydrogenosome (20) , chromosome (21) , chromatophore (22) } DEFAULT unknown , origin INTEGER { unknown (0) , natural (1) , -- normal biological entity natmut (2) , -- naturally occurring mutant mut (3) , -- artificially mutagenized artificial (4) , -- artificially engineered synthetic (5) , -- purely synthetic other (255) } DEFAULT unknown , org Org-ref , subtype SEQUENCE OF SubSource OPTIONAL , is-focus NULL OPTIONAL , -- to distinguish biological focus pcr-primers PCRReactionSet OPTIONAL } PCRReactionSet ::= SET OF PCRReaction PCRReaction ::= SEQUENCE { forward PCRPrimerSet OPTIONAL , reverse PCRPrimerSet OPTIONAL } PCRPrimerSet ::= SET OF PCRPrimer PCRPrimer ::= SEQUENCE { seq PCRPrimerSeq OPTIONAL , name PCRPrimerName OPTIONAL } PCRPrimerSeq ::= VisibleString PCRPrimerName ::= VisibleString SubSource ::= SEQUENCE { subtype INTEGER { chromosome (1) , map (2) , clone (3) , subclone (4) , haplotype (5) , genotype (6) , sex (7) , cell-line (8) , cell-type (9) , tissue-type (10) , clone-lib (11) , dev-stage (12) , frequency (13) , germline (14) , rearranged (15) , lab-host (16) , pop-variant (17) , tissue-lib (18) , plasmid-name (19) , transposon-name (20) , insertion-seq-name (21) , plastid-name (22) , country (23) , segment (24) , endogenous-virus-name (25) , transgenic (26) , environmental-sample (27) , isolation-source (28) , lat-lon (29) , -- +/- decimal degrees collection-date (30) , -- DD-MMM-YYYY format collected-by (31) , -- name of person who collected the sample identified-by (32) , -- name of person who identified the sample fwd-primer-seq (33) , -- sequence (possibly more than one; semicolon-separated) rev-primer-seq (34) , -- sequence (possibly more than one; semicolon-separated) fwd-primer-name (35) , rev-primer-name (36) , metagenomic (37) , mating-type (38) , linkage-group (39) , haplogroup (40) , whole-replicon (41) , phenotype (42) , other (255) } , name VisibleString , attrib VisibleString OPTIONAL } -- attribution/source of this name END --********************************************************************** -- -- NCBI Protein -- by James Ostell, 1990 -- version 0.8 -- --********************************************************************** NCBI-Protein DEFINITIONS ::= BEGIN EXPORTS Prot-ref; IMPORTS Dbtag FROM NCBI-General; --*** Prot-ref *********************************************** --* --* Reference to a protein name --* Prot-ref ::= SEQUENCE { name SET OF VisibleString OPTIONAL , -- protein name desc VisibleString OPTIONAL , -- description (instead of name) ec SET OF VisibleString OPTIONAL , -- E.C. number(s) activity SET OF VisibleString OPTIONAL , -- activities db SET OF Dbtag OPTIONAL , -- ids in other dbases processed ENUMERATED { -- processing status not-set (0) , preprotein (1) , mature (2) , signal-peptide (3) , transit-peptide (4) } DEFAULT not-set } END --******************************************************************** -- -- Transcription Initiation Site Feature Data Block -- James Ostell, 1991 -- Philip Bucher, David Ghosh -- version 1.1 -- -- -- --******************************************************************** NCBI-TxInit DEFINITIONS ::= BEGIN EXPORTS Txinit; IMPORTS Gene-ref FROM NCBI-Gene Prot-ref FROM NCBI-Protein Org-ref FROM NCBI-Organism; Txinit ::= SEQUENCE { name VisibleString , -- descriptive name of initiation site syn SEQUENCE OF VisibleString OPTIONAL , -- synonyms gene SEQUENCE OF Gene-ref OPTIONAL , -- gene(s) transcribed protein SEQUENCE OF Prot-ref OPTIONAL , -- protein(s) produced rna SEQUENCE OF VisibleString OPTIONAL , -- rna(s) produced expression VisibleString OPTIONAL , -- tissue/time of expression txsystem ENUMERATED { -- transcription apparatus used at this site unknown (0) , pol1 (1) , -- eukaryotic Pol I pol2 (2) , -- eukaryotic Pol II pol3 (3) , -- eukaryotic Pol III bacterial (4) , viral (5) , rna (6) , -- RNA replicase organelle (7) , other (255) } , txdescr VisibleString OPTIONAL , -- modifiers on txsystem txorg Org-ref OPTIONAL , -- organism supplying transcription apparatus mapping-precise BOOLEAN DEFAULT FALSE , -- mapping precise or approx location-accurate BOOLEAN DEFAULT FALSE , -- does Seq-loc reflect mapping inittype ENUMERATED { unknown (0) , single (1) , multiple (2) , region (3) } OPTIONAL , evidence SET OF Tx-evidence OPTIONAL } Tx-evidence ::= SEQUENCE { exp-code ENUMERATED { unknown (0) , rna-seq (1) , -- direct RNA sequencing rna-size (2) , -- RNA length measurement np-map (3) , -- nuclease protection mapping with homologous sequence ladder np-size (4) , -- nuclease protected fragment length measurement pe-seq (5) , -- dideoxy RNA sequencing cDNA-seq (6) , -- full-length cDNA sequencing pe-map (7) , -- primer extension mapping with homologous sequence ladder pe-size (8) , -- primer extension product length measurement pseudo-seq (9) , -- full-length processed pseudogene sequencing rev-pe-map (10) , -- see NOTE (1) below other (255) } , expression-system ENUMERATED { unknown (0) , physiological (1) , in-vitro (2) , oocyte (3) , transfection (4) , transgenic (5) , other (255) } DEFAULT physiological , low-prec-data BOOLEAN DEFAULT FALSE , from-homolog BOOLEAN DEFAULT FALSE } -- experiment actually done on -- close homolog -- NOTE (1) length measurement of a reverse direction primer-extension -- product (blocked by RNA 5'end) by comparison with -- homologous sequence ladder (J. Mol. Biol. 199, 587) END --$Revision: 1.5 $ -- ---------------------------------------------------------------------------- -- -- PUBLIC DOMAIN NOTICE -- National Center for Biotechnology Information -- -- This software/database is a "United States Government Work" under the terms -- of the United States Copyright Act. It was written as part of the author's -- official duties as a United States Government employee and thus cannot be -- copyrighted. This software/database is freely available to the public for -- use. The National Library of Medicine and the U.S. Government have not -- placed any restriction on its use or reproduction. -- -- Although all reasonable efforts have been taken to ensure the accuracy and -- reliability of the software and data, the NLM and the U.S. Government do not -- and cannot warrant the performance or results that may be obtained by using -- this software or data. The NLM and the U.S. Government disclaim all -- warranties, express or implied, including warranties of performance, -- merchantability or fitness for any particular purpose. -- -- Please cite the authors in any work or product based on this material. -- -- ---------------------------------------------------------------------------- -- -- Authors: Mike DiCuccio, Eugene Vasilchenko -- -- ASN.1 interface to table readers -- -- ---------------------------------------------------------------------------- NCBI-SeqTable DEFINITIONS ::= BEGIN EXPORTS SeqTable-column-info, SeqTable-column, Seq-table; IMPORTS Seq-id, Seq-loc, Seq-interval FROM NCBI-Seqloc; SeqTable-column-info ::= SEQUENCE { -- user friendly column name, can be skipped title VisibleString OPTIONAL, -- identification of the column data in the objects described by the table field-id INTEGER { -- known column data types -- position types location (0), -- location as Seq-loc location-id (1), -- location Seq-id location-gi (2), -- gi location-from (3), -- interval from location-to (4), -- interval to location-strand (5), -- location strand location-fuzz-from-lim (6), location-fuzz-to-lim (7), product (10), -- product as Seq-loc product-id (11), -- product Seq-id product-gi (12), -- product gi product-from (13), -- product interval from product-to (14), -- product interval to product-strand (15), -- product strand product-fuzz-from-lim (16), product-fuzz-to-lim (17), -- main feature fields id-local (20), -- id.local.id xref-id-local (21), -- xref.id.local.id partial (22), comment (23), title (24), ext (25), -- field-name must be "E.xxx", see below qual (26), -- field-name must be "Q.xxx", see below dbxref (27), -- field-name must be "D.xxx", see below -- various data fields data-imp-key (30), data-region (31), data-cdregion-frame (32), -- extra fields, see also special values for str below ext-type (40), qual-qual (41), qual-val (42), dbxref-db (43), dbxref-tag (44) } OPTIONAL, -- any column can be identified by ASN.1 text locator string -- with omitted object type. -- examples: -- "data.gene.locus" for Seq-feat.data.gene.locus -- "data.imp.key" for Seq-feat.data.imp.key -- "qual.qual" -- - Seq-feat.qual is SEQUENCE so several columns are allowed -- see also "Q.xxx" special value for shorter qual representation -- "ext.type.str" -- "ext.data.label.str" -- "ext.data.data.int" -- see also "E.xxx" special value for shorter ext representation -- special values start with capital letter: -- "E.xxx" - ext.data.label.str = xxx, ext.data.data = data -- - Seq-feat.ext.data is SEQUENCE so several columns are allowed -- "Q.xxx" - qual.qual = xxx, qual.val = data -- - Seq-feat.qual is SEQUENCE so several columns are allowed -- "D.xxx" - dbxref.id = xxx, dbxref.tag = data -- - Seq-feat.dbxref is SET so several columns are allowed field-name VisibleString OPTIONAL } CommonString-table ::= SEQUENCE { -- set of possible values strings SEQUENCE OF VisibleString, -- indexes of values indexes SEQUENCE OF INTEGER } CommonBytes-table ::= SEQUENCE { -- set of possible values bytes SEQUENCE OF OCTET STRING, -- indexes of values indexes SEQUENCE OF INTEGER } SeqTable-multi-data ::= CHOICE { -- a set of integers, one per row int SEQUENCE OF INTEGER, -- a set of reals, one per row real SEQUENCE OF REAL, -- a set of strings, one per row string SEQUENCE OF VisibleString, -- a set of byte arrays, one per row bytes SEQUENCE OF OCTET STRING, -- a set of string with small set of possible values common-string CommonString-table, -- a set of byte arrays with small set of possible values common-bytes CommonBytes-table, -- a set of bits, one per row -- this uses bm::bvector<> as its storage mechanism bit OCTET STRING, -- a set of locations, one per row loc SEQUENCE OF Seq-loc, id SEQUENCE OF Seq-id, interval SEQUENCE OF Seq-interval } SeqTable-single-data ::= CHOICE { -- integer int INTEGER, -- real real REAL, -- string string VisibleString, -- byte array bytes OCTET STRING, -- bit bit BOOLEAN, -- location loc Seq-loc, id Seq-id, interval Seq-interval } SeqTable-sparse-index ::= CHOICE { -- indexed of rows with values indexes SEQUENCE OF INTEGER, -- bitset of rows with values bit-set OCTET STRING } SeqTable-column ::= SEQUENCE { -- column description or reference to previously defined info header SeqTable-column-info, -- information about data -- row data data SeqTable-multi-data OPTIONAL, -- in case not all rows contain data this field will contain sparse info sparse SeqTable-sparse-index OPTIONAL, -- default value for sparse table, or if row data is too short default SeqTable-single-data OPTIONAL, -- single value for indexes not listed in sparse table sparse-other SeqTable-single-data OPTIONAL } Seq-table ::= SEQUENCE { -- type of features in this table, equal to Seq-feat.data variant index feat-type INTEGER, -- subtype of features in this table, defined in header SeqFeatData.hpp feat-subtype INTEGER OPTIONAL, -- number of rows num-rows INTEGER, -- data in columns columns SEQUENCE OF SeqTable-column } END --$Revision: 6.4 $ --********************************************************************** -- -- NCBI Sequence Alignment elements -- by James Ostell, 1990 -- --********************************************************************** NCBI-Seqalign DEFINITIONS ::= BEGIN EXPORTS Seq-align, Score, Score-set, Seq-align-set; IMPORTS Seq-id, Seq-loc , Na-strand FROM NCBI-Seqloc User-object, Object-id FROM NCBI-General; --*** Sequence Alignment ******************************** --* Seq-align-set ::= SET OF Seq-align Seq-align ::= SEQUENCE { type ENUMERATED { not-set (0) , global (1) , diags (2) , -- unbroken, but not ordered, diagonals partial (3) , -- mapping pieces together disc (4) , -- discontinuous alignment other (255) } , dim INTEGER OPTIONAL , -- dimensionality score SET OF Score OPTIONAL , -- for whole alignment segs CHOICE { -- alignment data dendiag SEQUENCE OF Dense-diag , denseg Dense-seg , std SEQUENCE OF Std-seg , packed Packed-seg , disc Seq-align-set, spliced Spliced-seg, sparse Sparse-seg } , -- regions of sequence over which align -- was computed bounds SET OF Seq-loc OPTIONAL, -- alignment id id SEQUENCE OF Object-id OPTIONAL, --extra info ext SEQUENCE OF User-object OPTIONAL } Dense-diag ::= SEQUENCE { -- for (multiway) diagonals dim INTEGER DEFAULT 2 , -- dimensionality ids SEQUENCE OF Seq-id , -- sequences in order starts SEQUENCE OF INTEGER , -- start OFFSETS in ids order len INTEGER , -- len of aligned segments strands SEQUENCE OF Na-strand OPTIONAL , scores SET OF Score OPTIONAL } -- Dense-seg: the densist packing for sequence alignments only. -- a start of -1 indicates a gap for that sequence of -- length lens. -- -- id=100 AAGGCCTTTTAGAGATGATGATGATGATGA -- id=200 AAGGCCTTTTAG.......GATGATGATGA -- id=300 ....CCTTTTAGAGATGATGAT....ATGA -- -- dim = 3, numseg = 6, ids = { 100, 200, 300 } -- starts = { 0,0,-1, 4,4,0, 12,-1,8, 19,12,15, 22,15,-1, 26,19,18 } -- lens = { 4, 8, 7, 3, 4, 4 } -- Dense-seg ::= SEQUENCE { -- for (multiway) global or partial alignments dim INTEGER DEFAULT 2 , -- dimensionality numseg INTEGER , -- number of segments here ids SEQUENCE OF Seq-id , -- sequences in order starts SEQUENCE OF INTEGER , -- start OFFSETS in ids order within segs lens SEQUENCE OF INTEGER , -- lengths in ids order within segs strands SEQUENCE OF Na-strand OPTIONAL , scores SEQUENCE OF Score OPTIONAL } -- score for each seg Packed-seg ::= SEQUENCE { -- for (multiway) global or partial alignments dim INTEGER DEFAULT 2 , -- dimensionality numseg INTEGER , -- number of segments here ids SEQUENCE OF Seq-id , -- sequences in order starts SEQUENCE OF INTEGER , -- start OFFSETS in ids order for whole alignment present OCTET STRING , -- Boolean if each sequence present or absent in -- each segment lens SEQUENCE OF INTEGER , -- length of each segment strands SEQUENCE OF Na-strand OPTIONAL , scores SEQUENCE OF Score OPTIONAL } -- score for each segment Std-seg ::= SEQUENCE { dim INTEGER DEFAULT 2 , -- dimensionality ids SEQUENCE OF Seq-id OPTIONAL , loc SEQUENCE OF Seq-loc , scores SET OF Score OPTIONAL } Spliced-seg ::= SEQUENCE { -- product is either protein or transcript (cDNA) product-id Seq-id OPTIONAL, genomic-id Seq-id OPTIONAL, -- should be 'plus' or 'minus' product-strand Na-strand OPTIONAL , genomic-strand Na-strand OPTIONAL , product-type ENUMERATED { transcript(0), protein(1) }, -- set of segments involved -- each segment corresponds to one exon -- exons are always in biological order exons SEQUENCE OF Spliced-exon , -- optional poly(A) tail poly-a INTEGER OPTIONAL, -- length of the product, in bases/residues -- from this, a 3' unaligned length can be extracted; this also captures -- the case in which a protein aligns leaving a partial codon alignment -- at the 3' end product-length INTEGER OPTIONAL, -- alignment descriptors / modifiers -- this provides us a set for extension modifiers SET OF Spliced-seg-modifier OPTIONAL } Spliced-seg-modifier ::= CHOICE { -- protein aligns from the start and the first codon -- on both product and genomic is start codon start-codon-found BOOLEAN, -- protein aligns to it's end and there is stop codon -- on the genomic right after the alignment stop-codon-found BOOLEAN } -- complete or partial exon -- two consecutive Spliced-exons may belong to one exon Spliced-exon ::= SEQUENCE { -- product-end >= product-start product-start Product-pos , product-end Product-pos , -- genomic-end >= genomic-start genomic-start INTEGER , genomic-end INTEGER , -- product is either protein or transcript (cDNA) product-id Seq-id OPTIONAL , genomic-id Seq-id OPTIONAL , -- should be 'plus' or 'minus' product-strand Na-strand OPTIONAL , -- genomic-strand represents the strand of translation genomic-strand Na-strand OPTIONAL , -- basic seqments always are in biologic order parts SEQUENCE OF Spliced-exon-chunk OPTIONAL , -- scores for this exon scores Score-set OPTIONAL , -- splice sites acceptor-before-exon Splice-site OPTIONAL, donor-after-exon Splice-site OPTIONAL, -- flag: is this exon complete or partial? partial BOOLEAN OPTIONAL, --extra info ext SEQUENCE OF User-object OPTIONAL } Product-pos ::= CHOICE { nucpos INTEGER, protpos Prot-pos } -- codon based position on protein (1/3 of aminoacid) Prot-pos ::= SEQUENCE { -- standard protein position amin INTEGER , -- 0, 1, 2, or 3 as for Cdregion -- 0 = not set -- 1, 2, 3 = actual frame frame INTEGER DEFAULT 0 } -- Spliced-exon-chunk: piece of an exon -- lengths are given in nucleotide bases (1/3 of aminoacid when product is a -- protein) Spliced-exon-chunk ::= CHOICE { -- both sequences represented, product and genomic sequences match match INTEGER , -- both sequences represented, product and genomic sequences do not match mismatch INTEGER , -- both sequences are represented, there is sufficient similarity -- between product and genomic sequences. Can be used to replace stretches -- of matches and mismatches, mostly for protein to genomic where -- definition of match or mismatch depends on translation table diag INTEGER , -- insertion in product sequence (i.e. gap in the genomic sequence) product-ins INTEGER , -- insertion in genomic sequence (i.e. gap in the product sequence) genomic-ins INTEGER } -- site involved in splice Splice-site ::= SEQUENCE { -- typically two bases in the intronic region, always -- in IUPAC format bases VisibleString } -- ========================================================================== -- -- Sparse-seg follows the semantics of dense-seg and is more optimal for -- representing sparse multiple alignments -- -- ========================================================================== Sparse-seg ::= SEQUENCE { master-id Seq-id OPTIONAL, -- pairwise alignments constituting this multiple alignment rows SET OF Sparse-align, -- per-row scores row-scores SET OF Score OPTIONAL, -- index of extra items ext SET OF Sparse-seg-ext OPTIONAL } Sparse-align ::= SEQUENCE { first-id Seq-id, second-id Seq-id, numseg INTEGER, --number of segments first-starts SEQUENCE OF INTEGER , --starts on the first sequence [numseg] second-starts SEQUENCE OF INTEGER , --starts on the second sequence [numseg] lens SEQUENCE OF INTEGER , --lengths of segments [numseg] second-strands SEQUENCE OF Na-strand OPTIONAL , -- per-segment scores seg-scores SET OF Score OPTIONAL } Sparse-seg-ext ::= SEQUENCE { --seg-ext SET OF { -- index INTEGER, -- data User-field -- } index INTEGER } -- use of Score is discouraged for external ASN.1 specifications Score ::= SEQUENCE { id Object-id OPTIONAL , value CHOICE { real REAL , int INTEGER } } -- use of Score-set is encouraged for external ASN.1 specifications Score-set ::= SET OF Score END --$Revision: 6.0 $ --********************************************************************** -- -- NCBI Sequence Analysis Results (other than alignments) -- by James Ostell, 1990 -- --********************************************************************** NCBI-Seqres DEFINITIONS ::= BEGIN EXPORTS Seq-graph; IMPORTS Seq-loc FROM NCBI-Seqloc; --*** Sequence Graph ******************************** --* --* for values mapped by residue or range to sequence --* Seq-graph ::= SEQUENCE { title VisibleString OPTIONAL , comment VisibleString OPTIONAL , loc Seq-loc , -- region this applies to title-x VisibleString OPTIONAL , -- title for x-axis title-y VisibleString OPTIONAL , comp INTEGER OPTIONAL , -- compression (residues/value) a REAL OPTIONAL , -- for scaling values b REAL OPTIONAL , -- display = (a x value) + b numval INTEGER , -- number of values in graph graph CHOICE { real Real-graph , int Int-graph , byte Byte-graph } } Real-graph ::= SEQUENCE { max REAL , -- top of graph min REAL , -- bottom of graph axis REAL , -- value to draw axis on values SEQUENCE OF REAL } Int-graph ::= SEQUENCE { max INTEGER , min INTEGER , axis INTEGER , values SEQUENCE OF INTEGER } Byte-graph ::= SEQUENCE { -- integer from 0-255 max INTEGER , min INTEGER , axis INTEGER , values OCTET STRING } END --$Revision: 6.1 $ --******************************************************************** -- -- Direct Submission of Sequence Data -- James Ostell, 1991 -- -- This is a trial specification for direct submission of sequence -- data worked out between NCBI and EMBL -- Later revised to reflect work with GenBank and Integrated database -- -- Version 3.0, 1994 -- This is the official NCBI sequence submission format now. -- --******************************************************************** NCBI-Submit DEFINITIONS ::= BEGIN EXPORTS Seq-submit, Contact-info; IMPORTS Cit-sub, Author FROM NCBI-Biblio Date, Object-id FROM NCBI-General Seq-annot FROM NCBI-Sequence Seq-id FROM NCBI-Seqloc Seq-entry FROM NCBI-Seqset; Seq-submit ::= SEQUENCE { sub Submit-block , data CHOICE { entrys SET OF Seq-entry , -- sequence(s) annots SET OF Seq-annot , -- annotation(s) delete SET OF Seq-id } } -- deletions of entries Submit-block ::= SEQUENCE { contact Contact-info , -- who to contact cit Cit-sub , -- citation for this submission hup BOOLEAN DEFAULT FALSE , -- hold until publish reldate Date OPTIONAL , -- release by date subtype INTEGER { -- type of submission new (1) , -- new data update (2) , -- update by author revision (3) , -- 3rd party (non-author) update other (255) } OPTIONAL , tool VisibleString OPTIONAL, -- tool used to make submission user-tag VisibleString OPTIONAL, -- user supplied id for this submission comment VisibleString OPTIONAL } -- user comments/advice to database Contact-info ::= SEQUENCE { -- who to contact to discuss the submission name VisibleString OPTIONAL , -- OBSOLETE: will be removed address SEQUENCE OF VisibleString OPTIONAL , phone VisibleString OPTIONAL , fax VisibleString OPTIONAL , email VisibleString OPTIONAL , telex VisibleString OPTIONAL , owner-id Object-id OPTIONAL , -- for owner accounts password OCTET STRING OPTIONAL , last-name VisibleString OPTIONAL , -- structured to replace name above first-name VisibleString OPTIONAL , middle-initial VisibleString OPTIONAL , contact Author OPTIONAL } -- WARNING: this will replace the above END --$Revision: 1.15 $ --********************************************************************** -- -- Definitions for Cn3D-specific data (rendering settings, -- user annotations, etc.) -- -- by Paul Thiessen -- -- National Center for Biotechnology Information -- National Institutes of Health -- Bethesda, MD 20894 USA -- -- asntool -m cn3d.asn -w 100 -o cn3d.h -- asntool -B objcn3d -m cn3d.asn -G -w 100 -K cn3d.h -I mapcn3d.h \ -- -M ../mmdb1.asn,../mmdb2.asn,../mmdb3.asn --********************************************************************** NCBI-Cn3d DEFINITIONS ::= -- Cn3D-specific information BEGIN EXPORTS Cn3d-style-dictionary, Cn3d-user-annotations; IMPORTS Biostruc-id FROM MMDB Molecule-id, Residue-id FROM MMDB-Chemical-graph; -- values of enumerations must match those in cn3d/style_manager.hpp! Cn3d-backbone-type ::= ENUMERATED { -- for different types of backbones off (1), trace (2), partial (3), complete (4) } Cn3d-drawing-style ::= ENUMERATED { -- atom/bond/object rendering styles -- for atoms and bonds wire (1), tubes (2), ball-and-stick (3), space-fill (4), wire-worm (5), tube-worm (6), -- for 3d-objects with-arrows (7), without-arrows (8) } Cn3d-color-scheme ::= ENUMERATED { -- available color schemes (not all -- necessarily applicable to all objects) element (1), object (2), molecule (3), domain (4), residue (20), secondary-structure (5), user-select (6), -- different alignment conservation coloring (currently only for proteins) aligned (7), identity (8), variety (9), weighted-variety (10), information-content (11), fit (12), block-fit (17), block-z-fit (18), block-row-fit (19), -- other schemes temperature (13), hydrophobicity (14), charge (15), rainbow (16) } -- RGB triplet, interpreted (after division by the scale-factor) as floating -- point values which should range from [0..1]. The default scale-factor is -- 255, so that one can conveniently set integer byte values [0..255] for -- colors with the scale-factor already set appropriately to map to [0..1]. -- An alpha value is allowed, but is currently ignored by Cn3D. Cn3d-color ::= SEQUENCE { scale-factor INTEGER DEFAULT 255, red INTEGER, green INTEGER, blue INTEGER, alpha INTEGER DEFAULT 255 } Cn3d-backbone-style ::= SEQUENCE { -- style blob for backbones only type Cn3d-backbone-type, style Cn3d-drawing-style, color-scheme Cn3d-color-scheme, user-color Cn3d-color } Cn3d-general-style ::= SEQUENCE { -- style blob for other objects is-on BOOLEAN, style Cn3d-drawing-style, color-scheme Cn3d-color-scheme, user-color Cn3d-color } Cn3d-backbone-label-style ::= SEQUENCE { -- style blob for backbone labels spacing INTEGER, -- zero means none type ENUMERATED { one-letter (1), three-letter (2) }, number ENUMERATED { none (0), sequential (1), -- from 1, by residues present, to match sequence pdb (2) -- use number assigned by PDB }, termini BOOLEAN, white BOOLEAN -- all white, or (if false) color of alpha carbon } -- rendering settings for Cn3D (mirrors StyleSettings class) Cn3d-style-settings ::= SEQUENCE { name VisibleString OPTIONAL, -- a name (for favorites) protein-backbone Cn3d-backbone-style, -- backbone styles nucleotide-backbone Cn3d-backbone-style, protein-sidechains Cn3d-general-style, -- styles for other stuff nucleotide-sidechains Cn3d-general-style, heterogens Cn3d-general-style, solvents Cn3d-general-style, connections Cn3d-general-style, helix-objects Cn3d-general-style, strand-objects Cn3d-general-style, virtual-disulfides-on BOOLEAN, -- virtual disulfides virtual-disulfide-color Cn3d-color, hydrogens-on BOOLEAN, -- hydrogens background-color Cn3d-color, -- background -- floating point parameters - scale-factor applies to all the following: scale-factor INTEGER, space-fill-proportion INTEGER, ball-radius INTEGER, stick-radius INTEGER, tube-radius INTEGER, tube-worm-radius INTEGER, helix-radius INTEGER, strand-width INTEGER, strand-thickness INTEGER, -- backbone labels (no labels if not present) protein-labels Cn3d-backbone-label-style OPTIONAL, nucleotide-labels Cn3d-backbone-label-style OPTIONAL, -- ion labels ion-labels BOOLEAN OPTIONAL } Cn3d-style-settings-set ::= SET OF Cn3d-style-settings Cn3d-style-table-id ::= INTEGER Cn3d-style-table-item ::= SEQUENCE { id Cn3d-style-table-id, style Cn3d-style-settings } -- the global settings, and a lookup table of styles for user annotations. Cn3d-style-dictionary ::= SEQUENCE { global-style Cn3d-style-settings, style-table SEQUENCE OF Cn3d-style-table-item OPTIONAL } -- a range of residues in a chain, identified by MMDB residue-id -- (e.g., numbered from 1) Cn3d-residue-range ::= SEQUENCE { from Residue-id, to Residue-id } -- set of locations on a particular chain Cn3d-molecule-location ::= SEQUENCE { molecule-id Molecule-id, -- MMDB molecule id -- which residues; whole molecule implied if absent residues SEQUENCE OF Cn3d-residue-range OPTIONAL } -- set of locations on a particular structure object (e.g., a PDB/MMDB -- structure), which may include multiple ranges of residues each on -- multiple chains. Cn3d-object-location ::= SEQUENCE { structure-id Biostruc-id, residues SEQUENCE OF Cn3d-molecule-location } -- information for an individual user annotation Cn3d-user-annotation ::= SEQUENCE { name VisibleString, -- a (short) name for this annotation description VisibleString OPTIONAL, -- an optional longer description style-id Cn3d-style-table-id, -- how to draw this annotation residues SEQUENCE OF Cn3d-object-location, -- which residues to cover is-on BOOLEAN -- whether this annotation is to be turned on in Cn3D } -- a GL-ordered transformation matrix Cn3d-GL-matrix ::= SEQUENCE { m0 REAL, m1 REAL, m2 REAL, m3 REAL, m4 REAL, m5 REAL, m6 REAL, m7 REAL, m8 REAL, m9 REAL, m10 REAL, m11 REAL, m12 REAL, m13 REAL, m14 REAL, m15 REAL } -- a floating point 3d vector Cn3d-vector ::= SEQUENCE { x REAL, y REAL, z REAL } -- parameters used to set up the camera in Cn3D Cn3d-view-settings ::= SEQUENCE { camera-distance REAL, -- camera on +Z axis this distance from origin camera-angle-rad REAL, -- camera angle camera-look-at-X REAL, -- X,Y of point in Z=0 plane camera points at camera-look-at-Y REAL, camera-clip-near REAL, -- distance of clipping planes from camera camera-clip-far REAL, matrix Cn3d-GL-matrix, -- transformation of objects in the scene rotation-center Cn3d-vector -- center of rotation of whole scene } -- The list of annotations for a given CDD/mime. If residue regions overlap -- between annotations that are turned on, the last annotation in this list -- that contains these residues will be used as the display style for these -- residues. -- Also contains the current viewpoint, so that user's camera angle -- can be stored and reproduced, for illustrations, on-line figures, etc. Cn3d-user-annotations ::= SEQUENCE { annotations SEQUENCE OF Cn3d-user-annotation OPTIONAL, view Cn3d-view-settings OPTIONAL } END --$Revision: 6.3 $ --**************************************************************** -- -- NCBI Project Definition Module -- by Jim Ostell and Jonathan Kans, 1998 -- --**************************************************************** NCBI-Project DEFINITIONS ::= BEGIN EXPORTS Project, Project-item; IMPORTS Date FROM NCBI-General PubMedId FROM NCBI-Biblio Seq-id, Seq-loc FROM NCBI-Seqloc Seq-annot, Pubdesc FROM NCBI-Sequence Seq-entry FROM NCBI-Seqset Pubmed-entry FROM NCBI-PubMed; Project ::= SEQUENCE { descr Project-descr OPTIONAL , data Project-item } Project-item ::= CHOICE { pmuid SET OF INTEGER , protuid SET OF INTEGER , nucuid SET OF INTEGER , sequid SET OF INTEGER , genomeuid SET OF INTEGER , structuid SET OF INTEGER , pmid SET OF PubMedId , protid SET OF Seq-id , nucid SET OF Seq-id , seqid SET OF Seq-id , genomeid SET OF Seq-id , structid NULL , pment SET OF Pubmed-entry , protent SET OF Seq-entry , nucent SET OF Seq-entry , seqent SET OF Seq-entry , genomeent SET OF Seq-entry , structent NULL , seqannot SET OF Seq-annot , loc SET OF Seq-loc , proj SET OF Project } Project-descr ::= SEQUENCE { id SET OF Project-id , name VisibleString OPTIONAL , descr SET OF Projdesc OPTIONAL } Projdesc ::= CHOICE { pub Pubdesc , date Date , comment VisibleString , title VisibleString } Project-id ::= VisibleString END --$Revision: 6.0 $ --********************************************************************* -- -- access.asn -- -- messages for data access -- --********************************************************************* NCBI-Access DEFINITIONS ::= BEGIN EXPORTS Link-set; -- links between same class = neighbors -- links between other classes = links Link-set ::= SEQUENCE { num INTEGER , -- number of links to this doc type uids SEQUENCE OF INTEGER OPTIONAL , -- the links weights SEQUENCE OF INTEGER OPTIONAL } -- the weights END --$Revision: 6.0 $ --********************************************************************** -- -- NCBI Sequence Feature Definition Module -- by James Ostell, 1994 -- --********************************************************************** NCBI-FeatDef DEFINITIONS ::= BEGIN EXPORTS FeatDef, FeatDefSet, FeatDispGroup, FeatDispGroupSet; FeatDef ::= SEQUENCE { typelabel VisibleString , -- short label for type eg "CDS" menulabel VisibleString , -- label for a menu eg "Coding Region" featdef-key INTEGER , -- unique for this feature definition seqfeat-key INTEGER , -- SeqFeat.data.choice from objfeat.h entrygroup INTEGER , -- Group for data entry displaygroup INTEGER , -- Group for data display molgroup FeatMolType -- Type of Molecule used for } FeatMolType ::= ENUMERATED { aa (1), -- proteins na (2), -- nucleic acids both (3) } -- both FeatDefSet ::= SEQUENCE OF FeatDef -- collections of defintions FeatDispGroup ::= SEQUENCE { groupkey INTEGER , groupname VisibleString } FeatDispGroupSet ::= SEQUENCE OF FeatDispGroup FeatDefGroupSet ::= SEQUENCE { groups FeatDispGroupSet , defs FeatDefSet } END --$Revision: 6.12 $ --**************************************************************** -- -- NCBI MIME type (chemical/ncbi-asn1-ascii and chemical/ncbi-asn1-binary) -- by Jonathan Epstein, February 1996 -- --**************************************************************** NCBI-Mime DEFINITIONS ::= BEGIN EXPORTS Ncbi-mime-asn1; IMPORTS Biostruc, Biostruc-annot-set FROM MMDB Cdd FROM NCBI-Cdd Seq-entry FROM NCBI-Seqset Seq-annot FROM NCBI-Sequence Medline-entry FROM NCBI-Medline Cn3d-style-dictionary, Cn3d-user-annotations FROM NCBI-Cn3d; Ncbi-mime-asn1 ::= CHOICE { entrez Entrez-general, -- just a structure alignstruc Biostruc-align, -- structures & sequences & alignments alignseq Biostruc-align-seq, -- sequence alignment strucseq Biostruc-seq, -- structure & sequences strucseqs Biostruc-seqs, -- structure & sequences & alignments general Biostruc-seqs-aligns-cdd -- all-purpose "grab bag" -- others may be added here in the future } -- generic bundle of sequence and alignment info Bundle-seqs-aligns ::= SEQUENCE { sequences SET OF Seq-entry OPTIONAL, -- sequences seqaligns SET OF Seq-annot OPTIONAL, -- sequence alignments strucaligns Biostruc-annot-set OPTIONAL, -- structure alignments imports SET OF Seq-annot OPTIONAL, -- imports (updates in Cn3D) style-dictionary Cn3d-style-dictionary OPTIONAL, -- Cn3D stuff user-annotations Cn3d-user-annotations OPTIONAL } Biostruc-seqs-aligns-cdd ::= SEQUENCE { seq-align-data CHOICE { bundle Bundle-seqs-aligns, -- either seqs + alignments cdd Cdd -- or CDD (which contains these) }, structures SET OF Biostruc OPTIONAL, -- structures structure-type ENUMERATED { -- type of structures to load if ncbi-backbone(2), -- not present; meanings and ncbi-all-atom(3), -- values are same as MMDB's pdb-model(4) -- Model-type } OPTIONAL } Biostruc-align ::= SEQUENCE { master Biostruc, slaves SET OF Biostruc, alignments Biostruc-annot-set, -- structure alignments sequences SET OF Seq-entry, -- sequences seqalign SET OF Seq-annot, style-dictionary Cn3d-style-dictionary OPTIONAL, user-annotations Cn3d-user-annotations OPTIONAL } Biostruc-align-seq ::= SEQUENCE { -- display seq structure align only sequences SET OF Seq-entry, -- sequences seqalign SET OF Seq-annot, style-dictionary Cn3d-style-dictionary OPTIONAL, user-annotations Cn3d-user-annotations OPTIONAL } Biostruc-seq ::= SEQUENCE { -- display structure seq added by yanli structure Biostruc, sequences SET OF Seq-entry, style-dictionary Cn3d-style-dictionary OPTIONAL, user-annotations Cn3d-user-annotations OPTIONAL } Biostruc-seqs ::= SEQUENCE { -- display blast alignment along with neighbor's structure added by yanli structure Biostruc, sequences SET OF Seq-entry, -- sequences seqalign SET OF Seq-annot, style-dictionary Cn3d-style-dictionary OPTIONAL, user-annotations Cn3d-user-annotations OPTIONAL } Entrez-style ::= ENUMERATED { docsum (1), genbank (2) , genpept (3) , fasta (4) , asn1 (5) , graphic (6) , alignment (7) , globalview (8) , report (9) , medlars (10) , embl (11) , pdb (12) , kinemage (13) } Entrez-general ::= SEQUENCE { title VisibleString OPTIONAL, data CHOICE { ml Medline-entry , prot Seq-entry , nuc Seq-entry , genome Seq-entry , structure Biostruc , strucAnnot Biostruc-annot-set } , style Entrez-style , location VisibleString OPTIONAL } END --$Revision: 6.0 $ --******************************************************************** -- -- Print Templates -- James Ostell, 1993 -- -- --******************************************************************** NCBI-ObjPrt DEFINITIONS ::= BEGIN EXPORTS PrintTemplate, PrintTemplateSet; PrintTemplate ::= SEQUENCE { name TemplateName , -- name for this template labelfrom VisibleString OPTIONAL, -- ASN.1 path to get label from format PrintFormat } TemplateName ::= VisibleString PrintTemplateSet ::= SEQUENCE OF PrintTemplate PrintFormat ::= SEQUENCE { asn1 VisibleString , -- ASN.1 partial path for this label VisibleString OPTIONAL , -- printable label prefix VisibleString OPTIONAL, suffix VisibleString OPTIONAL, form PrintForm } PrintForm ::= CHOICE { -- Forms for various ASN.1 components block PrintFormBlock, boolean PrintFormBoolean, enum PrintFormEnum, text PrintFormText, use-template TemplateName, user UserFormat , null NULL } -- rarely used UserFormat ::= SEQUENCE { printfunc VisibleString , defaultfunc VisibleString OPTIONAL } PrintFormBlock ::= SEQUENCE { -- for SEQUENCE, SET separator VisibleString OPTIONAL , components SEQUENCE OF PrintFormat } PrintFormBoolean ::= SEQUENCE { true VisibleString OPTIONAL , false VisibleString OPTIONAL } PrintFormEnum ::= SEQUENCE { values SEQUENCE OF VisibleString OPTIONAL } PrintFormText ::= SEQUENCE { textfunc VisibleString OPTIONAL } END --$Revision: 6.10 $ --********************************************************* -- -- ASN.1 and XML for the components of a GenBank format sequence -- J.Ostell 2002 -- Updated 25 May 2010 -- --********************************************************* NCBI-GBSeq DEFINITIONS ::= BEGIN --******** -- GBSeq represents the elements in a GenBank style report -- of a sequence with some small additions to structure and support -- for protein (GenPept) versions of GenBank format as seen in -- Entrez. While this represents the simplification, reduction of -- detail, and flattening to a single sequence perspective of GenBank -- format (compared with the full ASN.1 or XML from which GenBank and -- this format is derived at NCBI), it is presented in ASN.1 or XML for -- automated parsing and processing. It is hoped that this compromise -- will be useful for those bulk processing at the GenBank format level -- of detail today. Since it is a compromise, a number of pragmatic -- decisions have been made. -- -- In pursuit of simplicity and familiarity a number of -- fields do not have full substructure defined here where there is -- already a standard GenBank format string. For example: -- -- Date DD-Mon-YYYY -- Authors LastName, Intials (with periods) -- Journal JounalName Volume (issue), page-range (year) -- FeatureLocations as per GenBank feature table, but FeatureIntervals -- may also be provided as a convenience -- FeatureQualifiers as per GenBank feature table -- Primary has a string that represents a table to construct -- a third party (TPA) sequence. -- other-seqids can have strings with the "vertical bar format" sequence -- identifiers used in BLAST for example, when they are non-genbank types. -- Currently in GenBank format you only see GI, but there are others, like -- patents, submitter clone names, etc which will appear here, as they -- always have in the ASN.1 format, and full XML format. -- source-db is a formatted text block for peptides in GenPept format that -- carries information from the source protein database. -- -- There are also a number of elements that could have been -- more exactly specified, but in the interest of simplicity -- have been simply left as options. For example.. -- -- accession and accession.version will always appear in a GenBank record -- they are optional because this format can also be used for non-GenBank -- sequences, and in that case will have only "other-seqids". -- -- sequences will normally all have "sequence" filled in. But contig records -- will have a "join" statement in the "contig" slot, and no "sequence". -- We also may consider a retrieval option with no sequence of any kind -- and no feature table to quickly check minimal values. -- -- a reference may have an author list, or be from a consortium, or both. -- -- some fields, such as taxonomy, do appear as separate elements in GenBank -- format but without a specific linetype (in GenBank format this comes -- under ORGANISM). Another example is the separation of primary accession -- from the list of secondary accessions. In GenBank format primary -- accession is just the first one on the list that includes all secondaries -- after it. -- -- create-date deserves special comment. The date you see on the right hand -- side of the LOCUS line in GenBank format is actually the last date the -- the record was modified (or the update-date). The date the record was -- first submitted to GenBank appears in the first submission citation in -- the reference section. Internally in the databases and ASN.1 NCBI keeps -- the first date the record was released into the sequence database at -- NCBI as create-date. For records from EMBL, which supports create-date, -- it is the date provided by EMBL. For DDBJ records, which do not supply -- a create-date (same as GenBank format) the create-date is the first date -- NCBI saw the record from DDBJ. For older GenBank records, before NCBI -- took responsibility for GenBank, it is just the first date NCBI saw the -- record. Create-date can be very useful, so we expose it here, but users -- must understand it is only an approximation and comes from many sources, -- and with many exceptions and caveats. It does NOT tell you the first -- date the public might have seen this record and thus is NOT an accurate -- measure for legal issues of precedence. -- --******** GBSet ::= SEQUENCE OF GBSeq GBSeq ::= SEQUENCE { locus VisibleString , length INTEGER , strandedness VisibleString OPTIONAL , moltype VisibleString , topology VisibleString OPTIONAL , division VisibleString , update-date VisibleString , create-date VisibleString OPTIONAL , update-release VisibleString OPTIONAL , create-release VisibleString OPTIONAL , definition VisibleString , primary-accession VisibleString OPTIONAL , entry-version VisibleString OPTIONAL , accession-version VisibleString OPTIONAL , other-seqids SEQUENCE OF GBSeqid OPTIONAL , secondary-accessions SEQUENCE OF GBSecondary-accn OPTIONAL, project VisibleString OPTIONAL , keywords SEQUENCE OF GBKeyword OPTIONAL , segment VisibleString OPTIONAL , source VisibleString OPTIONAL , organism VisibleString OPTIONAL , taxonomy VisibleString OPTIONAL , references SEQUENCE OF GBReference OPTIONAL , comment VisibleString OPTIONAL , comment-set SEQUENCE OF GBComment OPTIONAL , struc-comments SEQUENCE OF GBStrucComment OPTIONAL , primary VisibleString OPTIONAL , source-db VisibleString OPTIONAL , database-reference VisibleString OPTIONAL , feature-table SEQUENCE OF GBFeature OPTIONAL , feature-set SEQUENCE OF GBFeatureSet OPTIONAL , sequence VisibleString OPTIONAL , -- Optional for contig, wgs, etc. contig VisibleString OPTIONAL , alt-seq SEQUENCE OF GBAltSeqData OPTIONAL } GBSeqid ::= VisibleString GBSecondary-accn ::= VisibleString GBKeyword ::= VisibleString GBReference ::= SEQUENCE { reference VisibleString , position VisibleString OPTIONAL , authors SEQUENCE OF GBAuthor OPTIONAL , consortium VisibleString OPTIONAL , title VisibleString OPTIONAL , journal VisibleString , xref SEQUENCE OF GBXref OPTIONAL , pubmed INTEGER OPTIONAL , remark VisibleString OPTIONAL } GBAuthor ::= VisibleString GBXref ::= SEQUENCE { dbname VisibleString , id VisibleString } GBComment ::= SEQUENCE { type VisibleString OPTIONAL , paragraphs SEQUENCE OF GBCommentParagraph } GBCommentParagraph ::= SEQUENCE { items SEQUENCE OF GBCommentItem } GBCommentItem ::= SEQUENCE { value VisibleString OPTIONAL , url VisibleString OPTIONAL } GBStrucComment ::= SEQUENCE { name VisibleString OPTIONAL , items SEQUENCE OF GBStrucCommentItem } GBStrucCommentItem ::= SEQUENCE { tag VisibleString OPTIONAL , value VisibleString OPTIONAL , url VisibleString OPTIONAL } GBFeatureSet ::= SEQUENCE { annot-source VisibleString OPTIONAL , features SEQUENCE OF GBFeature } GBFeature ::= SEQUENCE { key VisibleString , location VisibleString , intervals SEQUENCE OF GBInterval OPTIONAL , operator VisibleString OPTIONAL , partial5 BOOLEAN OPTIONAL , partial3 BOOLEAN OPTIONAL , quals SEQUENCE OF GBQualifier OPTIONAL , xrefs SEQUENCE OF GBXref OPTIONAL } GBInterval ::= SEQUENCE { from INTEGER OPTIONAL , to INTEGER OPTIONAL , point INTEGER OPTIONAL , iscomp BOOLEAN OPTIONAL , interbp BOOLEAN OPTIONAL , accession VisibleString } GBQualifier ::= SEQUENCE { name VisibleString , value VisibleString OPTIONAL } GBAltSeqData ::= SEQUENCE { name VisibleString , -- e.g., contig, wgs, scaffold, cage, genome items SEQUENCE OF GBAltSeqItem OPTIONAL } GBAltSeqItem ::= SEQUENCE { interval GBInterval OPTIONAL , isgap BOOLEAN OPTIONAL , gap-length INTEGER OPTIONAL , gap-type VisibleString OPTIONAL , gap-linkage VisibleString OPTIONAL , gap-comment VisibleString OPTIONAL , first-accn VisibleString OPTIONAL , last-accn VisibleString OPTIONAL , value VisibleString OPTIONAL } END --$Revision: 1.8 $ --************************************************************************ -- -- ASN.1 and XML for the components of a GenBank/EMBL/DDBJ sequence record -- The International Nucleotide Sequence Database (INSD) collaboration -- Version 1.6, 25 May 2010 -- --************************************************************************ INSD-INSDSeq DEFINITIONS ::= BEGIN -- INSDSeq provides the elements of a sequence as presented in the -- GenBank/EMBL/DDBJ-style flatfile formats, with a small amount of -- additional structure. -- Although this single perspective of the three flatfile formats -- provides a useful simplification, it hides to some extent the -- details of the actual data underlying those formats. Nevertheless, -- the XML version of INSD-Seq is being provided with -- the hopes that it will prove useful to those who bulk-process -- sequence data at the flatfile-format level of detail. Further -- documentation regarding the content and conventions of those formats -- can be found at: -- -- URLs for the DDBJ, EMBL, and GenBank Feature Table Document: -- http://www.ddbj.nig.ac.jp/FT/full_index.html -- http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html -- http://www.ncbi.nlm.nih.gov/projects/collab/FT/index.html -- -- URLs for DDBJ, EMBL, and GenBank Release Notes : -- ftp://ftp.ddbj.nig.ac.jp/database/ddbj/ddbjrel.txt -- http://www.ebi.ac.uk/embl/Documentation/Release_notes/current/relnotes.html -- ftp://ftp.ncbi.nih.gov/genbank/gbrel.txt -- -- Because INSDSeq is a compromise, a number of pragmatic decisions have -- been made: -- -- In pursuit of simplicity and familiarity a number of fields do not -- have full substructure defined here where there is already a -- standard flatfile format string. For example: -- -- Dates: DD-MON-YYYY (eg 10-JUN-2003) -- -- Author: LastName, Initials (eg Smith, J.N.) -- or Lastname Initials (eg Smith J.N.) -- -- Journal: JournalName Volume (issue), page-range (year) -- or JournalName Volume(issue):page-range(year) -- eg Appl. Environ. Microbiol. 61 (4), 1646-1648 (1995) -- Appl. Environ. Microbiol. 61(4):1646-1648(1995). -- -- FeatureLocations are representated as in the flatfile feature table, -- but FeatureIntervals may also be provided as a convenience -- -- FeatureQualifiers are represented as in the flatfile feature table. -- -- Primary has a string that represents a table to construct -- a third party (TPA) sequence. -- -- other-seqids can have strings with the "vertical bar format" sequence -- identifiers used in BLAST for example, when they are non-INSD types. -- -- Currently in flatfile format you only see Accession numbers, but there -- are others, like patents, submitter clone names, etc which will -- appear here -- -- There are also a number of elements that could have been more exactly -- specified, but in the interest of simplicity have been simply left as -- optional. For example: -- -- All publicly accessible sequence records in INSDSeq format will -- include accession and accession.version. However, these elements are -- optional in optional in INSDSeq so that this format can also be used -- for non-public sequence data, prior to the assignment of accessions and -- version numbers. In such cases, records will have only "other-seqids". -- -- sequences will normally all have "sequence" filled in. But contig records -- will have a "join" statement in the "contig" slot, and no "sequence". -- We also may consider a retrieval option with no sequence of any kind -- and no feature table to quickly check minimal values. -- -- Four (optional) elements are specific to records represented via the EMBL -- sequence database: INSDSeq_update-release, INSDSeq_create-release, -- INSDSeq_entry-version, and INSDSeq_database-reference. -- -- One (optional) element is specific to records originating at the GenBank -- and DDBJ sequence databases: INSDSeq_segment. -- --******** INSDSet ::= SEQUENCE OF INSDSeq INSDSeq ::= SEQUENCE { locus VisibleString , length INTEGER , strandedness VisibleString OPTIONAL , moltype VisibleString , topology VisibleString OPTIONAL , division VisibleString , update-date VisibleString , create-date VisibleString OPTIONAL , update-release VisibleString OPTIONAL , create-release VisibleString OPTIONAL , definition VisibleString , primary-accession VisibleString OPTIONAL , entry-version VisibleString OPTIONAL , accession-version VisibleString OPTIONAL , other-seqids SEQUENCE OF INSDSeqid OPTIONAL , secondary-accessions SEQUENCE OF INSDSecondary-accn OPTIONAL, project VisibleString OPTIONAL , keywords SEQUENCE OF INSDKeyword OPTIONAL , segment VisibleString OPTIONAL , source VisibleString OPTIONAL , organism VisibleString OPTIONAL , taxonomy VisibleString OPTIONAL , references SEQUENCE OF INSDReference OPTIONAL , comment VisibleString OPTIONAL , comment-set SEQUENCE OF INSDComment OPTIONAL , struc-comments SEQUENCE OF INSDStrucComment OPTIONAL , primary VisibleString OPTIONAL , source-db VisibleString OPTIONAL , database-reference VisibleString OPTIONAL , feature-table SEQUENCE OF INSDFeature OPTIONAL , feature-set SEQUENCE OF INSDFeatureSet OPTIONAL , sequence VisibleString OPTIONAL , -- Optional for contig, wgs, etc. contig VisibleString OPTIONAL , alt-seq SEQUENCE OF INSDAltSeqData OPTIONAL } INSDSeqid ::= VisibleString INSDSecondary-accn ::= VisibleString INSDKeyword ::= VisibleString -- INSDReference_position contains a string value indicating the -- basepair span(s) to which a reference applies. The allowable -- formats are: -- -- X..Y : Where X and Y are integers separated by two periods, -- X >= 1 , Y <= sequence length, and X <= Y -- -- Multiple basepair spans can exist, separated by a -- semi-colon and a space. For example : 10..20; 100..500 -- -- sites : The string literal 'sites', indicating that a reference -- provides sequence annotation information, but the specific -- basepair spans are either not captured, or were too numerous -- to record. -- -- The 'sites' literal string is singly occuring, and -- cannot be used in conjunction with any X..Y basepair spans. -- -- References that lack an INSDReference_position element apply -- to the entire sequence. INSDReference ::= SEQUENCE { reference VisibleString , position VisibleString OPTIONAL , authors SEQUENCE OF INSDAuthor OPTIONAL , consortium VisibleString OPTIONAL , title VisibleString OPTIONAL , journal VisibleString , xref SEQUENCE OF INSDXref OPTIONAL , pubmed INTEGER OPTIONAL , remark VisibleString OPTIONAL } INSDAuthor ::= VisibleString -- INSDXref provides a method for referring to records in -- other databases. INSDXref_dbname is a string value that -- provides the name of the database, and INSDXref_dbname -- is a string value that provides the record's identifier -- in that database. INSDXref ::= SEQUENCE { dbname VisibleString , id VisibleString } INSDComment ::= SEQUENCE { type VisibleString OPTIONAL , paragraphs SEQUENCE OF INSDCommentParagraph } INSDCommentParagraph ::= SEQUENCE { items SEQUENCE OF INSDCommentItem } INSDCommentItem ::= SEQUENCE { value VisibleString OPTIONAL , url VisibleString OPTIONAL } INSDStrucComment ::= SEQUENCE { name VisibleString OPTIONAL , items SEQUENCE OF INSDStrucCommentItem } INSDStrucCommentItem ::= SEQUENCE { tag VisibleString OPTIONAL , value VisibleString OPTIONAL , url VisibleString OPTIONAL } -- INSDFeature_operator contains a string value describing -- the relationship among a set of INSDInterval within -- INSDFeature_intervals. The allowable formats are: -- -- join : The string literal 'join' indicates that the -- INSDInterval intervals are biologically joined -- together into a contiguous molecule. -- -- order : The string literal 'order' indicates that the -- INSDInterval intervals are in the presented -- order, but they are not necessarily contiguous. -- -- Either 'join' or 'order' is required if INSDFeature_intervals -- is comprised of more than one INSDInterval . INSDFeatureSet ::= SEQUENCE { annot-source VisibleString OPTIONAL , features SEQUENCE OF INSDFeature } INSDFeature ::= SEQUENCE { key VisibleString , location VisibleString , intervals SEQUENCE OF INSDInterval OPTIONAL , operator VisibleString OPTIONAL , partial5 BOOLEAN OPTIONAL , partial3 BOOLEAN OPTIONAL , quals SEQUENCE OF INSDQualifier OPTIONAL , xrefs SEQUENCE OF INSDXref OPTIONAL } -- INSDInterval_iscomp is a boolean indicating whether -- an INSDInterval_from / INSDInterval_to location -- represents a location on the complement strand. -- When INSDInterval_iscomp is TRUE, it essentially -- confirms that a 'from' value which is greater than -- a 'to' value is intentional, because the location -- is on the opposite strand of the presented sequence. -- INSDInterval_interbp is a boolean indicating whether -- a feature (such as a restriction site) is located -- between two adjacent basepairs. When INSDInterval_iscomp -- is TRUE, the 'from' and 'to' values must differ by -- exactly one base. INSDInterval ::= SEQUENCE { from INTEGER OPTIONAL , to INTEGER OPTIONAL , point INTEGER OPTIONAL , iscomp BOOLEAN OPTIONAL , interbp BOOLEAN OPTIONAL , accession VisibleString } INSDQualifier ::= SEQUENCE { name VisibleString , value VisibleString OPTIONAL } INSDAltSeqData ::= SEQUENCE { name VisibleString , -- e.g., CON-division-join, WGS-contig-range, -- WGS-scaffold-range, MGA/CAGE-range, genome items SEQUENCE OF INSDAltSeqItem OPTIONAL } INSDAltSeqItem ::= SEQUENCE { interval INSDInterval OPTIONAL , isgap BOOLEAN OPTIONAL , gap-length INTEGER OPTIONAL , gap-type VisibleString OPTIONAL , gap-linkage VisibleString OPTIONAL , gap-comment VisibleString OPTIONAL , first-accn VisibleString OPTIONAL , last-accn VisibleString OPTIONAL , value VisibleString OPTIONAL } END --$Revision: 6.1 $ --********************************************************************** -- -- ASN.1 for a tiny Bioseq in XML -- basically a structured FASTA file with a few extras -- in this case we drop all modularity of components -- All ids are Optional - simpler structure, less checking -- Components of organism are hard coded - can't easily add or change -- sequence is just string whether DNA or protein -- by James Ostell, 2000 -- --********************************************************************** NCBI-TSeq DEFINITIONS ::= BEGIN TSeq ::= SEQUENCE { seqtype ENUMERATED { nucleotide (1), protein (2) }, gi INTEGER OPTIONAL, accver VisibleString OPTIONAL, sid VisibleString OPTIONAL, local VisibleString OPTIONAL, taxid INTEGER OPTIONAL, orgname VisibleString OPTIONAL, defline VisibleString, length INTEGER, sequence VisibleString } TSeqSet ::= SEQUENCE OF TSeq -- a bunch of them END --$Id: scoremat.asn,v 1.12 2008/04/15 15:55:45 kazimird Exp $ -- =========================================================================== -- -- PUBLIC DOMAIN NOTICE -- National Center for Biotechnology Information -- -- This software/database is a "United States Government Work" under the -- terms of the United States Copyright Act. It was written as part of -- the author's official duties as a United States Government employee and -- thus cannot be copyrighted. This software/database is freely available -- to the public for use. The National Library of Medicine and the U.S. -- Government have not placed any restriction on its use or reproduction. -- -- Although all reasonable efforts have been taken to ensure the accuracy -- and reliability of the software and data, the NLM and the U.S. -- Government do not and cannot warrant the performance or results that -- may be obtained by using this software or data. The NLM and the U.S. -- Government disclaim all warranties, express or implied, including -- warranties of performance, merchantability or fitness for any particular -- purpose. -- -- Please cite the author in any work or product based on this material. -- -- =========================================================================== -- -- Author: Christiam Camacho -- -- File Description: -- ASN.1 definitions for scoring matrix -- -- =========================================================================== NCBI-ScoreMat DEFINITIONS ::= BEGIN EXPORTS Pssm, PssmIntermediateData, PssmFinalData, PssmParameters, PssmWithParameters; IMPORTS Object-id FROM NCBI-General Seq-entry FROM NCBI-Seqset; -- a rudimentary block/core-model, to be used with block-based alignment -- routines and threading BlockProperty ::= SEQUENCE { type INTEGER { unassigned (0), threshold (1), -- score threshold for heuristics minscore (2), -- observed minimum score in CD maxscore (3), -- observed maximum score in CD meanscore (4), -- observed mean score in CD variance (5), -- observed score variance name (10), -- just name the block is-optional(20), -- block may not have to be used other (255) }, intvalue INTEGER OPTIONAL, textvalue VisibleString OPTIONAL } CoreBlock ::= SEQUENCE { start INTEGER, -- begin of block on query stop INTEGER, -- end of block on query minstart INTEGER OPTIONAL, -- optional N-terminal extension maxstop INTEGER OPTIONAL, -- optional C-terminal extension property SEQUENCE OF BlockProperty OPTIONAL } LoopConstraint ::= SEQUENCE { minlength INTEGER DEFAULT 0, -- minimum length of unaligned region maxlength INTEGER DEFAULT 100000 -- maximum length of unaligned region } CoreDef ::= SEQUENCE { nblocks INTEGER, -- number of core elements/blocks blocks SEQUENCE OF CoreBlock, -- nblocks locations loops SEQUENCE OF LoopConstraint -- (nblocks+1) constraints } -- =========================================================================== -- PSI-BLAST, formatrpsdb, RPS-BLAST workflow: -- =========================================== -- -- Two possible inputs to PSI-BLAST and formatrpsdb: -- 1) PssmWithParams where pssm field contains intermediate PSSM data (matrix -- of frequency ratios) -- 2) PssmWithParams where pssm field contains final PSSM data (matrix of -- scores and statistical parameters) - such as written by cddumper -- -- In case 1, PSI-BLAST's PSSM engine is invoked to create the PSSM and perform -- the PSI-BLAST search or build the PSSM to then build the RPS-BLAST database. -- In case 2, PSI-BLAST's PSSM engine is not invoked and the matrix of scores -- statistical parameters are used to perform the search in PSI-BLAST and the -- same data and the data in PssmWithParams::params::rpsdbparams is used to -- build the PSSM and ultimately the RPS-BLAST database -- -- -- reads ++++++++++++++ writes -- PssmWithParams ====> + PSI-BLAST + =====> PssmWithParams -- ++++++++++++++ | ^ -- ^ | | -- | | | -- +===========================================+ | -- | | -- +===========================================+ | -- | | -- reads | | -- v | -- +++++++++++++++ writes +++++++++++++++++++++++ | -- | formatrpsdb | =====> | RPS-BLAST databases | | -- +++++++++++++++ +++++++++++++++++++++++ | -- ^ | -- | | -- | reads | -- +++++++++++++ | -- | RPS-BLAST | | -- +++++++++++++ | -- | -- reads ++++++++++++ writes | -- Cdd ======> | cddumper | =============================+ -- ++++++++++++ -- -- =========================================================================== -- Contains the PSSM's scores and its associated statistical parameters. -- Dimensions and order in which scores are stored must be the same as that -- specified in Pssm::numRows, Pssm::numColumns, and Pssm::byrow PssmFinalData ::= SEQUENCE { -- PSSM's scores scores SEQUENCE OF INTEGER, -- Karlin & Altschul parameter produced during the PSSM's calculation lambda REAL, -- Karlin & Altschul parameter produced during the PSSM's calculation kappa REAL, -- Karlin & Altschul parameter produced during the PSSM's calculation h REAL, -- scaling factor used to obtain more precision when building the PSSM. -- (i.e.: scores are scaled by this value). By default, PSI-BLAST's PSSM -- engine generates PSSMs which are not scaled-up, however, if PSI-BLAST is -- given a PSSM which contains a scaled-up PSSM (indicated by having a -- scalingFactor greater than 1), then it will scale down the PSSM to -- perform the initial stages of the search with it. -- N.B.: When building RPS-BLAST databases, if formatrpsdb is provided -- scaled-up PSSMs, it will ensure that all PSSMs used to build the -- RPS-BLAST database are scaled by the same factor (otherwise, RPS-BLAST -- will silently produce incorrect results). scalingFactor INTEGER DEFAULT 1, -- Karlin & Altschul parameter produced during the PSSM's calculation lambdaUngapped REAL OPTIONAL, -- Karlin & Altschul parameter produced during the PSSM's calculation kappaUngapped REAL OPTIONAL, -- Karlin & Altschul parameter produced during the PSSM's calculation hUngapped REAL OPTIONAL } -- Contains the PSSM's intermediate data used to create the PSSM's scores -- and statistical parameters. Dimensions and order in which scores are -- stored must be the same as that specified in Pssm::numRows, -- Pssm::numColumns, and Pssm::byrow PssmIntermediateData ::= SEQUENCE { -- observed residue frequencies (or counts) per position of the PSSM -- (prior to application of pseudocounts) resFreqsPerPos SEQUENCE OF INTEGER OPTIONAL, -- Weighted observed residue frequencies per position of the PSSM. -- (N.B.: each position's weights should add up to 1.0). -- This field corresponds to f_i (f sub i) in equation 2 of -- Nucleic Acids Res. 2001 Jul 15;29(14):2994-3005. -- NOTE: this is needed for diagnostics information only (i.e.: -- -out_ascii_pssm option in psiblast) weightedResFreqsPerPos SEQUENCE OF REAL OPTIONAL, -- PSSM's frequency ratios freqRatios SEQUENCE OF REAL, -- Information content per position of the PSSM -- NOTE: this is needed for diagnostics information only (i.e.: -- -out_ascii_pssm option in psiblast) informationContent SEQUENCE OF REAL OPTIONAL, -- Weights for columns of the PSSM without gaps -- NOTE: this is needed for diagnostics information only (i.e.: -- -out_ascii_pssm option in psiblast) gaplessColumnWeights SEQUENCE OF REAL OPTIONAL, -- Used in sequence weights computation -- NOTE: this is needed for diagnostics information only (i.e.: -- -out_ascii_pssm option in psiblast) sigma SEQUENCE OF REAL OPTIONAL, -- Length of the aligned regions per position of the query sequence -- NOTE: this is needed for diagnostics information only (i.e.: -- -out_ascii_pssm option in psiblast) intervalSizes SEQUENCE OF INTEGER OPTIONAL, -- Number of matching sequences per position of the PSSM (including the -- query) -- NOTE: this is needed for diagnostics information only (i.e.: -- -out_ascii_pssm option in psiblast) numMatchingSeqs SEQUENCE OF INTEGER OPTIONAL } -- Position-specific scoring matrix -- -- Column indices on the PSSM refer to the positions corresponding to the -- query/master sequence, i.e. the number of columns (N) is the same -- as the length of the query/master sequence. -- Row indices refer to individual amino acid types, i.e. the number of -- rows (M) is the same as the number of different residues in the -- alphabet we use. Consequently, row labels are amino acid identifiers. -- -- PSSMs are stored as linear arrays of integers. By default, we store -- them column-by-column, M values for the first column followed by M -- values for the second column, and so on. In order to provide -- flexibility for external applications, the boolean field "byrow" is -- provided to specify the storage order. Pssm ::= SEQUENCE { -- Is the this a protein or nucleotide scoring matrix? isProtein BOOLEAN DEFAULT TRUE, -- PSSM identifier identifier Object-id OPTIONAL, -- The dimensions of the matrix are returned so the client can -- verify that all data was received. numRows INTEGER, -- number of rows numColumns INTEGER, -- number of columns -- row-labels is given to note the order of residue types so that it can -- be cross-checked between applications. -- If this field is not given, the matrix values are presented in -- order of the alphabet ncbistdaa is used for protein, ncbi4na for nucl. -- for proteins the values returned correspond to -- (-,-), (-,A), (-,B), (-,C) ... (A,-), (A,A), (A,B), (A,C) ... rowLabels SEQUENCE OF VisibleString OPTIONAL, -- are matrices stored row by row? byRow BOOLEAN DEFAULT FALSE, -- PSSM representative sequence (master) query Seq-entry OPTIONAL, -- both intermediateData and finalData can be provided, but at least one of -- them must be provided. -- N.B.: by default PSI-BLAST will return the PSSM in its PssmIntermediateData -- representation. -- Intermediate or final data for the PSSM intermediateData PssmIntermediateData OPTIONAL, -- Final representation for the PSSM finalData PssmFinalData OPTIONAL } -- This structure is used to create the RPS-BLAST database auxiliary file -- (*.aux) and it contains parameters set at creation time of the PSSM. -- Also, the matrixName field is used by formatrpsdb to build a PSSM from -- a Pssm structure which only contains PssmIntermediateData. FormatRpsDbParameters ::= SEQUENCE { -- name of the underlying score matrix whose frequency ratios were -- used in PSSM construction (e.g.: BLOSUM62) matrixName VisibleString, -- gap opening penalty corresponding to the matrix above gapOpen INTEGER OPTIONAL, -- gap extension penalty corresponding to the matrix above gapExtend INTEGER OPTIONAL } -- Populated by PSSM engine of PSI-BLAST, original source for these values -- are the PSI-BLAST options specified using the BLAST options API PssmParameters ::= SEQUENCE { -- pseudocount constant used for PSSM. This field corresponds to beta in -- equation 2 of Nucleic Acids Res. 2001 Jul 15;29(14):2994-3005. pseudocount INTEGER OPTIONAL, -- data needed by formatrpsdb to create RPS-BLAST databases. matrixName is -- populated by PSI-BLAST rpsdbparams FormatRpsDbParameters OPTIONAL, -- alignment constraints needed by sequence-structure threader -- and other global or local block-alignment algorithms constraints CoreDef OPTIONAL } -- Envelope containing PSSM and the parameters used to create it. -- Provided for use in PSI-BLAST, formatrpsdb, and for the structure group. PssmWithParameters ::= SEQUENCE { -- This field is applicable to PSI-BLAST and formatrpsdb. -- When both the intermediate and final PSSM data are provided in this -- field, the final data (matrix of scores and associated statistical -- parameters) takes precedence and that data is used for further -- processing. The rationale for this is that the PSSM's scores and -- statistical parameters might have been calculated by other applications -- and it might not be possible to recreate it by using PSI-BLAST's PSSM -- engine. pssm Pssm, -- This field's rpsdbparams is used to specify the values of options -- for processing by formatrpsdb. If these are not set, the command -- line defaults of formatrpsdb are applied. This field is used -- by PSI-BLAST to verify that the underlying scorem matrix used to BUILD -- the PSSM is the same as the one being specified through the BLAST -- Options API. If this field is omitted, no verification will be -- performed, so be careful to keep track of what matrix was used to build -- the PSSM or else the results produced by PSI-BLAST will be unreliable. params PssmParameters OPTIONAL } END --$Revision: 1.124 $ --********************************************************************** -- -- NCBI ASN.1 macro editing language specifications -- -- by Colleen Bollin, 2007 -- --********************************************************************** NCBI-Macro DEFINITIONS ::= BEGIN EXPORTS AECR-action, Parse-action, Macro-action-list, Suspect-rule-set; -- simple constraints -- String-location ::= ENUMERATED { contains (1) , equals (2) , starts (3) , ends (4) , inlist (5) } Word-substitution ::= SEQUENCE { word VisibleString OPTIONAL , synonyms SET OF VisibleString OPTIONAL , case-sensitive BOOLEAN DEFAULT FALSE , whole-word BOOLEAN DEFAULT FALSE } Word-substitution-set ::= SET OF Word-substitution String-constraint ::= SEQUENCE { match-text VisibleString OPTIONAL , match-location String-location DEFAULT contains , case-sensitive BOOLEAN DEFAULT FALSE , ignore-space BOOLEAN DEFAULT FALSE , ignore-punct BOOLEAN DEFAULT FALSE , ignore-words Word-substitution-set OPTIONAL , whole-word BOOLEAN DEFAULT FALSE , not-present BOOLEAN DEFAULT FALSE , is-all-caps BOOLEAN DEFAULT FALSE , is-all-lower BOOLEAN DEFAULT FALSE , is-all-punct BOOLEAN DEFAULT FALSE , ignore-weasel BOOLEAN DEFAULT FALSE } String-constraint-set ::= SET OF String-constraint Strand-constraint ::= ENUMERATED { any (0) , plus (1) , minus (2) } Seqtype-constraint ::= ENUMERATED { any (0) , nuc (1) , prot (2) } Partial-constraint ::= ENUMERATED { either (0) , partial (1) , complete (2) } Location-type-constraint ::= ENUMERATED { any (0) , single-interval (1) , joined (2) , ordered (3) } Location-pos-constraint ::= CHOICE { dist-from-end INTEGER , max-dist-from-end INTEGER , min-dist-from-end INTEGER } Location-constraint ::= SEQUENCE { strand Strand-constraint DEFAULT any , seq-type Seqtype-constraint DEFAULT any , partial5 Partial-constraint DEFAULT either , partial3 Partial-constraint DEFAULT either , location-type Location-type-constraint DEFAULT any , end5 Location-pos-constraint OPTIONAL , end3 Location-pos-constraint OPTIONAL } Object-type-constraint ::= ENUMERATED { any (0) , feature (1) , descriptor (2) } -- feature values -- Macro-feature-type ::= ENUMERATED { any (0) , gene (1) , org (2) , cds (3) , prot (4) , preRNA (5) , mRNA (6) , tRNA (7) , rRNA (8) , snRNA (9) , scRNA (10) , otherRNA (11) , pub (12) , seq (13) , imp (14) , allele (15) , attenuator (16) , c-region (17) , caat-signal (18) , imp-CDS (19) , conflict (20) , d-loop (21) , d-segment (22) , enhancer (23) , exon (24) , gC-signal (25) , iDNA (26) , intron (27) , j-segment (28) , ltr (29) , mat-peptide (30) , misc-binding (31) , misc-difference (32) , misc-feature (33) , misc-recomb (34) , misc-RNA (35) , misc-signal (36) , misc-structure (37) , modified-base (38) , mutation (39) , n-region (40) , old-sequence (41) , polyA-signal (42) , polyA-site (43) , precursor-RNA (44) , prim-transcript (45) , primer-bind (46) , promoter (47) , protein-bind (48) , rbs (49) , repeat-region (50) , rep-origin (51) , s-region (52) , sig-peptide (53) , source (54) , stem-loop (55) , sts (56) , tata-signal (57) , terminator (58) , transit-peptide (59) , unsure (60) , v-region (61) , v-segment (62) , variation (63) , virion (64) , n3clip (65) , n3UTR (66) , n5clip (67) , n5UTR (68) , n10-signal (69) , n35-signal (70) , site-ref (71) , region (72) , comment (73) , bond (74) , site (75) , rsite (76) , user (77) , txinit (78) , num (79) , psec-str (80) , non-std-residue (81) , het (82) , biosrc (83) , preprotein (84) , mat-peptide-aa (85) , sig-peptide-aa (86) , transit-peptide-aa (87) , snoRNA (88) , gap (89) , operon (90) , oriT (91) , ncRNA (92) , tmRNA (93) , mobile-element (94) } Feat-qual-legal ::= ENUMERATED { allele (1) , activity (2) , anticodon (3) , bound-moiety (4) , chromosome (5), citation (6), codon (7) , codon-start (8) , codons-recognized (9) , compare (10) , cons-splice (11) , db-xref (12) , description (13) , direction (14) , ec-number (15) , environmental-sample (16) , evidence (17) , exception (18) , experiment (19) , focus (20) , frequency (21) , function (22) , gene (23) , gene-description (24) , inference (25) , label (26) , locus-tag (27) , map (28) , mobile-element (29) , mod-base (30) , mol-type (31) , ncRNA-class (32) , note (33) , number (34) , old-locus-tag (35) , operon (36) , organism (37) , organelle (38) , partial (39) , phenotype (40) , plasmid (41) , product (42) , protein-id (43) , pseudo (44) , rearranged (45) , replace (46) , rpt-family (47) , rpt-type (48) , rpt-unit (49) , rpt-unit-seq (50) , rpt-unit-range (51) , segment (52) , sequenced-mol (53) , standard-name (54) , synonym (55) , transcript-id (56) , transgenic (57) , translation (58) , transl-except (59) , transl-table (60) , usedin (61), mobile-element-type (62), mobile-element-name (63), gene-comment (64) , satellite (65) , satellite-type (66) , satellite-name (67) , location (68) , tag-peptide (69) , mobile-element-type-type (70) , name (71) } Feat-qual-legal-val ::= SEQUENCE { qual Feat-qual-legal , val VisibleString } Feat-qual-legal-val-choice ::= CHOICE { qual Feat-qual-legal-val } Feat-qual-legal-set ::= SET OF Feat-qual-legal-val-choice Feat-qual-choice ::= CHOICE { legal-qual Feat-qual-legal , illegal-qual String-constraint } Feature-field ::= SEQUENCE { type Macro-feature-type , field Feat-qual-choice } Feature-field-legal ::= SEQUENCE { type Macro-feature-type , field Feat-qual-legal } Feature-field-pair ::= SEQUENCE { type Macro-feature-type , field-from Feat-qual-choice , field-to Feat-qual-choice } Rna-feat-type ::= CHOICE { any NULL , preRNA NULL , mRNA NULL , tRNA NULL , rRNA NULL , ncRNA VisibleString , tmRNA NULL, miscRNA NULL } Rna-field ::= ENUMERATED { product (1) , comment (2) , codons-recognized (3) , ncrna-class (4) , anticodon (5) , transcript-id (6) , gene-locus (7) , gene-description (8) , gene-maploc (9) , gene-locus-tag (10) , gene-synonym (11) , gene-comment (12) , tag-peptide (13) } Rna-qual ::= SEQUENCE { type Rna-feat-type , field Rna-field } Rna-qual-pair ::= SEQUENCE { type Rna-feat-type , field-from Rna-field , field-to Rna-field } Source-qual ::= ENUMERATED { acronym (1) , anamorph (2) , authority (3) , bio-material (4) , biotype (5) , biovar (6) , breed (7) , cell-line (8) , cell-type (9) , chemovar (10) , chromosome (11) , clone (12) , clone-lib (13) , collected-by (14) , collection-date (15) , common (16) , common-name (17) , country (18) , cultivar (19) , culture-collection (20) , dev-stage (21) , division (22) , dosage (23) , ecotype (24) , endogenous-virus-name (25) , environmental-sample (26) , forma (27) , forma-specialis (28) , frequency (29) , fwd-primer-name (30) , fwd-primer-seq (31) , gb-acronym (32) , gb-anamorph (33) , gb-synonym (34) , genotype (35) , germline (36) , group (37) , haplotype (38) , identified-by (39) , insertion-seq-name (40) , isolate (41) , isolation-source (42) , lab-host (43) , lat-lon (44) , lineage (45) , map (46) , metagenome-source (47) , metagenomic (48) , old-lineage (49) , old-name (50) , orgmod-note (51) , nat-host (52) , pathovar (53) , plasmid-name (54) , plastid-name (55) , pop-variant (56) , rearranged (57) , rev-primer-name (58) , rev-primer-seq (59) , segment (60) , serogroup (61) , serotype (62) , serovar (63) , sex (64) , specimen-voucher (65) , strain (66) , subclone (67) , subgroup (68) , subsource-note (69), sub-species (70) , substrain (71) , subtype (72) , synonym (73) , taxname (74) , teleomorph (75) , tissue-lib (76) , tissue-type (77) , transgenic (78) , transposon-name (79) , type (80) , variety (81) , specimen-voucher-INST (82) , specimen-voucher-COLL (83) , specimen-voucher-SpecID (84) , culture-collection-INST (85) , culture-collection-COLL (86) , culture-collection-SpecID (87) , bio-material-INST (88) , bio-material-COLL (89) , bio-material-SpecID (90), all-notes (91), mating-type (92), linkage-group (93) , haplogroup (94), all-quals (95), dbxref (96) , taxid (97) } Source-qual-pair ::= SEQUENCE { field-from Source-qual , field-to Source-qual } Source-location ::= ENUMERATED { unknown (0) , genomic (1) , chloroplast (2) , chromoplast (3) , kinetoplast (4) , mitochondrion (5) , plastid (6) , macronuclear (7) , extrachrom (8) , plasmid (9) , transposon (10) , insertion-seq (11) , cyanelle (12) , proviral (13) , virion (14) , nucleomorph (15) , apicoplast (16) , leucoplast (17) , proplastid (18) , endogenous-virus (19) , hydrogenosome (20) , chromosome (21) , chromatophore (22) } Source-origin ::= ENUMERATED { unknown (0) , natural (1) , natmut (2) , mut (3) , artificial (4) , synthetic (5) , other (255) } Source-qual-choice ::= CHOICE { textqual Source-qual , location Source-location, origin Source-origin , gcode INTEGER , mgcode INTEGER } Source-qual-text-val ::= SEQUENCE { srcqual Source-qual , val VisibleString } Source-qual-val-choice ::= CHOICE { textqual Source-qual-text-val , location Source-location, origin Source-origin , gcode INTEGER , mgcode INTEGER } Source-qual-val-set ::= SET OF Source-qual-val-choice CDSGeneProt-field ::= ENUMERATED { cds-comment (1) , gene-locus (2) , gene-description (3) , gene-comment (4) , gene-allele (5) , gene-maploc (6) , gene-locus-tag (7) , gene-synonym (8) , gene-old-locus-tag (9) , mrna-product (10) , mrna-comment (11) , prot-name (12) , prot-description (13) , prot-ec-number (14) , prot-activity (15) , prot-comment (16) , mat-peptide-name (17) , mat-peptide-description (18) , mat-peptide-ec-number (19) , mat-peptide-activity (20) , mat-peptide-comment (21) , cds-inference (22) , gene-inference (23) , codon-start (24) } CDSGeneProt-field-pair ::= SEQUENCE { field-from CDSGeneProt-field , field-to CDSGeneProt-field } Molecule-type ::= ENUMERATED { unknown (0) , genomic (1) , precursor-RNA (2) , mRNA (3) , rRNA (4) , tRNA (5) , genomic-mRNA (6) , cRNA (7) , transcribed-RNA (8) , ncRNA (9) , transfer-messenger-RNA (10) , macro-other (11) } Technique-type ::= ENUMERATED { unknown (0) , standard (1) , est (2) , sts (3) , survey (4) , genetic-map (5) , physical-map (6) , derived (7) , concept-trans (8) , seq-pept (9) , both (10) , seq-pept-overlap (11) , seq-pept-homol (12) , concept-trans-a (13) , htgs-1 (14) , htgs-2 (15) , htgs-3 (16) , fli-cDNA (17) , htgs-0 (18) , htc (19) , wgs (20) , barcode (21) , composite-wgs-htgs (22) , tsa (23) , other (24) } Completedness-type ::= ENUMERATED { unknown (0) , complete (1) , partial (2) , no-left (3) , no-right (4) , no-ends (5) , has-left (6) , has-right (7) , other (6) } Molecule-class-type ::= ENUMERATED { unknown (0) , dna (1) , rna (2) , protein (3) , nucleotide (4), other (5) } Topology-type ::= ENUMERATED { unknown (0) , linear (1) , circular (2) , tandem (3) , other (4) } Strand-type ::= ENUMERATED { unknown (0) , single (1) , double (2) , mixed (3) , mixed-rev (4) , other (5) } Molinfo-field ::= CHOICE { molecule Molecule-type , technique Technique-type , completedness Completedness-type , mol-class Molecule-class-type , topology Topology-type , strand Strand-type } Molinfo-molecule-pair ::= SEQUENCE { from Molecule-type , to Molecule-type } Molinfo-technique-pair ::= SEQUENCE { from Technique-type , to Technique-type } Molinfo-completedness-pair ::= SEQUENCE { from Completedness-type , to Completedness-type } Molinfo-mol-class-pair ::= SEQUENCE { from Molecule-class-type , to Molecule-class-type } Molinfo-topology-pair ::= SEQUENCE { from Topology-type , to Topology-type } Molinfo-strand-pair ::= SEQUENCE { from Strand-type , to Strand-type } Molinfo-field-pair ::= CHOICE { molecule Molinfo-molecule-pair , technique Molinfo-technique-pair , completedness Molinfo-completedness-pair , mol-class Molinfo-mol-class-pair , topology Molinfo-topology-pair , strand Molinfo-strand-pair } Molinfo-field-list ::= SET OF Molinfo-field Molinfo-field-constraint ::= SEQUENCE { field Molinfo-field , is-not BOOLEAN DEFAULT FALSE } -- publication fields -- Publication-field ::= ENUMERATED { cit (1) , authors (2) , journal (3) , volume (4) , issue (5) , pages (6) , date (7) , serial-number (8) , title (9) , affiliation (10) , affil-div (11) , affil-city (12) , affil-sub (13) , affil-country (14) , affil-street (15) , affil-email (16) , affil-fax (17) , affil-phone (18) , affil-zipcode (19), authors-initials (20) } -- structured comment fields -- Structured-comment-field ::= CHOICE { database NULL , named VisibleString , field-name NULL } Structured-comment-field-pair ::= SEQUENCE { from Structured-comment-field , to Structured-comment-field } -- misc fields -- -- these would not appear in pairs -- Misc-field ::= ENUMERATED { genome-project-id (1) , comment-descriptor (2) , defline (3) , keyword (4) } -- dblink fields -- DBLink-field-type ::= ENUMERATED { trace-assembly (1) , bio-sample (2) , probe-db (3) , sequence-read-archve (4) , bio-project (5) } DBLink-field-pair ::= SEQUENCE { from DBLink-field-type , to DBLink-field-type } -- complex constraints -- Pub-type ::= ENUMERATED { any (0) , published (1) , unpublished (2) , in-press (3) , submitter-block (4) } Pub-field-constraint ::= SEQUENCE { field Publication-field , constraint String-constraint } Pub-field-special-constraint-type ::= CHOICE { is-present NULL , is-not-present NULL , is-all-caps NULL , is-all-lower NULL , is-all-punct NULL } Pub-field-special-constraint ::= SEQUENCE { field Publication-field , constraint Pub-field-special-constraint-type } Publication-constraint ::= SEQUENCE { type Pub-type , field Pub-field-constraint OPTIONAL , special-field Pub-field-special-constraint OPTIONAL } Source-constraint ::= SEQUENCE { field1 Source-qual-choice OPTIONAL , field2 Source-qual-choice OPTIONAL , constraint String-constraint OPTIONAL , type-constraint Object-type-constraint OPTIONAL } CDSGeneProt-feature-type-constraint ::= ENUMERATED { gene (1) , mRNA (2) , cds (3) , prot (4) , exon (5) , mat-peptide (6) } CDSGeneProt-pseudo-constraint ::= SEQUENCE { feature CDSGeneProt-feature-type-constraint , is-pseudo BOOLEAN DEFAULT TRUE } CDSGeneProt-constraint-field ::= CHOICE { field CDSGeneProt-field } CDSGeneProt-qual-constraint ::= SEQUENCE { field1 CDSGeneProt-constraint-field OPTIONAL , field2 CDSGeneProt-constraint-field OPTIONAL , constraint String-constraint OPTIONAL } Field-constraint ::= SEQUENCE { field Field-type , string-constraint String-constraint } Sequence-constraint-rnamol ::= ENUMERATED { any (0) , genomic (1) , precursor-RNA (2) , mRNA (3) , rRNA (4) , tRNA (5) , genomic-mRNA (6) , cRNA (7) , transcribed-RNA (8) , ncRNA (9) , transfer-messenger-RNA (10) } Sequence-constraint-mol-type-constraint ::= CHOICE { any NULL , nucleotide NULL , dna NULL , rna Sequence-constraint-rnamol , protein NULL } Quantity-constraint ::= CHOICE { equals INTEGER , greater-than INTEGER , less-than INTEGER } Feature-strandedness-constraint ::= ENUMERATED { any (0) , minus-only (1) , plus-only (2) , at-least-one-minus (3) , at-least-one-plus (4) , no-minus (5) , no-plus (6) } Sequence-constraint ::= SEQUENCE { seqtype Sequence-constraint-mol-type-constraint OPTIONAL , id String-constraint OPTIONAL , feature Macro-feature-type , num-type-features Quantity-constraint OPTIONAL , num-features Quantity-constraint OPTIONAL , length Quantity-constraint OPTIONAL , strandedness Feature-strandedness-constraint DEFAULT any } Match-type-constraint ::= ENUMERATED { dont-care (0) , yes (1) , no (2) } Translation-constraint ::= SEQUENCE { actual-strings String-constraint-set , transl-strings String-constraint-set , internal-stops Match-type-constraint DEFAULT dont-care , num-mismatches Quantity-constraint OPTIONAL } Constraint-choice ::= CHOICE { string String-constraint , location Location-constraint , field Field-constraint , source Source-constraint , cdsgeneprot-qual CDSGeneProt-qual-constraint , cdsgeneprot-pseudo CDSGeneProt-pseudo-constraint , sequence Sequence-constraint , pub Publication-constraint , molinfo Molinfo-field-constraint , field-missing Field-type , translation Translation-constraint } Constraint-choice-set ::= SET OF Constraint-choice Text-marker ::= CHOICE { free-text VisibleString , digits NULL , letters NULL } Text-portion ::= SEQUENCE { left-marker Text-marker OPTIONAL , include-left BOOLEAN , right-marker Text-marker OPTIONAL , include-right BOOLEAN , inside BOOLEAN , case-sensitive BOOLEAN DEFAULT FALSE , whole-word BOOLEAN DEFAULT FALSE } Field-edit-location ::= ENUMERATED { anywhere (0) , beginning (1) , end (2) } Field-edit ::= SEQUENCE { find-txt VisibleString , repl-txt VisibleString OPTIONAL , location Field-edit-location DEFAULT anywhere , case-insensitive BOOLEAN DEFAULT FALSE } Field-type ::= CHOICE { source-qual Source-qual-choice , feature-field Feature-field , rna-field Rna-qual , cds-gene-prot CDSGeneProt-field , molinfo-field Molinfo-field , pub Publication-field , struc-comment-field Structured-comment-field , misc Misc-field , dblink DBLink-field-type } Field-pair-type ::= CHOICE { source-qual Source-qual-pair , feature-field Feature-field-pair , rna-field Rna-qual-pair , cds-gene-prot CDSGeneProt-field-pair , molinfo-field Molinfo-field-pair , struc-comment-field Structured-comment-field-pair , dblink DBLink-field-pair} ExistingTextOption ::= ENUMERATED { replace-old (1) , append-semi (2) , append-space (3) , append-colon (4) , append-comma (5) , append-none (6) , prefix-semi (7) , prefix-space (8) , prefix-colon (9) , prefix-comma (10) , prefix-none (11) , leave-old (12) , add-qual (13) } Apply-action ::= SEQUENCE { field Field-type , value VisibleString , existing-text ExistingTextOption } Edit-action ::= SEQUENCE { edit Field-edit , field Field-type } Cap-change ::= ENUMERATED { none (0) , tolower (1) , toupper (2) , firstcap (3) , firstcaprestnochange (4) } Text-transform ::= CHOICE { edit Field-edit , caps Cap-change , remove Text-portion } Text-transform-set ::= SET OF Text-transform Convert-action ::= SEQUENCE { fields Field-pair-type , strip-name BOOLEAN DEFAULT FALSE , keep-original BOOLEAN DEFAULT FALSE , capitalization Cap-change DEFAULT none , existing-text ExistingTextOption } Copy-action ::= SEQUENCE { fields Field-pair-type , existing-text ExistingTextOption } Swap-action ::= SEQUENCE { fields Field-pair-type , field-to Field-type } AECRParse-action ::= SEQUENCE { portion Text-portion , fields Field-pair-type , remove-from-parsed BOOLEAN DEFAULT FALSE , remove-left BOOLEAN DEFAULT FALSE , remove-right BOOLEAN DEFAULT FALSE , transform Text-transform-set OPTIONAL , existing-text ExistingTextOption } Remove-action ::= SEQUENCE { field Field-type } Action-choice ::= CHOICE { apply Apply-action , edit Edit-action , convert Convert-action , copy Copy-action , swap Swap-action , remove Remove-action , parse AECRParse-action } AECR-action ::= SEQUENCE { action Action-choice , also-change-mrna BOOLEAN DEFAULT FALSE , constraint Constraint-choice-set OPTIONAL } Parse-src-org-choice ::= CHOICE { source-qual Source-qual , taxname-after-binomial NULL } Parse-src-org ::= SEQUENCE { field Parse-src-org-choice , type Object-type-constraint DEFAULT any } -- For Parse-src-general-id tag, specify the db of the id from which you -- want to retrieve the tag. If empty or null, any db will do. Parse-src-general-id ::= CHOICE { whole-text NULL , db NULL , tag VisibleString } Parse-src ::= CHOICE { defline NULL , flatfile NULL , local-id NULL , org Parse-src-org , comment NULL , bankit-comment NULL , structured-comment VisibleString , file-id NULL , general-id Parse-src-general-id } Parse-dst-org ::= SEQUENCE { field Source-qual-choice , type Object-type-constraint DEFAULT any } Parse-dest ::= CHOICE { defline NULL , org Parse-dst-org , featqual Feature-field-legal , comment-descriptor NULL , dbxref VisibleString } Parse-action ::= SEQUENCE { portion Text-portion , src Parse-src , dest Parse-dest , capitalization Cap-change DEFAULT none , remove-from-parsed BOOLEAN DEFAULT FALSE , transform Text-transform-set OPTIONAL , existing-text ExistingTextOption } Location-interval ::= SEQUENCE { from INTEGER , to INTEGER } Location-choice ::= CHOICE { interval Location-interval , whole-sequence NULL , point INTEGER } Sequence-list ::= SET OF VisibleString Sequence-list-choice ::= CHOICE { list Sequence-list , all NULL } Apply-feature-action ::= SEQUENCE { type Macro-feature-type , partial5 BOOLEAN DEFAULT FALSE , partial3 BOOLEAN DEFAULT FALSE , plus-strand BOOLEAN DEFAULT TRUE , location Location-choice , seq-list Sequence-list-choice , add-redundant BOOLEAN DEFAULT TRUE , add-mrna BOOLEAN DEFAULT FALSE , apply-to-parts BOOLEAN DEFAULT FALSE , only-seg-num INTEGER DEFAULT -1 , fields Feat-qual-legal-set OPTIONAL, src-fields Source-qual-val-set OPTIONAL } Remove-feature-action ::= SEQUENCE { type Macro-feature-type , constraint Constraint-choice-set OPTIONAL } -- for convert features -- Convert-from-CDS-options ::= SEQUENCE { remove-mRNA BOOLEAN , remove-gene BOOLEAN , remove-transcript-id BOOLEAN } Convert-feature-src-options ::= CHOICE { cds Convert-from-CDS-options } Bond-type ::= ENUMERATED { disulfide (1) , thioester (2) , crosslink (3) , thioether (4) , other (5) } Site-type ::= ENUMERATED { active (1) , binding (2) , cleavage (3) , inhibit (4) , modified (5) , glycosylation (6) , myristoylation (7) , mutagenized (8) , metal-binding (9) , phosphorylation (10) , acetylation (11) , amidation (12) , methylation (13) , hydroxylation (14) , sulfatation (15) , oxidative-deamination (16) , pyrrolidone-carboxylic-acid (17) , gamma-carboxyglutamic-acid (18) , blocked (19) , lipid-binding (20) , np-binding (21) , dna-binding (22) , signal-peptide (23) , transit-peptide (24) , transmembrane-region (25) , nitrosylation (26) , other (27) } -- other choice is to create protein sequences, skipping bad -- Region-type ::= SEQUENCE { create-nucleotide BOOLEAN } Convert-feature-dst-options ::= CHOICE { bond Bond-type , site Site-type , region Region-type , ncrna-class VisibleString , remove-original BOOLEAN } Convert-feature-action ::= SEQUENCE { type-from Macro-feature-type , type-to Macro-feature-type , src-options Convert-feature-src-options OPTIONAL , dst-options Convert-feature-dst-options OPTIONAL , leave-original BOOLEAN , src-feat-constraint Constraint-choice-set OPTIONAL } Feature-location-strand-from ::= ENUMERATED { any (0) , plus (1) , minus (2) , unknown (3) , both (4) } Feature-location-strand-to ::= ENUMERATED { plus (1) , minus (2) , unknown (3) , both (4) , reverse (5) } Edit-location-strand ::= SEQUENCE { strand-from Feature-location-strand-from , strand-to Feature-location-strand-to } Partial-5-set-constraint ::= ENUMERATED { all (0) , at-end (1) , bad-start (2) , frame-not-one (3) } Partial-5-set-action ::= SEQUENCE { constraint Partial-5-set-constraint , extend BOOLEAN } Partial-5-clear-constraint ::= ENUMERATED { all (0) , not-at-end (1) , good-start (2) } Partial-3-set-constraint ::= ENUMERATED { all (0) , at-end (1) , bad-end (2) } Partial-3-set-action ::= SEQUENCE { constraint Partial-3-set-constraint , extend BOOLEAN } Partial-3-clear-constraint ::= ENUMERATED { all (0) , not-at-end (1) , good-end (2) } Partial-both-set-constraint ::= ENUMERATED { all (0) , at-end (1) } Partial-both-set-action ::= SEQUENCE { constraint Partial-both-set-constraint , extend BOOLEAN } Partial-both-clear-constraint ::= ENUMERATED { all (0) , not-at-end (1) } Convert-location-type ::= ENUMERATED { join (1) , order (2) , merge (3) } Location-edit-type ::= CHOICE { strand Edit-location-strand , set-5-partial Partial-5-set-action , clear-5-partial Partial-5-clear-constraint , set-3-partial Partial-3-set-action , clear-3-partial Partial-3-clear-constraint , set-both-partial Partial-both-set-action , clear-both-partial Partial-both-clear-constraint , convert Convert-location-type , extend-5 NULL , extend-3 NULL } Edit-feature-location-action ::= SEQUENCE { type Macro-feature-type , action Location-edit-type , retranslate-cds BOOLEAN OPTIONAL , constraint Constraint-choice-set OPTIONAL } Molinfo-block ::= SEQUENCE { to-list Molinfo-field-list , from-list Molinfo-field-list OPTIONAL , constraint Constraint-choice-set OPTIONAL } Descriptor-type ::= ENUMERATED { all (0) , title (1) , source (2) , publication (3) , comment (4) , genbank (5) , user (6) , create-date (7) , update-date (8) , mol-info (9) , structured-comment (10) , genome-project-id (11) } Remove-descriptor-action ::= SEQUENCE { type Descriptor-type , constraint Constraint-choice-set OPTIONAL } Autodef-list-type ::= ENUMERATED { feature-list (1) , complete-sequence (2) , complete-genome (3) } Autodef-action ::= SEQUENCE { modifiers SET OF Source-qual OPTIONAL , clause-list-type Autodef-list-type } Fix-pub-caps-action ::= SEQUENCE { title BOOLEAN OPTIONAL , authors BOOLEAN OPTIONAL , affiliation BOOLEAN OPTIONAL , affil-country BOOLEAN OPTIONAL , punct-only BOOLEAN DEFAULT FALSE , constraint Constraint-choice-set OPTIONAL } Sort-order ::= ENUMERATED { short-to-long (1) , long-to-short (2) , alphabetical (3) } Sort-fields-action ::= SEQUENCE { field Field-type , order Sort-order , constraint Constraint-choice-set OPTIONAL } Fix-caps-action ::= CHOICE { pub Fix-pub-caps-action , src-country NULL , mouse-strain NULL , src-qual Source-qual } Fix-format-action ::= CHOICE { collection-date NULL , lat-lon NULL , primers NULL , protein-name NULL } Remove-duplicate-feature-action ::= SEQUENCE { type Macro-feature-type , ignore-partials BOOLEAN , case-sensitive BOOLEAN , remove-proteins BOOLEAN , rd-constraint Constraint-choice-set OPTIONAL } Gene-xref-suppression-type ::= ENUMERATED { any (0) , suppressing (1) , non-suppressing (2) } Gene-xref-necessary-type ::= ENUMERATED { any (0) , necessary (1) , unnecessary (2) } Gene-xref-type ::= SEQUENCE { feature Macro-feature-type , suppression Gene-xref-suppression-type , necessary Gene-xref-necessary-type } Xref-type ::= CHOICE { gene Gene-xref-type } Remove-xrefs-action ::= SEQUENCE { xref-type Xref-type , constraint Constraint-choice-set OPTIONAL } Make-gene-xref-action ::= SEQUENCE { feature Macro-feature-type , constraint Constraint-choice-set OPTIONAL } Author-fix-type ::= ENUMERATED { truncate-middle-initials (1) , strip-suffix (2) , move-middle-to-first (3) } Author-fix-action ::= SEQUENCE { fix-type Author-fix-type , constraint Constraint-choice-set OPTIONAL } Update-sequences-action ::= SEQUENCE { filename VisibleString , add-cit-subs BOOLEAN DEFAULT FALSE } Macro-action-choice ::= CHOICE { aecr AECR-action , parse Parse-action , add-feature Apply-feature-action , remove-feature Remove-feature-action , convert-feature Convert-feature-action , edit-location Edit-feature-location-action , remove-descriptor Remove-descriptor-action , autodef Autodef-action , removesets NULL , trim-junk-from-primer-seq NULL , trim-stop-from-complete-cds NULL , fix-usa-and-states NULL , synchronize-cds-partials NULL , adjust-for-consensus-splice NULL , fix-pub-caps Fix-pub-caps-action , remove-seg-gaps NULL , sort-fields Sort-fields-action , apply-molinfo-block Molinfo-block , fix-caps Fix-caps-action , fix-format Fix-format-action , fix-spell NULL , remove-duplicate-features Remove-duplicate-feature-action , remove-lineage-notes NULL , remove-xrefs Remove-xrefs-action , make-gene-xrefs Make-gene-xref-action , make-bold-xrefs NULL , fix-author Author-fix-action , update-sequences Update-sequences-action , add-trans-splicing NULL , remove-invalid-ecnumbers NULL } Macro-action-list ::= SET OF Macro-action-choice Search-func ::= CHOICE { string-constraint String-constraint , contains-plural NULL , n-or-more-brackets-or-parentheses INTEGER , three-numbers NULL , underscore NULL , prefix-and-numbers VisibleString , all-caps NULL , unbalanced-paren NULL , too-long INTEGER , has-term VisibleString } Simple-replace ::= SEQUENCE { replace VisibleString OPTIONAL, whole-string BOOLEAN DEFAULT FALSE , weasel-to-putative BOOLEAN DEFAULT FALSE } Replace-func ::= CHOICE { simple-replace Simple-replace , haem-replace VisibleString } Replace-rule ::= SEQUENCE { replace-func Replace-func , move-to-note BOOLEAN DEFAULT FALSE } Fix-type ::= ENUMERATED { none (0) , typo (1) , putative-typo (2) , quickfix (3) , no-organelle-for-prokaryote (4), might-be-nonfunctional (5), database (6), remove-organism-name (7), inappropriate-symbol (8), evolutionary-relationship (9), use-protein (10), hypothetical (11), british (12), description (13), gene (14) } Suspect-rule ::= SEQUENCE { find Search-func , except Search-func OPTIONAL , feat-constraint Constraint-choice-set OPTIONAL , rule-type Fix-type DEFAULT none , replace Replace-rule OPTIONAL , description VisibleString OPTIONAL } Suspect-rule-set ::= SET OF Suspect-rule END