/* $Id: txalign.h,v 6.21 2006/07/13 17:06:39 bollin Exp $ * =========================================================================== * * PUBLIC DOMAIN NOTICE * National Center for Biotechnology Information * * This software/database is a "United States Government Work" under the * terms of the United States Copyright Act. It was written as part of * the author's official duties as a United States Government employee and * thus cannot be copyrighted. This software/database is freely available * to the public for use. The National Library of Medicine and the U.S. * Government have not placed any restriction on its use or reproduction. * * Although all reasonable efforts have been taken to ensure the accuracy * and reliability of the software and data, the NLM and the U.S. * Government do not and cannot warrant the performance or results that * may be obtained by using this software or data. The NLM and the U.S. * Government disclaim all warranties, express or implied, including * warranties of performance, merchantability or fitness for any particular * purpose. * * Please cite the author in any work or product based on this material. * * =========================================================================== * * File Name: $RCSfile: txalign.h,v $ * * Author: Jinghui Zhang * * Initial Version Creation Date: 03/13/94 * * $Revision: 6.21 $ * * File Description: * External include file for various alignments * Revision 5.13 1997/06/05 20:55:34 madden * Added PrintDefLinesFromSeqAlign prototype * * * $Log: txalign.h,v $ * Revision 6.21 2006/07/13 17:06:39 bollin * use Uint4 instead of Uint2 for itemID values * removed unused variables * resolved compiler warnings * * Revision 6.20 2006/05/30 13:50:48 kans * set define to include BlastDefLineSet* functions in fdlobj.h * * Revision 6.19 2004/08/16 19:36:52 dondosha * Made CreateMaskByteStore function public: needed by web BLAST 2 sequences * * Revision 6.18 2004/06/24 21:15:44 dondosha * Changed last Boolean argument in ScoreAndEvalueToBuffers to Uint1, to allow different options for formatting * * Revision 6.17 2004/05/14 15:38:09 dondosha * Made function ScoreAndEvalueToBuffers public * * Revision 6.16 2003/11/20 22:09:26 dondosha * Added a PrindDefLinesFromSeqAlignWithPath function with an argument to provide root path for image links * * Revision 6.15 2002/12/11 16:24:51 jianye * added structure linkout * * Revision 6.14 2002/10/17 16:57:50 jianye * added option for get sequence feature * * Revision 6.13 2002/03/26 23:26:38 dondosha * Added a possibility of a link to Blast 2 sequences from megablast output * * Revision 6.12 2002/02/15 14:18:24 camacho * Added RDBTaxNamesClone function * * Revision 6.11 2002/02/01 20:04:57 jianye * Fixed getting wrong blast defline struct for non-redundant bioseq and adding utility function getBlastDefLineForSeqId(bdlp, sip) * * Revision 6.10 2002/01/24 18:47:49 camacho * Moved RDBTaxNamesFree from readdb.[ch] to txalign.[ch] * * Revision 6.9 2002/01/23 19:32:25 jianye * Added checkLinkoutType() * * Revision 6.8 2002/01/23 17:54:50 jianye * Added SHOW_LINKOUT def * * Revision 6.7 2001/07/23 20:20:12 dondosha * Made replace_bytestore_data function public for use in web blast2seq * * Revision 6.6 2001/06/21 19:42:18 shavirin * Moved here definitions related to Taxonomy names. * * Revision 6.5 2001/06/21 18:26:27 shavirin * Moved here functions to get Taxonomy names information encoded in * the Bioseq returned from the Blast database. * * Revision 6.4 2001/05/15 17:18:26 egorov * Added txalign_options to AlignStatOption structure * * Revision 6.3 2001/03/23 17:24:44 madden * Add FDGetDeflineAsnFromBioseq from readdb.[ch] * * Revision 6.2 2000/12/14 17:08:53 shavirin * Added additinal label " #include /* This define should be added to include BlastDefLineSet* functions in fdlobj.h */ #ifndef NLM_GENERATED_CODE_PROTO #define NLM_GENERATED_CODE_PROTO #endif #include /****************************************************************************/ /* DEFINES */ /****************************************************************************/ #define WEBB_asize 23 /* webb's matrix */ #define TX_MATRIX_SIZE 128 /*size of the matrix for showing the text alignment*/ #define TXALIGN_LOCUS_NAME ((Uint4)0x00000100) /*display the locus name*/ #define TXALIGN_MASTER ((Uint4)0x00000002) /*display the alignment as multiple pairwise alignment*/ #define TXALIGN_MISMATCH ((Uint4)0x00000004) /*display the mismatched residue of the sequence */ #define TXALIGN_MATRIX_VAL ((Uint4)0x00000008) /*display the matrix of the alignment */ #define TXALIGN_HTML ((Uint4)0x00000010) /*display the format in a HTML page*/ #define TXALIGN_HTML_RELATIVE ((Uint4)0x00002000) /*the HTML (if enabled by TXALIGN_HTML) should be relative*/ #define TXALIGN_SHOW_RULER ((Uint4)0x00000020) /*display the ruler for the text alignment*/ #define TXALIGN_COMPRESS ((Uint4)0x00000040) /*make the space for label smaller*/ #define TXALIGN_END_NUM ((Uint4)0x00000080) /*show the number at the end */ #define TXALIGN_FLAT_INS ((Uint4)0x00000001) /*flat the insertions in multiple pairwise alignment */ #define TXALIGN_SHOW_GI ((Uint4)0x00000200) /*show the gi in the defline. */ #define TXALIGN_SHOW_NO_OF_SEGS ((Uint4)0x00000400) /*show the number of (sum statistics) segments in the one-line descriptions? */ #define TXALIGN_BLASTX_SPECIAL ((Uint4)0x00000800) /*display the BLASTX results as protein alignment */ #define TXALIGN_SHOW_QS ((Uint4)0x00001000) /*show the results as query-subect*/ #define TXALIGN_SPLIT_ANNOT ((Uint4)0x00004000) /*for Seq-annot from the same alignment, split the the display into individual panel*/ #define TXALIGN_SHOW_STRAND ((Uint4)0x00008000) /*for displaying the stradn even in the compact form*/ #define TXALIGN_BLUNT_END ((Uint4)0x00010000) /*showing the blunt-end for the end gaps*/ #define TXALIGN_DO_NOT_PRINT_TITLE ((Uint4)0x00020000) /* do not print title before list of deflines */ #define TXALIGN_CHECK_BOX ((Uint4)0x00040000) /* place checkbox before the line (HTML only) */ #define TXALIGN_CHECK_BOX_CHECKED ((Uint4)0x00080000) /* make default value for checkboxes ON (HTML only) */ #define TXALIGN_NEW_GIF ((Uint4)0x00100000) /* print new.gif near new alignments (HTML only) */ #define TXALIGN_NO_ENTREZ ((Uint4)0x00200000) /* Use dumpgnl syntax instead of ENTREZ. */ #define TXALIGN_NO_DUMPGNL ((Uint4)0x00400000) /* No dumpgnl output, even if GNL. */ #define TXALIGN_TARGET_IN_LINKS ((Uint4)0x00800000) /* Put TARGET in Entrez links */ #define TXALIGN_SHOW_LINKOUT ((Uint4)0x01000000) /*print linkout info*/ #define TXALIGN_BL2SEQ_LINK ((Uint4) 0x02000000) /* Add link to Blast 2 Sequences */ #define TXALIGN_GET_SEQUENCE ((Uint4)0x04000000) /*get sequence ability*/ /* Used by psi-blast to distinguish first from subsequent passes. */ #define FIRST_PASS 1 #define NOT_FIRST_PASS_REPEATS 2 #define NOT_FIRST_PASS_NEW 3 #define ASN_DEFLINE_OBJ_LABEL "ASN1_BlastDefLine" #define TAX_DATA_OBJ_LABEL "TaxNamesData" /* Bit meanings in membership element of ASN.1 structured definition lines */ #define EST_HUMAN_BIT 0x1 #define EST_MOUSE_BIT 0x2 #define SWISSPROT_BIT 0x4 #define PDB_BIT 0x8 #define REFSEQ_BIT 0x10 #define CONTIG_BIT 0x20 #define NUM_TAX_NAMES 4 #define SCI_NAME_POS 0 #define COMMON_NAME_POS 1 #define BLAST_NAME_POS 2 #define S_KING_POS 3 /* ---------------------------------------------------------------------*/ /* -- Here is set of definitions used with taxonomy info database ----- */ /* ---------------------------------------------------------------------*/ typedef struct _RDBTaxNames { Int4 tax_id; CharPtr sci_name; CharPtr common_name; CharPtr blast_name; Char s_king[3]; } RDBTaxNames, *RDBTaxNamesPtr; void RDBTaxNamesFree(RDBTaxNamesPtr tnames); RDBTaxNamesPtr RDBTaxNamesClone(RDBTaxNamesPtr orig); /****************************************************************************/ /* TYPEDEFS */ /****************************************************************************/ typedef struct text_buf{ /*for a generic feature comment*/ Int4 pos; /*position for label*/ Uint1 strand; /*the orientation*/ CharPtr label; /*label for the feature*/ CharPtr buf; /*the buffer for features other than cds for aa*/ Int2Ptr matrix_val; /*the value of each residue from the matrix */ CharPtr codon[3]; /*for features such as cds for aa*/ Int2 frame; /*for cds for feature*/ Int4 f_pos; /*position of the current buf*/ Uint2 exonCount; /*count the number of exons, useded in cds for aa*/ Uint4 itemID; /*feature's itemID. It is used to check identity*/ Uint2 feattype; Uint2 subtype; Uint2 entityID; Uint2 seqEntityID; /*the entityID for the sequence*/ Uint4 bsp_itemID; /*itemID for the Bioseqs*/ Boolean extra_space; }TextAlignBuf, PNTR TextAlignBufPtr; typedef struct align_summary { Int4 positive; /*number of positive residues*/ Int4 identical; /*number of identical residues*/ Int4 gaps; /*number of the gaps*/ Int4 totlen; /*total length of the alignemtns*/ Int4Ptr PNTR matrix; /*matrix for protein alignments*/ Int4Ptr PNTR posMatrix; /*matrix for PSSM protein alignments*/ SeqIdPtr master_sip; /*the Seq-id of the master sequence*/ SeqIdPtr target_sip; /*the Seq-id for the target sequence*/ Boolean is_aa; /*are the sequences nucleotide or protein?*/ Uint1 m_strand, /* strand of the query. */ t_strand; /* strand of the database sequence. */ Int4 m_frame, /* Frame of the query. */ t_frame; /* Frame of the database sequence. */ Boolean m_frame_set, /* query frame was set. */ t_frame_set; /* database sequence frame was set. */ Int4 master_from; /* from for master sequence */ Int4 master_to; /* to for master sequence */ Int4 target_from; /* from for target sequence */ Int4 target_to; /* to region for master sequence */ Boolean ooframe; /* Is this out-of-frame alignment ? */ }AlignSum, PNTR AlignSumPtr; typedef struct align_stat_option { /*options for printing the statistics*/ Int2 line_len; Int2 indent_len; Boolean html_hot_link; /* Prepare HTML output. */ Boolean html_hot_link_relative; /* Make the HTML link relative. */ Boolean show_gi; Boolean no_entrez; /* Do not use Entrez format for HTML links. */ Boolean no_dumpgnl; /* Do not use dumpgnl format even if GNL. */ FILE *fp; CharPtr buf; BioseqPtr bsp; ScorePtr sp; Int4 identical; /*number of identical residues*/ Int4 gaps; /*number of the gaps*/ Int4 positive; /*number of the positive residues*/ Int4 align_len; /*the length of the alignment. EXCLUDE the GAPS*/ Boolean follower; /* If TRUE, this is NOT the first alignment for this sequences. */ Uint1 m_strand, /* strand of the query. */ t_strand; /* strand of the database sequence. */ Int2 m_frame, /* Frame of the query. */ t_frame; /* Frame of the database sequence. */ /* This information was added first only for creation of very specific links to the single alignment. However - may be it will be used later for something else */ Int4 master_from; /* from for master sequence */ Int4 master_to; /* to for master sequence */ Int4 target_from; /* from for target sequence */ Int4 target_to; /* to region for master sequence */ CharPtr segs; /* "-" ("," "-" )* */ CharPtr db_name; /* searched databases list */ CharPtr blast_type; /* string used to choose proper config parms */ Uint4 txalign_options;/* the TXALIGN_* options */ }AlignStatOption, PNTR AlignStatOptionPtr; /****************************************************************************/ /* FINCTION DEFINITIONS */ /****************************************************************************/ #undef NLM_EXTERN #ifdef NLM_IMPORT #define NLM_EXTERN NLM_IMPORT #else #define NLM_EXTERN extern #endif #ifdef __cplusplus extern "C" { #endif /***************************************************************** * * find_score_in_align(align, chain, asp) * align: the Seq-align point * chain: for multiple segment Seq-aligns, such as DenseDiag and * StdSeg, the order within the Seq-align * asp: the structure that records and stores the positive, * identical residues * the function only works for DenseDiag and Stdseg for now * *****************************************************************/ NLM_EXTERN ScorePtr find_score_in_align PROTO((SeqAlignPtr align, Uint2 chain, AlignSumPtr asp)); /*the default formatting function for printing the scores*/ NLM_EXTERN int LIBCALLBACK FormatScoreFunc PROTO((AlignStatOptionPtr asop)); /********************************************************************************** * * Given a chain of annots (ValNodePtrs) they are all printed out, one pattern * at a time. * *************************************************************************************/ NLM_EXTERN Boolean LIBCALL ShowTextAlignFromAnnotExtra PROTO((BioseqPtr bsp, ValNodePtr vnp, SeqLocPtr seqloc, Int4 line_len, FILE *fp, Uint1Ptr featureOrder, Uint1Ptr groupOrder, Uint4 option, Int4Ptr PNTR matrix, ValNodePtr mask_loc, int (LIBCALLBACK *fmt_score_func)PROTO((AlignStatOptionPtr)))); /***************************************************************************** * * ShowTextAlignFromAnnot(annot, locus, line_len, fp, master, f_order) * display the alignment stored in a Seq-annot in a text file * annot: the Seq-annot pointer * locus: if TRUE, show the locus name as the sequence label, otherwise, * use the accession * line_len: the number of sequence char per line * fp: The file pointer to store the text output * master: if TRUE, show the result as a master-slave type multiple pair * wise alignment. if FALSE, display one alignment after the other * f_order: the user selected feature type and order to be shown together * with the alignment * return TRUE for success, FALSE for fail * *****************************************************************************/ NLM_EXTERN Boolean ShowTextAlignFromAnnot PROTO(( SeqAnnotPtr hannot, Int4 line_len, FILE *fp, Uint1Ptr featureOrder, Uint1Ptr groupOrder, Uint4 option, Int4Ptr PNTR matrix, ValNodePtr mask_loc, int (LIBCALLBACK *fmt_score_func) PROTO((AlignStatOptionPtr)) )); /** * same as ShowTextAlignFromAnnot * the db_name argument is used to make links to * incomplete genomes */ NLM_EXTERN Boolean ShowTextAlignFromAnnot2 PROTO(( SeqAnnotPtr hannot, Int4 line_len, FILE *fp, Uint1Ptr featureOrder, Uint1Ptr groupOrder, Uint4 option, Int4Ptr PNTR matrix, ValNodePtr mask_loc, int (LIBCALLBACK *fmt_score_func) PROTO((AlignStatOptionPtr)), CharPtr db_name, CharPtr blast_type )); /** * same as ShowTextAlignFromAnnot * the posMatrix used to show alignments using PSSM */ NLM_EXTERN Boolean ShowTextAlignFromAnnot3 PROTO(( SeqAnnotPtr hannot, Int4 line_len, FILE *fp, Uint1Ptr featureOrder, Uint1Ptr groupOrder, Uint4 option, Int4Ptr PNTR matrix, ValNodePtr mask_loc, int (LIBCALLBACK *fmt_score_func) PROTO((AlignStatOptionPtr)), CharPtr db_name, CharPtr blast_type, Int4Ptr PNTR posMatrix )); /* Simple printing function: Can be used while debugging.. options kept to a minimum fp==NULL ==> stdout */ NLM_EXTERN void LIBCALL SeqAlignPrint(SeqAlignPtr salp,FILE* fp); /*********************************************************************** * * ShowAlignNodeText(anp_list, num_node, line_len, locus, * fp) * convert the alignment data in the list of AlignNode into text written * to a file * anp_list: a list (ValNodePtr) of AlignNode processed from Seq-aligns * num_node: the number of AlignNode to be processed currently. It can * be used in the cases where only the top num_node in the anp_list is * going to be processed. This can be useful to make vertically cashed * buffer * line_len: the length of sequence char per line * locus: if TRUE, show the locus name * fp: the file Pointer * left: the leftmost position for display * right: the rightmost position for display * align_type: the type of alignment. DNA-protein alignment? * * return TRUE for success, FALSE for fail * ************************************************************************/ NLM_EXTERN Boolean ShowAlignNodeText PROTO(( ValNodePtr anp_list, Int2 num_node, Int4 line_len, FILE *fp, Int4 left, Int4 right, Uint4 option, Int4Ptr PNTR u_matrix, int (LIBCALLBACK *fmt_score_func) PROTO((AlignStatOptionPtr)) )); NLM_EXTERN Boolean ShowAlignNodeText2 PROTO(( ValNodePtr anp_list, Int2 num_node, Int4 line_len, FILE *fp, Int4 left, Int4 right, Uint4 option, Int4Ptr PNTR u_matrix, int (LIBCALLBACK *fmt_score_func) PROTO((AlignStatOptionPtr)), CharPtr db_name, CharPtr blast_type, Int4Ptr PNTR posMatrix )); /* Print a summary of the Sequences producing significant alignments. */ NLM_EXTERN Boolean LIBCALL PrintDefLinesExtra PROTO(( ValNodePtr vnp, Int4 line_length, FILE *outfp, Uint4 options, Int4 mode, Int2Ptr marks, SeqLocPtr seqloc)); NLM_EXTERN Boolean LIBCALL PrintDefLinesFromAnnot PROTO(( SeqAnnotPtr seqannot, Int4 line_length, FILE *fp, Uint4 options, Int4 mode, Int2Ptr marks )); NLM_EXTERN Boolean LIBCALL PrintDefLinesFromSeqAlign PROTO(( SeqAlignPtr seqalign, Int4 line_length, FILE *fp, Uint4 options, Int4 mode, Int2Ptr marks )); NLM_EXTERN Boolean LIBCALL PrintDefLinesFromSeqAlignEx PROTO(( SeqAlignPtr seqalign, Int4 line_length, FILE *outfp, Uint4 options, Int4 mode, Int2Ptr marks, Int4 number_of_descriptions )); NLM_EXTERN Boolean LIBCALL PrintDefLinesFromSeqAlignEx2 PROTO(( SeqAlignPtr seqalign, Int4 line_length, FILE *outfp, Uint4 options, Int4 mode, Int2Ptr marks, Int4 number_of_descriptions, CharPtr db_name, CharPtr blast_type )); NLM_EXTERN Boolean LIBCALL PrintDefLinesFromSeqAlignWithPath PROTO(( SeqAlignPtr seqalign, Int4 line_length, FILE *outfp, Uint4 options, Int4 mode, Int2Ptr marks, Int4 number_of_descriptions, CharPtr db_name, CharPtr blast_type, CharPtr www_root_path )); #define TX_KNOCK_OFF_ALLOWED 0x01 #define TX_INTEGER_BIT_SCORE 0x02 NLM_EXTERN void LIBCALL ScoreAndEvalueToBuffers PROTO((FloatHi bit_score, FloatHi evalue, CharPtr bit_score_buf, CharPtr PNTR evalue_buf, Uint1 format_options)); /* Fills in the slots with score, bit_score, etc. from the SeqAlign. */ /*options for display of the text alignment*/ #define TEXT_MP_MISMATCH 1 /*multiple pairwise alignment with mismatch*/ #define TEXT_MP 2 /*multiple pairwise without mismatch*/ #define TEXT_MPFLAT_MISMATCH 3 /*flat multile with mismatch*/ #define TEXT_MPFLAT 4 /*flat multiple without mismatch*/ #define TEXT_BLAST 5 /*traditional blast output*/ /*can the current alignnode be printed for text view*/ NLM_EXTERN Boolean PrintAlignForText PROTO((AnnotInfoPtr info, AlignNodePtr anp)); /* * * determine the option for alignment based on the named tx_option * */ NLM_EXTERN Uint4 GetTxAlignOptionValue PROTO((Uint1 tx_option, BoolPtr hide_feature, BoolPtr print_score, BoolPtr split_display)); /* Gets the SeqIdPtr for the subject sequence from the first SeqAlign. The SeqIdPtr is not saved and should not be deleted. */ /* Marks structure is used for PSI Blast to print .gif marsk near alignments and to check for convergence */ #define SEQ_ALIGN_MARK_PREVGOOD 1 #define SEQ_ALIGN_MARK_PREVCHECKED 2 /* the following serves only for old stuff which uses posRepeat... */ #define SEQ_ALIGN_MARK_REPEAT 4 typedef struct MarkSeqAlign { Int4 kind; /* bitmask for the mark */ struct MarkSeqAlign *next; } MarkSeqAlign, PNTR MarkSeqAlignPtr; NLM_EXTERN Boolean LIBCALL FilterTheDefline PROTO((BioseqPtr bsp, SeqIdPtr gi_list_head, CharPtr buffer_id, Int4 buffer_id_length, CharPtr PNTR titlepp)); NLM_EXTERN Boolean FormatScoreFromSeqAlign (SeqAlignPtr sap, Uint4 option, FILE *fp, Int4Ptr PNTR matrix, Boolean follower); NLM_EXTERN SeqFeatPtr make_fake_cds(BioseqPtr m_bsp, Int4 start, Int4 stop, Uint1 strand); /* Obtains the genetic code from a BioseqPtr, assuming that a fetch function has been enabled. */ NLM_EXTERN CharPtr GetGeneticCodeFromSeqId (SeqIdPtr sip); /* Translate DNA sequence in all frames and create protein sequence for Out-Of-Frame gap algorithm */ NLM_EXTERN CharPtr OOFTranslateDNAInAllFrames(Uint1Ptr dna, Int4 length, SeqIdPtr query_id); /************************************************************************* Function : OOFShowBlastAlignment(); Purpose : function to display a BLAST output with Out-of-Frame information Parameters : sap; seqalign mask; list of masked regions in the query fp; output file; tx_option; some display options Return value : FALSE if failure ***************************************************************************/ NLM_EXTERN Boolean OOFShowBlastAlignment(SeqAlignPtr sap, ValNodePtr mask, FILE *fp, Uint4 tx_option, Int4Ptr PNTR matrix); /* Test functions to display Out-of-Frame traceback */ NLM_EXTERN void OOFDisplayTraceBack1(Int4Ptr a, CharPtr dna, CharPtr pro, Int4 ld, Int4 lp, Int4 q_start, Int4 p_start); NLM_EXTERN void OOFDisplayTraceBack2(Int4Ptr a, CharPtr dna, CharPtr pro, Int4 ld, Int4 lp, Int4 q_start, Int4 p_start); BlastDefLinePtr FDGetDeflineAsnFromBioseq(BioseqPtr bsp); RDBTaxNamesPtr FDGetTaxNamesFromBioseq(BioseqPtr bsp, Int4 taxid); NLM_EXTERN Boolean replace_bytestore_data PROTO((BioseqPtr bsp, ValNodePtr bs_list, Uint1 frame)); NLM_EXTERN Boolean checkLinkoutType(BlastDefLinePtr bdfl, Uint1 linkoutType); /* return bdlp containing the sip from a chain of bdlp*/ NLM_EXTERN BlastDefLinePtr getBlastDefLineForSeqId(BlastDefLinePtr bdlp, SeqIdPtr sip); Boolean PairwiseSeqAlignHasLinkout(SeqAlignPtr sap, Uint1 linkoutType); ValNodePtr CreateMaskByteStore (ValNodePtr mask_list); #ifdef __cplusplus } #endif #undef NLM_EXTERN #ifdef NLM_EXPORT #define NLM_EXTERN NLM_EXPORT #else #define NLM_EXTERN #endif #endif