/* Easel's foundation. * * Contents: * 1. Exception and fatal error handling. * 2. Memory allocation/deallocation conventions. * 3. Standard banner for Easel miniapplications. * 4. Improved replacements for some C library functions. * 5. Portable drop-in replacements for nonstandard C functions. * 6. Additional string functions, esl_str*() * 7. File path/name manipulation, including tmpfiles. * 8. Typed comparison functions. * 9. Unit tests. * 10. Test driver. * 11. Examples. * 12. Copyright and license. */ #include "esl_config.h" #include #include #include #include #include #include #include #ifdef HAVE_UNISTD_H #include #endif #ifdef _POSIX_VERSION #include #include #endif #ifdef HAVE_MPI #include /* MPI_Abort() may be used in esl_fatal() or other program killers */ #endif #include "easel.h" /***************************************************************** * 1. Exception and fatal error handling. *****************************************************************/ static esl_exception_handler_f esl_exception_handler = NULL; /* Function: esl_exception() * Synopsis: Throw an exception. * * Purpose: Throw an exception. An "exception" is defined by Easel * as an internal error that shouldn't happen and/or is * outside the user's control; as opposed to "failures", that * are to be expected, and within user control, and * therefore normal. By default, exceptions are fatal. * A program that wishes to be more robust can register * a non-fatal exception handler. * * Easel programs normally call one of the exception-handling * wrappers or , which * handle the overhead of passing in , , * and . is rarely called directly. * * If no custom exception handler has been registered, the * default behavior is to print a brief message to * then , resulting in a nonzero exit code from the * program. Depending on what , , * , and the -formatted * are, this output looks like: * * Fatal exception (source file foo.c, line 42): * Something wicked this way came. * * Additionally, in an MPI parallel program, the default fatal * handler aborts all processes (with ), not just * the one that called . * * Args: errcode - Easel error code, such as eslEINVAL. See easel.h. * use_errno - if TRUE, also use perror() to report POSIX errno message. * sourcefile - Name of offending source file; normally __FILE__. * sourceline - Name of offending source line; normally __LINE__. * format - formatted exception message, followed * by any additional necessary arguments for that * message. * * Returns: void. * * Throws: No abnormal error conditions. (Who watches the watchers?) */ void esl_exception(int errcode, int use_errno, char *sourcefile, int sourceline, char *format, ...) { va_list argp; #ifdef HAVE_MPI int mpiflag; #endif if (esl_exception_handler != NULL) { va_start(argp, format); (*esl_exception_handler)(errcode, use_errno, sourcefile, sourceline, format, argp); va_end(argp); return; } else { fprintf(stderr, "Fatal exception (source file %s, line %d):\n", sourcefile, sourceline); va_start(argp, format); vfprintf(stderr, format, argp); va_end(argp); fprintf(stderr, "\n"); if (use_errno && errno) perror("system error"); fflush(stderr); #ifdef HAVE_MPI MPI_Initialized(&mpiflag); /* we're assuming we can do this, even in a corrupted, dying process...? */ if (mpiflag) MPI_Abort(MPI_COMM_WORLD, 1); #endif abort(); } } /* Function: esl_exception_SetHandler() * Synopsis: Register a different exception handling function. * * Purpose: Register a different exception handling function, * . When an exception occurs, the handler * receives at least four arguments: , , * , and . * * is an Easel error code, such as * . See for a list of all codes. * * is TRUE for POSIX system call failures. The * handler may then use POSIX to format/print an * additional message, using or . * * is the name of the Easel source code file * in which the exception occurred, and is * the line number. * * is a -formatted string, followed by * a containing any additional arguments that * formatted message needs. Your custom exception handler * will probably use or to format * its error message. * * Args: handler - ptr to your custom exception handler. * * Returns: void. * * Throws: (no abnormal error conditions) */ void esl_exception_SetHandler(void (*handler)(int errcode, int use_errno, char *sourcefile, int sourceline, char *format, va_list argp)) { esl_exception_handler = handler; } /* Function: esl_exception_ResetDefaultHandler() * Synopsis: Restore default exception handling. * * Purpose: Restore default exception handling, which is to print * a simple error message to then (see * . * * An example where this might be useful is in a program * that only temporarily wants to catch one or more types * of normally fatal exceptions. * * If the default handler is already in effect, this * call has no effect (is a no-op). * * Args: (void) * * Returns: (void) * * Throws: (no abnormal error conditions) */ void esl_exception_ResetDefaultHandler(void) { esl_exception_handler = NULL; } /* Function: esl_nonfatal_handler() * Synopsis: A trivial example of a nonfatal exception handler. * * Purpose: This serves two purposes. First, it is the simplest * example of a nondefault exception handler. Second, this * is used in test harnesses, when they have * turned on to test that thrown errors * are handled properly when a nonfatal error handler is * registered by the application. * * Args: errcode - Easel error code, such as eslEINVAL. See easel.h. * use_errno - TRUE on POSIX system call failures; use * sourcefile - Name of offending source file; normally __FILE__. * sourceline - Name of offending source line; normally __LINE__. * format - formatted exception message. * argp - containing any additional necessary arguments for * the message. * * Returns: void. * * Throws: (no abnormal error conditions) */ void esl_nonfatal_handler(int errcode, int use_errno, char *sourcefile, int sourceline, char *format, va_list argp) { return; } /* Function: esl_fatal() * Synopsis: Kill a program immediately, for a "violation". * * Purpose: Kill a program for a "violation". In general this should only be used * in development or testing code, not in production * code. The main use of is in unit tests. * Another use is in assertions used in dev code. * * The only other case (and the only case that should be allowed in * production code) is in a true "function" (a function that returns * its answer, rather than an Easel error code), where Easel error * conventions can't be used (because it can't return an error code), * AND the error is guaranteed to be a coding error. For an example, * see , which triggers a violation if the code * checks for an option that isn't in the code. * * In an MPI-parallel program, the entire job is * terminated; all processes are aborted (, * not just the one that called . * * Args: format - formatted exception message, followed * by any additional necessary arguments for that * message. * * Returns: (void) * * Throws: (no abnormal error conditions) */ void esl_fatal(const char *format, ...) { va_list argp; #ifdef HAVE_MPI int mpiflag; #endif va_start(argp, format); vfprintf(stderr, format, argp); va_end(argp); fprintf(stderr, "\n"); fflush(stderr); #ifdef HAVE_MPI MPI_Initialized(&mpiflag); if (mpiflag) MPI_Abort(MPI_COMM_WORLD, 1); #endif exit(1); } /*---------------- end, error handling conventions --------------*/ /***************************************************************** * 2. Memory allocation/deallocation conventions. *****************************************************************/ /* Function: esl_Free2D() * * Purpose: Free a 2D pointer array

, where first dimension is * . (That is, the array is .) * Tolerates any of the pointers being NULL, to allow * sparse arrays. * * Returns: void. */ void esl_Free2D(void **p, int dim1) { int i; if (p != NULL) { for (i = 0; i < dim1; i++) if (p[i] != NULL) free(p[i]); free(p); } return; } /* Function: esl_Free3D() * * Purpose: Free a 3D pointer array

, where first and second * dimensions are ,. (That is, the array is * .) Tolerates any of the * pointers being NULL, to allow sparse arrays. * * Returns: void. */ void esl_Free3D(void ***p, int dim1, int dim2) { int i, j; if (p != NULL) { for (i = 0; i < dim1; i++) if (p[i] != NULL) { for (j = 0; j < dim2; j++) if (p[i][j] != NULL) free(p[i][j]); free(p[i]); } free(p); } } /*------------- end, memory allocation conventions --------------*/ /***************************************************************** * 3. Standard banner for Easel miniapplications. *****************************************************************/ /* Function: esl_banner() * Synopsis: print standard Easel application output header * * Purpose: Print the standard Easel command line application banner * to , constructing it from (the name of the * program) and a short one-line description . * For example, * * might result in: * * \begin{cchunk} * # compstruct :: compare RNA structures * # Easel 0.1 (February 2005) * # Copyright (C) 2004-2007 HHMI Janelia Farm Research Campus * # Freely licensed under the Janelia Software License. * # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - * \end{cchunk} * * would typically be an application's * , rather than a fixed string. This allows the * program to be renamed, or called under different names * via symlinks. Any path in the is discarded; * for instance, if is "/usr/local/bin/esl-compstruct", * "esl-compstruct" is used as the program name. * * Note: * Needs to pick up preprocessor #define's from easel.h, * as set by ./configure: * * symbol example * ------ ---------------- * EASEL_VERSION "0.1" * EASEL_DATE "May 2007" * EASEL_COPYRIGHT "Copyright (C) 2004-2007 HHMI Janelia Farm Research Campus" * EASEL_LICENSE "Freely licensed under the Janelia Software License." * * Returns: on success. * * Throws: on allocation error. * on write error. */ int esl_banner(FILE *fp, char *progname, char *banner) { char *appname = NULL; int status; if ((status = esl_FileTail(progname, FALSE, &appname)) != eslOK) return status; if (fprintf(fp, "# %s :: %s\n", appname, banner) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); if (fprintf(fp, "# Easel %s (%s)\n", EASEL_VERSION, EASEL_DATE) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); if (fprintf(fp, "# %s\n", EASEL_COPYRIGHT) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); if (fprintf(fp, "# %s\n", EASEL_LICENSE) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); if (fprintf(fp, "# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); if (appname) free(appname); return eslOK; ERROR: if (appname) free(appname); return status; } /* Function: esl_usage() * Synopsis: print standard Easel application usage help line * * Purpose: Given a usage string and the name of the program * , output a standardized usage/help * message. is minimally a one line synopsis like * "[options] ", but it may extend to multiple * lines to explain the command line arguments in more * detail. It should not describe the options; that's the * job of the getopts module, and its * function. * * This is used by the Easel miniapps, and may be useful in * other applications as well. * * As in , the is typically passed * as , and any path prefix is ignored. * * For example, if is , * then * * \begin{cchunk} * esl_usage(stdout, argv[0], "[options] "> * \end{cchunk} * * produces * * \begin{cchunk} * Usage: esl-compstruct [options] * \end{cchunk} * * Returns: on success. * * Throws: on allocation failure. * on write failure. */ int esl_usage(FILE *fp, char *progname, char *usage) { char *appname = NULL; int status; if ( (status = esl_FileTail(progname, FALSE, &appname)) != eslOK) return status; if (fprintf(fp, "Usage: %s %s\n", appname, usage) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); if (appname) free(appname); return eslOK; ERROR: if (appname) free(appname); return status; } /*-------------------- end, standard miniapp banner --------------------------*/ /****************************************************************************** * 4. Replacements for C library functions * fgets() -> esl_fgets() fgets() with dynamic allocation * strdup() -> esl_strdup() strdup() is not ANSI * strcat() -> esl_strcat() strcat() with dynamic allocation * strtok() -> esl_strtok() threadsafe strtok() * sprintf() -> esl_sprintf() sprintf() with dynamic allocation * strcmp() -> esl_strcmp() strcmp() tolerant of NULL strings *****************************************************************************/ /* Function: esl_fgets() * * Purpose: Dynamic allocation version of fgets(), * capable of reading almost unlimited line lengths. * * Args: buf - ptr to a string (may be reallocated) * n - ptr to current allocated length of buf, * (may be changed) * fp - open file ptr for reading * * Before the first call to esl_fgets(), * initialize buf to NULL and n to 0. * They're a linked pair, so don't muck with the * allocation of buf or the value of n while * you're still doing esl_fgets() calls with them. * * Returns: on success. * Returns on normal end-of-file. * * When : * <*buf> points to a -terminated line from the file. * <*n> contains the current allocated length for <*buf>. * * Caller must free <*buf> eventually. * * Throws: on an allocation failure. * * Example: char *buf = NULL; * int n = 0; * FILE *fp = fopen("my_file", "r"); * * while (esl_fgets(&buf, &n, fp) == eslOK) * { * do stuff with buf; * } * if (buf != NULL) free(buf); */ int esl_fgets(char **buf, int *n, FILE *fp) { int status; char *s; int len; int pos; if (*n == 0) { ESL_ALLOC(*buf, sizeof(char) * 128); *n = 128; } /* Simple case 1. We're sitting at EOF, or there's an error. * fgets() returns NULL, so we return EOF. */ if (fgets(*buf, *n, fp) == NULL) return eslEOF; /* Simple case 2. fgets() got a string, and it reached EOF doing it. * return success status, so caller can use * the last line; on the next call we'll * return the 0 for the EOF. */ if (feof(fp)) return eslOK; /* Simple case 3. We got a complete string, with \n, * and don't need to extend the buffer. */ len = strlen(*buf); if ((*buf)[len-1] == '\n') return eslOK; /* The case we're waiting for. We have an incomplete string, * and we have to extend the buffer one or more times. Make * sure we overwrite the previous fgets's \0 (hence +(n-1) * in first step, rather than 128, and reads of 129, not 128). */ pos = (*n)-1; while (1) { ESL_REALLOC(*buf, sizeof(char) * (*n+128)); *n += 128; s = *buf + pos; if (fgets(s, 129, fp) == NULL) return eslOK; len = strlen(s); if (s[len-1] == '\n') return eslOK; pos += 128; } /*NOTREACHED*/ return eslOK; ERROR: if (*buf != NULL) free(*buf); *buf = NULL; *n = 0; return status; } /* Function: esl_strdup() * * Purpose: Makes a duplicate of string , puts it in . * Caller can pass string length , if it's known, * to save a strlen() call; else pass -1 to have the string length * determined. * * Tolerates being ; in which case, * returns with <*ret_dup> set to . * * Args: s - string to duplicate (NUL-terminated) * n - length of string, if known; -1 if unknown. * ret_dup - RETURN: duplicate of . * * Returns: on success, and is valid. * * Throws: on allocation failure. */ int esl_strdup(const char *s, int64_t n, char **ret_dup) { int status; char *new = NULL; if (ret_dup != NULL) *ret_dup = NULL; if (s == NULL) return eslOK; if (n < 0) n = strlen(s); ESL_ALLOC(new, sizeof(char) * (n+1)); strcpy(new, s); if (ret_dup != NULL) *ret_dup = new; else free(new); return eslOK; ERROR: if (new != NULL) free(new); if (ret_dup != NULL) *ret_dup = NULL; return status; } /* Function: esl_strcat() * * Purpose: Dynamic memory version of strcat(). * Appends to the string that points to, * extending allocation for dest if necessary. Caller * can optionally provide the length of <*dest> in * , and the length of in ; if * either of these is -1, calls * to determine the length. Providing length information, * if known, accelerates the routine. * * <*dest> may be , in which case this is equivalent * to a of (that is, <*dest> is allocated * rather than reallocated). * * may be , in which case is unmodified. * * Note: One timing experiment (100 successive appends of * 1-255 char) shows esl_strcat() has about a 20% * overhead relative to strcat(). If optional * length info is passed, esl_strcat() is about 30% * faster than strcat(). * * Args: dest - ptr to string (char **), '\0' terminated * ldest - length of dest, if known; or -1 if length unknown. * src - string to append to dest, '\0' terminated * lsrc - length of src, if known; or -1 if length unknown. * * Returns: on success; <*dest> is (probably) reallocated, * modified, and nul-terminated. * * Throws: on allocation failure; initial state of * is unaffected. */ int esl_strcat(char **dest, int64_t ldest, const char *src, int64_t lsrc) { int status; int64_t len1, len2; if (ldest < 0) len1 = ((*dest == NULL) ? 0 : strlen(*dest)); else len1 = ldest; if (lsrc < 0) len2 = (( src == NULL) ? 0 : strlen(src)); else len2 = lsrc; if (len2 == 0) return eslOK; ESL_REALLOC(*dest, sizeof(char) * (len1+len2+1)); memcpy((*dest)+len1, src, len2); (*dest)[len1+len2] = '\0'; return eslOK; ERROR: return status; } /* Function: esl_strmapcat() * Synopsis: Version of esl_strcat that uses an inmap. * * Purpose: Append the contents of string or memory line * of length to a string. The destination * string and its length are passed as pointers <*dest> * and <*ldest>, so the string can be reallocated * and the length updated. When appending, map each * character to a new character * in the destination string. The destination string * <*dest> is NUL-terminated on return (even if it * wasn't to begin with). * * One reason to use the inmap is to enable parsers to * ignore some characters in an input string or buffer, * such as whitespace (mapped to ). Of * course this means, unlike the new length * isn't just , because we don't know how many * characters get appended until we've processed them * through the inmap -- that's why this function takes * <*ldest> by reference, whereas takes it * by value. * * If <*dest> is a NUL-terminated string and the caller * doesn't know its length, <*ldest> may be passed as -1. * Providing the length saves a call. If <*dest> * is a memory line, providing <*ldest> is mandatory. Same * goes for and . * * <*dest> may be , in which case it is allocated * and considered to be an empty string to append to. * When <*dest> is the input <*ldest> should be <0> * or <-1>. * * The caller must provide a that it already knows * should be entirely appended to <*dest>, except for * perhaps some ignored characters. No characters may be * mapped to or . The reason for * this is that we're going to allocate <*dest> for * <*ldest+lsrc> chars. If were a large memory buffer, * only a fraction of which needed to be appended (up to * an or ), this reallocation would * be inefficient. * * Args: inmap - an Easel input map, inmap[0..127]; * inmap[0] is special: set to the 'unknown' character to * replace invalid input chars. * *dest - destination string or memory to append to, passed by reference * *ldest - length of <*dest> (or -1), passed by reference * src - string or memory to inmap and append to <*dest> * lsrc - length of to map and append (or -1). * * Returns: on success. Upon successful return, <*dest> is * reallocated and contains the new string (with from 0 to * appended characters), NUL-terminated. * * if one or more characters in the input * are mapped to . Appending nonetheless * proceeds to completion, with any illegal characters * represented as '?' in <*dest> and counted in <*ldest>. * This is a normal error, because the string may be * user input. The caller may want to call some sort of * validation function on if an error is * returned, in order to report some helpful diagnostics to * the user. * * Throws: on allocation or reallocation failure. * on internal coding error; for example, * if the inmap tries to map an input character to , * , or . On exceptions, <*dest> * and <*ldest> should not be used by the caller except to * free <*dest>; their state may have been corrupted. * * Note: This deliberately mirrors , so * that sequence file parsers have comparable behavior whether * they're working with text-mode or digital-mode input. * * Might be useful to create a variant that also handles * eslDSQ_EOD (and eslDSQ_EOL?) and returns the number of * residues parsed. This'd allow a FASTA parser, for * instance, to use this method while reading buffer pages * rather than lines; it could define '>' as eslDSQ_EOD. */ int esl_strmapcat(const ESL_DSQ *inmap, char **dest, int64_t *ldest, const char *src, esl_pos_t lsrc) { int status = eslOK; if (*ldest < 0) *ldest = ( (*dest) ? strlen(*dest) : 0); if ( lsrc < 0) lsrc = ( (*src) ? strlen(src) : 0); if (lsrc == 0) goto ERROR; /* that'll return eslOK, leaving *dest untouched, and *ldest its length. */ ESL_REALLOC(*dest, sizeof(char) * (*ldest + lsrc + 1)); /* includes case of a new alloc of *dest */ return esl_strmapcat_noalloc(inmap, *dest, ldest, src, lsrc); ERROR: return status; } /* Function: esl_strmapcat_noalloc() * Synopsis: Version of esl_strmapcat() that does no reallocation. * * Purpose: Same as , but with no reallocation. The * pointer to the destination string is passed by * value, not by reference, because it will not be changed. * Caller has allocated at least <*ldest + lsrc + 1> bytes * in . In this version, <*ldest> and are not * optional; caller must know the lengths of both the old * string and the new source. * * Note: (see note on esl_abc_dsqcat_noalloc() for rationale) */ int esl_strmapcat_noalloc(const ESL_DSQ *inmap, char *dest, int64_t *ldest, const char *src, esl_pos_t lsrc) { int64_t xpos; esl_pos_t cpos; ESL_DSQ x; int status = eslOK; for (xpos = *ldest, cpos = 0; cpos < lsrc; cpos++) { if (! isascii(src[cpos])) { dest[xpos++] = inmap[0]; status = eslEINVAL; continue; } x = inmap[(int) src[cpos]]; if (x <= 127) dest[xpos++] = x; else switch (x) { case eslDSQ_SENTINEL: ESL_EXCEPTION(eslEINCONCEIVABLE, "input char mapped to eslDSQ_SENTINEL"); break; case eslDSQ_ILLEGAL: dest[xpos++] = inmap[0]; status = eslEINVAL; break; case eslDSQ_IGNORED: break; case eslDSQ_EOL: ESL_EXCEPTION(eslEINCONCEIVABLE, "input char mapped to eslDSQ_EOL"); break; case eslDSQ_EOD: ESL_EXCEPTION(eslEINCONCEIVABLE, "input char mapped to eslDSQ_EOD"); break; default: ESL_EXCEPTION(eslEINCONCEIVABLE, "bad inmap, no such ESL_DSQ code"); break; } } dest[xpos] = '\0'; *ldest = xpos; return status; } /* Function: esl_strtok() * Synopsis: Threadsafe version of C's * * Purpose: Thread-safe version of for parsing next token in * a string. * * Increments <*s> while <**s> is a character in , * then stops; the first non- character defines the * beginning of a token. Increments <*s> until it reaches * the next delim character (or \verb+\0+); this defines the end * of the token, and this character is replaced with * \verb+\0+. <*s> is then reset to point to the next character * after the \verb+\0+ that was written, so successive calls can * extract tokens in succession. Sets <*ret_tok> to point at * the beginning of the token, and returns . * * If a token is not found -- if <*s> already points to * \verb+\0+, or to a string composed entirely of characters in * -- then returns , with <*ret_tok> set to * . * * <*s> cannot be a constant string, since we write \verb+\0+'s * to it; caller must be willing to have this string * modified. And since we walk <*s> through the string as we * parse, the caller wants to use a tmp pointer <*s>, not * the original string itself. * * Example: * char *tok; * char *s; * char buf[50] = "This is a sentence."; * * s = buf; * esl_strtok(&s, " ", &tok); * tok is "This"; s is "is a sentence." * esl_strtok(&s, " ", &tok); * tok is "is"; s is " a sentence.". * esl_strtok(&s, " ", &tok); * tok is "a"; s is "sentence.". * esl_strtok(&s, " ", &tok, &len); * tok is "sentence."; s is "\0". * esl_strtok(&s, " ", &tok, &len); * returned eslEOL; tok is NULL; s is "\0". * * Args: s - a tmp, modifiable ptr to a string * delim - characters that delimits tokens * ret_tok - RETURN: ptr to \0-terminated token * * Returns: on success, <*ret_tok> points to next token, and * <*s> points to next character following the token. * * Returns on end of line; in which case <*s> * points to the terminal \verb+\0+ on the line, and <*ret_tok> * is . */ int esl_strtok(char **s, char *delim, char **ret_tok) { return esl_strtok_adv(s, delim, ret_tok, NULL, NULL); } /* Function: esl_strtok_adv() * Synopsis: More advanced interface to * * Purpose: Same as , except the caller may also * optionally retrieve the length of the token in <*opt_toklen>, * and the token-ending character that was replaced by \verb+\0+ * in <*opt_endchar>. * * Args: s - a tmp, modifiable ptr to string * delim - characters that delimits tokens * ret_tok - RETURN: ptr to \0-terminated token string * opt_toklen - optRETURN: length of token; pass NULL if not wanted * opt_endchar - optRETURN: character that was replaced by <\0>. * * Returns: on success, <*ret_tok> points to next token, <*s> * points to next character following the token, * <*opt_toklen> is the length of the token in * characters (excluding its terminal \verb+\0+), and <*opt_endchar> * is the character that got replaced by \verb+\0+ to form the token. * * Returns if no token is found (end of line); in * which case <*s> points to the terminal \verb+\0+ on the line, * <*ret_tok> is , <*opt_toklen> is 0 and * <*opt_endchar> is \verb+\0+. */ int esl_strtok_adv(char **s, char *delim, char **ret_tok, int *opt_toklen, char *opt_endchar) { char *end; char *tok = *s; char c = '\0'; int n = 0; int status = eslEOL; /* unless proven otherwise */ tok += strspn(tok, delim); if (! *tok) tok = NULL; /* if *tok = 0, EOL, no token left */ else { n = strcspn(tok, delim); end = tok + n; if (*end == '\0') *s = end; /* a final token that extends to end of string */ else { c = *end; /* internal token: terminate with \0 */ *end = '\0'; *s = end+1; } status = eslOK; } *ret_tok = tok; if (opt_toklen != NULL) *opt_toklen = n; if (opt_endchar != NULL) *opt_endchar = c; return status; } /* Function: esl_sprintf() * Synopsis: Dynamic allocation version of sprintf(). * * Purpose: Like ANSI C's , except the string * result is dynamically allocated, and returned * through <*ret_s>. * * Caller is responsible for free'ing <*ret_s>. * * As a special case to facilitate some optional string * initializations, if is , <*ret_s> is set * to . * * Returns: on success, and <*ret_s> is the resulting * string. * * Throws: on allocation failure. * if a <*printf()> library call fails. */ int esl_sprintf(char **ret_s, const char *format, ...) { va_list ap; int status; va_start(ap, format); status = esl_vsprintf(ret_s, format, &ap); va_end(ap); return status; } /* Function: esl_vsprintf() * Synopsis: Dynamic allocation version of vsprintf() * * Purpose: Like ANSI C's , except the string * result is dynamically allocated, and returned * through <*ret_s>. * * Caller is responsible for free'ing <*ret_s>. * * As a special case to facilitate some optional string * initializations, if is , <*ret_s> is set * to . * * Returns: on success, and <*ret_s> is the resulting * string. * * Throws: on allocation failure. * if a <*printf()> library call fails. */ int esl_vsprintf(char **ret_s, const char *format, va_list *ap) { char *s = NULL; va_list ap2; int n1, n2; int status; if (format == NULL) { *ret_s = NULL; return eslOK; } va_copy(ap2, *ap); n1 = strlen(format) * 2; /* initial guess at string size needed */ ESL_ALLOC(s, sizeof(char) * (n1+1)); if ((n2 = vsnprintf(s, n1+1, format, *ap)) >= n1) { ESL_REALLOC(s, sizeof(char) * (n2+1)); if (vsnprintf(s, n2+1, format, ap2) == -1) ESL_EXCEPTION(eslESYS, "vsnprintf() failed"); } else if (n2 == -1) ESL_EXCEPTION(eslESYS, "vsnprintf() failed"); va_end(ap2); *ret_s = s; return eslOK; ERROR: if (s != NULL) free(s); va_end(ap2); *ret_s = NULL; return status; } /* Function: esl_strcmp() * Synopsis: a strcmp() that treats NULL as empty string. * * Purpose: A version of that accepts * strings. If both and are non- * they are compared by . If both are * , return 0 (as if they are identical * strings). If only (or ) is non-, * return 1 (or -1), corresponding to ordering * any non- string as greater than a * string. * * (Easel routinely uses NULL to mean an unset optional * string, and often needs to compare two strings for * equality.) * * Returns: 0 if ; 1 if s2>; -1 if . */ int esl_strcmp(const char *s1, const char *s2) { if (s1 && s2) return strcmp(s1, s2); else if (s1) return 1; else if (s2) return -1; else return 0; } /*--------- end, improved replacement ANSI C functions ----------*/ /***************************************************************** * 5. Portable drop-in replacements for non-standard C functions *****************************************************************/ #ifndef HAVE_STRCASECMP /* Function: esl_strcasecmp() * * Purpose: Compare strings and . Return -1 if * is alphabetically less than , 0 if they * match, and 1 if is alphabetically greater * than . All matching is case-insensitive. * * Args: s1 - string 1, \0 terminated * s2 - string 2, \0 terminated * * Returns: -1, 0, or 1, if is less than, equal, or * greater than , case-insensitively. * * Throws: (no abnormal error conditions) */ int esl_strcasecmp(const char *s1, const char *s2) { int i, c1, c2; for (i = 0; s1[i] != '\0' && s2[i] != '\0'; i++) { c1 = s1[i]; /* total paranoia. don't trust toupper() to */ c2 = s2[i]; /* leave the original unmodified; make a copy. */ if (islower(c1)) c1 = toupper(c1); if (islower(c2)) c2 = toupper(c2); if (c1 < c2) return -1; else if (c1 > c2) return 1; } if (s1[i] != '\0') return 1; /* prefixes match, but s1 is longer */ else if (s2[i] != '\0') return -1; /* prefixes match, s2 is longer */ return 0; /* else, a case-insensitive match. */ } #endif /* ! HAVE_STRCASECMP */ /*------------- end, portable drop-in replacements --------------*/ /***************************************************************** * 6. Additional string functions, esl_str*() *****************************************************************/ /* Function: esl_strchop() * * Purpose: Chops trailing whitespace off of a string (or if * is NULL, do nothing). * is the length of the input string, if known; or pass * if length is unknown. * * Returns: on success. * * Throws: (no abnormal error conditions) * * Xref: from squid's StringChop(). */ int esl_strchop(char *s, int64_t n) { int i; if (s == NULL) return eslOK; if (n < 0) n = strlen(s); for (i = n-1; i>=0 && isspace((int) s[i]); i--); s[i+1] = '\0'; return eslOK; } /* Function: esl_strdealign() * Synopsis: Dealign a string according to gaps in a reference aseq. * * Purpose: Dealign string in place, by removing any characters * aligned to gaps in . Gap characters are defined in the * string ; for example, <-_.>. Optionally return the * unaligned length of in characters in <*opt_rlen>. * * By providing a reference to dealign against, this * function can dealign aligned annotation strings, such as * secondary structure or surface accessibility strings. * If is the same as , then the aligned sequence * itself is dealigned in place. * * To dealign both annotations and sequence, do the * sequence last, since you need it as the reference * when doing the annotations. * * It is safe to pass a (an unset optional * annotation), in which case the function no-ops and * returns . * * Args: s - string to dealign * aseq - reference aligned sequence seq * gapchars - definition of gap characters ("-_." for example) * opt_rlen - optRETURN: number of residues in after dealign * * Returns: on success. */ int esl_strdealign(char *s, const char *aseq, const char *gapchars, int64_t *opt_rlen) { int64_t n = 0; int64_t apos; if (s == NULL) return eslOK; for (apos = 0; aseq[apos] != '\0'; apos++) if (strchr(gapchars, aseq[apos]) == NULL) s[n++] = s[apos]; s[n] = '\0'; if (opt_rlen != NULL) *opt_rlen = n; return eslOK; } /* Function: esl_str_IsBlank() * Synopsis: Return TRUE if is all whitespace; else FALSE. * * Purpose: Given a NUL-terminated string ; return if * string is entirely whitespace (as defined by ), * and return FALSE if not. */ int esl_str_IsBlank(char *s) { for (; *s; s++) if (!isspace(*s)) return FALSE; return TRUE; } /* Function: esl_str_IsInteger() * Synopsis: Return TRUE if represents an integer; else FALSE. * * Purpose: Given a NUL-terminated string , return TRUE * if the complete string is convertible to a base-10 integer * by the rules of or . * * Leading and trailing whitespace is allowed, but otherwise * the entire string must be convertable. (Unlike * itself, which will convert a prefix. ' 99 foo' converts * to 99, but will return FALSE. * * If is , FALSE is returned. */ int esl_str_IsInteger(char *s) { char *endp; long val; if (s == NULL) return FALSE; /* it's NULL */ val = strtol(s, &endp, 10); if (endp == s) return FALSE; /* strtol() can't convert it */ for (s = endp; *s != '\0'; s++) if (! isspace(*s)) return FALSE; /* it has trailing nonconverted nonwhitespace */ return TRUE; } /* Function: esl_str_IsReal() * Synopsis: Return TRUE if string represents a real number; else FALSE. * * Purpose: Given a NUL-terminated string , return * if the string is completely convertible to a floating-point * real number by the rules of and . * (Which allow for exponential forms, hexadecimal forms, * and case-insensitive INF, INFINITY, NAN, all w/ optional * leading +/- sign.) * * No trailing garbage is allowed, unlike . The * entire string must be convertible, allowing leading and * trailing whitespace is allowed. '99.0 foo' converts * to 99.0 with but is for * . ' 99.0 ' is . * * If is , return . */ int esl_str_IsReal(char *s) { char *endp; double val; if (! s) return FALSE; /* is NULL */ val = strtod(s, &endp); if (val == 0.0f && endp == s) return FALSE; /* strtod() can't convert it */ for (s = endp; *s != '\0'; s++) if (! isspace(*s)) return FALSE; /* it has trailing nonconverted nonwhitespace */ return TRUE; } /* Function: esl_str_GetMaxWidth() * Synopsis: Returns maximum strlen() in an array of strings. * * Purpose: Returns the length of the longest string in * an array of strings . If , * returns 0. Any that's is counted * as zero length. */ int64_t esl_str_GetMaxWidth(char **s, int n) { int64_t max = 0; int64_t len; int i; for (i = 0; i < n; i++) if (s[i]) { len = strlen(s[i]); if (len > max) max = len; } return max; } /*-------------- end, additional string functions ---------------*/ /***************************************************************** * 7. File path/name manipulation, including tmpfiles *****************************************************************/ /* Function: esl_FileExists() * Synopsis: Return TRUE if exists and is readable, else FALSE. * * Purpose: Returns TRUE if exists and is readable, else FALSE. * * Note: Testing a read-only fopen() is the only portable ANSI C * I'm aware of. We could also use a POSIX func here, since * we have a ESL_POSIX_AUGMENTATION flag in the code. * * Xref: squid's FileExists(). */ int esl_FileExists(const char *filename) { #if defined _POSIX_VERSION struct stat fileinfo; if (stat(filename, &fileinfo) != 0) return FALSE; if (! (fileinfo.st_mode & S_IRUSR)) return FALSE; return TRUE; #else FILE *fp; if ((fp = fopen(filename, "r"))) { fclose(fp); return TRUE; } return FALSE; #endif } /* Function: esl_FileTail() * Synopsis: Extract filename, removing path prefix. * * Purpose: Given a full pathname , extract the filename * without the directory path; return it via * . space is allocated * here, and must be free'd by the caller. * For example: * becomes ; * becomes ; * becomes ; and * becomes the empty string. * * If is , the rightmost trailing ".foo" extension * is removed too. The suffix is defined as everything following * the rightmost period in the filename in : * with , * becomes , * becomes , and * becomes . * * Args: path - full pathname to process, "/foo/bar/baz.1" * nosuffix - TRUE to remove rightmost suffix from the filename * ret_file - RETURN: filename portion of the path. * * Returns: on success, and points to a newly * allocated string containing the filename. * * Throws: on allocation failure. */ int esl_FileTail(const char *path, int nosuffix, char **ret_file) { int status; char *tail = NULL; char *lastslash; char *lastdot; /* remove directory prefix */ lastslash = strrchr(path, eslDIRSLASH); ESL_ALLOC(tail, sizeof(char) * (strlen(path)+1)); /* a little overkill */ if (lastslash == NULL) strcpy(tail, path); else strcpy(tail, lastslash+1); /* remove trailing suffix */ if (nosuffix) { if ((lastdot = strrchr(tail, '.')) != NULL) *lastdot = '\0'; } *ret_file = tail; return eslOK; ERROR: if (tail != NULL) free(tail); *ret_file = NULL; return status; } /* Function: esl_file_Extension() * Synopsis: Find suffix of a file name; set a memory line on it. * * Purpose: Given a path or file name , and ignoring the * last characters, find the rightmost suffix; * return a pointer to its start in <*ret_sfx> (inclusive * of the ``.''), and its length in <*ret_n>. If no * suffix is found, return with <*ret_sfx = NULL> * and . * * The argument allows iterating through more * than one suffix. * * For example, if is ``./foo/bar/baz.xx.yyy'' * and is 0, <*ret_sfx> points to ``.yyy'' and * <*ret_n> is 4. If is 4, then <*ret_sfx> * points to ``.xx'' and is 3. If is 7 * then status is . */ int esl_file_Extension(char *filename, esl_pos_t n_ignore, char **ret_sfx, esl_pos_t *ret_n) { esl_pos_t n1 = strlen(filename) - n_ignore; esl_pos_t n2; for (n2 = n1; n2 > 0 && filename[n2-1] != eslDIRSLASH && filename[n2-1] != '.'; n2--) ; if (n2 <= 0 || filename[n2-1] == eslDIRSLASH) { *ret_sfx = NULL; *ret_n = 0; return eslFAIL; } *ret_sfx = filename + n2 - 1; *ret_n = n1-n2+1; return eslOK; } /* Function: esl_FileConcat() * * Purpose: Concatenates directory path prefix

and a filename * , and returns the new full pathname through * . If does not already end in the * appropriate delimiter (e.g. / for UNIX), one is added. * * If is NULL, then is just the same as * . Similarly, if already appears to be a * full path (because its first character is a /), then * is ignored and is the same as * . It wouldn't normally make sense for a caller to * call this function with such arguments. * * may be a relative path. For example, * if is "/usr/local" and is "lib/myapp/data", * will be "/usr/local/lib/myapp/data". * * Returns: on success, and puts the path * in ; this string is allocated here, * and must be free'd by caller with . * * Throws: on allocation failure. * on bad argument. * In either case, is returned NULL. * * Xref: squid's FileConcat(). */ int esl_FileConcat(const char *dir, const char *file, char **ret_path) { char *path = NULL; int nd, nf; int status; if (ret_path != NULL) *ret_path = NULL; if (file == NULL) ESL_EXCEPTION(eslEINVAL, "null file"); nd = (dir != NULL)? strlen(dir) : 0; nf = strlen(file); ESL_ALLOC(path, sizeof(char) * (nd+nf+2)); if (dir == NULL) /* 1. silly caller didn't give a path */ strcpy(path, file); else if (*file == eslDIRSLASH) /* 2. is already a path? */ strcpy(path, file); else if (dir[nd-1] == eslDIRSLASH) /* 3. (dir is / terminated) */ sprintf(path, "%s%s", dir, file); else /* 4. / (usual case) */ sprintf(path, "%s%c%s", dir, eslDIRSLASH, file); *ret_path = path; return eslOK; ERROR: if (path != NULL) free(path); if (ret_path != NULL) *ret_path = NULL; return status; } /* Function: esl_FileNewSuffix() * * Purpose: Add a file suffix to ; or if * already has a suffix, replace it with . A suffix is * usually 2-4 letters following a '.' character. Returns * an allocated string containing the result in . * * For example, if is "foo" and is "ssi", * returns "foo.ssi". If is "foo.db" and * is "idx", returns "foo.idx". * * Returns: on success, and is set * string ".". Caller must * this string. * * Throws: on allocation failure. * * Xref: squid's FileAddSuffix(). */ int esl_FileNewSuffix(const char *filename, const char *sfx, char **ret_newpath) { char *new = NULL; char *lastdot; int nf; int status; if (ret_newpath != NULL) *ret_newpath = NULL; lastdot = strrchr(filename, '.'); /* check for suffix to replace */ if (lastdot != NULL && strchr(lastdot, eslDIRSLASH) != NULL) lastdot = NULL; /*foo.1/filename case - don't be fooled.*/ nf = (lastdot == NULL)? strlen(filename) : lastdot-filename; ESL_ALLOC(new, sizeof(char) * (nf+strlen(sfx)+2)); /* '.' too */ strncpy(new, filename, nf); *(new+nf) = '.'; strcpy(new+nf+1, sfx); if (ret_newpath != NULL) *ret_newpath = new; else free(new); return eslOK; ERROR: if (new != NULL) free(new); if (ret_newpath != NULL) *ret_newpath = NULL; return status; } /* Function: esl_FileEnvOpen() * * Purpose: Looks for a file in a colon-separated list of * directories that is configured in an environment variable * . The first occurrence of file in this directory * list is opened read-only. The open file ptr is returned * through , and the full path name to the file * that was opened is returned through . * Caller can pass NULL in place of or * if it is not interested in one or both of these. * * Does not look in the current directory unless "." is * explicitly in the directory list provided by . * * Note: One reason to pass back to the caller is that * sometimes we're opening the first in a group of files * (for example, a database and its SSI index), and we want * to make sure that after we find the main file, the * caller can look for the auxiliary file(s) in exactly the * same directory. * * Examples: % setenv BLASTDB /nfs/databases/blast-db:/nfs/databases/nr/ * * FILE *fp; * char *path; * int status; * status = esl_FileEnvOpen("swiss42", "BLASTDB", &fp, &path); * * Returns: on success, and provides and ; * is opened here, and must be 'd by caller; * is allocated here, and must be 'd by caller. * * Returns if the file not found in any directory, * or if does not contain any directories to look in. * * Throws: on allocation error. * * Xref: squid's EnvFileOpen(). */ int esl_FileEnvOpen(const char *fname, const char *env, FILE **opt_fp, char **opt_path) { FILE *fp; char *dirlist; /* :-separated list of directories */ char *s, *s2; /* ptrs into elems in env list */ char *path = NULL; int np; int status; fp = NULL; if (opt_fp != NULL) *opt_fp = NULL; if (opt_path != NULL) *opt_path = NULL; if (env == NULL) return eslENOTFOUND; if ((s = getenv(env)) == NULL) return eslENOTFOUND; if (esl_strdup(s, -1, &dirlist) != eslOK) return eslEMEM; np = strlen(fname) + strlen(s) + 2; /* upper bound on full path len */ ESL_ALLOC(path, sizeof(char) * np); s = dirlist; while (s != NULL) { if ((s2 = strchr(s, ':')) != NULL) { *s2 = '\0'; s2++;} /* ~=strtok() */ sprintf(path, "%s%c%s", s, eslDIRSLASH, fname); /* // won't hurt */ if ((fp = fopen(path, "r")) != NULL) break; s = s2; } if (fp == NULL) { free(path); free(dirlist); return eslENOTFOUND; } if (opt_path != NULL) { *opt_path = path; } else free(path); if (opt_fp != NULL) { *opt_fp = fp; } else fclose(fp); free(dirlist); return eslOK; ERROR: if (path != NULL) free(path); if (fp != NULL) fclose(fp); if (dirlist != NULL) free(dirlist); if (opt_path != NULL) *opt_path = NULL; if (opt_fp != NULL) *opt_fp = NULL; return status; } /* Function: esl_tmpfile() * * Purpose: Open a secure temporary handle and return it in * . The file is opened in read-write mode () * with permissions 0600, as an atomic operation using the * POSIX function. * * The argument is a modifiable string that must * end in "XXXXXX" (for example, "esltmpXXXXXX"). The * is used to construct a unique tmpfile name. * * Note that this string must be modifiable; do not declare * it nor because these will not work on some * compilers. Something like that explicitly allocates storage will * suffice. * * The file is opened in a standard temporary file * directory. The path is obtained from the environment * variable ; failing that, from the environment * variable ; and failing that, is used. If the * process is running or , then the * environment variables are ignored, and the temp file is * always created in . * * The created tmpfile is not persistent and is not visible * to a directory listing. The caller may the * and do cycles of reading and/or writing, but * once the is closed, the file disappears. The * caller does not need to or it (and * in fact, cannot do so, because it does not know the * tmpfile's name). * * This function is a secure replacement for ANSI C * , which is said to be insecurely implemented on * some platforms. * * Returns: on success, and now points to a new * stream for the opened tempfile. * * Throws: if a system call (including the call) * fails, and and is returned NULL. One possible * problem is if the temporary directory doesn't exist or * is not writable. This is considered to be a system * error, not a user error, so Easel handles it as an exception. * * Xref: STL11/85. Substantially copied from David Wheeler, * "Secure Programming for Linux and Unix HOWTO", * http://www.dwheeler.com/secure-programs/Secure-Programs-HOWTO/introduction.html. * Copyright (C) 1999-2001 David A. Wheeler. * Licensed under the MIT license; see Appendix C of the HOWTO. * Thanks, David, for the clearest explanation of the issues * that I've seen. * * I also referred to H. Chen, D. Dean, and D. Wagner, * "Model checking one million lines of C code", * In: Network and Distributed System Security Symposium, pp 171-185, * San Diego, CA, February 2004; * http://www.cs.ucdavis.edu/~hchen/paper/ndss04.pdf. * Wheeler's implementation obeys Chen et al's "Property 5", * governing secure use of tempfiles. */ int esl_tmpfile(char *basename6X, FILE **ret_fp) { char *tmpdir = NULL; char *path = NULL; FILE *fp = NULL; int fd; int status; mode_t old_mode; /* Determine what tmp directory to use, and construct the * file name. */ if (getuid() == geteuid() && getgid() == getegid()) { tmpdir = getenv("TMPDIR"); if (tmpdir == NULL) tmpdir = getenv("TMP"); } if (tmpdir == NULL) tmpdir = "/tmp"; if ((status = esl_FileConcat(tmpdir, basename6X, &path)) != eslOK) goto ERROR; old_mode = umask(077); if ((fd = mkstemp(path)) < 0) ESL_XEXCEPTION(eslESYS, "mkstemp() failed."); umask(old_mode); if ((fp = fdopen(fd, "w+b")) == NULL) ESL_XEXCEPTION(eslESYS, "fdopen() failed."); if (unlink(path) < 0) ESL_XEXCEPTION(eslESYS, "unlink() failed."); *ret_fp = fp; free(path); return eslOK; ERROR: if (path != NULL) free(path); if (fp != NULL) fclose(fp); *ret_fp = NULL; return status; } /* Function: esl_tmpfile_named() * * Purpose: Open a persistent temporary file relative to the current * working directory. The file name is constructed from the * argument, which must be a modifiable string * ending in the six characters "XXXXXX". These are * replaced by a unique character string by a call to POSIX * . For example, might be * on input, and on return; or, to * put the tmp file in a subdirectory under the current * working directory, something like * on input resulting in something like * on return. The tmpfile is opened * for reading and writing (in mode with permissions * 0600) and the opened handle is returned through * . * * The created tmpfile is persistent: it will be visible in * a directory listing, and will remain after program * termination unless the caller explicitly removes it by a * or call. * * To use this function securely, if you reopen the * tmpfile, you must only reopen it for reading, not * writing, and you must not trust the contents. * * Because the will be modified, it cannot be * a string constant (especially on a picky compiler like * gcc). You have to declare it with something like * * not * * because a compiler is allowed to make the <*tmpfile> version * a constant. * * Returns: on success, contains the name of the * tmpfile, and contains a new stream for the * opened file. * * on failure, and is returned NULL and * the contents of are undefined. The most * common reason for a failure will be that the caller does * not have write permission for the directory that * is in. Easel handles this as a normal (user) * failure, not an exception, because these permissions are * most likely in the user's control (in contrast to * , which always uses a system * that should always be user-writable on a properly * configured POSIX system). * * Xref: STL11/85. */ int esl_tmpfile_named(char *basename6X, FILE **ret_fp) { FILE *fp; mode_t old_mode; int fd; *ret_fp = NULL; old_mode = umask(077); if ((fd = mkstemp(basename6X)) < 0) return eslFAIL; umask(old_mode); if ((fp = fdopen(fd, "w+b")) == NULL) return eslFAIL; *ret_fp = fp; return eslOK; } /* Function: esl_getcwd() * Synopsis: Gets the path for the current working directory. * * Purpose: Returns the path for the current working directory * in <*ret_cwd>, as reported by POSIX . * <*ret_cmd> is allocated here and must be freed by * the caller. * * Returns: on success, and <*ret_cwd> points to * the pathname of the current working directory. * * If is unavailable on this system, * returns and <*ret_cwd> is . * * If the pathname length exceeds a set limit (16384 char), * returns and <*ret_cwd> is . * * Throws: on allocation failure; <*ret_cwd> is . * on getcwd() failure; <*ret_cwd> is . * * Xref: J7/54. */ int esl_getcwd(char **ret_cwd) { char *cwd = NULL; int status = eslOK; #ifdef _POSIX_VERSION int nalloc = 256; int maxalloc = 16384; do { ESL_ALLOC(cwd, sizeof(char) * nalloc); if (getcwd(cwd, nalloc) == NULL) { if (errno != ERANGE) ESL_XEXCEPTION(eslESYS, "unexpected getcwd() error"); if (nalloc * 2 > maxalloc) { status = eslERANGE; goto ERROR; } free(cwd); cwd = NULL; nalloc *= 2; } } while (cwd == NULL); *ret_cwd = cwd; return status; ERROR: if (cwd) free(cwd); *ret_cwd = NULL; return status; #else *ret_cwd = NULL; return eslEUNIMPLEMENTED; #endif } /*----------------- end of file path/name functions ------------------------*/ /***************************************************************** * 8. Typed comparison routines. *****************************************************************/ /* Function: esl_DCompare() * * Purpose: Compare two floating point scalars and for approximate equality. * Return if equal, if not. * * Equality is defined by being within a relative * epsilon , as <2*fabs(a-b)/(a+b)> $\leq$ . * Additionally, we catch the special cases where * and/or are 0 or -0. If both are, return ; if * one is, check that the absolute value of the other is * $\leq$ . * * and work on and * scalars, respectively. */ int esl_DCompare(double a, double b, double tol) { if (isinf(a) && isinf(b)) return eslOK; if (isnan(a) && isnan(b)) return eslOK; if (!isfinite(a) || !isfinite(b)) return eslFAIL; if (a == b) return eslOK; if (fabs(a) == 0. && fabs(b) <= tol) return eslOK; if (fabs(b) == 0. && fabs(a) <= tol) return eslOK; if (2.*fabs(a-b) / fabs(a+b) <= tol) return eslOK; return eslFAIL; } int esl_FCompare(float a, float b, float tol) { if (isinf(a) && isinf(b)) return eslOK; if (isnan(a) && isnan(b)) return eslOK; if (!isfinite(a) || !isfinite(b)) return eslFAIL; if (a == b) return eslOK; if (fabs(a) == 0. && fabs(b) <= tol) return eslOK; if (fabs(b) == 0. && fabs(a) <= tol) return eslOK; if (2.*fabs(a-b) / fabs(a+b) <= tol) return eslOK; return eslFAIL; } /* Function: esl_DCompareAbs() * * Purpose: Compare two floating point scalars and for * approximate equality, by absolute difference. Return * if equal, if not. * * Equality is defined as for finite * ; or , when either value is not * finite. * * Generally it is preferable to compare floating point * numbers for equality using relative difference: see * , and also Knuth's Seminumerical * Algorithms. However, cases arise where absolute * difference comparison is preferred. One such case is in * comparing the log probability values of DP matrices, * where numerical error tends to accumulate on an absolute * scale, dependent more on the number of terms than on * their magnitudes. DP cells with values that happen to be * very close to zero can have high relative differences. */ int esl_DCompareAbs(double a, double b, double tol) { if (isinf(a) && isinf(b)) return eslOK; if (isnan(a) && isnan(b)) return eslOK; if (!isfinite(a) || !isfinite(b)) return eslFAIL; if (fabs(a-b) <= tol) return eslOK; return eslFAIL; } int esl_FCompareAbs(float a, float b, float tol) { if (isinf(a) && isinf(b)) return eslOK; if (isnan(a) && isnan(b)) return eslOK; if (!isfinite(a) || !isfinite(b)) return eslFAIL; if (fabs(a-b) <= tol) return eslOK; return eslFAIL; } /* Function: esl_CCompare() * Synopsis: Compare two optional strings for equality. * * Purpose: Compare two optional strings and * for equality. * * If they're non- and identical up to their * -terminator, return . * * If they're both (unset), return . * * Otherwise, they're not identical; return . */ int esl_CCompare(char *s1, char *s2) { if (s1 == NULL && s2 == NULL) return eslOK; if (s1 == NULL || s2 == NULL) return eslFAIL; if (strcmp(s1, s2) != 0) return eslFAIL; return eslOK; } /*-------------- end, typed comparison routines --------------------*/ /***************************************************************** * 9. Unit tests. *****************************************************************/ #ifdef eslEASEL_TESTDRIVE static void utest_IsInteger(void) { char *goodones[] = { " 99 " }; char *badones[] = { "", " 99 foo " }; int ngood = sizeof(goodones) / sizeof(char *); int nbad = sizeof(badones) / sizeof(char *); int i; for (i = 0; i < ngood; i++) if (! esl_str_IsInteger(goodones[i])) esl_fatal("esl_str_IsInteger() should have recognized %s", goodones[i]); for (i = 0; i < nbad; i++) if ( esl_str_IsInteger(badones[i])) esl_fatal("esl_str_IsInteger() should not have recognized %s", badones[i]); } static void utest_IsReal(void) { char *goodones[] = { "99", " \t 99", "-99.00", "+99.00e-12", "+0xabc.defp-12", " +INFINITY", "-nan" }; char *badones[] = { "", "FIBB_BOVIN/67-212", /* testing for a fixed bug, 17 Dec 2012, reported by ER */ }; int ngood = sizeof(goodones) / sizeof(char *); int nbad = sizeof(badones) / sizeof(char *); int i; for (i = 0; i < ngood; i++) if (! esl_str_IsReal(goodones[i])) esl_fatal("esl_str_IsReal() should have recognized %s", goodones[i]); for (i = 0; i < nbad; i++) if ( esl_str_IsReal(badones[i])) esl_fatal("esl_str_IsReal() should not have recognized %s", badones[i]); } static void utest_strmapcat(void) { char *msg = "esl_strmapcat() unit test failed"; ESL_DSQ inmap[128]; char *pfx = "testing testing"; char *append = "one two three"; char *bad = "1 2 three"; char *dest; int64_t L1; esl_pos_t L2; int x; /* a simple input map, for testing */ for (x = 0; x < 128; x++) inmap[x] = eslDSQ_ILLEGAL; for (x = 'a'; x < 'z'; x++) inmap[x] = x; for (x = 'A'; x < 'Z'; x++) inmap[x] = x; inmap[' '] = eslDSQ_IGNORED; inmap[0] = '?'; L1 = strlen(pfx); L2 = strlen(append); if ( ( esl_strdup (pfx, L1, &dest)) != eslOK) esl_fatal(msg); if ( ( esl_strmapcat(inmap, &dest, &L1, append, L2)) != eslOK) esl_fatal(msg); if ( strcmp(dest, "testing testingonetwothree") != 0) esl_fatal(msg); free(dest); L1 = -1; L2 = -1; if ( ( esl_strdup (pfx, L1, &dest)) != eslOK) esl_fatal(msg); if ( ( esl_strmapcat(inmap, &dest, &L1, append, L2)) != eslOK) esl_fatal(msg); if ( strcmp(dest, "testing testingonetwothree") != 0) esl_fatal(msg); free(dest); L1 = 0; dest = NULL; if ( ( esl_strmapcat(inmap, &dest, &L1, pfx, -1)) != eslOK) esl_fatal(msg); if ( ( esl_strmapcat(inmap, &dest, &L1, append, -1)) != eslOK) esl_fatal(msg); if ( strcmp(dest, "testingtestingonetwothree") != 0) esl_fatal(msg); free(dest); if ( ( esl_strdup(pfx, -1, &dest)) != eslOK) esl_fatal(msg); L1 = 8; if ( ( esl_strmapcat(inmap, &dest, &L1, bad, -1)) != eslEINVAL) esl_fatal(msg); if ( strcmp(dest, "testing ??three") != 0) esl_fatal(msg); free(dest); } static void utest_strtok(void) { char msg[] = "esl_strtok() unit test failed"; char *teststring; char *s; char *tok; int toklen; char endc; if (esl_strdup("This is\t a sentence.", -1, &teststring) != eslOK) esl_fatal(msg); s = teststring; if (esl_strtok(&s, " ", &tok) != eslOK) esl_fatal(msg); if (strcmp(tok, "This") != 0) esl_fatal(msg); if (*s != 'i') esl_fatal(msg); if (esl_strtok_adv(&s, " \t", &tok, &toklen, &endc) != eslOK) esl_fatal(msg); if (strcmp(tok, "is") != 0) esl_fatal(msg); if (*s != ' ') esl_fatal(msg); if (toklen != 2) esl_fatal(msg); if (endc != '\t') esl_fatal(msg); if (esl_strtok_adv(&s, "\n", &tok, NULL, NULL) != eslOK) esl_fatal(msg); if (strcmp(tok, " a sentence.") != 0) esl_fatal(msg); if (*s != '\0') esl_fatal(msg); free(teststring); return; } static void utest_sprintf(void) { char msg[] = "unit tests for esl_[v]sprintf() failed"; int num = 99; char *what = "beer"; char *s = NULL; if (esl_sprintf(&s, "%d bottles of %s", num, what) != eslOK) esl_fatal(msg); if (strcmp(s, "99 bottles of beer") != 0) esl_fatal(msg); free(s); if (esl_sprintf(&s, NULL) != eslOK) esl_fatal(msg); if (s != NULL) esl_fatal(msg); } static void utest_FileExists(void) { char msg[] = "FileExists unit test failed"; char tmpfile[32] = "esltmpXXXXXX"; FILE *fp = NULL; #ifdef _POSIX_VERSION struct stat st; mode_t mode; #endif /* create a tmpfile */ if (esl_tmpfile_named(tmpfile, &fp) != eslOK) esl_fatal(msg); fprintf(fp, "Unit test.\n"); fclose(fp); if (! esl_FileExists(tmpfile)) esl_fatal(msg); #ifdef _POSIX_VERSION /* The FileExists doesn't just test existence; it also checks read permission */ if (stat(tmpfile, &st) != 0) esl_fatal(msg); mode = st.st_mode & ~S_IRUSR; if (chmod(tmpfile, mode) != 0) esl_fatal(msg); if (esl_FileExists(tmpfile)) esl_fatal(msg); #endif remove(tmpfile); if (esl_FileExists(tmpfile)) esl_fatal(msg); return; } static void utest_tmpfile_named(void) { char msg[] = "tmpfile_named unit test failed"; char tmpfile[32] = "esltmpXXXXXX"; FILE *fp = NULL; char buf[256]; if (esl_tmpfile_named(tmpfile, &fp) != eslOK) esl_fatal(msg); fprintf(fp, "Unit test.\n"); fclose(fp); if ((fp = fopen(tmpfile, "r")) == NULL) esl_fatal(msg); if (fgets(buf, 256, fp) == NULL) esl_fatal(msg); if (strcmp(buf, "Unit test.\n") != 0) esl_fatal(msg); fclose(fp); remove(tmpfile); return; } #endif /*eslEASEL_TESTDRIVE*/ /***************************************************************** * 10. Test driver. *****************************************************************/ #ifdef eslEASEL_TESTDRIVE /* gcc -g -Wall -o easel_utest -I. -L. -DeslEASEL_TESTDRIVE easel.c -leasel -lm * ./easel_utest */ #include "easel.h" int main(void) { #ifdef eslTEST_THROWING esl_exception_SetHandler(&esl_nonfatal_handler); #endif utest_IsInteger(); utest_IsReal(); utest_strmapcat(); utest_strtok(); utest_sprintf(); utest_FileExists(); utest_tmpfile_named(); return eslOK; } #endif /*eslEASEL_TESTDRIVE*/ /***************************************************************** * 11. Examples. *****************************************************************/ #ifdef eslEASEL_EXAMPLE /*::cexcerpt::easel_example_tmpfiles::begin::*/ /* gcc -g -Wall -o example -I. -L. -DeslEASEL_EXAMPLE_TMPFILES easel.c -leasel -lm * ./example */ #include "easel.h" int main(void) { char tmpfile1[32] = "esltmpXXXXXX"; /* a transient, secure tmpfile: 6 X's are important */ char tmpfile2[32] = "esltmpXXXXXX"; /* a named tmpfile */ FILE *fp = NULL; char buf[256]; /* Example of using a secure, unnamed tmpfile. * Note, the new tmpfile is automatically deleted, so to cleanup, just fclose() the FILE */ esl_tmpfile(tmpfile1, &fp); fprintf(fp, "Hello world!\n"); rewind(fp); fgets(buf, 256, fp); printf("first temp file says: %s\n", buf); fclose(fp); /* Example of reasonably securely using a named tmpfile. * To cleanup, must both fclose() the FILE and remove() the file by name */ esl_tmpfile_named(tmpfile2, &fp); fprintf(fp, "Hello insecure world!\n"); fclose(fp); /* tmpfile2 now exists on disk and can be closed/reopened */ fp = fopen(tmpfile2, "r"); fgets(buf, 256, fp); printf("second temp file says: %s\n", buf); fclose(fp); remove(tmpfile2); /* disk file cleanup necessary with this version. */ return eslOK; } /*::cexcerpt::easel_example_tmpfiles::end::*/ #endif /*eslEASEL_EXAMPLE*/ /***************************************************************** * Easel - a library of C functions for biological sequence analysis * Version h3.1b2; February 2015 * Copyright (C) 2015 Howard Hughes Medical Institute. * Other copyrights also apply. See the COPYRIGHT file for a full list. * * Easel is distributed under the Janelia Farm Software License, a BSD * license. See the LICENSE file for more details. * * SVN $Id: easel.c 940 2015-01-22 19:34:21Z eddys $ * SVN $URL: https://svn.janelia.org/eddylab/eddys/easel/branches/hmmer/3.1/easel.c $ *****************************************************************/