diff options
| author | Justin Bedo <cu@cua0.org> | 2016-09-30 13:23:10 +1000 | 
|---|---|---|
| committer | Justin Bedo <cu@cua0.org> | 2016-09-30 13:23:10 +1000 | 
| commit | 5fd056c15a0ea0eb28d0bc7cac84d6fa7fe0a3f3 (patch) | |
| tree | e29087738344ff4e2b7a5371fba6bf2f18d8a794 | |
| parent | e0ddb4b25a399d1769165806d5919c6af8c173cf (diff) | |
| -rw-r--r-- | dict.c | 67 | 
1 files changed, 58 insertions, 9 deletions
| @@ -50,8 +50,9 @@ new_tokeniser(char *path)    return tok;  } +// Fastq tokeniser  int -getTok(tokeniser *tok) +getTokQ(tokeniser *tok)  {    int c;    switch(tok->s){ @@ -86,10 +87,48 @@ getTok(tokeniser *tok)    }  } +// Fasta tokeniser +int +getTokA(tokeniser *tok) +{ +  int c; +  switch(tok->s){ +    case TOUTSEQ: +      c = gzgetc(tok->f); +      while(c != -1 && c != '>') c = gzgetc(tok->f); +      while(c != -1 && c != '\n') c = gzgetc(tok->f); +      tok->s = TINSEQ; +    case TINSEQ: +      c = gzgetc(tok->f); +      while(c != -1 && c == '\n') c = gzgetc(tok->f); +      switch(c){ +        case -1: +          return -1; +        case 'A': +        case 'a': +          return 0; +        case 'C': +        case 'c': +          return 1; +        case 'G': +        case 'g': +          return 2; +        case 'T': +        case 't': +          return 3; +        case '>': +          while(c != -1 && c != '\n') c = gzgetc(tok->f); +        default: +          return -2; +      } +  } +} +  void -usage(char *name) +usage(void)  { -  fprintf(stderr, "usage: %s <input fastq> <bloom output>\n", name); +  fprintf(stderr, "usage: %s [-a] <input fastq/fasta> <bloom output>\n", argv0); +  fprintf(stderr, "\t-a: assume input is fasta, default is fastq\n");    exit(-1);  } @@ -107,15 +146,25 @@ uint64_t bitmix(uint64_t x)  int  main(int argc, char **argv)  { -  if(argc != 3) -    usage(argv[0]); +  int (*getTok)(tokeniser *) = getTokQ; +  ARGBEGIN{ +    case 'a': +      getTok = getTokA; +      break; +    case 'h': +    default: +      usage(); +  }ARGEND; + +  if(argc != 2) +    usage(); -  tokeniser *t = new_tokeniser(argv[1]); +  tokeniser *t = new_tokeniser(argv[0]);    // map bloom filter    uint64_t *bloom; -  if(access(argv[2], F_OK) == -1){ -    int bfd = open(argv[2], O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); +  if(access(argv[1], F_OK) == -1){ +    int bfd = open(argv[1], O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);      if(bfd == -1)        error("open");      if(ftruncate(bfd, BLOOMSIZE * sizeof(uint64_t)) == -1) @@ -126,7 +175,7 @@ main(int argc, char **argv)      bzero(bloom, BLOOMSIZE * sizeof(uint64_t));    }else{      struct stat sb; -    int bfd = open(argv[2], O_RDWR); +    int bfd = open(argv[1], O_RDWR);      if(bfd == -1)        error("open");      if(fstat(bfd, &sb) == -1) | 
