summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJustin Bedo <cu@cua0.org>2016-09-30 13:23:10 +1000
committerJustin Bedo <cu@cua0.org>2016-09-30 13:23:10 +1000
commit5fd056c15a0ea0eb28d0bc7cac84d6fa7fe0a3f3 (patch)
treee29087738344ff4e2b7a5371fba6bf2f18d8a794
parente0ddb4b25a399d1769165806d5919c6af8c173cf (diff)
add fasta parserHEADmaster
-rw-r--r--dict.c67
1 files changed, 58 insertions, 9 deletions
diff --git a/dict.c b/dict.c
index 7bb0e63..53e18bd 100644
--- a/dict.c
+++ b/dict.c
@@ -50,8 +50,9 @@ new_tokeniser(char *path)
return tok;
}
+// Fastq tokeniser
int
-getTok(tokeniser *tok)
+getTokQ(tokeniser *tok)
{
int c;
switch(tok->s){
@@ -86,10 +87,48 @@ getTok(tokeniser *tok)
}
}
+// Fasta tokeniser
+int
+getTokA(tokeniser *tok)
+{
+ int c;
+ switch(tok->s){
+ case TOUTSEQ:
+ c = gzgetc(tok->f);
+ while(c != -1 && c != '>') c = gzgetc(tok->f);
+ while(c != -1 && c != '\n') c = gzgetc(tok->f);
+ tok->s = TINSEQ;
+ case TINSEQ:
+ c = gzgetc(tok->f);
+ while(c != -1 && c == '\n') c = gzgetc(tok->f);
+ switch(c){
+ case -1:
+ return -1;
+ case 'A':
+ case 'a':
+ return 0;
+ case 'C':
+ case 'c':
+ return 1;
+ case 'G':
+ case 'g':
+ return 2;
+ case 'T':
+ case 't':
+ return 3;
+ case '>':
+ while(c != -1 && c != '\n') c = gzgetc(tok->f);
+ default:
+ return -2;
+ }
+ }
+}
+
void
-usage(char *name)
+usage(void)
{
- fprintf(stderr, "usage: %s <input fastq> <bloom output>\n", name);
+ fprintf(stderr, "usage: %s [-a] <input fastq/fasta> <bloom output>\n", argv0);
+ fprintf(stderr, "\t-a: assume input is fasta, default is fastq\n");
exit(-1);
}
@@ -107,15 +146,25 @@ uint64_t bitmix(uint64_t x)
int
main(int argc, char **argv)
{
- if(argc != 3)
- usage(argv[0]);
+ int (*getTok)(tokeniser *) = getTokQ;
+ ARGBEGIN{
+ case 'a':
+ getTok = getTokA;
+ break;
+ case 'h':
+ default:
+ usage();
+ }ARGEND;
+
+ if(argc != 2)
+ usage();
- tokeniser *t = new_tokeniser(argv[1]);
+ tokeniser *t = new_tokeniser(argv[0]);
// map bloom filter
uint64_t *bloom;
- if(access(argv[2], F_OK) == -1){
- int bfd = open(argv[2], O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
+ if(access(argv[1], F_OK) == -1){
+ int bfd = open(argv[1], O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
if(bfd == -1)
error("open");
if(ftruncate(bfd, BLOOMSIZE * sizeof(uint64_t)) == -1)
@@ -126,7 +175,7 @@ main(int argc, char **argv)
bzero(bloom, BLOOMSIZE * sizeof(uint64_t));
}else{
struct stat sb;
- int bfd = open(argv[2], O_RDWR);
+ int bfd = open(argv[1], O_RDWR);
if(bfd == -1)
error("open");
if(fstat(bfd, &sb) == -1)