aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJustin Bedo <cu@cua0.org>2012-06-12 12:08:35 +0200
committerJustin Bedo <cu@cua0.org>2012-06-12 12:08:35 +0200
commit06a376f7f913bd57a1256898a04882818deaefa4 (patch)
tree33423384477ee90ccf022f29f4685bccc2a3874e
Initial implementation
-rw-r--r--README198
-rw-r--r--json.c248
-rw-r--r--json.h38
-rw-r--r--json.man190
-rw-r--r--mkfile11
5 files changed, 685 insertions, 0 deletions
diff --git a/README b/README
new file mode 100644
index 0000000..a35851c
--- /dev/null
+++ b/README
@@ -0,0 +1,198 @@
+
+
+
+ JSON(2) JSON(2)
+
+
+
+
+
+ NAME
+ Jinit, Jtokenise, Jfind, Jnext, Jtokstr - JSON parser
+
+ SYNOPSIS
+ #include <u.h>
+ #include <libc.h>
+ #include <json.h>
+
+ typedef enum{
+ JBool,
+ JNum,
+ JNil,
+ JObj,
+ JArr,
+ JStr,
+ } Jtype;
+
+ typedef struct{
+ Jtype type;
+ char *start;
+ char *end;
+ uint nsub;
+ } Jtok;
+
+ typedef struct{
+ uint ntok, mtok;
+ Jtok *tokens;
+ uint stktop;
+ uint tokstk[JStksz];
+ } Jparser;
+
+ void Jinit(Jparser *p)
+
+ void Jterm(Jparser *p);
+
+ int Jtokenise(Jparser *p, char *s)
+
+ char *Jtokstr(Jtok *t)
+
+ int Jfind(Jparser *p, uint i, char *s)
+
+ uint Jnext(Jparser *p, uint i)
+
+ DESCRIPTION
+ These routines implement a fast, in-memory JSON parser. The
+ parser operates on a string which is expected to contain the
+ full JSON text. The string is not altered in any way.
+
+ A parser object p is initialised using Jinit. The parser
+ object contains the number of tokens found ntok and an array
+
+
+
+
+
+
+
+
+
+
+ JSON(2) JSON(2)
+
+
+
+ of tokens tokens. The other fields are for internal use. As
+ memory is dynamically allocated, an initialised parser p
+ must be terminated with Jterm after use.
+
+ An initialised parser object is populated with tokens using
+ Jtokenise. Jtokenise accepts parser p and a string s and
+ returns a filled Jparser structure containing tokens found
+ in s. S is unaltered.
+
+ Each token Jtoken has a type Jtype, pointers to the start
+ and end of the token's location in the JSON string, and a
+ count nsub of the number of subtokens. The types of a token
+ are
+
+ JBool
+ Boolean value. Token must be either 't' or 'f'.
+
+ JNum A real number.
+
+ JNil Nill token matching 'n'.
+
+ JObj An object (dictionary) containing key:value pairs.
+
+ JArr An array.
+
+ JStr A string.
+
+ The pointers start and end point to the beginning character
+ of the token and to one character after the end of the token
+ respectively. The exception are JStr tokens, where start
+ points to the first character after the double quote and end
+ points to the terminating double quote. As arrays JArr and
+ objects JObj are the only types allowing subelements, nsub
+ is necessarily 0 for JBool, JNum, JNil, and JStr.
+
+ The functions Jtokstr, Jfind and Jnext are functions for
+ working with Jtokstr takes a token and extracts out the
+ string pointed to by the token. The string is null termi-
+ nated and escaped characters are handled appropriately. As
+ Jtokstr uses a static buffer, the string returned is
+ destroyed on the next call.
+
+ Jfind is used to find specifically named attributes within a
+ JObj. Given an index into the tokens array of pā€“ which must
+ be a JObj ā€“ and the name of an attribute s, Jfind returns
+ the index of the token corresponding to the value matching
+ the attribute name. If no attribute is found then Jfind
+ returns -1.
+
+ Finally, Jnext takes and index and calculates the index of
+ the next element at that depth. It is used for skipping
+ over JObjs and JArrs which can contain a number of
+
+
+
+
+
+
+
+
+
+
+ JSON(2) JSON(2)
+
+
+
+ subelements.
+
+ BUGS
+ The parser does not implement a full grammar so some JSON
+ errors are not detected when parsing.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/json.c b/json.c
new file mode 100644
index 0000000..9f52bc7
--- /dev/null
+++ b/json.c
@@ -0,0 +1,248 @@
+#include<u.h>
+#include<libc.h>
+#include<json.h>
+
+static void*
+erealloc(void *ptr, ulong sz)
+{
+ ptr = realloc(ptr, sz);
+ if(ptr == nil)
+ sysfatal("realloc: %r");
+ return ptr;
+}
+
+void
+Jinit(Jparser *p)
+{
+ memset(p, 0, sizeof *p);
+}
+
+void
+Jterm(Jparser *p)
+{
+ if(p->mtok > 0 || p->tokens != nil)
+ free(p->tokens);
+}
+
+static Jtok*
+gettok(Jparser *p)
+{
+ uint i;
+
+ if(p->ntok == p->mtok){
+ p->mtok = (p->ntok << 1) + 1;
+ p->tokens = erealloc(p->tokens, p->mtok * sizeof *p->tokens);
+ for(i = p->ntok; i < p->mtok; i++)
+ memset(&p->tokens[i], 0, sizeof *p->tokens);
+ }
+
+ return &p->tokens[p->ntok++];
+}
+
+static char *
+parsestr(Jparser *p, char *s)
+{
+ Jtok *tok;
+
+ tok = gettok(p);
+ tok->type = JStr;
+ for(tok->start = ++s; *s != '\0' && utfrune("\"", *s) == nil; s++)
+ if(utfrune("\\", *s) != nil)
+ s++;
+ tok->end = s;
+ return s;
+}
+
+static char *
+saccept(char *src, char *str)
+{
+ while(*src != '\0' && utfrune(str, *src) != nil)
+ src++;
+ return src;
+}
+
+static void
+incnsub(Jparser *p)
+{
+ if(p->stktop > 0)
+ p->tokens[p->tokstk[p->stktop - 1]].nsub++;
+}
+
+int
+Jtokenise(Jparser *p, char *s)
+{
+ Jtok *tok;
+ Jtype type;
+
+ memset(p->tokens, 0, p->mtok * sizeof *p->tokens);
+
+ for(; *s != '\0'; s++){
+ if(utfrune("\t\r\n ", *s) != nil)
+ continue;
+
+ if(utfrune(":", *s) != nil)
+ if(p->ntok > 0 && p->tokens[p->ntok - 1].type == JStr)
+ continue;
+ else{
+ werrstr("unexpected :");
+ return -1;
+ }
+
+ if(utfrune(",", *s) != nil)
+ if(p->stktop != 0)
+ continue;
+ else{
+ werrstr(", outside of object or array");
+ return -1;
+ }
+
+ if(utfrune("{[", *s) != nil){
+ tok = gettok(p);
+ tok->start = s;
+ if(p->stktop == JStksz){
+ werrstr("stack overflow");
+ return -1;
+ }
+ incnsub(p);
+ p->tokstk[p->stktop++] = p->ntok - 1;
+ tok->type = utfrune("{", *s) != nil ? JObj:JArr;
+ continue;
+ }
+
+ if(utfrune("}]", *s) != nil){
+ type = utfrune("}", *s) != nil ? JObj:JArr;
+ if(p->ntok == 0)
+ goto Jmatcherr;
+ for(tok = &p->tokens[p->ntok - 1]; tok >= p->tokens; tok--){
+ if(tok->start != nil && tok->end == nil){
+ if(tok->type != type){
+ werrstr("expected %c", type == JObj ? '}':']');
+ return -1;
+ }
+ if(p->stktop == 0){
+ werrstr("stack underflow");
+ return -1;
+ }
+ p->stktop--;
+ tok->end = s + 1;
+ goto Jnext;
+ }
+ }
+Jmatcherr:
+ werrstr("no matching %c", type == JObj ? '{':'[');
+ return -1;
+ }
+
+ if(utfrune("\"", *s) != nil){
+ s = parsestr(p, s);
+ if(*s == '\0' || utfrune("\"", *s) == nil){
+ werrstr("expected \"");
+ return -1;
+ }
+ incnsub(p);
+ continue;
+ }
+
+ if(utfrune("tf", *s) != nil){
+ tok = gettok(p);
+ tok->type = JBool;
+ tok->start = s;
+ tok->end = s + 1;
+ incnsub(p);
+ continue;
+ }
+
+ if(utfrune("n", *s) != nil){
+ tok = gettok(p);
+ tok->type = JNil;
+ tok->start = s;
+ tok->end = s + 1;
+ incnsub(p);
+ continue;
+ }
+
+ /* Try and parse a number */
+ tok = gettok(p);
+ tok->type = JNum;
+ tok->start = s;
+ s = saccept(s, "+-");
+ s = saccept(s, "0123456789");
+ s = saccept(s, ".");
+ s = saccept(s, "0123456789");
+ s = saccept(s, "eE");
+ s = saccept(s, "+-");
+ s = saccept(s, "0123456789");
+ tok->end = s;
+
+ if(*s != '\0' && utfrune("\t\r\n }],", *s) == nil){
+ werrstr("number format error");
+ return -1;
+ }
+
+ incnsub(p);
+ s--;
+Jnext:;
+ }
+
+ if(p->stktop != 0){
+ werrstr("unexpected end of input");
+ return -1;
+ }
+
+ return 0;
+}
+
+char *
+Jtokstr(Jtok *t)
+{
+ static char *buf;
+ static ulong sz = 0;
+ ulong len;
+ char *src, *dst;
+
+ len = t->end - t->start;
+ if(sz <= len){
+ sz = len + 1;
+ buf = erealloc(buf, sz);
+ }
+
+ for(src = t->start, dst = buf; len > 0; len--){
+ if(*src == '\\')
+ src++, len--;
+ *dst++ = *src++;
+ }
+ *dst = '\0';
+ return buf;
+}
+
+uint
+Jnext(Jparser *p, uint i)
+{
+ int j;
+ if(i >= p->ntok)
+ return i;
+ if(p->tokens[i].type != JObj && p->tokens[i].type != JArr)
+ return i + 1;
+ for(j = i; j < p->ntok && p->tokens[i].end > p->tokens[j].start; j++);
+ return j;
+}
+
+int
+Jfind(Jparser *p, uint r, char *name)
+{
+ uint i, j;
+ assert(r < p->ntok);
+ assert(p->tokens[r].type == JObj);
+ assert(p->tokens[r].nsub % 2 == 0);
+
+ for(i = 0, j = r + 1; i < p->tokens[r].nsub; i += 2){
+ assert(p->tokens[j].type == JStr);
+ if(strcmp(Jtokstr(&p->tokens[j]), name) == 0)
+ return j + 1;
+ j = Jnext(p, j + 1);
+ }
+
+ if(j >= p->ntok)
+ return -1;
+ return j;
+}
diff --git a/json.h b/json.h
new file mode 100644
index 0000000..215f716
--- /dev/null
+++ b/json.h
@@ -0,0 +1,38 @@
+#pragma src "/usr/doc/src/json"
+#pragma lib "json.a"
+
+enum{
+ JStksz= 32,
+};
+
+typedef enum{
+ JBool,
+ JNum,
+ JNil,
+ JObj,
+ JArr,
+ JStr,
+} Jtype;
+
+typedef struct{
+ Jtype type;
+ char *start;
+ char *end;
+ uint nsub;
+} Jtok;
+
+typedef struct{
+ uint ntok, mtok;
+ Jtok *tokens;
+ uint stktop;
+ uint tokstk[JStksz];
+} Jparser;
+
+void Jinit(Jparser *);
+void Jterm(Jparser *);
+
+int Jtokenise(Jparser *, char *);
+char *Jtokstr(Jtok *);
+
+int Jfind(Jparser *, uint, char *);
+uint Jnext(Jparser *, uint);
diff --git a/json.man b/json.man
new file mode 100644
index 0000000..d6867e9
--- /dev/null
+++ b/json.man
@@ -0,0 +1,190 @@
+.TH JSON 2
+.SH NAME
+Jinit, Jtokenise, Jfind, Jnext, Jtokstr \- JSON parser
+.SH SYNOPSIS
+.B #include <u.h>
+.br
+.B #include <libc.h>
+.br
+.B #include <json.h>
+.PP
+.nf
+.ft L
+typedef enum{
+ JBool,
+ JNum,
+ JNil,
+ JObj,
+ JArr,
+ JStr,
+} Jtype;
+.fi
+.PP
+.nf
+.ft L
+typedef struct{
+ Jtype type;
+ char *start;
+ char *end;
+ uint nsub;
+} Jtok;
+.fi
+.PP
+.nf
+.ft L
+typedef struct{
+ uint ntok, mtok;
+ Jtok *tokens;
+ uint stktop;
+ uint tokstk[JStksz];
+} Jparser;
+.fi
+.PP
+.B
+void Jinit(Jparser *p)
+.PP
+.B
+void Jterm(Jparser *p);
+.PP
+.B
+int Jtokenise(Jparser *p, char *s)
+.PP
+.B
+char *Jtokstr(Jtok *t)
+.PP
+.B
+int Jfind(Jparser *p, uint i, char *s)
+.PP
+.B
+uint Jnext(Jparser *p, uint i)
+.PP
+.SH DESCRIPTION
+These routines implement a fast, in-memory JSON parser. The parser
+operates on a string which is expected to contain the full JSON text.
+The string is not altered in any way.
+.PP
+A parser object
+.I p
+is initialised using
+.IR Jinit .
+The parser object contains the number of tokens found
+.I ntok
+and an array of tokens
+.IR tokens .
+The other fields are for internal use. As memory is dynamically
+allocated, an initialised parser
+.I p
+must be terminated with
+.I Jterm
+after use.
+.PP
+An initialised parser object is populated with tokens using
+.IR Jtokenise .
+.I Jtokenise
+accepts parser
+.I p
+and a string
+.I s
+and returns a filled
+.I Jparser
+structure containing tokens found in
+.IR s .
+.I S
+is unaltered.
+.PP
+Each token
+.I Jtoken
+has a type
+.IR Jtype ,
+pointers to the
+.I start
+and
+.I end
+of the token's location in the JSON string,
+and a count
+.I nsub
+of the number of subtokens. The types of a token are
+.TP
+.I JBool
+Boolean value. Token must be either 't' or 'f'.
+.TP
+.I JNum
+A real number.
+.TP
+.I JNil
+Nill token matching 'n'.
+.TP
+.I JObj
+An object (dictionary) containing key:value pairs.
+.TP
+.I JArr
+An array.
+.TP
+.I JStr
+A string.
+.PP
+The pointers
+.I start
+and
+.I end
+point to the beginning character of the token and to one character
+after the end of the token respectively. The exception are
+.I JStr
+tokens, where
+.I start
+points to the first character after the double quote and
+.I end
+points to the terminating double quote. As arrays
+.I JArr
+and objects
+.I JObj
+are the only types allowing subelements,
+.I nsub
+is necessarily 0 for
+.IR JBool ,
+.IR JNum ,
+.IR JNil ,
+and
+.IR JStr .
+.PP
+The functions
+.IR Jtokstr ,
+.IR Jfind
+and
+.I Jnext are functions for working with the tokens.
+.I Jtokstr
+takes a token and extracts out the string pointed to by the token.
+The string is null terminated and escaped characters are handled
+appropriately. As
+.I Jtokstr
+uses a static buffer, the string returned is destroyed on the next
+call.
+.PP
+.I Jfind
+is used to find specifically named attributes within a
+.IR JObj .
+Given an index into the
+.I tokens
+array of
+.IR p ā€“
+which must be a
+.I JObj
+ā€“ and the name of an attribute
+.IR s ,
+.I Jfind
+returns the index of the token corresponding to the value matching the
+attribute name. If no attribute is found then
+.I Jfind
+returns -1.
+.PP
+Finally,
+.I Jnext
+takes and index and calculates the index of the next element at that
+depth. It is used for skipping over
+.IR JObj s
+and
+.IR JArr s
+which can contain a number of subelements.
+.SH BUGS
+The parser does not implement a full grammar so some JSON errors are
+not detected when parsing.
diff --git a/mkfile b/mkfile
new file mode 100644
index 0000000..f7f40f8
--- /dev/null
+++ b/mkfile
@@ -0,0 +1,11 @@
+</$objtype/mkfile
+
+LIB=/$objtype/lib/json.a
+
+HFILES=\
+ json.h\
+
+OFILES=\
+ json.$O\
+
+</sys/src/cmd/mklib