]> git.stg.codes - stg.git/blobdiff - libs/smux/xer_support.c
Port to CMake, get rid of os_int.h.
[stg.git] / libs / smux / xer_support.c
diff --git a/libs/smux/xer_support.c b/libs/smux/xer_support.c
new file mode 100644 (file)
index 0000000..9e34e69
--- /dev/null
@@ -0,0 +1,233 @@
+/*
+ * Copyright (c) 2003, 2004 X/IO Labs, xiolabs.com.
+ * Copyright (c) 2003, 2004, 2005 Lev Walkin <vlm@lionet.info>.
+ *     All rights reserved.
+ * Redistribution and modifications are permitted subject to BSD license.
+ */
+#include <asn_system.h>
+#include <xer_support.h>
+
+/* Parser states */
+typedef enum {
+       ST_TEXT,
+       ST_TAG_START,
+       ST_TAG_BODY,
+       ST_TAG_QUOTE_WAIT,
+       ST_TAG_QUOTED_STRING,
+       ST_TAG_UNQUOTED_STRING,
+       ST_COMMENT_WAIT_DASH1,  /* "<!--"[1] */
+       ST_COMMENT_WAIT_DASH2,  /* "<!--"[2] */
+       ST_COMMENT,
+       ST_COMMENT_CLO_DASH2,   /* "-->"[0] */
+       ST_COMMENT_CLO_RT       /* "-->"[1] */
+} pstate_e;
+
+static pxml_chunk_type_e final_chunk_type[] = {
+       PXML_TEXT,
+       PXML_TAG_END,
+       PXML_COMMENT_END,
+       PXML_TAG_END,
+       PXML_COMMENT_END,
+};
+
+
+static int
+_charclass[256] = {
+       0,0,0,0,0,0,0,0, 0,1,1,0,1,1,0,0,
+       0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
+       1,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
+       2,2,2,2,2,2,2,2, 2,2,0,0,0,0,0,0,       /* 01234567 89       */
+       0,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3,       /*  ABCDEFG HIJKLMNO */
+       3,3,3,3,3,3,3,3, 3,3,3,0,0,0,0,0,       /* PQRSTUVW XYZ      */
+       0,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3,       /*  abcdefg hijklmno */
+       3,3,3,3,3,3,3,3, 3,3,3,0,0,0,0,0        /* pqrstuvw xyz      */
+};
+#define WHITESPACE(c)  (_charclass[(unsigned char)(c)] == 1)
+#define ALNUM(c)       (_charclass[(unsigned char)(c)] >= 2)
+#define ALPHA(c)       (_charclass[(unsigned char)(c)] == 3)
+
+/* Aliases for characters, ASCII/UTF-8 */
+#define        EXCLAM  0x21    /* '!' */
+#define        CQUOTE  0x22    /* '"' */
+#define        CDASH   0x2d    /* '-' */
+#define        CSLASH  0x2f    /* '/' */
+#define        LANGLE  0x3c    /* '<' */
+#define        CEQUAL  0x3d    /* '=' */
+#define        RANGLE  0x3e    /* '>' */
+#define        CQUEST  0x3f    /* '?' */
+
+/* Invoke token callback */
+#define        TOKEN_CB_CALL(type, _ns, _current_too, _final) do {     \
+               int _ret;                                       \
+               pstate_e ns  = _ns;                             \
+               ssize_t _sz = (p - chunk_start) + _current_too; \
+               if (!_sz) {                                     \
+                       /* Shortcut */                          \
+                       state = _ns;                            \
+                       break;                                  \
+               }                                               \
+               _ret = cb(type, chunk_start, _sz, key);         \
+               if(_ret < _sz) {                                \
+                       if(_current_too && _ret == -1)          \
+                               state = ns;                     \
+                       goto finish;                            \
+               }                                               \
+               chunk_start = p + _current_too;                 \
+               state = ns;                                     \
+       } while(0)
+
+#define TOKEN_CB(_type, _ns, _current_too)                     \
+       TOKEN_CB_CALL(_type, _ns, _current_too, 0)
+
+#define TOKEN_CB_FINAL(_type, _ns, _current_too)               \
+       TOKEN_CB_CALL(final_chunk_type[_type], _ns, _current_too, 1)
+
+/*
+ * Parser itself
+ */
+ssize_t pxml_parse(int *stateContext, const void *xmlbuf, size_t size, pxml_callback_f *cb, void *key) {
+       pstate_e state = (pstate_e)*stateContext;
+       const char *chunk_start = (const char *)xmlbuf;
+       const char *p = chunk_start;
+       const char *end = p + size;
+
+       for(; p < end; p++) {
+         int C = *(const unsigned char *)p;
+         switch(state) {
+         case ST_TEXT:
+               /*
+                * Initial state: we're in the middle of some text,
+                * or just have started.
+                */
+               if (C == LANGLE) 
+                       /* We're now in the tag, probably */
+                       TOKEN_CB(PXML_TEXT, ST_TAG_START, 0);
+               break;
+         case ST_TAG_START:
+               if (ALPHA(C) || (C == CSLASH))
+                       state = ST_TAG_BODY;
+               else if (C == EXCLAM)
+                       state = ST_COMMENT_WAIT_DASH1;
+               else 
+                       /*
+                        * Not characters and not whitespace.
+                        * Must be something like "3 < 4".
+                        */
+                       TOKEN_CB(PXML_TEXT, ST_TEXT, 1);/* Flush as data */
+               break;
+         case ST_TAG_BODY:
+               switch(C) {
+               case RANGLE:
+                       /* End of the tag */
+                       TOKEN_CB_FINAL(PXML_TAG, ST_TEXT, 1);
+                       break;
+               case LANGLE:
+                       /*
+                        * The previous tag wasn't completed, but still
+                        * recognized as valid. (Mozilla-compatible)
+                        */
+                       TOKEN_CB_FINAL(PXML_TAG, ST_TAG_START, 0);      
+                       break;
+               case CEQUAL:
+                       state = ST_TAG_QUOTE_WAIT;
+                       break;
+               }
+               break;
+         case ST_TAG_QUOTE_WAIT:
+               /*
+                * State after the equal sign ("=") in the tag.
+                */
+               switch(C) {
+               case CQUOTE:
+                       state = ST_TAG_QUOTED_STRING;
+                       break;
+               case RANGLE:
+                       /* End of the tag */
+                       TOKEN_CB_FINAL(PXML_TAG, ST_TEXT, 1);
+                       break;
+               default:
+                       if(!WHITESPACE(C))
+                               /* Unquoted string value */
+                               state = ST_TAG_UNQUOTED_STRING;
+               }
+               break;
+         case ST_TAG_QUOTED_STRING:
+               /*
+                * Tag attribute's string value in quotes.
+                */
+               if(C == CQUOTE) {
+                       /* Return back to the tag state */
+                       state = ST_TAG_BODY;
+               }
+               break;
+         case ST_TAG_UNQUOTED_STRING:
+               if(C == RANGLE) {
+                       /* End of the tag */
+                       TOKEN_CB_FINAL(PXML_TAG, ST_TEXT, 1);
+               } else if(WHITESPACE(C)) {
+                       /* Return back to the tag state */
+                       state = ST_TAG_BODY;
+               }
+               break;
+         case ST_COMMENT_WAIT_DASH1:
+               if(C == CDASH) {
+                       state = ST_COMMENT_WAIT_DASH2;
+               } else {
+                       /* Some ordinary tag. */
+                       state = ST_TAG_BODY;
+               }
+               break;
+         case ST_COMMENT_WAIT_DASH2:
+               if(C == CDASH) {
+                       /* Seen "<--" */
+                       state = ST_COMMENT;
+               } else {
+                       /* Some ordinary tag */
+                       state = ST_TAG_BODY;
+               }
+               break;
+         case ST_COMMENT:
+               if(C == CDASH) {
+                       state = ST_COMMENT_CLO_DASH2;
+               }
+               break;
+         case ST_COMMENT_CLO_DASH2:
+               if(C == CDASH) {
+                       state = ST_COMMENT_CLO_RT;
+               } else {
+                       /* This is not an end of a comment */
+                       state = ST_COMMENT;
+               }
+               break;
+         case ST_COMMENT_CLO_RT:
+               if(C == RANGLE) {
+                       TOKEN_CB_FINAL(PXML_COMMENT, ST_TEXT, 1);
+               } else if(C == CDASH) {
+                       /* Maintain current state, still waiting for '>' */
+               } else {
+                       state = ST_COMMENT;
+               }
+               break;
+         } /* switch(*ptr) */
+       } /* for() */
+
+       /*
+        * Flush the partially processed chunk, state permitting.
+        */
+       if(p - chunk_start) {
+               switch (state) {
+               case ST_COMMENT:
+                       TOKEN_CB(PXML_COMMENT, state, 0);
+                       break;
+               case ST_TEXT:
+                       TOKEN_CB(PXML_TEXT, state, 0);
+                       break;
+               default: break; /* a no-op */
+               }
+       }
+
+finish:
+       *stateContext = (int)state;
+       return chunk_start - (const char *)xmlbuf;
+}
+