]> git.stg.codes - stg.git/blob - libs/smux/xer_support.c
Update README.
[stg.git] / libs / smux / xer_support.c
1 /*
2  * Copyright (c) 2003, 2004 X/IO Labs, xiolabs.com.
3  * Copyright (c) 2003, 2004, 2005 Lev Walkin <vlm@lionet.info>.
4  *      All rights reserved.
5  * Redistribution and modifications are permitted subject to BSD license.
6  */
7 #include <asn_system.h>
8 #include <xer_support.h>
9
10 /* Parser states */
11 typedef enum {
12         ST_TEXT,
13         ST_TAG_START,
14         ST_TAG_BODY,
15         ST_TAG_QUOTE_WAIT,
16         ST_TAG_QUOTED_STRING,
17         ST_TAG_UNQUOTED_STRING,
18         ST_COMMENT_WAIT_DASH1,  /* "<!--"[1] */
19         ST_COMMENT_WAIT_DASH2,  /* "<!--"[2] */
20         ST_COMMENT,
21         ST_COMMENT_CLO_DASH2,   /* "-->"[0] */
22         ST_COMMENT_CLO_RT       /* "-->"[1] */
23 } pstate_e;
24
25 static const int
26 _charclass[256] = {
27         0,0,0,0,0,0,0,0, 0,1,1,0,1,1,0,0,
28         0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
29         1,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
30         2,2,2,2,2,2,2,2, 2,2,0,0,0,0,0,0,       /* 01234567 89       */
31         0,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3,       /*  ABCDEFG HIJKLMNO */
32         3,3,3,3,3,3,3,3, 3,3,3,0,0,0,0,0,       /* PQRSTUVW XYZ      */
33         0,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3,       /*  abcdefg hijklmno */
34         3,3,3,3,3,3,3,3, 3,3,3,0,0,0,0,0        /* pqrstuvw xyz      */
35 };
36 #define WHITESPACE(c)   (_charclass[(unsigned char)(c)] == 1)
37 #define ALNUM(c)        (_charclass[(unsigned char)(c)] >= 2)
38 #define ALPHA(c)        (_charclass[(unsigned char)(c)] == 3)
39
40 /* Aliases for characters, ASCII/UTF-8 */
41 #define EXCLAM  0x21    /* '!' */
42 #define CQUOTE  0x22    /* '"' */
43 #define CDASH   0x2d    /* '-' */
44 #define CSLASH  0x2f    /* '/' */
45 #define LANGLE  0x3c    /* '<' */
46 #define CEQUAL  0x3d    /* '=' */
47 #define RANGLE  0x3e    /* '>' */
48 #define CQUEST  0x3f    /* '?' */
49
50 /* Invoke token callback */
51 #define TOKEN_CB_CALL(type, _ns, _current_too, _final) do {     \
52                 int _ret;                                       \
53                 pstate_e ns  = _ns;                             \
54                 ssize_t _sz = (p - chunk_start) + _current_too; \
55                 if (!_sz) {                                     \
56                         /* Shortcut */                          \
57                         state = _ns;                            \
58                         break;                                  \
59                 }                                               \
60                 _ret = cb(type, chunk_start, _sz, key);         \
61                 if(_ret < _sz) {                                \
62                         if(_current_too && _ret == -1)          \
63                                 state = ns;                     \
64                         goto finish;                            \
65                 }                                               \
66                 chunk_start = p + _current_too;                 \
67                 state = ns;                                     \
68         } while(0)
69
70 #define TOKEN_CB(_type, _ns, _current_too)                      \
71         TOKEN_CB_CALL(_type, _ns, _current_too, 0)
72
73 #define PXML_TAG_FINAL_CHUNK_TYPE      PXML_TAG_END
74 #define PXML_COMMENT_FINAL_CHUNK_TYPE  PXML_COMMENT_END
75
76 #define TOKEN_CB_FINAL(_type, _ns, _current_too)                \
77         TOKEN_CB_CALL( _type ## _FINAL_CHUNK_TYPE , _ns, _current_too, 1)
78
79 /*
80  * Parser itself
81  */
82 ssize_t pxml_parse(int *stateContext, const void *xmlbuf, size_t size, pxml_callback_f *cb, void *key) {
83         pstate_e state = (pstate_e)*stateContext;
84         const char *chunk_start = (const char *)xmlbuf;
85         const char *p = chunk_start;
86         const char *end = p + size;
87
88         for(; p < end; p++) {
89           int C = *(const unsigned char *)p;
90           switch(state) {
91           case ST_TEXT:
92                 /*
93                  * Initial state: we're in the middle of some text,
94                  * or just have started.
95                  */
96                 if (C == LANGLE) 
97                         /* We're now in the tag, probably */
98                         TOKEN_CB(PXML_TEXT, ST_TAG_START, 0);
99                 break;
100           case ST_TAG_START:
101                 if (ALPHA(C) || (C == CSLASH))
102                         state = ST_TAG_BODY;
103                 else if (C == EXCLAM)
104                         state = ST_COMMENT_WAIT_DASH1;
105                 else 
106                         /*
107                          * Not characters and not whitespace.
108                          * Must be something like "3 < 4".
109                          */
110                         TOKEN_CB(PXML_TEXT, ST_TEXT, 1);/* Flush as data */
111                 break;
112           case ST_TAG_BODY:
113                 switch(C) {
114                 case RANGLE:
115                         /* End of the tag */
116                         TOKEN_CB_FINAL(PXML_TAG, ST_TEXT, 1);
117                         break;
118                 case LANGLE:
119                         /*
120                          * The previous tag wasn't completed, but still
121                          * recognized as valid. (Mozilla-compatible)
122                          */
123                         TOKEN_CB_FINAL(PXML_TAG, ST_TAG_START, 0);      
124                         break;
125                 case CEQUAL:
126                         state = ST_TAG_QUOTE_WAIT;
127                         break;
128                 }
129                 break;
130           case ST_TAG_QUOTE_WAIT:
131                 /*
132                  * State after the equal sign ("=") in the tag.
133                  */
134                 switch(C) {
135                 case CQUOTE:
136                         state = ST_TAG_QUOTED_STRING;
137                         break;
138                 case RANGLE:
139                         /* End of the tag */
140                         TOKEN_CB_FINAL(PXML_TAG, ST_TEXT, 1);
141                         break;
142                 default:
143                         if(!WHITESPACE(C))
144                                 /* Unquoted string value */
145                                 state = ST_TAG_UNQUOTED_STRING;
146                 }
147                 break;
148           case ST_TAG_QUOTED_STRING:
149                 /*
150                  * Tag attribute's string value in quotes.
151                  */
152                 if(C == CQUOTE) {
153                         /* Return back to the tag state */
154                         state = ST_TAG_BODY;
155                 }
156                 break;
157           case ST_TAG_UNQUOTED_STRING:
158                 if(C == RANGLE) {
159                         /* End of the tag */
160                         TOKEN_CB_FINAL(PXML_TAG, ST_TEXT, 1);
161                 } else if(WHITESPACE(C)) {
162                         /* Return back to the tag state */
163                         state = ST_TAG_BODY;
164                 }
165                 break;
166           case ST_COMMENT_WAIT_DASH1:
167                 if(C == CDASH) {
168                         state = ST_COMMENT_WAIT_DASH2;
169                 } else {
170                         /* Some ordinary tag. */
171                         state = ST_TAG_BODY;
172                 }
173                 break;
174           case ST_COMMENT_WAIT_DASH2:
175                 if(C == CDASH) {
176                         /* Seen "<--" */
177                         state = ST_COMMENT;
178                 } else {
179                         /* Some ordinary tag */
180                         state = ST_TAG_BODY;
181                 }
182                 break;
183           case ST_COMMENT:
184                 if(C == CDASH) {
185                         state = ST_COMMENT_CLO_DASH2;
186                 }
187                 break;
188           case ST_COMMENT_CLO_DASH2:
189                 if(C == CDASH) {
190                         state = ST_COMMENT_CLO_RT;
191                 } else {
192                         /* This is not an end of a comment */
193                         state = ST_COMMENT;
194                 }
195                 break;
196           case ST_COMMENT_CLO_RT:
197                 if(C == RANGLE) {
198                         TOKEN_CB_FINAL(PXML_COMMENT, ST_TEXT, 1);
199                 } else if(C == CDASH) {
200                         /* Maintain current state, still waiting for '>' */
201                 } else {
202                         state = ST_COMMENT;
203                 }
204                 break;
205           } /* switch(*ptr) */
206         } /* for() */
207
208         /*
209          * Flush the partially processed chunk, state permitting.
210          */
211         if(p - chunk_start) {
212                 switch (state) {
213                 case ST_COMMENT:
214                         TOKEN_CB(PXML_COMMENT, state, 0);
215                         break;
216                 case ST_TEXT:
217                         TOKEN_CB(PXML_TEXT, state, 0);
218                         break;
219                 default: break; /* a no-op */
220                 }
221         }
222
223 finish:
224         *stateContext = (int)state;
225         return chunk_start - (const char *)xmlbuf;
226 }
227