]> git.stg.codes - stg.git/blob - libs/smux/xer_support.c
9e34e6923467a436e20690633c4091b6db4683cd
[stg.git] / libs / smux / xer_support.c
1 /*
2  * Copyright (c) 2003, 2004 X/IO Labs, xiolabs.com.
3  * Copyright (c) 2003, 2004, 2005 Lev Walkin <vlm@lionet.info>.
4  *      All rights reserved.
5  * Redistribution and modifications are permitted subject to BSD license.
6  */
7 #include <asn_system.h>
8 #include <xer_support.h>
9
10 /* Parser states */
11 typedef enum {
12         ST_TEXT,
13         ST_TAG_START,
14         ST_TAG_BODY,
15         ST_TAG_QUOTE_WAIT,
16         ST_TAG_QUOTED_STRING,
17         ST_TAG_UNQUOTED_STRING,
18         ST_COMMENT_WAIT_DASH1,  /* "<!--"[1] */
19         ST_COMMENT_WAIT_DASH2,  /* "<!--"[2] */
20         ST_COMMENT,
21         ST_COMMENT_CLO_DASH2,   /* "-->"[0] */
22         ST_COMMENT_CLO_RT       /* "-->"[1] */
23 } pstate_e;
24
25 static pxml_chunk_type_e final_chunk_type[] = {
26         PXML_TEXT,
27         PXML_TAG_END,
28         PXML_COMMENT_END,
29         PXML_TAG_END,
30         PXML_COMMENT_END,
31 };
32
33
34 static int
35 _charclass[256] = {
36         0,0,0,0,0,0,0,0, 0,1,1,0,1,1,0,0,
37         0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
38         1,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
39         2,2,2,2,2,2,2,2, 2,2,0,0,0,0,0,0,       /* 01234567 89       */
40         0,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3,       /*  ABCDEFG HIJKLMNO */
41         3,3,3,3,3,3,3,3, 3,3,3,0,0,0,0,0,       /* PQRSTUVW XYZ      */
42         0,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3,       /*  abcdefg hijklmno */
43         3,3,3,3,3,3,3,3, 3,3,3,0,0,0,0,0        /* pqrstuvw xyz      */
44 };
45 #define WHITESPACE(c)   (_charclass[(unsigned char)(c)] == 1)
46 #define ALNUM(c)        (_charclass[(unsigned char)(c)] >= 2)
47 #define ALPHA(c)        (_charclass[(unsigned char)(c)] == 3)
48
49 /* Aliases for characters, ASCII/UTF-8 */
50 #define EXCLAM  0x21    /* '!' */
51 #define CQUOTE  0x22    /* '"' */
52 #define CDASH   0x2d    /* '-' */
53 #define CSLASH  0x2f    /* '/' */
54 #define LANGLE  0x3c    /* '<' */
55 #define CEQUAL  0x3d    /* '=' */
56 #define RANGLE  0x3e    /* '>' */
57 #define CQUEST  0x3f    /* '?' */
58
59 /* Invoke token callback */
60 #define TOKEN_CB_CALL(type, _ns, _current_too, _final) do {     \
61                 int _ret;                                       \
62                 pstate_e ns  = _ns;                             \
63                 ssize_t _sz = (p - chunk_start) + _current_too; \
64                 if (!_sz) {                                     \
65                         /* Shortcut */                          \
66                         state = _ns;                            \
67                         break;                                  \
68                 }                                               \
69                 _ret = cb(type, chunk_start, _sz, key);         \
70                 if(_ret < _sz) {                                \
71                         if(_current_too && _ret == -1)          \
72                                 state = ns;                     \
73                         goto finish;                            \
74                 }                                               \
75                 chunk_start = p + _current_too;                 \
76                 state = ns;                                     \
77         } while(0)
78
79 #define TOKEN_CB(_type, _ns, _current_too)                      \
80         TOKEN_CB_CALL(_type, _ns, _current_too, 0)
81
82 #define TOKEN_CB_FINAL(_type, _ns, _current_too)                \
83         TOKEN_CB_CALL(final_chunk_type[_type], _ns, _current_too, 1)
84
85 /*
86  * Parser itself
87  */
88 ssize_t pxml_parse(int *stateContext, const void *xmlbuf, size_t size, pxml_callback_f *cb, void *key) {
89         pstate_e state = (pstate_e)*stateContext;
90         const char *chunk_start = (const char *)xmlbuf;
91         const char *p = chunk_start;
92         const char *end = p + size;
93
94         for(; p < end; p++) {
95           int C = *(const unsigned char *)p;
96           switch(state) {
97           case ST_TEXT:
98                 /*
99                  * Initial state: we're in the middle of some text,
100                  * or just have started.
101                  */
102                 if (C == LANGLE) 
103                         /* We're now in the tag, probably */
104                         TOKEN_CB(PXML_TEXT, ST_TAG_START, 0);
105                 break;
106           case ST_TAG_START:
107                 if (ALPHA(C) || (C == CSLASH))
108                         state = ST_TAG_BODY;
109                 else if (C == EXCLAM)
110                         state = ST_COMMENT_WAIT_DASH1;
111                 else 
112                         /*
113                          * Not characters and not whitespace.
114                          * Must be something like "3 < 4".
115                          */
116                         TOKEN_CB(PXML_TEXT, ST_TEXT, 1);/* Flush as data */
117                 break;
118           case ST_TAG_BODY:
119                 switch(C) {
120                 case RANGLE:
121                         /* End of the tag */
122                         TOKEN_CB_FINAL(PXML_TAG, ST_TEXT, 1);
123                         break;
124                 case LANGLE:
125                         /*
126                          * The previous tag wasn't completed, but still
127                          * recognized as valid. (Mozilla-compatible)
128                          */
129                         TOKEN_CB_FINAL(PXML_TAG, ST_TAG_START, 0);      
130                         break;
131                 case CEQUAL:
132                         state = ST_TAG_QUOTE_WAIT;
133                         break;
134                 }
135                 break;
136           case ST_TAG_QUOTE_WAIT:
137                 /*
138                  * State after the equal sign ("=") in the tag.
139                  */
140                 switch(C) {
141                 case CQUOTE:
142                         state = ST_TAG_QUOTED_STRING;
143                         break;
144                 case RANGLE:
145                         /* End of the tag */
146                         TOKEN_CB_FINAL(PXML_TAG, ST_TEXT, 1);
147                         break;
148                 default:
149                         if(!WHITESPACE(C))
150                                 /* Unquoted string value */
151                                 state = ST_TAG_UNQUOTED_STRING;
152                 }
153                 break;
154           case ST_TAG_QUOTED_STRING:
155                 /*
156                  * Tag attribute's string value in quotes.
157                  */
158                 if(C == CQUOTE) {
159                         /* Return back to the tag state */
160                         state = ST_TAG_BODY;
161                 }
162                 break;
163           case ST_TAG_UNQUOTED_STRING:
164                 if(C == RANGLE) {
165                         /* End of the tag */
166                         TOKEN_CB_FINAL(PXML_TAG, ST_TEXT, 1);
167                 } else if(WHITESPACE(C)) {
168                         /* Return back to the tag state */
169                         state = ST_TAG_BODY;
170                 }
171                 break;
172           case ST_COMMENT_WAIT_DASH1:
173                 if(C == CDASH) {
174                         state = ST_COMMENT_WAIT_DASH2;
175                 } else {
176                         /* Some ordinary tag. */
177                         state = ST_TAG_BODY;
178                 }
179                 break;
180           case ST_COMMENT_WAIT_DASH2:
181                 if(C == CDASH) {
182                         /* Seen "<--" */
183                         state = ST_COMMENT;
184                 } else {
185                         /* Some ordinary tag */
186                         state = ST_TAG_BODY;
187                 }
188                 break;
189           case ST_COMMENT:
190                 if(C == CDASH) {
191                         state = ST_COMMENT_CLO_DASH2;
192                 }
193                 break;
194           case ST_COMMENT_CLO_DASH2:
195                 if(C == CDASH) {
196                         state = ST_COMMENT_CLO_RT;
197                 } else {
198                         /* This is not an end of a comment */
199                         state = ST_COMMENT;
200                 }
201                 break;
202           case ST_COMMENT_CLO_RT:
203                 if(C == RANGLE) {
204                         TOKEN_CB_FINAL(PXML_COMMENT, ST_TEXT, 1);
205                 } else if(C == CDASH) {
206                         /* Maintain current state, still waiting for '>' */
207                 } else {
208                         state = ST_COMMENT;
209                 }
210                 break;
211           } /* switch(*ptr) */
212         } /* for() */
213
214         /*
215          * Flush the partially processed chunk, state permitting.
216          */
217         if(p - chunk_start) {
218                 switch (state) {
219                 case ST_COMMENT:
220                         TOKEN_CB(PXML_COMMENT, state, 0);
221                         break;
222                 case ST_TEXT:
223                         TOKEN_CB(PXML_TEXT, state, 0);
224                         break;
225                 default: break; /* a no-op */
226                 }
227         }
228
229 finish:
230         *stateContext = (int)state;
231         return chunk_start - (const char *)xmlbuf;
232 }
233