Mercurial > ~dholland > hg > ag > index.cgi
comparison tests/agcl/parsifal/good/detag2.c @ 0:13d2b8934445
Import AnaGram (near-)release tree into Mercurial.
author | David A. Holland |
---|---|
date | Sat, 22 Dec 2007 17:52:45 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:13d2b8934445 |
---|---|
1 /* | |
2 detag.syn | |
3 | |
4 Program to strip HTML tags from HTML files. | |
5 Copyright (c) 1996 - 1999 Parsifal Software, All | |
6 Rights Reserved. | |
7 See the file COPYING for license and usage terms. | |
8 | |
9 For information about AnaGram, visit http://www.parsifalsoft.com. | |
10 */ | |
11 | |
12 #include <stdio.h> | |
13 | |
14 | |
15 /* | |
16 * AnaGram, A System for Syntax Directed Programming | |
17 * File generated by: ... | |
18 * | |
19 * AnaGram Parsing Engine | |
20 * Copyright 1993-2002 Parsifal Software. All Rights Reserved. | |
21 * | |
22 * This software is provided 'as-is', without any express or implied | |
23 * warranty. In no event will the authors be held liable for any damages | |
24 * arising from the use of this software. | |
25 * | |
26 * Permission is granted to anyone to use this software for any purpose, | |
27 * including commercial applications, and to alter it and redistribute it | |
28 * freely, subject to the following restrictions: | |
29 * | |
30 * 1. The origin of this software must not be misrepresented; you must not | |
31 * claim that you wrote the original software. If you use this software | |
32 * in a product, an acknowledgment in the product documentation would be | |
33 * appreciated but is not required. | |
34 * 2. Altered source versions must be plainly marked as such, and must not be | |
35 * misrepresented as being the original software. | |
36 * 3. This notice may not be removed or altered from any source distribution. | |
37 */ | |
38 | |
39 #ifndef DETAG2_H | |
40 #include "detag2.h" | |
41 #endif | |
42 | |
43 #ifndef DETAG2_H | |
44 #error Mismatched header file | |
45 #endif | |
46 | |
47 #include <ctype.h> | |
48 #include <stdio.h> | |
49 | |
50 #define RULE_CONTEXT (&((PCB).cs[(PCB).ssx])) | |
51 #define ERROR_CONTEXT ((PCB).cs[(PCB).error_frame_ssx]) | |
52 #define CONTEXT ((PCB).cs[(PCB).ssx]) | |
53 | |
54 | |
55 | |
56 detag2_pcb_type detag2_pcb; | |
57 #define PCB detag2_pcb | |
58 | |
59 /* Line -, detag2.syn */ | |
60 // ----- Embedded C --------------------------- | |
61 | |
62 FILE *output; | |
63 | |
64 int main(int argc, char *argv[]) { | |
65 | |
66 FILE *input; | |
67 size_t fileLength; | |
68 size_t stringLength; | |
69 int errorFlag = 0; | |
70 char *inString; | |
71 | |
72 | |
73 /* Check for enough arguments */ | |
74 if (argc != 3) { | |
75 printf("Program to strip HTML tags from a file\n" | |
76 "Usage: %s <input filename> <output filename>\n", argv[0]); | |
77 return 1; | |
78 } | |
79 | |
80 /* Open input file for reading only */ | |
81 input = fopen(argv[1],"r"); | |
82 if (input == NULL) { | |
83 printf("Cannot open %s\n", argv[1]); | |
84 return 2; | |
85 } | |
86 | |
87 /* find out how big the file is */ | |
88 if (fseek(input, SEEK_SET, SEEK_END)) { | |
89 printf("Strange problems with %s\n", argv[1]); | |
90 return 3; | |
91 } | |
92 fileLength = ftell(input); | |
93 if (fileLength < 0 ) { // -1L is error return | |
94 printf("Error getting file length (%d) of %s\n", fileLength, argv[1]); | |
95 return 4; | |
96 } | |
97 | |
98 /* fseek to beginning of file */ | |
99 if (fseek(input, 0, SEEK_SET)) { | |
100 printf("Strange problems with %s\n", argv[1]); | |
101 return 5; | |
102 } | |
103 | |
104 /* Allocate storage for input string */ | |
105 inString = (char*)malloc(fileLength + 1); | |
106 if (inString == NULL) { | |
107 printf("Insufficient memory\n"); | |
108 return 6; | |
109 } | |
110 | |
111 /* Read file */ | |
112 stringLength = fread(inString, 1, fileLength, input); | |
113 if (stringLength == 0) { | |
114 printf("Unable to read %s\n", argv[1]); | |
115 return 7; | |
116 } | |
117 inString[stringLength] = 0; | |
118 | |
119 | |
120 /* Open output file for writing only */ | |
121 output = fopen(argv[2],"w"); | |
122 if (output == NULL) { | |
123 printf("Cannot open %s\n", argv[2]); | |
124 free(inString); | |
125 fclose(input); | |
126 return 8; | |
127 } | |
128 | |
129 | |
130 /* Invoke parser */ | |
131 PCB.pointer = (unsigned char *)inString; // using pointer input | |
132 detag(); | |
133 if (PCB.exit_flag != 1) { | |
134 printf( "Unsuccessful termination of parse, PCB.exit_flag = %d\n", | |
135 PCB.exit_flag); | |
136 } | |
137 | |
138 | |
139 /* Done */ | |
140 free(inString); | |
141 fclose(input); | |
142 fclose(output); | |
143 printf( " End detag "); | |
144 return 0; | |
145 } | |
146 | |
147 | |
148 #ifndef CONVERT_CASE | |
149 #define CONVERT_CASE(c) (c) | |
150 #endif | |
151 #ifndef TAB_SPACING | |
152 #define TAB_SPACING 8 | |
153 #endif | |
154 | |
155 #define ag_rp_1() (putc('\n', output)) | |
156 | |
157 #define ag_rp_5(c) (putc(c, output)) | |
158 | |
159 #define ag_rp_6() (putc('<', output)) | |
160 | |
161 #define ag_rp_7() (putc('>', output)) | |
162 | |
163 #define ag_rp_8() (putc('&', output)) | |
164 | |
165 | |
166 #define READ_COUNTS | |
167 #define WRITE_COUNTS | |
168 #undef V | |
169 #define V(i,t) (*t (&(PCB).vs[(PCB).ssx + i])) | |
170 #undef VS | |
171 #define VS(i) (PCB).vs[(PCB).ssx + i] | |
172 | |
173 #ifndef GET_CONTEXT | |
174 #define GET_CONTEXT CONTEXT = (PCB).input_context | |
175 #endif | |
176 | |
177 typedef enum { | |
178 ag_action_1, | |
179 ag_action_2, | |
180 ag_action_3, | |
181 ag_action_4, | |
182 ag_action_5, | |
183 ag_action_6, | |
184 ag_action_7, | |
185 ag_action_8, | |
186 ag_action_9, | |
187 ag_action_10, | |
188 ag_action_11, | |
189 ag_action_12 | |
190 } ag_parser_action; | |
191 | |
192 | |
193 #ifndef NULL_VALUE_INITIALIZER | |
194 #define NULL_VALUE_INITIALIZER = 0 | |
195 #endif | |
196 | |
197 static int const ag_null_value NULL_VALUE_INITIALIZER; | |
198 | |
199 static const unsigned char ag_rpx[] = { | |
200 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, | |
201 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
202 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
203 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 3, 4, | |
204 5 | |
205 }; | |
206 | |
207 static const unsigned char ag_key_itt[] = { | |
208 0 | |
209 }; | |
210 | |
211 static const unsigned short ag_key_pt[] = { | |
212 0 | |
213 }; | |
214 | |
215 static const unsigned char ag_key_ch[] = { | |
216 0, 97,103,108,255, 38,255, 69, 82, 84,255, 82,255, 65, 68, 82,255,101, | |
217 114,116,255,114,255, 97,100,114,255, 33, 66, 68, 72, 76, 79, 80, 84, 85, | |
218 98,100,104,108,111,112,116,117,255 | |
219 }; | |
220 | |
221 static const unsigned char ag_key_act[] = { | |
222 0,3,3,3,4,2,4,3,0,3,4,3,4,3,0,0,4,3,0,3,4,3,4,3,0,0,4,3,3,3,1,3,3,1,2, | |
223 3,3,3,1,3,3,1,2,3,4 | |
224 }; | |
225 | |
226 static const unsigned char ag_key_parm[] = { | |
227 0, 69, 68, 67, 0, 0, 0, 27, 13, 29, 0, 53, 0, 44, 50, 47, 0, 28, | |
228 14, 30, 0, 54, 0, 45, 51, 48, 0, 56, 20, 38, 23, 41, 35, 17, 0, 32, | |
229 21, 39, 24, 42, 36, 18, 0, 33, 0 | |
230 }; | |
231 | |
232 static const unsigned char ag_key_jmp[] = { | |
233 0, 0, 4, 7, 0, 1, 0, 17, 0, 20, 0, 27, 0, 29, 0, 0, 0, 39, | |
234 0, 42, 0, 49, 0, 51, 0, 0, 0, 10, 13, 15, 7, 23, 25, 11, 13, 33, | |
235 35, 37, 17, 45, 47, 21, 23, 55, 0 | |
236 }; | |
237 | |
238 static const unsigned char ag_key_index[] = { | |
239 5, 27, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
240 0, 0, 0, 0, 0, 5, 5, 0, 0 | |
241 }; | |
242 | |
243 static const unsigned char ag_key_ends[] = { | |
244 109,112,59,0, 116,59,0, 116,59,0, 45,45,0, 82,0, 76,0, | |
245 65,68,0, 77,76,0, 73,0, 76,0, 69,0, 66,76,69,0, 76,0, 114,0, | |
246 108,0, 97,100,0, 109,108,0, 105,0, 108,0, 101,0, 98,108,101,0, | |
247 108,0, | |
248 }; | |
249 | |
250 #define AG_TCV(x) ag_tcv[(x)] | |
251 | |
252 static const unsigned char ag_tcv[] = { | |
253 3, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, | |
254 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, | |
255 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 26, 26, 26, 26, 26, | |
256 26, 70, 70, 70, 70, 70, 10, 70, 12, 70, 70, 70, 70, 70, 70, 70, 70, 70, | |
257 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, | |
258 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, | |
259 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, | |
260 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, | |
261 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, | |
262 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, | |
263 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, | |
264 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, | |
265 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, | |
266 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, | |
267 70, 70, 70, 70 | |
268 }; | |
269 | |
270 #ifndef SYNTAX_ERROR | |
271 #define SYNTAX_ERROR fprintf(stderr,"%s, line %d, column %d\n", \ | |
272 (PCB).error_message, (PCB).line, (PCB).column) | |
273 #endif | |
274 | |
275 #ifndef FIRST_LINE | |
276 #define FIRST_LINE 1 | |
277 #endif | |
278 | |
279 #ifndef FIRST_COLUMN | |
280 #define FIRST_COLUMN 1 | |
281 #endif | |
282 | |
283 #ifndef PARSER_STACK_OVERFLOW | |
284 #define PARSER_STACK_OVERFLOW {fprintf(stderr, \ | |
285 "\nParser stack overflow, line %d, column %d\n",\ | |
286 (PCB).line, (PCB).column);} | |
287 #endif | |
288 | |
289 #ifndef REDUCTION_TOKEN_ERROR | |
290 #define REDUCTION_TOKEN_ERROR {fprintf(stderr, \ | |
291 "\nReduction token error, line %d, column %d\n", \ | |
292 (PCB).line, (PCB).column);} | |
293 #endif | |
294 | |
295 | |
296 #ifndef INPUT_CODE | |
297 #define INPUT_CODE(T) (T) | |
298 #endif | |
299 | |
300 typedef enum | |
301 {ag_accept_key, ag_set_key, ag_jmp_key, ag_end_key, ag_no_match_key, | |
302 ag_cf_accept_key, ag_cf_set_key, ag_cf_end_key} key_words; | |
303 | |
304 static void ag_get_key_word(int ag_k) { | |
305 int ag_save = (int) ((PCB).la_ptr - (PCB).pointer); | |
306 const unsigned char *ag_p; | |
307 int ag_ch; | |
308 while (1) { | |
309 switch (ag_key_act[ag_k]) { | |
310 case ag_cf_end_key: { | |
311 const unsigned char *sp = ag_key_ends + ag_key_jmp[ag_k]; | |
312 do { | |
313 if ((ag_ch = *sp++) == 0) { | |
314 int ag_k1 = ag_key_parm[ag_k]; | |
315 int ag_k2 = ag_key_pt[ag_k1]; | |
316 if (ag_key_itt[ag_k2 + CONVERT_CASE(*(PCB).la_ptr)]) goto ag_fail; | |
317 (PCB).token_number = (detag2_token_type) ag_key_pt[ag_k1 + 1]; | |
318 return; | |
319 } | |
320 } while (CONVERT_CASE(*(PCB).la_ptr++) == ag_ch); | |
321 goto ag_fail; | |
322 } | |
323 case ag_end_key: { | |
324 const unsigned char *sp = ag_key_ends + ag_key_jmp[ag_k]; | |
325 do { | |
326 if ((ag_ch = *sp++) == 0) { | |
327 (PCB).token_number = (detag2_token_type) ag_key_parm[ag_k]; | |
328 return; | |
329 } | |
330 } while (CONVERT_CASE(*(PCB).la_ptr++) == ag_ch); | |
331 } | |
332 case ag_no_match_key: | |
333 ag_fail: | |
334 (PCB).la_ptr = (PCB).pointer + ag_save; | |
335 return; | |
336 case ag_cf_set_key: { | |
337 int ag_k1 = ag_key_parm[ag_k]; | |
338 int ag_k2 = ag_key_pt[ag_k1]; | |
339 ag_k = ag_key_jmp[ag_k]; | |
340 if (ag_key_itt[ag_k2 + CONVERT_CASE(*(PCB).la_ptr)]) break; | |
341 ag_save = (int) ((PCB).la_ptr - (PCB).pointer); | |
342 (PCB).token_number = (detag2_token_type) ag_key_pt[ag_k1+1]; | |
343 break; | |
344 } | |
345 case ag_set_key: | |
346 ag_save = (int) ((PCB).la_ptr - (PCB).pointer); | |
347 (PCB).token_number = (detag2_token_type) ag_key_parm[ag_k]; | |
348 case ag_jmp_key: | |
349 ag_k = ag_key_jmp[ag_k]; | |
350 break; | |
351 case ag_accept_key: | |
352 (PCB).token_number = (detag2_token_type) ag_key_parm[ag_k]; | |
353 return; | |
354 case ag_cf_accept_key: { | |
355 int ag_k1 = ag_key_parm[ag_k]; | |
356 int ag_k2 = ag_key_pt[ag_k1]; | |
357 if (ag_key_itt[ag_k2 + CONVERT_CASE(*(PCB).la_ptr)]) | |
358 (PCB).la_ptr = (PCB).pointer + ag_save; | |
359 else (PCB).token_number = (detag2_token_type) ag_key_pt[ag_k1+1]; | |
360 return; | |
361 } | |
362 } | |
363 ag_ch = CONVERT_CASE(*(PCB).la_ptr++); | |
364 ag_p = &ag_key_ch[ag_k]; | |
365 if (ag_ch <= 255) while (*ag_p < ag_ch) ag_p++; | |
366 if (ag_ch > 255 || *ag_p != ag_ch) { | |
367 (PCB).la_ptr = (PCB).pointer + ag_save; | |
368 return; | |
369 } | |
370 ag_k = (int) (ag_p - ag_key_ch); | |
371 } | |
372 } | |
373 | |
374 | |
375 #ifndef AG_NEWLINE | |
376 #define AG_NEWLINE 10 | |
377 #endif | |
378 | |
379 #ifndef AG_RETURN | |
380 #define AG_RETURN 13 | |
381 #endif | |
382 | |
383 #ifndef AG_FORMFEED | |
384 #define AG_FORMFEED 12 | |
385 #endif | |
386 | |
387 #ifndef AG_TABCHAR | |
388 #define AG_TABCHAR 9 | |
389 #endif | |
390 | |
391 static void ag_track(void) { | |
392 int ag_k = (int) ((PCB).la_ptr - (PCB).pointer); | |
393 while (ag_k--) { | |
394 switch (*(PCB).pointer++) { | |
395 case AG_NEWLINE: | |
396 (PCB).column = 1, (PCB).line++; | |
397 case AG_RETURN: | |
398 case AG_FORMFEED: | |
399 break; | |
400 case AG_TABCHAR: | |
401 (PCB).column += (TAB_SPACING) - ((PCB).column - 1) % (TAB_SPACING); | |
402 break; | |
403 default: | |
404 (PCB).column++; | |
405 } | |
406 } | |
407 } | |
408 | |
409 | |
410 static void ag_prot(void) { | |
411 int ag_k; | |
412 ag_k = 128 - ++(PCB).btsx; | |
413 if (ag_k <= (PCB).ssx) { | |
414 (PCB).exit_flag = AG_STACK_ERROR_CODE; | |
415 PARSER_STACK_OVERFLOW; | |
416 return; | |
417 } | |
418 (PCB).bts[(PCB).btsx] = (PCB).sn; | |
419 (PCB).bts[ag_k] = (PCB).ssx; | |
420 (PCB).vs[ag_k] = (PCB).vs[(PCB).ssx]; | |
421 (PCB).ss[ag_k] = (PCB).ss[(PCB).ssx]; | |
422 } | |
423 | |
424 static void ag_undo(void) { | |
425 if ((PCB).drt == -1) return; | |
426 while ((PCB).btsx) { | |
427 int ag_k = 128 - (PCB).btsx; | |
428 (PCB).sn = (PCB).bts[(PCB).btsx--]; | |
429 (PCB).ssx = (PCB).bts[ag_k]; | |
430 (PCB).vs[(PCB).ssx] = (PCB).vs[ag_k]; | |
431 (PCB).ss[(PCB).ssx] = (PCB).ss[ag_k]; | |
432 } | |
433 (PCB).token_number = (detag2_token_type) (PCB).drt; | |
434 (PCB).ssx = (PCB).dssx; | |
435 (PCB).sn = (PCB).dsn; | |
436 (PCB).drt = -1; | |
437 } | |
438 | |
439 | |
440 static const unsigned char ag_tstt[] = { | |
441 70,69,68,67,26,12,10,3,0,1,2,4,5,6,7,8,9,64,66, | |
442 70,56,54,53,51,50,48,47,45,44,42,41,39,38,36,35,33,32,30,29,28,27,26,24,23, | |
443 21,20,18,17,14,13,12,0,11,15,16,19,22,25,31,34,37,40,43,46,49,52,55,59, | |
444 60, | |
445 70,69,68,67,26,12,10,3,0,5, | |
446 70,69,68,67,26,12,10,3,0,64,66, | |
447 70,69,68,67,26,12,3,0,4,64,66, | |
448 10,3,0,5,6, | |
449 3,0, | |
450 70,26,12,0, | |
451 70,26,12,10,0,57,62,63, | |
452 70,26,12,0,16,59,60, | |
453 70,26,12,0,16,59,60, | |
454 70,26,12,0,16,59,60, | |
455 70,26,12,0,16,59,60, | |
456 70,26,12,0,16,59,60, | |
457 70,26,12,0,16,59,60, | |
458 70,26,12,0,16,59,60, | |
459 70,26,12,0,16,59,60, | |
460 70,26,12,0,16,59,60, | |
461 26,0, | |
462 70,26,12,0,16,59,60, | |
463 70,26,12,0,16,59,60, | |
464 70,26,12,0,16,59,60, | |
465 12,0, | |
466 70,69,68,67,26,12,10,3,0,64,66, | |
467 70,69,68,67,26,12,10,3,0,5, | |
468 70,26,12,10,0, | |
469 70,26,12,0,16,59,60, | |
470 | |
471 }; | |
472 | |
473 | |
474 static unsigned const char ag_astt[241] = { | |
475 2,2,2,2,2,2,1,5,7,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
476 1,1,1,1,1,1,1,1,1,1,1,1,1,5,7,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,5,5,5,5,5, | |
477 5,1,5,7,3,2,2,2,2,2,2,5,5,7,3,3,2,2,2,2,2,2,5,7,1,1,1,1,5,7,1,1,3,7,9,9,5, | |
478 7,1,1,5,1,7,3,1,3,1,1,5,7,3,1,3,1,1,5,7,3,1,3,1,1,5,7,3,1,3,1,1,5,7,2,1,2, | |
479 1,1,5,7,3,1,3,1,1,5,7,3,1,3,1,1,5,7,3,1,3,1,1,5,7,3,1,3,1,1,5,7,3,1,3,1,7, | |
480 1,1,5,7,3,1,3,1,1,5,7,2,1,2,1,1,5,7,2,1,2,3,7,2,2,2,2,2,2,5,5,7,3,3,5,5,5, | |
481 5,5,5,1,5,7,3,9,9,5,9,7,1,1,5,7,2,1,2 | |
482 }; | |
483 | |
484 | |
485 static const unsigned char ag_pstt[] = { | |
486 68,72,71,70,68,68,1,8,0,0,6,3,2,2,5,4,6,3,3, | |
487 7,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,16,16,17,17,17,17,7,18,18,19,19, | |
488 20,20,21,21,58,1,22,21,22,20,19,18,17,16,15,14,13,12,11,10,9,7,22, | |
489 7,7,7,7,7,7,1,7,2,3, | |
490 68,72,71,70,68,68,4,4,3,67,67, | |
491 68,72,71,70,68,68,10,4,23,23,23, | |
492 1,9,5,24,24, | |
493 1,6, | |
494 57,57,59,7, | |
495 25,25,63,25,8,54,25,54, | |
496 7,7,58,9,53,7,53, | |
497 7,7,58,10,50,7,50, | |
498 7,7,58,11,47,7,47, | |
499 7,7,58,12,44,7,44, | |
500 7,7,58,13,41,7,41, | |
501 7,7,58,14,38,7,38, | |
502 7,7,58,15,35,7,35, | |
503 7,7,58,16,32,7,32, | |
504 7,7,58,17,29,7,29, | |
505 26,18, | |
506 7,7,58,19,21,7,21, | |
507 7,7,58,20,18,7,18, | |
508 7,7,58,21,15,7,15, | |
509 12,22, | |
510 68,72,71,70,68,68,5,5,23,67,67, | |
511 6,6,6,6,6,6,1,6,24,3, | |
512 62,62,64,62,25, | |
513 7,7,58,26,24,7,24, | |
514 | |
515 }; | |
516 | |
517 | |
518 static const unsigned char ag_sbt[] = { | |
519 0, 19, 69, 79, 90, 101, 106, 108, 112, 120, 127, 134, 141, 148, | |
520 155, 162, 169, 176, 183, 185, 192, 199, 206, 208, 219, 229, 234, 241 | |
521 }; | |
522 | |
523 | |
524 static const unsigned char ag_sbe[] = { | |
525 8, 51, 77, 87, 97, 103, 107, 111, 116, 123, 130, 137, 144, 151, | |
526 158, 165, 172, 179, 184, 188, 195, 202, 207, 216, 227, 233, 237, 241 | |
527 }; | |
528 | |
529 | |
530 static const unsigned char ag_fl[] = { | |
531 1,2,1,2,1,2,2,1,0,1,1,1,3,1,1,2,1,1,2,1,1,2,1,1,3,1,1,1,1,2,1,1,2,1,1, | |
532 2,1,1,2,1,1,2,1,1,2,1,1,2,1,1,2,1,1,2,2,1,1,2,0,1,1,1,2,0,1,1,1,2,1,1, | |
533 1,1,1 | |
534 }; | |
535 | |
536 static const unsigned char ag_ptt[] = { | |
537 0, 1, 6, 6, 7, 7, 8, 8, 9, 9, 9, 2, 5, 15, 15, 11, 19, 19, | |
538 11, 22, 22, 11, 25, 25, 11, 31, 31, 31, 31, 11, 34, 34, 11, 37, 37, 11, | |
539 40, 40, 11, 43, 43, 11, 46, 46, 11, 49, 49, 11, 52, 52, 11, 55, 55, 11, | |
540 11, 11, 59, 59, 60, 60, 16, 62, 62, 63, 63, 57, 4, 4, 64, 64, 66, 66, | |
541 66 | |
542 }; | |
543 | |
544 | |
545 static void ag_ra(void) | |
546 { | |
547 switch(ag_rpx[(PCB).ag_ap]) { | |
548 case 1: VS(0) = ag_rp_1(); break; | |
549 case 2: VS(0) = ag_rp_5(VS(0)); break; | |
550 case 3: VS(0) = ag_rp_6(); break; | |
551 case 4: VS(0) = ag_rp_7(); break; | |
552 case 5: VS(0) = ag_rp_8(); break; | |
553 } | |
554 (PCB).la_ptr = (PCB).pointer; | |
555 } | |
556 | |
557 #define TOKEN_NAMES detag2_token_names | |
558 const char *const detag2_token_names[71] = { | |
559 "input string", | |
560 "input string", | |
561 "html", | |
562 "eof", | |
563 "text", | |
564 "tag", | |
565 "", | |
566 "", | |
567 "", | |
568 "", | |
569 "'<'", | |
570 "tag innards", | |
571 "'>'", | |
572 "\"HR\"", | |
573 "\"hr\"", | |
574 "", | |
575 "other stuff", | |
576 "\"P\"", | |
577 "\"p\"", | |
578 "", | |
579 "\"BR\"", | |
580 "\"br\"", | |
581 "", | |
582 "\"H\"", | |
583 "\"h\"", | |
584 "", | |
585 "header type", | |
586 "\"HEAD\"", | |
587 "\"head\"", | |
588 "\"HTML\"", | |
589 "\"html\"", | |
590 "", | |
591 "\"UL\"", | |
592 "\"ul\"", | |
593 "", | |
594 "\"OL\"", | |
595 "\"ol\"", | |
596 "", | |
597 "\"DL\"", | |
598 "\"dl\"", | |
599 "", | |
600 "\"LI\"", | |
601 "\"li\"", | |
602 "", | |
603 "\"TABLE\"", | |
604 "\"table\"", | |
605 "", | |
606 "\"TR\"", | |
607 "\"tr\"", | |
608 "", | |
609 "\"TD\"", | |
610 "\"td\"", | |
611 "", | |
612 "\"PRE\"", | |
613 "\"pre\"", | |
614 "", | |
615 "\"!--\"", | |
616 "comment stuff", | |
617 "tag innard char", | |
618 "", | |
619 "", | |
620 "comment char", | |
621 "", | |
622 "", | |
623 "text char", | |
624 "ordinary text char", | |
625 "entity text char", | |
626 "\"<\"", | |
627 "\">\"", | |
628 "\"&\"", | |
629 "", | |
630 | |
631 }; | |
632 | |
633 #ifndef MISSING_FORMAT | |
634 #define MISSING_FORMAT "Missing %s" | |
635 #endif | |
636 #ifndef UNEXPECTED_FORMAT | |
637 #define UNEXPECTED_FORMAT "Unexpected %s" | |
638 #endif | |
639 #ifndef UNNAMED_TOKEN | |
640 #define UNNAMED_TOKEN "input" | |
641 #endif | |
642 | |
643 | |
644 static void ag_diagnose(void) { | |
645 int ag_snd = (PCB).sn; | |
646 int ag_k = ag_sbt[ag_snd]; | |
647 | |
648 if (*TOKEN_NAMES[ag_tstt[ag_k]] && ag_astt[ag_k + 1] == ag_action_8) { | |
649 sprintf((PCB).ag_msg, MISSING_FORMAT, TOKEN_NAMES[ag_tstt[ag_k]]); | |
650 } | |
651 else if (ag_astt[ag_sbe[(PCB).sn]] == ag_action_8 | |
652 && (ag_k = (int) ag_sbe[(PCB).sn] + 1) == (int) ag_sbt[(PCB).sn+1] - 1 | |
653 && *TOKEN_NAMES[ag_tstt[ag_k]]) { | |
654 sprintf((PCB).ag_msg, MISSING_FORMAT, TOKEN_NAMES[ag_tstt[ag_k]]); | |
655 } | |
656 else if ((PCB).token_number && *TOKEN_NAMES[(PCB).token_number]) { | |
657 sprintf((PCB).ag_msg, UNEXPECTED_FORMAT, TOKEN_NAMES[(PCB).token_number]); | |
658 } | |
659 else if (isprint(INPUT_CODE((*(PCB).pointer))) && INPUT_CODE((*(PCB).pointer)) != '\\') { | |
660 char buf[20]; | |
661 sprintf(buf, "\'%c\'", (char) INPUT_CODE((*(PCB).pointer))); | |
662 sprintf((PCB).ag_msg, UNEXPECTED_FORMAT, buf); | |
663 } | |
664 else sprintf((PCB).ag_msg, UNEXPECTED_FORMAT, UNNAMED_TOKEN); | |
665 (PCB).error_message = (PCB).ag_msg; | |
666 | |
667 | |
668 } | |
669 static int ag_action_1_r_proc(void); | |
670 static int ag_action_2_r_proc(void); | |
671 static int ag_action_3_r_proc(void); | |
672 static int ag_action_4_r_proc(void); | |
673 static int ag_action_1_s_proc(void); | |
674 static int ag_action_3_s_proc(void); | |
675 static int ag_action_1_proc(void); | |
676 static int ag_action_2_proc(void); | |
677 static int ag_action_3_proc(void); | |
678 static int ag_action_4_proc(void); | |
679 static int ag_action_5_proc(void); | |
680 static int ag_action_6_proc(void); | |
681 static int ag_action_7_proc(void); | |
682 static int ag_action_8_proc(void); | |
683 static int ag_action_9_proc(void); | |
684 static int ag_action_10_proc(void); | |
685 static int ag_action_11_proc(void); | |
686 static int ag_action_8_proc(void); | |
687 | |
688 | |
689 static int (*const ag_r_procs_scan[])(void) = { | |
690 ag_action_1_r_proc, | |
691 ag_action_2_r_proc, | |
692 ag_action_3_r_proc, | |
693 ag_action_4_r_proc | |
694 }; | |
695 | |
696 static int (*const ag_s_procs_scan[])(void) = { | |
697 ag_action_1_s_proc, | |
698 ag_action_2_r_proc, | |
699 ag_action_3_s_proc, | |
700 ag_action_4_r_proc | |
701 }; | |
702 | |
703 static int (*const ag_gt_procs_scan[])(void) = { | |
704 ag_action_1_proc, | |
705 ag_action_2_proc, | |
706 ag_action_3_proc, | |
707 ag_action_4_proc, | |
708 ag_action_5_proc, | |
709 ag_action_6_proc, | |
710 ag_action_7_proc, | |
711 ag_action_8_proc, | |
712 ag_action_9_proc, | |
713 ag_action_10_proc, | |
714 ag_action_11_proc, | |
715 ag_action_8_proc | |
716 }; | |
717 | |
718 | |
719 static int ag_action_10_proc(void) { | |
720 int ag_t = (PCB).token_number; | |
721 (PCB).btsx = 0, (PCB).drt = -1; | |
722 do { | |
723 ag_track(); | |
724 (PCB).token_number = (detag2_token_type) AG_TCV(INPUT_CODE(*(PCB).la_ptr)); | |
725 (PCB).la_ptr++; | |
726 if (ag_key_index[(PCB).sn]) { | |
727 unsigned ag_k = ag_key_index[(PCB).sn]; | |
728 int ag_ch = CONVERT_CASE(INPUT_CODE(*(PCB).pointer)); | |
729 if (ag_ch <= 255) { | |
730 while (ag_key_ch[ag_k] < ag_ch) ag_k++; | |
731 if (ag_key_ch[ag_k] == ag_ch) ag_get_key_word(ag_k); | |
732 } | |
733 } | |
734 } while ((PCB).token_number == (detag2_token_type) ag_t); | |
735 (PCB).la_ptr = (PCB).pointer; | |
736 return 1; | |
737 } | |
738 | |
739 static int ag_action_11_proc(void) { | |
740 int ag_t = (PCB).token_number; | |
741 | |
742 (PCB).btsx = 0, (PCB).drt = -1; | |
743 do { | |
744 (PCB).vs[(PCB).ssx] = *(PCB).pointer; | |
745 (PCB).ssx--; | |
746 ag_track(); | |
747 ag_ra(); | |
748 if ((PCB).exit_flag != AG_RUNNING_CODE) return 0; | |
749 (PCB).ssx++; | |
750 (PCB).token_number = (detag2_token_type) AG_TCV(INPUT_CODE(*(PCB).la_ptr)); | |
751 (PCB).la_ptr++; | |
752 if (ag_key_index[(PCB).sn]) { | |
753 unsigned ag_k = ag_key_index[(PCB).sn]; | |
754 int ag_ch = CONVERT_CASE(INPUT_CODE(*(PCB).pointer)); | |
755 if (ag_ch <= 255) { | |
756 while (ag_key_ch[ag_k] < ag_ch) ag_k++; | |
757 if (ag_key_ch[ag_k] == ag_ch) ag_get_key_word(ag_k); | |
758 } | |
759 } | |
760 } | |
761 while ((PCB).token_number == (detag2_token_type) ag_t); | |
762 (PCB).la_ptr = (PCB).pointer; | |
763 return 1; | |
764 } | |
765 | |
766 static int ag_action_3_r_proc(void) { | |
767 int ag_sd = ag_fl[(PCB).ag_ap] - 1; | |
768 if (ag_sd) (PCB).sn = (PCB).ss[(PCB).ssx -= ag_sd]; | |
769 (PCB).btsx = 0, (PCB).drt = -1; | |
770 (PCB).reduction_token = (detag2_token_type) ag_ptt[(PCB).ag_ap]; | |
771 ag_ra(); | |
772 return (PCB).exit_flag == AG_RUNNING_CODE; | |
773 } | |
774 | |
775 static int ag_action_3_s_proc(void) { | |
776 int ag_sd = ag_fl[(PCB).ag_ap] - 1; | |
777 if (ag_sd) (PCB).sn = (PCB).ss[(PCB).ssx -= ag_sd]; | |
778 (PCB).btsx = 0, (PCB).drt = -1; | |
779 (PCB).reduction_token = (detag2_token_type) ag_ptt[(PCB).ag_ap]; | |
780 ag_ra(); | |
781 return (PCB).exit_flag == AG_RUNNING_CODE; | |
782 } | |
783 | |
784 static int ag_action_4_r_proc(void) { | |
785 int ag_sd = ag_fl[(PCB).ag_ap] - 1; | |
786 if (ag_sd) (PCB).sn = (PCB).ss[(PCB).ssx -= ag_sd]; | |
787 (PCB).reduction_token = (detag2_token_type) ag_ptt[(PCB).ag_ap]; | |
788 return 1; | |
789 } | |
790 | |
791 static int ag_action_2_proc(void) { | |
792 (PCB).btsx = 0, (PCB).drt = -1; | |
793 if ((PCB).ssx >= 128) { | |
794 (PCB).exit_flag = AG_STACK_ERROR_CODE; | |
795 PARSER_STACK_OVERFLOW; | |
796 } | |
797 (PCB).vs[(PCB).ssx] = *(PCB).pointer; | |
798 (PCB).ss[(PCB).ssx] = (PCB).sn; | |
799 (PCB).ssx++; | |
800 (PCB).sn = (PCB).ag_ap; | |
801 ag_track(); | |
802 return 0; | |
803 } | |
804 | |
805 static int ag_action_9_proc(void) { | |
806 if ((PCB).drt == -1) { | |
807 (PCB).drt=(PCB).token_number; | |
808 (PCB).dssx=(PCB).ssx; | |
809 (PCB).dsn=(PCB).sn; | |
810 } | |
811 ag_prot(); | |
812 (PCB).vs[(PCB).ssx] = ag_null_value; | |
813 (PCB).ss[(PCB).ssx] = (PCB).sn; | |
814 (PCB).ssx++; | |
815 (PCB).sn = (PCB).ag_ap; | |
816 (PCB).la_ptr = (PCB).pointer; | |
817 return (PCB).exit_flag == AG_RUNNING_CODE; | |
818 } | |
819 | |
820 static int ag_action_2_r_proc(void) { | |
821 (PCB).ssx++; | |
822 (PCB).sn = (PCB).ag_ap; | |
823 return 0; | |
824 } | |
825 | |
826 static int ag_action_7_proc(void) { | |
827 --(PCB).ssx; | |
828 (PCB).la_ptr = (PCB).pointer; | |
829 (PCB).exit_flag = AG_SUCCESS_CODE; | |
830 return 0; | |
831 } | |
832 | |
833 static int ag_action_1_proc(void) { | |
834 ag_track(); | |
835 (PCB).exit_flag = AG_SUCCESS_CODE; | |
836 return 0; | |
837 } | |
838 | |
839 static int ag_action_1_r_proc(void) { | |
840 (PCB).exit_flag = AG_SUCCESS_CODE; | |
841 return 0; | |
842 } | |
843 | |
844 static int ag_action_1_s_proc(void) { | |
845 (PCB).exit_flag = AG_SUCCESS_CODE; | |
846 return 0; | |
847 } | |
848 | |
849 static int ag_action_4_proc(void) { | |
850 int ag_sd = ag_fl[(PCB).ag_ap] - 1; | |
851 (PCB).reduction_token = (detag2_token_type) ag_ptt[(PCB).ag_ap]; | |
852 (PCB).btsx = 0, (PCB).drt = -1; | |
853 (PCB).vs[(PCB).ssx] = *(PCB).pointer; | |
854 if (ag_sd) (PCB).sn = (PCB).ss[(PCB).ssx -= ag_sd]; | |
855 else (PCB).ss[(PCB).ssx] = (PCB).sn; | |
856 ag_track(); | |
857 while ((PCB).exit_flag == AG_RUNNING_CODE) { | |
858 unsigned ag_t1 = ag_sbe[(PCB).sn] + 1; | |
859 unsigned ag_t2 = ag_sbt[(PCB).sn+1] - 1; | |
860 do { | |
861 unsigned ag_tx = (ag_t1 + ag_t2)/2; | |
862 if (ag_tstt[ag_tx] < (unsigned char)(PCB).reduction_token) ag_t1 = ag_tx + 1; | |
863 else ag_t2 = ag_tx; | |
864 } while (ag_t1 < ag_t2); | |
865 (PCB).ag_ap = ag_pstt[ag_t1]; | |
866 if ((ag_s_procs_scan[ag_astt[ag_t1]])() == 0) break; | |
867 } | |
868 return 0; | |
869 } | |
870 | |
871 static int ag_action_3_proc(void) { | |
872 int ag_sd = ag_fl[(PCB).ag_ap] - 1; | |
873 (PCB).btsx = 0, (PCB).drt = -1; | |
874 (PCB).vs[(PCB).ssx] = *(PCB).pointer; | |
875 if (ag_sd) (PCB).sn = (PCB).ss[(PCB).ssx -= ag_sd]; | |
876 else (PCB).ss[(PCB).ssx] = (PCB).sn; | |
877 ag_track(); | |
878 (PCB).reduction_token = (detag2_token_type) ag_ptt[(PCB).ag_ap]; | |
879 ag_ra(); | |
880 while ((PCB).exit_flag == AG_RUNNING_CODE) { | |
881 unsigned ag_t1 = ag_sbe[(PCB).sn] + 1; | |
882 unsigned ag_t2 = ag_sbt[(PCB).sn+1] - 1; | |
883 do { | |
884 unsigned ag_tx = (ag_t1 + ag_t2)/2; | |
885 if (ag_tstt[ag_tx] < (unsigned char)(PCB).reduction_token) ag_t1 = ag_tx + 1; | |
886 else ag_t2 = ag_tx; | |
887 } while (ag_t1 < ag_t2); | |
888 (PCB).ag_ap = ag_pstt[ag_t1]; | |
889 if ((ag_s_procs_scan[ag_astt[ag_t1]])() == 0) break; | |
890 } | |
891 return 0; | |
892 } | |
893 | |
894 static int ag_action_8_proc(void) { | |
895 ag_undo(); | |
896 (PCB).la_ptr = (PCB).pointer; | |
897 (PCB).exit_flag = AG_SYNTAX_ERROR_CODE; | |
898 ag_diagnose(); | |
899 SYNTAX_ERROR; | |
900 {(PCB).la_ptr = (PCB).pointer + 1; ag_track();} | |
901 return (PCB).exit_flag == AG_RUNNING_CODE; | |
902 } | |
903 | |
904 static int ag_action_5_proc(void) { | |
905 int ag_sd = ag_fl[(PCB).ag_ap]; | |
906 (PCB).btsx = 0, (PCB).drt = -1; | |
907 if (ag_sd) (PCB).sn = (PCB).ss[(PCB).ssx -= ag_sd]; | |
908 else { | |
909 (PCB).ss[(PCB).ssx] = (PCB).sn; | |
910 } | |
911 (PCB).la_ptr = (PCB).pointer; | |
912 (PCB).reduction_token = (detag2_token_type) ag_ptt[(PCB).ag_ap]; | |
913 ag_ra(); | |
914 while ((PCB).exit_flag == AG_RUNNING_CODE) { | |
915 unsigned ag_t1 = ag_sbe[(PCB).sn] + 1; | |
916 unsigned ag_t2 = ag_sbt[(PCB).sn+1] - 1; | |
917 do { | |
918 unsigned ag_tx = (ag_t1 + ag_t2)/2; | |
919 if (ag_tstt[ag_tx] < (unsigned char)(PCB).reduction_token) ag_t1 = ag_tx + 1; | |
920 else ag_t2 = ag_tx; | |
921 } while (ag_t1 < ag_t2); | |
922 (PCB).ag_ap = ag_pstt[ag_t1]; | |
923 if ((ag_r_procs_scan[ag_astt[ag_t1]])() == 0) break; | |
924 } | |
925 return (PCB).exit_flag == AG_RUNNING_CODE; | |
926 } | |
927 | |
928 static int ag_action_6_proc(void) { | |
929 int ag_sd = ag_fl[(PCB).ag_ap]; | |
930 (PCB).reduction_token = (detag2_token_type) ag_ptt[(PCB).ag_ap]; | |
931 if ((PCB).drt == -1) { | |
932 (PCB).drt=(PCB).token_number; | |
933 (PCB).dssx=(PCB).ssx; | |
934 (PCB).dsn=(PCB).sn; | |
935 } | |
936 if (ag_sd) { | |
937 (PCB).sn = (PCB).ss[(PCB).ssx -= ag_sd]; | |
938 } | |
939 else { | |
940 ag_prot(); | |
941 (PCB).vs[(PCB).ssx] = ag_null_value; | |
942 (PCB).ss[(PCB).ssx] = (PCB).sn; | |
943 } | |
944 (PCB).la_ptr = (PCB).pointer; | |
945 while ((PCB).exit_flag == AG_RUNNING_CODE) { | |
946 unsigned ag_t1 = ag_sbe[(PCB).sn] + 1; | |
947 unsigned ag_t2 = ag_sbt[(PCB).sn+1] - 1; | |
948 do { | |
949 unsigned ag_tx = (ag_t1 + ag_t2)/2; | |
950 if (ag_tstt[ag_tx] < (unsigned char)(PCB).reduction_token) ag_t1 = ag_tx + 1; | |
951 else ag_t2 = ag_tx; | |
952 } while (ag_t1 < ag_t2); | |
953 (PCB).ag_ap = ag_pstt[ag_t1]; | |
954 if ((ag_r_procs_scan[ag_astt[ag_t1]])() == 0) break; | |
955 } | |
956 return (PCB).exit_flag == AG_RUNNING_CODE; | |
957 } | |
958 | |
959 | |
960 void init_detag2(void) { | |
961 (PCB).la_ptr = (PCB).pointer; | |
962 (PCB).ss[0] = (PCB).sn = (PCB).ssx = 0; | |
963 (PCB).exit_flag = AG_RUNNING_CODE; | |
964 (PCB).line = FIRST_LINE; | |
965 (PCB).column = FIRST_COLUMN; | |
966 (PCB).btsx = 0, (PCB).drt = -1; | |
967 } | |
968 | |
969 void detag2(void) { | |
970 init_detag2(); | |
971 (PCB).exit_flag = AG_RUNNING_CODE; | |
972 while ((PCB).exit_flag == AG_RUNNING_CODE) { | |
973 unsigned ag_t1 = ag_sbt[(PCB).sn]; | |
974 if (ag_tstt[ag_t1]) { | |
975 unsigned ag_t2 = ag_sbe[(PCB).sn] - 1; | |
976 (PCB).token_number = (detag2_token_type) AG_TCV(INPUT_CODE(*(PCB).la_ptr)); | |
977 (PCB).la_ptr++; | |
978 if (ag_key_index[(PCB).sn]) { | |
979 unsigned ag_k = ag_key_index[(PCB).sn]; | |
980 int ag_ch = CONVERT_CASE(INPUT_CODE(*(PCB).pointer)); | |
981 if (ag_ch <= 255) { | |
982 while (ag_key_ch[ag_k] < ag_ch) ag_k++; | |
983 if (ag_key_ch[ag_k] == ag_ch) ag_get_key_word(ag_k); | |
984 } | |
985 } | |
986 do { | |
987 unsigned ag_tx = (ag_t1 + ag_t2)/2; | |
988 if (ag_tstt[ag_tx] > (unsigned char)(PCB).token_number) | |
989 ag_t1 = ag_tx + 1; | |
990 else ag_t2 = ag_tx; | |
991 } while (ag_t1 < ag_t2); | |
992 if (ag_tstt[ag_t1] != (unsigned char)(PCB).token_number) | |
993 ag_t1 = ag_sbe[(PCB).sn]; | |
994 } | |
995 (PCB).ag_ap = ag_pstt[ag_t1]; | |
996 (ag_gt_procs_scan[ag_astt[ag_t1]])(); | |
997 } | |
998 } | |
999 | |
1000 |