Mercurial > ~dholland > hg > ag > index.cgi
comparison tests/agcl/parsifal/good/detag.c @ 0:13d2b8934445
Import AnaGram (near-)release tree into Mercurial.
author | David A. Holland |
---|---|
date | Sat, 22 Dec 2007 17:52:45 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:13d2b8934445 |
---|---|
1 /* | |
2 detag.syn | |
3 | |
4 Program to strip HTML tags from HTML files. | |
5 Copyright (c) 1996 - 1999 Parsifal Software, All | |
6 Rights Reserved. | |
7 See the file COPYING for license and usage terms. | |
8 | |
9 For information about AnaGram, visit http://www.parsifalsoft.com. | |
10 */ | |
11 | |
12 #include <stdio.h> | |
13 | |
14 | |
15 /* | |
16 * AnaGram, A System for Syntax Directed Programming | |
17 * File generated by: ... | |
18 * | |
19 * AnaGram Parsing Engine | |
20 * Copyright 1993-2002 Parsifal Software. All Rights Reserved. | |
21 * | |
22 * This software is provided 'as-is', without any express or implied | |
23 * warranty. In no event will the authors be held liable for any damages | |
24 * arising from the use of this software. | |
25 * | |
26 * Permission is granted to anyone to use this software for any purpose, | |
27 * including commercial applications, and to alter it and redistribute it | |
28 * freely, subject to the following restrictions: | |
29 * | |
30 * 1. The origin of this software must not be misrepresented; you must not | |
31 * claim that you wrote the original software. If you use this software | |
32 * in a product, an acknowledgment in the product documentation would be | |
33 * appreciated but is not required. | |
34 * 2. Altered source versions must be plainly marked as such, and must not be | |
35 * misrepresented as being the original software. | |
36 * 3. This notice may not be removed or altered from any source distribution. | |
37 */ | |
38 | |
39 #ifndef DETAG_H | |
40 #include "detag.h" | |
41 #endif | |
42 | |
43 #ifndef DETAG_H | |
44 #error Mismatched header file | |
45 #endif | |
46 | |
47 #include <ctype.h> | |
48 #include <stdio.h> | |
49 | |
50 #define RULE_CONTEXT (&((PCB).cs[(PCB).ssx])) | |
51 #define ERROR_CONTEXT ((PCB).cs[(PCB).error_frame_ssx]) | |
52 #define CONTEXT ((PCB).cs[(PCB).ssx]) | |
53 | |
54 | |
55 | |
56 detag_pcb_type detag_pcb; | |
57 #define PCB detag_pcb | |
58 | |
59 /* Line -, detag.syn */ | |
60 // ----- Embedded C --------------------------- | |
61 | |
62 FILE *output; | |
63 | |
64 int main(int argc, char *argv[]) { | |
65 | |
66 FILE *input; | |
67 size_t fileLength; | |
68 size_t stringLength; | |
69 int errorFlag = 0; | |
70 char *inString; | |
71 | |
72 | |
73 /* Check for enough arguments */ | |
74 if (argc != 3) { | |
75 printf("Program to strip HTML tags from a file\n" | |
76 "Usage: %s <input filename> <output filename>\n", argv[0]); | |
77 return 1; | |
78 } | |
79 | |
80 /* Open input file for reading only */ | |
81 input = fopen(argv[1],"r"); | |
82 if (input == NULL) { | |
83 printf("Cannot open %s\n", argv[1]); | |
84 return 2; | |
85 } | |
86 | |
87 /* find out how big the file is */ | |
88 if (fseek(input, SEEK_SET, SEEK_END)) { | |
89 printf("Strange problems with %s\n", argv[1]); | |
90 return 3; | |
91 } | |
92 fileLength = ftell(input); | |
93 if (fileLength < 0 ) { // -1L is error return | |
94 printf("Error getting file length (%d) of %s\n", fileLength, argv[1]); | |
95 return 4; | |
96 } | |
97 | |
98 /* fseek to beginning of file */ | |
99 if (fseek(input, 0, SEEK_SET)) { | |
100 printf("Strange problems with %s\n", argv[1]); | |
101 return 5; | |
102 } | |
103 | |
104 /* Allocate storage for input string */ | |
105 inString = (char*)malloc(fileLength + 1); | |
106 if (inString == NULL) { | |
107 printf("Insufficient memory\n"); | |
108 return 6; | |
109 } | |
110 | |
111 /* Read file */ | |
112 stringLength = fread(inString, 1, fileLength, input); | |
113 if (stringLength == 0) { | |
114 printf("Unable to read %s\n", argv[1]); | |
115 return 7; | |
116 } | |
117 inString[stringLength] = 0; | |
118 | |
119 | |
120 /* Open output file for writing only */ | |
121 output = fopen(argv[2],"w"); | |
122 if (output == NULL) { | |
123 printf("Cannot open %s\n", argv[2]); | |
124 free(inString); | |
125 fclose(input); | |
126 return 8; | |
127 } | |
128 | |
129 | |
130 /* Invoke parser */ | |
131 PCB.pointer = (unsigned char *)inString; // using pointer input | |
132 detag(); | |
133 if (PCB.exit_flag != 1) { | |
134 printf( "Unsuccessful termination of parse, PCB.exit_flag = %d\n", | |
135 PCB.exit_flag); | |
136 } | |
137 | |
138 | |
139 /* Done */ | |
140 free(inString); | |
141 fclose(input); | |
142 fclose(output); | |
143 printf( " End detag "); | |
144 return 0; | |
145 } | |
146 | |
147 | |
148 #ifndef CONVERT_CASE | |
149 #define CONVERT_CASE(c) (c) | |
150 #endif | |
151 #ifndef TAB_SPACING | |
152 #define TAB_SPACING 8 | |
153 #endif | |
154 | |
155 #define ag_rp_1() (putc('\n', output)) | |
156 | |
157 #define ag_rp_5(c) (putc(c, output)) | |
158 | |
159 #define ag_rp_6() (putc('<', output)) | |
160 | |
161 #define ag_rp_7() (putc('>', output)) | |
162 | |
163 #define ag_rp_8() (putc('&', output)) | |
164 | |
165 | |
166 #define READ_COUNTS | |
167 #define WRITE_COUNTS | |
168 #undef V | |
169 #define V(i,t) (*t (&(PCB).vs[(PCB).ssx + i])) | |
170 #undef VS | |
171 #define VS(i) (PCB).vs[(PCB).ssx + i] | |
172 | |
173 #ifndef GET_CONTEXT | |
174 #define GET_CONTEXT CONTEXT = (PCB).input_context | |
175 #endif | |
176 | |
177 typedef enum { | |
178 ag_action_1, | |
179 ag_action_2, | |
180 ag_action_3, | |
181 ag_action_4, | |
182 ag_action_5, | |
183 ag_action_6, | |
184 ag_action_7, | |
185 ag_action_8, | |
186 ag_action_9, | |
187 ag_action_10, | |
188 ag_action_11, | |
189 ag_action_12 | |
190 } ag_parser_action; | |
191 | |
192 | |
193 #ifndef NULL_VALUE_INITIALIZER | |
194 #define NULL_VALUE_INITIALIZER = 0 | |
195 #endif | |
196 | |
197 static int const ag_null_value NULL_VALUE_INITIALIZER; | |
198 | |
199 static const unsigned char ag_rpx[] = { | |
200 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, | |
201 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
202 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
203 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 3, 4, | |
204 5 | |
205 }; | |
206 | |
207 static const unsigned char ag_key_itt[] = { | |
208 0 | |
209 }; | |
210 | |
211 static const unsigned short ag_key_pt[] = { | |
212 0 | |
213 }; | |
214 | |
215 static const unsigned char ag_key_ch[] = { | |
216 0, 97,103,108,255, 38,255, 69, 82, 84,255, 82,255, 65, 68, 82,255,101, | |
217 114,116,255,114,255, 97,100,114,255, 33, 66, 68, 72, 76, 79, 80, 84, 85, | |
218 98,100,104,108,111,112,116,117,255 | |
219 }; | |
220 | |
221 static const unsigned char ag_key_act[] = { | |
222 0,3,3,3,4,2,4,3,0,3,4,3,4,3,0,0,4,3,0,3,4,3,4,3,0,0,4,3,3,3,1,3,3,1,2, | |
223 3,3,3,1,3,3,1,2,3,4 | |
224 }; | |
225 | |
226 static const unsigned char ag_key_parm[] = { | |
227 0, 69, 68, 67, 0, 0, 0, 27, 13, 29, 0, 53, 0, 44, 50, 47, 0, 28, | |
228 14, 30, 0, 54, 0, 45, 51, 48, 0, 56, 20, 38, 23, 41, 35, 17, 0, 32, | |
229 21, 39, 24, 42, 36, 18, 0, 33, 0 | |
230 }; | |
231 | |
232 static const unsigned char ag_key_jmp[] = { | |
233 0, 0, 4, 7, 0, 1, 0, 17, 0, 20, 0, 27, 0, 29, 0, 0, 0, 39, | |
234 0, 42, 0, 49, 0, 51, 0, 0, 0, 10, 13, 15, 7, 23, 25, 11, 13, 33, | |
235 35, 37, 17, 45, 47, 21, 23, 55, 0 | |
236 }; | |
237 | |
238 static const unsigned char ag_key_index[] = { | |
239 5, 27, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
240 0, 0, 0, 0, 0, 5, 5, 0, 0 | |
241 }; | |
242 | |
243 static const unsigned char ag_key_ends[] = { | |
244 109,112,59,0, 116,59,0, 116,59,0, 45,45,0, 82,0, 76,0, | |
245 65,68,0, 77,76,0, 73,0, 76,0, 69,0, 66,76,69,0, 76,0, 114,0, | |
246 108,0, 97,100,0, 109,108,0, 105,0, 108,0, 101,0, 98,108,101,0, | |
247 108,0, | |
248 }; | |
249 | |
250 #define AG_TCV(x) ag_tcv[(x)] | |
251 | |
252 static const unsigned char ag_tcv[] = { | |
253 3, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, | |
254 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, | |
255 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 26, 26, 26, 26, 26, | |
256 26, 70, 70, 70, 70, 70, 10, 70, 12, 70, 70, 70, 70, 70, 70, 70, 70, 70, | |
257 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, | |
258 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, | |
259 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, | |
260 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, | |
261 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, | |
262 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, | |
263 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, | |
264 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, | |
265 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, | |
266 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, | |
267 70, 70, 70, 70 | |
268 }; | |
269 | |
270 #ifndef SYNTAX_ERROR | |
271 #define SYNTAX_ERROR fprintf(stderr,"%s, line %d, column %d\n", \ | |
272 (PCB).error_message, (PCB).line, (PCB).column) | |
273 #endif | |
274 | |
275 #ifndef FIRST_LINE | |
276 #define FIRST_LINE 1 | |
277 #endif | |
278 | |
279 #ifndef FIRST_COLUMN | |
280 #define FIRST_COLUMN 1 | |
281 #endif | |
282 | |
283 #ifndef PARSER_STACK_OVERFLOW | |
284 #define PARSER_STACK_OVERFLOW {fprintf(stderr, \ | |
285 "\nParser stack overflow, line %d, column %d\n",\ | |
286 (PCB).line, (PCB).column);} | |
287 #endif | |
288 | |
289 #ifndef REDUCTION_TOKEN_ERROR | |
290 #define REDUCTION_TOKEN_ERROR {fprintf(stderr, \ | |
291 "\nReduction token error, line %d, column %d\n", \ | |
292 (PCB).line, (PCB).column);} | |
293 #endif | |
294 | |
295 | |
296 #ifndef INPUT_CODE | |
297 #define INPUT_CODE(T) (T) | |
298 #endif | |
299 | |
300 typedef enum | |
301 {ag_accept_key, ag_set_key, ag_jmp_key, ag_end_key, ag_no_match_key, | |
302 ag_cf_accept_key, ag_cf_set_key, ag_cf_end_key} key_words; | |
303 | |
304 static void ag_get_key_word(int ag_k) { | |
305 int ag_save = (int) ((PCB).la_ptr - (PCB).pointer); | |
306 const unsigned char *ag_p; | |
307 int ag_ch; | |
308 while (1) { | |
309 switch (ag_key_act[ag_k]) { | |
310 case ag_cf_end_key: { | |
311 const unsigned char *sp = ag_key_ends + ag_key_jmp[ag_k]; | |
312 do { | |
313 if ((ag_ch = *sp++) == 0) { | |
314 int ag_k1 = ag_key_parm[ag_k]; | |
315 int ag_k2 = ag_key_pt[ag_k1]; | |
316 if (ag_key_itt[ag_k2 + CONVERT_CASE(*(PCB).la_ptr)]) goto ag_fail; | |
317 (PCB).token_number = (detag_token_type) ag_key_pt[ag_k1 + 1]; | |
318 return; | |
319 } | |
320 } while (CONVERT_CASE(*(PCB).la_ptr++) == ag_ch); | |
321 goto ag_fail; | |
322 } | |
323 case ag_end_key: { | |
324 const unsigned char *sp = ag_key_ends + ag_key_jmp[ag_k]; | |
325 do { | |
326 if ((ag_ch = *sp++) == 0) { | |
327 (PCB).token_number = (detag_token_type) ag_key_parm[ag_k]; | |
328 return; | |
329 } | |
330 } while (CONVERT_CASE(*(PCB).la_ptr++) == ag_ch); | |
331 } | |
332 case ag_no_match_key: | |
333 ag_fail: | |
334 (PCB).la_ptr = (PCB).pointer + ag_save; | |
335 return; | |
336 case ag_cf_set_key: { | |
337 int ag_k1 = ag_key_parm[ag_k]; | |
338 int ag_k2 = ag_key_pt[ag_k1]; | |
339 ag_k = ag_key_jmp[ag_k]; | |
340 if (ag_key_itt[ag_k2 + CONVERT_CASE(*(PCB).la_ptr)]) break; | |
341 ag_save = (int) ((PCB).la_ptr - (PCB).pointer); | |
342 (PCB).token_number = (detag_token_type) ag_key_pt[ag_k1+1]; | |
343 break; | |
344 } | |
345 case ag_set_key: | |
346 ag_save = (int) ((PCB).la_ptr - (PCB).pointer); | |
347 (PCB).token_number = (detag_token_type) ag_key_parm[ag_k]; | |
348 case ag_jmp_key: | |
349 ag_k = ag_key_jmp[ag_k]; | |
350 break; | |
351 case ag_accept_key: | |
352 (PCB).token_number = (detag_token_type) ag_key_parm[ag_k]; | |
353 return; | |
354 case ag_cf_accept_key: { | |
355 int ag_k1 = ag_key_parm[ag_k]; | |
356 int ag_k2 = ag_key_pt[ag_k1]; | |
357 if (ag_key_itt[ag_k2 + CONVERT_CASE(*(PCB).la_ptr)]) | |
358 (PCB).la_ptr = (PCB).pointer + ag_save; | |
359 else (PCB).token_number = (detag_token_type) ag_key_pt[ag_k1+1]; | |
360 return; | |
361 } | |
362 } | |
363 ag_ch = CONVERT_CASE(*(PCB).la_ptr++); | |
364 ag_p = &ag_key_ch[ag_k]; | |
365 if (ag_ch <= 255) while (*ag_p < ag_ch) ag_p++; | |
366 if (ag_ch > 255 || *ag_p != ag_ch) { | |
367 (PCB).la_ptr = (PCB).pointer + ag_save; | |
368 return; | |
369 } | |
370 ag_k = (int) (ag_p - ag_key_ch); | |
371 } | |
372 } | |
373 | |
374 | |
375 #ifndef AG_NEWLINE | |
376 #define AG_NEWLINE 10 | |
377 #endif | |
378 | |
379 #ifndef AG_RETURN | |
380 #define AG_RETURN 13 | |
381 #endif | |
382 | |
383 #ifndef AG_FORMFEED | |
384 #define AG_FORMFEED 12 | |
385 #endif | |
386 | |
387 #ifndef AG_TABCHAR | |
388 #define AG_TABCHAR 9 | |
389 #endif | |
390 | |
391 static void ag_track(void) { | |
392 int ag_k = (int) ((PCB).la_ptr - (PCB).pointer); | |
393 while (ag_k--) { | |
394 switch (*(PCB).pointer++) { | |
395 case AG_NEWLINE: | |
396 (PCB).column = 1, (PCB).line++; | |
397 case AG_RETURN: | |
398 case AG_FORMFEED: | |
399 break; | |
400 case AG_TABCHAR: | |
401 (PCB).column += (TAB_SPACING) - ((PCB).column - 1) % (TAB_SPACING); | |
402 break; | |
403 default: | |
404 (PCB).column++; | |
405 } | |
406 } | |
407 } | |
408 | |
409 | |
410 static void ag_prot(void) { | |
411 int ag_k; | |
412 ag_k = 128 - ++(PCB).btsx; | |
413 if (ag_k <= (PCB).ssx) { | |
414 (PCB).exit_flag = AG_STACK_ERROR_CODE; | |
415 PARSER_STACK_OVERFLOW; | |
416 return; | |
417 } | |
418 (PCB).bts[(PCB).btsx] = (PCB).sn; | |
419 (PCB).bts[ag_k] = (PCB).ssx; | |
420 (PCB).vs[ag_k] = (PCB).vs[(PCB).ssx]; | |
421 (PCB).ss[ag_k] = (PCB).ss[(PCB).ssx]; | |
422 } | |
423 | |
424 static void ag_undo(void) { | |
425 if ((PCB).drt == -1) return; | |
426 while ((PCB).btsx) { | |
427 int ag_k = 128 - (PCB).btsx; | |
428 (PCB).sn = (PCB).bts[(PCB).btsx--]; | |
429 (PCB).ssx = (PCB).bts[ag_k]; | |
430 (PCB).vs[(PCB).ssx] = (PCB).vs[ag_k]; | |
431 (PCB).ss[(PCB).ssx] = (PCB).ss[ag_k]; | |
432 } | |
433 (PCB).token_number = (detag_token_type) (PCB).drt; | |
434 (PCB).ssx = (PCB).dssx; | |
435 (PCB).sn = (PCB).dsn; | |
436 (PCB).drt = -1; | |
437 } | |
438 | |
439 | |
440 static const unsigned char ag_tstt[] = { | |
441 70,69,68,67,26,12,10,3,0,1,2,4,5,6,7,8,9,64,66, | |
442 70,56,54,53,51,50,48,47,45,44,42,41,39,38,36,35,33,32,30,29,28,27,26,24,23, | |
443 21,20,18,17,14,13,12,0,11,15,16,19,22,25,31,34,37,40,43,46,49,52,55,59, | |
444 60, | |
445 10,0,5, | |
446 70,69,68,67,26,12,0,64,66, | |
447 70,69,68,67,26,12,0,4,64,66, | |
448 10,0,5,6, | |
449 3,0, | |
450 70,26,0, | |
451 70,26,12,10,0,57,62,63, | |
452 70,26,12,0,16,59,60, | |
453 70,26,12,0,16,59,60, | |
454 70,26,12,0,16,59,60, | |
455 70,26,12,0,16,59,60, | |
456 70,26,12,0,16,59,60, | |
457 70,26,12,0,16,59,60, | |
458 70,26,12,0,16,59,60, | |
459 70,26,12,0,16,59,60, | |
460 70,26,12,0,16,59,60, | |
461 26,0, | |
462 70,26,12,0,16,59,60, | |
463 70,26,12,0,16,59,60, | |
464 70,26,12,0,16,59,60, | |
465 12,0, | |
466 70,69,68,67,26,12,0,64,66, | |
467 10,0,5, | |
468 70,26,10,0, | |
469 70,26,12,0,16,59,60, | |
470 | |
471 }; | |
472 | |
473 | |
474 static unsigned const char ag_astt[219] = { | |
475 2,2,2,2,2,2,1,5,7,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
476 1,1,1,1,1,1,1,1,1,1,1,1,1,5,7,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,5,3,2,2, | |
477 2,2,2,2,5,3,3,2,2,2,2,2,2,5,1,1,1,1,5,1,1,3,7,9,9,5,1,1,5,1,7,3,1,3,1,1,5, | |
478 7,3,1,3,1,1,5,7,3,1,3,1,1,5,7,3,1,3,1,1,5,7,2,1,2,1,1,5,7,3,1,3,1,1,5,7,3, | |
479 1,3,1,1,5,7,3,1,3,1,1,5,7,3,1,3,1,1,5,7,3,1,3,1,7,1,1,5,7,3,1,3,1,1,5,7,2, | |
480 1,2,1,1,5,7,2,1,2,3,7,2,2,2,2,2,2,5,3,3,1,5,3,9,9,9,5,1,1,5,7,2,1,2 | |
481 }; | |
482 | |
483 | |
484 static const unsigned char ag_pstt[] = { | |
485 68,72,71,70,68,68,1,8,0,0,6,3,2,2,5,4,6,3,3, | |
486 7,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,16,16,17,17,17,17,7,18,18,19,19, | |
487 20,20,21,21,58,1,22,21,22,20,19,18,17,16,15,14,13,12,11,10,9,7,22, | |
488 1,7,3, | |
489 68,72,71,70,68,68,4,67,67, | |
490 68,72,71,70,68,68,10,23,23,23, | |
491 1,9,24,24, | |
492 1,6, | |
493 57,57,59, | |
494 25,25,63,25,8,54,25,54, | |
495 7,7,58,9,53,7,53, | |
496 7,7,58,10,50,7,50, | |
497 7,7,58,11,47,7,47, | |
498 7,7,58,12,44,7,44, | |
499 7,7,58,13,41,7,41, | |
500 7,7,58,14,38,7,38, | |
501 7,7,58,15,35,7,35, | |
502 7,7,58,16,32,7,32, | |
503 7,7,58,17,29,7,29, | |
504 26,18, | |
505 7,7,58,19,21,7,21, | |
506 7,7,58,20,18,7,18, | |
507 7,7,58,21,15,7,15, | |
508 12,22, | |
509 68,72,71,70,68,68,5,67,67, | |
510 1,6,3, | |
511 62,62,62,64, | |
512 7,7,58,26,24,7,24, | |
513 | |
514 }; | |
515 | |
516 | |
517 static const unsigned char ag_sbt[] = { | |
518 0, 19, 69, 72, 81, 91, 95, 97, 100, 108, 115, 122, 129, 136, | |
519 143, 150, 157, 164, 171, 173, 180, 187, 194, 196, 205, 208, 212, 219 | |
520 }; | |
521 | |
522 | |
523 static const unsigned char ag_sbe[] = { | |
524 8, 51, 70, 78, 87, 92, 96, 99, 104, 111, 118, 125, 132, 139, | |
525 146, 153, 160, 167, 172, 176, 183, 190, 195, 202, 206, 211, 215, 219 | |
526 }; | |
527 | |
528 | |
529 static const unsigned char ag_fl[] = { | |
530 1,2,1,2,1,2,2,1,0,1,1,1,3,1,1,2,1,1,2,1,1,2,1,1,3,1,1,1,1,2,1,1,2,1,1, | |
531 2,1,1,2,1,1,2,1,1,2,1,1,2,1,1,2,1,1,2,2,1,1,2,0,1,1,1,2,0,1,1,1,2,1,1, | |
532 1,1,1 | |
533 }; | |
534 | |
535 static const unsigned char ag_ptt[] = { | |
536 0, 1, 6, 6, 7, 7, 8, 8, 9, 9, 9, 2, 5, 15, 15, 11, 19, 19, | |
537 11, 22, 22, 11, 25, 25, 11, 31, 31, 31, 31, 11, 34, 34, 11, 37, 37, 11, | |
538 40, 40, 11, 43, 43, 11, 46, 46, 11, 49, 49, 11, 52, 52, 11, 55, 55, 11, | |
539 11, 11, 59, 59, 60, 60, 16, 62, 62, 63, 63, 57, 4, 4, 64, 64, 66, 66, | |
540 66 | |
541 }; | |
542 | |
543 | |
544 static void ag_ra(void) | |
545 { | |
546 switch(ag_rpx[(PCB).ag_ap]) { | |
547 case 1: VS(0) = ag_rp_1(); break; | |
548 case 2: VS(0) = ag_rp_5(VS(0)); break; | |
549 case 3: VS(0) = ag_rp_6(); break; | |
550 case 4: VS(0) = ag_rp_7(); break; | |
551 case 5: VS(0) = ag_rp_8(); break; | |
552 } | |
553 (PCB).la_ptr = (PCB).pointer; | |
554 } | |
555 | |
556 #define TOKEN_NAMES detag_token_names | |
557 const char *const detag_token_names[71] = { | |
558 "input string", | |
559 "input string", | |
560 "html", | |
561 "eof", | |
562 "text", | |
563 "tag", | |
564 "", | |
565 "", | |
566 "", | |
567 "", | |
568 "'<'", | |
569 "tag innards", | |
570 "'>'", | |
571 "\"HR\"", | |
572 "\"hr\"", | |
573 "", | |
574 "other stuff", | |
575 "\"P\"", | |
576 "\"p\"", | |
577 "", | |
578 "\"BR\"", | |
579 "\"br\"", | |
580 "", | |
581 "\"H\"", | |
582 "\"h\"", | |
583 "", | |
584 "header type", | |
585 "\"HEAD\"", | |
586 "\"head\"", | |
587 "\"HTML\"", | |
588 "\"html\"", | |
589 "", | |
590 "\"UL\"", | |
591 "\"ul\"", | |
592 "", | |
593 "\"OL\"", | |
594 "\"ol\"", | |
595 "", | |
596 "\"DL\"", | |
597 "\"dl\"", | |
598 "", | |
599 "\"LI\"", | |
600 "\"li\"", | |
601 "", | |
602 "\"TABLE\"", | |
603 "\"table\"", | |
604 "", | |
605 "\"TR\"", | |
606 "\"tr\"", | |
607 "", | |
608 "\"TD\"", | |
609 "\"td\"", | |
610 "", | |
611 "\"PRE\"", | |
612 "\"pre\"", | |
613 "", | |
614 "\"!--\"", | |
615 "comment stuff", | |
616 "tag innard char", | |
617 "", | |
618 "", | |
619 "comment char", | |
620 "", | |
621 "", | |
622 "text char", | |
623 "ordinary text char", | |
624 "entity text char", | |
625 "\"<\"", | |
626 "\">\"", | |
627 "\"&\"", | |
628 "", | |
629 | |
630 }; | |
631 | |
632 #ifndef MISSING_FORMAT | |
633 #define MISSING_FORMAT "Missing %s" | |
634 #endif | |
635 #ifndef UNEXPECTED_FORMAT | |
636 #define UNEXPECTED_FORMAT "Unexpected %s" | |
637 #endif | |
638 #ifndef UNNAMED_TOKEN | |
639 #define UNNAMED_TOKEN "input" | |
640 #endif | |
641 | |
642 | |
643 static void ag_diagnose(void) { | |
644 int ag_snd = (PCB).sn; | |
645 int ag_k = ag_sbt[ag_snd]; | |
646 | |
647 if (*TOKEN_NAMES[ag_tstt[ag_k]] && ag_astt[ag_k + 1] == ag_action_8) { | |
648 sprintf((PCB).ag_msg, MISSING_FORMAT, TOKEN_NAMES[ag_tstt[ag_k]]); | |
649 } | |
650 else if (ag_astt[ag_sbe[(PCB).sn]] == ag_action_8 | |
651 && (ag_k = (int) ag_sbe[(PCB).sn] + 1) == (int) ag_sbt[(PCB).sn+1] - 1 | |
652 && *TOKEN_NAMES[ag_tstt[ag_k]]) { | |
653 sprintf((PCB).ag_msg, MISSING_FORMAT, TOKEN_NAMES[ag_tstt[ag_k]]); | |
654 } | |
655 else if ((PCB).token_number && *TOKEN_NAMES[(PCB).token_number]) { | |
656 sprintf((PCB).ag_msg, UNEXPECTED_FORMAT, TOKEN_NAMES[(PCB).token_number]); | |
657 } | |
658 else if (isprint(INPUT_CODE((*(PCB).pointer))) && INPUT_CODE((*(PCB).pointer)) != '\\') { | |
659 char buf[20]; | |
660 sprintf(buf, "\'%c\'", (char) INPUT_CODE((*(PCB).pointer))); | |
661 sprintf((PCB).ag_msg, UNEXPECTED_FORMAT, buf); | |
662 } | |
663 else sprintf((PCB).ag_msg, UNEXPECTED_FORMAT, UNNAMED_TOKEN); | |
664 (PCB).error_message = (PCB).ag_msg; | |
665 | |
666 | |
667 } | |
668 static int ag_action_1_r_proc(void); | |
669 static int ag_action_2_r_proc(void); | |
670 static int ag_action_3_r_proc(void); | |
671 static int ag_action_4_r_proc(void); | |
672 static int ag_action_1_s_proc(void); | |
673 static int ag_action_3_s_proc(void); | |
674 static int ag_action_1_proc(void); | |
675 static int ag_action_2_proc(void); | |
676 static int ag_action_3_proc(void); | |
677 static int ag_action_4_proc(void); | |
678 static int ag_action_5_proc(void); | |
679 static int ag_action_6_proc(void); | |
680 static int ag_action_7_proc(void); | |
681 static int ag_action_8_proc(void); | |
682 static int ag_action_9_proc(void); | |
683 static int ag_action_10_proc(void); | |
684 static int ag_action_11_proc(void); | |
685 static int ag_action_8_proc(void); | |
686 | |
687 | |
688 static int (*const ag_r_procs_scan[])(void) = { | |
689 ag_action_1_r_proc, | |
690 ag_action_2_r_proc, | |
691 ag_action_3_r_proc, | |
692 ag_action_4_r_proc | |
693 }; | |
694 | |
695 static int (*const ag_s_procs_scan[])(void) = { | |
696 ag_action_1_s_proc, | |
697 ag_action_2_r_proc, | |
698 ag_action_3_s_proc, | |
699 ag_action_4_r_proc | |
700 }; | |
701 | |
702 static int (*const ag_gt_procs_scan[])(void) = { | |
703 ag_action_1_proc, | |
704 ag_action_2_proc, | |
705 ag_action_3_proc, | |
706 ag_action_4_proc, | |
707 ag_action_5_proc, | |
708 ag_action_6_proc, | |
709 ag_action_7_proc, | |
710 ag_action_8_proc, | |
711 ag_action_9_proc, | |
712 ag_action_10_proc, | |
713 ag_action_11_proc, | |
714 ag_action_8_proc | |
715 }; | |
716 | |
717 | |
718 static int ag_action_10_proc(void) { | |
719 int ag_t = (PCB).token_number; | |
720 (PCB).btsx = 0, (PCB).drt = -1; | |
721 do { | |
722 ag_track(); | |
723 (PCB).token_number = (detag_token_type) AG_TCV(INPUT_CODE(*(PCB).la_ptr)); | |
724 (PCB).la_ptr++; | |
725 if (ag_key_index[(PCB).sn]) { | |
726 unsigned ag_k = ag_key_index[(PCB).sn]; | |
727 int ag_ch = CONVERT_CASE(INPUT_CODE(*(PCB).pointer)); | |
728 if (ag_ch <= 255) { | |
729 while (ag_key_ch[ag_k] < ag_ch) ag_k++; | |
730 if (ag_key_ch[ag_k] == ag_ch) ag_get_key_word(ag_k); | |
731 } | |
732 } | |
733 } while ((PCB).token_number == (detag_token_type) ag_t); | |
734 (PCB).la_ptr = (PCB).pointer; | |
735 return 1; | |
736 } | |
737 | |
738 static int ag_action_11_proc(void) { | |
739 int ag_t = (PCB).token_number; | |
740 | |
741 (PCB).btsx = 0, (PCB).drt = -1; | |
742 do { | |
743 (PCB).vs[(PCB).ssx] = *(PCB).pointer; | |
744 (PCB).ssx--; | |
745 ag_track(); | |
746 ag_ra(); | |
747 if ((PCB).exit_flag != AG_RUNNING_CODE) return 0; | |
748 (PCB).ssx++; | |
749 (PCB).token_number = (detag_token_type) AG_TCV(INPUT_CODE(*(PCB).la_ptr)); | |
750 (PCB).la_ptr++; | |
751 if (ag_key_index[(PCB).sn]) { | |
752 unsigned ag_k = ag_key_index[(PCB).sn]; | |
753 int ag_ch = CONVERT_CASE(INPUT_CODE(*(PCB).pointer)); | |
754 if (ag_ch <= 255) { | |
755 while (ag_key_ch[ag_k] < ag_ch) ag_k++; | |
756 if (ag_key_ch[ag_k] == ag_ch) ag_get_key_word(ag_k); | |
757 } | |
758 } | |
759 } | |
760 while ((PCB).token_number == (detag_token_type) ag_t); | |
761 (PCB).la_ptr = (PCB).pointer; | |
762 return 1; | |
763 } | |
764 | |
765 static int ag_action_3_r_proc(void) { | |
766 int ag_sd = ag_fl[(PCB).ag_ap] - 1; | |
767 if (ag_sd) (PCB).sn = (PCB).ss[(PCB).ssx -= ag_sd]; | |
768 (PCB).btsx = 0, (PCB).drt = -1; | |
769 (PCB).reduction_token = (detag_token_type) ag_ptt[(PCB).ag_ap]; | |
770 ag_ra(); | |
771 return (PCB).exit_flag == AG_RUNNING_CODE; | |
772 } | |
773 | |
774 static int ag_action_3_s_proc(void) { | |
775 int ag_sd = ag_fl[(PCB).ag_ap] - 1; | |
776 if (ag_sd) (PCB).sn = (PCB).ss[(PCB).ssx -= ag_sd]; | |
777 (PCB).btsx = 0, (PCB).drt = -1; | |
778 (PCB).reduction_token = (detag_token_type) ag_ptt[(PCB).ag_ap]; | |
779 ag_ra(); | |
780 return (PCB).exit_flag == AG_RUNNING_CODE; | |
781 } | |
782 | |
783 static int ag_action_4_r_proc(void) { | |
784 int ag_sd = ag_fl[(PCB).ag_ap] - 1; | |
785 if (ag_sd) (PCB).sn = (PCB).ss[(PCB).ssx -= ag_sd]; | |
786 (PCB).reduction_token = (detag_token_type) ag_ptt[(PCB).ag_ap]; | |
787 return 1; | |
788 } | |
789 | |
790 static int ag_action_2_proc(void) { | |
791 (PCB).btsx = 0, (PCB).drt = -1; | |
792 if ((PCB).ssx >= 128) { | |
793 (PCB).exit_flag = AG_STACK_ERROR_CODE; | |
794 PARSER_STACK_OVERFLOW; | |
795 } | |
796 (PCB).vs[(PCB).ssx] = *(PCB).pointer; | |
797 (PCB).ss[(PCB).ssx] = (PCB).sn; | |
798 (PCB).ssx++; | |
799 (PCB).sn = (PCB).ag_ap; | |
800 ag_track(); | |
801 return 0; | |
802 } | |
803 | |
804 static int ag_action_9_proc(void) { | |
805 if ((PCB).drt == -1) { | |
806 (PCB).drt=(PCB).token_number; | |
807 (PCB).dssx=(PCB).ssx; | |
808 (PCB).dsn=(PCB).sn; | |
809 } | |
810 ag_prot(); | |
811 (PCB).vs[(PCB).ssx] = ag_null_value; | |
812 (PCB).ss[(PCB).ssx] = (PCB).sn; | |
813 (PCB).ssx++; | |
814 (PCB).sn = (PCB).ag_ap; | |
815 (PCB).la_ptr = (PCB).pointer; | |
816 return (PCB).exit_flag == AG_RUNNING_CODE; | |
817 } | |
818 | |
819 static int ag_action_2_r_proc(void) { | |
820 (PCB).ssx++; | |
821 (PCB).sn = (PCB).ag_ap; | |
822 return 0; | |
823 } | |
824 | |
825 static int ag_action_7_proc(void) { | |
826 --(PCB).ssx; | |
827 (PCB).la_ptr = (PCB).pointer; | |
828 (PCB).exit_flag = AG_SUCCESS_CODE; | |
829 return 0; | |
830 } | |
831 | |
832 static int ag_action_1_proc(void) { | |
833 ag_track(); | |
834 (PCB).exit_flag = AG_SUCCESS_CODE; | |
835 return 0; | |
836 } | |
837 | |
838 static int ag_action_1_r_proc(void) { | |
839 (PCB).exit_flag = AG_SUCCESS_CODE; | |
840 return 0; | |
841 } | |
842 | |
843 static int ag_action_1_s_proc(void) { | |
844 (PCB).exit_flag = AG_SUCCESS_CODE; | |
845 return 0; | |
846 } | |
847 | |
848 static int ag_action_4_proc(void) { | |
849 int ag_sd = ag_fl[(PCB).ag_ap] - 1; | |
850 (PCB).reduction_token = (detag_token_type) ag_ptt[(PCB).ag_ap]; | |
851 (PCB).btsx = 0, (PCB).drt = -1; | |
852 (PCB).vs[(PCB).ssx] = *(PCB).pointer; | |
853 if (ag_sd) (PCB).sn = (PCB).ss[(PCB).ssx -= ag_sd]; | |
854 else (PCB).ss[(PCB).ssx] = (PCB).sn; | |
855 ag_track(); | |
856 while ((PCB).exit_flag == AG_RUNNING_CODE) { | |
857 unsigned ag_t1 = ag_sbe[(PCB).sn] + 1; | |
858 unsigned ag_t2 = ag_sbt[(PCB).sn+1] - 1; | |
859 do { | |
860 unsigned ag_tx = (ag_t1 + ag_t2)/2; | |
861 if (ag_tstt[ag_tx] < (unsigned char)(PCB).reduction_token) ag_t1 = ag_tx + 1; | |
862 else ag_t2 = ag_tx; | |
863 } while (ag_t1 < ag_t2); | |
864 (PCB).ag_ap = ag_pstt[ag_t1]; | |
865 if ((ag_s_procs_scan[ag_astt[ag_t1]])() == 0) break; | |
866 } | |
867 return 0; | |
868 } | |
869 | |
870 static int ag_action_3_proc(void) { | |
871 int ag_sd = ag_fl[(PCB).ag_ap] - 1; | |
872 (PCB).btsx = 0, (PCB).drt = -1; | |
873 (PCB).vs[(PCB).ssx] = *(PCB).pointer; | |
874 if (ag_sd) (PCB).sn = (PCB).ss[(PCB).ssx -= ag_sd]; | |
875 else (PCB).ss[(PCB).ssx] = (PCB).sn; | |
876 ag_track(); | |
877 (PCB).reduction_token = (detag_token_type) ag_ptt[(PCB).ag_ap]; | |
878 ag_ra(); | |
879 while ((PCB).exit_flag == AG_RUNNING_CODE) { | |
880 unsigned ag_t1 = ag_sbe[(PCB).sn] + 1; | |
881 unsigned ag_t2 = ag_sbt[(PCB).sn+1] - 1; | |
882 do { | |
883 unsigned ag_tx = (ag_t1 + ag_t2)/2; | |
884 if (ag_tstt[ag_tx] < (unsigned char)(PCB).reduction_token) ag_t1 = ag_tx + 1; | |
885 else ag_t2 = ag_tx; | |
886 } while (ag_t1 < ag_t2); | |
887 (PCB).ag_ap = ag_pstt[ag_t1]; | |
888 if ((ag_s_procs_scan[ag_astt[ag_t1]])() == 0) break; | |
889 } | |
890 return 0; | |
891 } | |
892 | |
893 static int ag_action_8_proc(void) { | |
894 ag_undo(); | |
895 (PCB).la_ptr = (PCB).pointer; | |
896 (PCB).exit_flag = AG_SYNTAX_ERROR_CODE; | |
897 ag_diagnose(); | |
898 SYNTAX_ERROR; | |
899 {(PCB).la_ptr = (PCB).pointer + 1; ag_track();} | |
900 return (PCB).exit_flag == AG_RUNNING_CODE; | |
901 } | |
902 | |
903 static int ag_action_5_proc(void) { | |
904 int ag_sd = ag_fl[(PCB).ag_ap]; | |
905 (PCB).btsx = 0, (PCB).drt = -1; | |
906 if (ag_sd) (PCB).sn = (PCB).ss[(PCB).ssx -= ag_sd]; | |
907 else { | |
908 (PCB).ss[(PCB).ssx] = (PCB).sn; | |
909 } | |
910 (PCB).la_ptr = (PCB).pointer; | |
911 (PCB).reduction_token = (detag_token_type) ag_ptt[(PCB).ag_ap]; | |
912 ag_ra(); | |
913 while ((PCB).exit_flag == AG_RUNNING_CODE) { | |
914 unsigned ag_t1 = ag_sbe[(PCB).sn] + 1; | |
915 unsigned ag_t2 = ag_sbt[(PCB).sn+1] - 1; | |
916 do { | |
917 unsigned ag_tx = (ag_t1 + ag_t2)/2; | |
918 if (ag_tstt[ag_tx] < (unsigned char)(PCB).reduction_token) ag_t1 = ag_tx + 1; | |
919 else ag_t2 = ag_tx; | |
920 } while (ag_t1 < ag_t2); | |
921 (PCB).ag_ap = ag_pstt[ag_t1]; | |
922 if ((ag_r_procs_scan[ag_astt[ag_t1]])() == 0) break; | |
923 } | |
924 return (PCB).exit_flag == AG_RUNNING_CODE; | |
925 } | |
926 | |
927 static int ag_action_6_proc(void) { | |
928 int ag_sd = ag_fl[(PCB).ag_ap]; | |
929 (PCB).reduction_token = (detag_token_type) ag_ptt[(PCB).ag_ap]; | |
930 if ((PCB).drt == -1) { | |
931 (PCB).drt=(PCB).token_number; | |
932 (PCB).dssx=(PCB).ssx; | |
933 (PCB).dsn=(PCB).sn; | |
934 } | |
935 if (ag_sd) { | |
936 (PCB).sn = (PCB).ss[(PCB).ssx -= ag_sd]; | |
937 } | |
938 else { | |
939 ag_prot(); | |
940 (PCB).vs[(PCB).ssx] = ag_null_value; | |
941 (PCB).ss[(PCB).ssx] = (PCB).sn; | |
942 } | |
943 (PCB).la_ptr = (PCB).pointer; | |
944 while ((PCB).exit_flag == AG_RUNNING_CODE) { | |
945 unsigned ag_t1 = ag_sbe[(PCB).sn] + 1; | |
946 unsigned ag_t2 = ag_sbt[(PCB).sn+1] - 1; | |
947 do { | |
948 unsigned ag_tx = (ag_t1 + ag_t2)/2; | |
949 if (ag_tstt[ag_tx] < (unsigned char)(PCB).reduction_token) ag_t1 = ag_tx + 1; | |
950 else ag_t2 = ag_tx; | |
951 } while (ag_t1 < ag_t2); | |
952 (PCB).ag_ap = ag_pstt[ag_t1]; | |
953 if ((ag_r_procs_scan[ag_astt[ag_t1]])() == 0) break; | |
954 } | |
955 return (PCB).exit_flag == AG_RUNNING_CODE; | |
956 } | |
957 | |
958 | |
959 void init_detag(void) { | |
960 (PCB).la_ptr = (PCB).pointer; | |
961 (PCB).ss[0] = (PCB).sn = (PCB).ssx = 0; | |
962 (PCB).exit_flag = AG_RUNNING_CODE; | |
963 (PCB).line = FIRST_LINE; | |
964 (PCB).column = FIRST_COLUMN; | |
965 (PCB).btsx = 0, (PCB).drt = -1; | |
966 } | |
967 | |
968 void detag(void) { | |
969 init_detag(); | |
970 (PCB).exit_flag = AG_RUNNING_CODE; | |
971 while ((PCB).exit_flag == AG_RUNNING_CODE) { | |
972 unsigned ag_t1 = ag_sbt[(PCB).sn]; | |
973 if (ag_tstt[ag_t1]) { | |
974 unsigned ag_t2 = ag_sbe[(PCB).sn] - 1; | |
975 (PCB).token_number = (detag_token_type) AG_TCV(INPUT_CODE(*(PCB).la_ptr)); | |
976 (PCB).la_ptr++; | |
977 if (ag_key_index[(PCB).sn]) { | |
978 unsigned ag_k = ag_key_index[(PCB).sn]; | |
979 int ag_ch = CONVERT_CASE(INPUT_CODE(*(PCB).pointer)); | |
980 if (ag_ch <= 255) { | |
981 while (ag_key_ch[ag_k] < ag_ch) ag_k++; | |
982 if (ag_key_ch[ag_k] == ag_ch) ag_get_key_word(ag_k); | |
983 } | |
984 } | |
985 do { | |
986 unsigned ag_tx = (ag_t1 + ag_t2)/2; | |
987 if (ag_tstt[ag_tx] > (unsigned char)(PCB).token_number) | |
988 ag_t1 = ag_tx + 1; | |
989 else ag_t2 = ag_tx; | |
990 } while (ag_t1 < ag_t2); | |
991 if (ag_tstt[ag_t1] != (unsigned char)(PCB).token_number) | |
992 ag_t1 = ag_sbe[(PCB).sn]; | |
993 } | |
994 (PCB).ag_ap = ag_pstt[ag_t1]; | |
995 (ag_gt_procs_scan[ag_astt[ag_t1]])(); | |
996 } | |
997 } | |
998 | |
999 |