comparison tests/agcl/parsifal/good/detag2.c @ 0:13d2b8934445

Import AnaGram (near-)release tree into Mercurial.
author David A. Holland
date Sat, 22 Dec 2007 17:52:45 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:13d2b8934445
1 /*
2 detag.syn
3
4 Program to strip HTML tags from HTML files.
5 Copyright (c) 1996 - 1999 Parsifal Software, All
6 Rights Reserved.
7 See the file COPYING for license and usage terms.
8
9 For information about AnaGram, visit http://www.parsifalsoft.com.
10 */
11
12 #include <stdio.h>
13
14
15 /*
16 * AnaGram, A System for Syntax Directed Programming
17 * File generated by: ...
18 *
19 * AnaGram Parsing Engine
20 * Copyright 1993-2002 Parsifal Software. All Rights Reserved.
21 *
22 * This software is provided 'as-is', without any express or implied
23 * warranty. In no event will the authors be held liable for any damages
24 * arising from the use of this software.
25 *
26 * Permission is granted to anyone to use this software for any purpose,
27 * including commercial applications, and to alter it and redistribute it
28 * freely, subject to the following restrictions:
29 *
30 * 1. The origin of this software must not be misrepresented; you must not
31 * claim that you wrote the original software. If you use this software
32 * in a product, an acknowledgment in the product documentation would be
33 * appreciated but is not required.
34 * 2. Altered source versions must be plainly marked as such, and must not be
35 * misrepresented as being the original software.
36 * 3. This notice may not be removed or altered from any source distribution.
37 */
38
39 #ifndef DETAG2_H
40 #include "detag2.h"
41 #endif
42
43 #ifndef DETAG2_H
44 #error Mismatched header file
45 #endif
46
47 #include <ctype.h>
48 #include <stdio.h>
49
50 #define RULE_CONTEXT (&((PCB).cs[(PCB).ssx]))
51 #define ERROR_CONTEXT ((PCB).cs[(PCB).error_frame_ssx])
52 #define CONTEXT ((PCB).cs[(PCB).ssx])
53
54
55
56 detag2_pcb_type detag2_pcb;
57 #define PCB detag2_pcb
58
59 /* Line -, detag2.syn */
60 // ----- Embedded C ---------------------------
61
62 FILE *output;
63
64 int main(int argc, char *argv[]) {
65
66 FILE *input;
67 size_t fileLength;
68 size_t stringLength;
69 int errorFlag = 0;
70 char *inString;
71
72
73 /* Check for enough arguments */
74 if (argc != 3) {
75 printf("Program to strip HTML tags from a file\n"
76 "Usage: %s <input filename> <output filename>\n", argv[0]);
77 return 1;
78 }
79
80 /* Open input file for reading only */
81 input = fopen(argv[1],"r");
82 if (input == NULL) {
83 printf("Cannot open %s\n", argv[1]);
84 return 2;
85 }
86
87 /* find out how big the file is */
88 if (fseek(input, SEEK_SET, SEEK_END)) {
89 printf("Strange problems with %s\n", argv[1]);
90 return 3;
91 }
92 fileLength = ftell(input);
93 if (fileLength < 0 ) { // -1L is error return
94 printf("Error getting file length (%d) of %s\n", fileLength, argv[1]);
95 return 4;
96 }
97
98 /* fseek to beginning of file */
99 if (fseek(input, 0, SEEK_SET)) {
100 printf("Strange problems with %s\n", argv[1]);
101 return 5;
102 }
103
104 /* Allocate storage for input string */
105 inString = (char*)malloc(fileLength + 1);
106 if (inString == NULL) {
107 printf("Insufficient memory\n");
108 return 6;
109 }
110
111 /* Read file */
112 stringLength = fread(inString, 1, fileLength, input);
113 if (stringLength == 0) {
114 printf("Unable to read %s\n", argv[1]);
115 return 7;
116 }
117 inString[stringLength] = 0;
118
119
120 /* Open output file for writing only */
121 output = fopen(argv[2],"w");
122 if (output == NULL) {
123 printf("Cannot open %s\n", argv[2]);
124 free(inString);
125 fclose(input);
126 return 8;
127 }
128
129
130 /* Invoke parser */
131 PCB.pointer = (unsigned char *)inString; // using pointer input
132 detag();
133 if (PCB.exit_flag != 1) {
134 printf( "Unsuccessful termination of parse, PCB.exit_flag = %d\n",
135 PCB.exit_flag);
136 }
137
138
139 /* Done */
140 free(inString);
141 fclose(input);
142 fclose(output);
143 printf( " End detag ");
144 return 0;
145 }
146
147
148 #ifndef CONVERT_CASE
149 #define CONVERT_CASE(c) (c)
150 #endif
151 #ifndef TAB_SPACING
152 #define TAB_SPACING 8
153 #endif
154
155 #define ag_rp_1() (putc('\n', output))
156
157 #define ag_rp_5(c) (putc(c, output))
158
159 #define ag_rp_6() (putc('<', output))
160
161 #define ag_rp_7() (putc('>', output))
162
163 #define ag_rp_8() (putc('&', output))
164
165
166 #define READ_COUNTS
167 #define WRITE_COUNTS
168 #undef V
169 #define V(i,t) (*t (&(PCB).vs[(PCB).ssx + i]))
170 #undef VS
171 #define VS(i) (PCB).vs[(PCB).ssx + i]
172
173 #ifndef GET_CONTEXT
174 #define GET_CONTEXT CONTEXT = (PCB).input_context
175 #endif
176
177 typedef enum {
178 ag_action_1,
179 ag_action_2,
180 ag_action_3,
181 ag_action_4,
182 ag_action_5,
183 ag_action_6,
184 ag_action_7,
185 ag_action_8,
186 ag_action_9,
187 ag_action_10,
188 ag_action_11,
189 ag_action_12
190 } ag_parser_action;
191
192
193 #ifndef NULL_VALUE_INITIALIZER
194 #define NULL_VALUE_INITIALIZER = 0
195 #endif
196
197 static int const ag_null_value NULL_VALUE_INITIALIZER;
198
199 static const unsigned char ag_rpx[] = {
200 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
201 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
202 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
203 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 3, 4,
204 5
205 };
206
207 static const unsigned char ag_key_itt[] = {
208 0
209 };
210
211 static const unsigned short ag_key_pt[] = {
212 0
213 };
214
215 static const unsigned char ag_key_ch[] = {
216 0, 97,103,108,255, 38,255, 69, 82, 84,255, 82,255, 65, 68, 82,255,101,
217 114,116,255,114,255, 97,100,114,255, 33, 66, 68, 72, 76, 79, 80, 84, 85,
218 98,100,104,108,111,112,116,117,255
219 };
220
221 static const unsigned char ag_key_act[] = {
222 0,3,3,3,4,2,4,3,0,3,4,3,4,3,0,0,4,3,0,3,4,3,4,3,0,0,4,3,3,3,1,3,3,1,2,
223 3,3,3,1,3,3,1,2,3,4
224 };
225
226 static const unsigned char ag_key_parm[] = {
227 0, 69, 68, 67, 0, 0, 0, 27, 13, 29, 0, 53, 0, 44, 50, 47, 0, 28,
228 14, 30, 0, 54, 0, 45, 51, 48, 0, 56, 20, 38, 23, 41, 35, 17, 0, 32,
229 21, 39, 24, 42, 36, 18, 0, 33, 0
230 };
231
232 static const unsigned char ag_key_jmp[] = {
233 0, 0, 4, 7, 0, 1, 0, 17, 0, 20, 0, 27, 0, 29, 0, 0, 0, 39,
234 0, 42, 0, 49, 0, 51, 0, 0, 0, 10, 13, 15, 7, 23, 25, 11, 13, 33,
235 35, 37, 17, 45, 47, 21, 23, 55, 0
236 };
237
238 static const unsigned char ag_key_index[] = {
239 5, 27, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
240 0, 0, 0, 0, 0, 5, 5, 0, 0
241 };
242
243 static const unsigned char ag_key_ends[] = {
244 109,112,59,0, 116,59,0, 116,59,0, 45,45,0, 82,0, 76,0,
245 65,68,0, 77,76,0, 73,0, 76,0, 69,0, 66,76,69,0, 76,0, 114,0,
246 108,0, 97,100,0, 109,108,0, 105,0, 108,0, 101,0, 98,108,101,0,
247 108,0,
248 };
249
250 #define AG_TCV(x) ag_tcv[(x)]
251
252 static const unsigned char ag_tcv[] = {
253 3, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70,
254 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70,
255 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 26, 26, 26, 26, 26,
256 26, 70, 70, 70, 70, 70, 10, 70, 12, 70, 70, 70, 70, 70, 70, 70, 70, 70,
257 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70,
258 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70,
259 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70,
260 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70,
261 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70,
262 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70,
263 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70,
264 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70,
265 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70,
266 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70,
267 70, 70, 70, 70
268 };
269
270 #ifndef SYNTAX_ERROR
271 #define SYNTAX_ERROR fprintf(stderr,"%s, line %d, column %d\n", \
272 (PCB).error_message, (PCB).line, (PCB).column)
273 #endif
274
275 #ifndef FIRST_LINE
276 #define FIRST_LINE 1
277 #endif
278
279 #ifndef FIRST_COLUMN
280 #define FIRST_COLUMN 1
281 #endif
282
283 #ifndef PARSER_STACK_OVERFLOW
284 #define PARSER_STACK_OVERFLOW {fprintf(stderr, \
285 "\nParser stack overflow, line %d, column %d\n",\
286 (PCB).line, (PCB).column);}
287 #endif
288
289 #ifndef REDUCTION_TOKEN_ERROR
290 #define REDUCTION_TOKEN_ERROR {fprintf(stderr, \
291 "\nReduction token error, line %d, column %d\n", \
292 (PCB).line, (PCB).column);}
293 #endif
294
295
296 #ifndef INPUT_CODE
297 #define INPUT_CODE(T) (T)
298 #endif
299
300 typedef enum
301 {ag_accept_key, ag_set_key, ag_jmp_key, ag_end_key, ag_no_match_key,
302 ag_cf_accept_key, ag_cf_set_key, ag_cf_end_key} key_words;
303
304 static void ag_get_key_word(int ag_k) {
305 int ag_save = (int) ((PCB).la_ptr - (PCB).pointer);
306 const unsigned char *ag_p;
307 int ag_ch;
308 while (1) {
309 switch (ag_key_act[ag_k]) {
310 case ag_cf_end_key: {
311 const unsigned char *sp = ag_key_ends + ag_key_jmp[ag_k];
312 do {
313 if ((ag_ch = *sp++) == 0) {
314 int ag_k1 = ag_key_parm[ag_k];
315 int ag_k2 = ag_key_pt[ag_k1];
316 if (ag_key_itt[ag_k2 + CONVERT_CASE(*(PCB).la_ptr)]) goto ag_fail;
317 (PCB).token_number = (detag2_token_type) ag_key_pt[ag_k1 + 1];
318 return;
319 }
320 } while (CONVERT_CASE(*(PCB).la_ptr++) == ag_ch);
321 goto ag_fail;
322 }
323 case ag_end_key: {
324 const unsigned char *sp = ag_key_ends + ag_key_jmp[ag_k];
325 do {
326 if ((ag_ch = *sp++) == 0) {
327 (PCB).token_number = (detag2_token_type) ag_key_parm[ag_k];
328 return;
329 }
330 } while (CONVERT_CASE(*(PCB).la_ptr++) == ag_ch);
331 }
332 case ag_no_match_key:
333 ag_fail:
334 (PCB).la_ptr = (PCB).pointer + ag_save;
335 return;
336 case ag_cf_set_key: {
337 int ag_k1 = ag_key_parm[ag_k];
338 int ag_k2 = ag_key_pt[ag_k1];
339 ag_k = ag_key_jmp[ag_k];
340 if (ag_key_itt[ag_k2 + CONVERT_CASE(*(PCB).la_ptr)]) break;
341 ag_save = (int) ((PCB).la_ptr - (PCB).pointer);
342 (PCB).token_number = (detag2_token_type) ag_key_pt[ag_k1+1];
343 break;
344 }
345 case ag_set_key:
346 ag_save = (int) ((PCB).la_ptr - (PCB).pointer);
347 (PCB).token_number = (detag2_token_type) ag_key_parm[ag_k];
348 case ag_jmp_key:
349 ag_k = ag_key_jmp[ag_k];
350 break;
351 case ag_accept_key:
352 (PCB).token_number = (detag2_token_type) ag_key_parm[ag_k];
353 return;
354 case ag_cf_accept_key: {
355 int ag_k1 = ag_key_parm[ag_k];
356 int ag_k2 = ag_key_pt[ag_k1];
357 if (ag_key_itt[ag_k2 + CONVERT_CASE(*(PCB).la_ptr)])
358 (PCB).la_ptr = (PCB).pointer + ag_save;
359 else (PCB).token_number = (detag2_token_type) ag_key_pt[ag_k1+1];
360 return;
361 }
362 }
363 ag_ch = CONVERT_CASE(*(PCB).la_ptr++);
364 ag_p = &ag_key_ch[ag_k];
365 if (ag_ch <= 255) while (*ag_p < ag_ch) ag_p++;
366 if (ag_ch > 255 || *ag_p != ag_ch) {
367 (PCB).la_ptr = (PCB).pointer + ag_save;
368 return;
369 }
370 ag_k = (int) (ag_p - ag_key_ch);
371 }
372 }
373
374
375 #ifndef AG_NEWLINE
376 #define AG_NEWLINE 10
377 #endif
378
379 #ifndef AG_RETURN
380 #define AG_RETURN 13
381 #endif
382
383 #ifndef AG_FORMFEED
384 #define AG_FORMFEED 12
385 #endif
386
387 #ifndef AG_TABCHAR
388 #define AG_TABCHAR 9
389 #endif
390
391 static void ag_track(void) {
392 int ag_k = (int) ((PCB).la_ptr - (PCB).pointer);
393 while (ag_k--) {
394 switch (*(PCB).pointer++) {
395 case AG_NEWLINE:
396 (PCB).column = 1, (PCB).line++;
397 case AG_RETURN:
398 case AG_FORMFEED:
399 break;
400 case AG_TABCHAR:
401 (PCB).column += (TAB_SPACING) - ((PCB).column - 1) % (TAB_SPACING);
402 break;
403 default:
404 (PCB).column++;
405 }
406 }
407 }
408
409
410 static void ag_prot(void) {
411 int ag_k;
412 ag_k = 128 - ++(PCB).btsx;
413 if (ag_k <= (PCB).ssx) {
414 (PCB).exit_flag = AG_STACK_ERROR_CODE;
415 PARSER_STACK_OVERFLOW;
416 return;
417 }
418 (PCB).bts[(PCB).btsx] = (PCB).sn;
419 (PCB).bts[ag_k] = (PCB).ssx;
420 (PCB).vs[ag_k] = (PCB).vs[(PCB).ssx];
421 (PCB).ss[ag_k] = (PCB).ss[(PCB).ssx];
422 }
423
424 static void ag_undo(void) {
425 if ((PCB).drt == -1) return;
426 while ((PCB).btsx) {
427 int ag_k = 128 - (PCB).btsx;
428 (PCB).sn = (PCB).bts[(PCB).btsx--];
429 (PCB).ssx = (PCB).bts[ag_k];
430 (PCB).vs[(PCB).ssx] = (PCB).vs[ag_k];
431 (PCB).ss[(PCB).ssx] = (PCB).ss[ag_k];
432 }
433 (PCB).token_number = (detag2_token_type) (PCB).drt;
434 (PCB).ssx = (PCB).dssx;
435 (PCB).sn = (PCB).dsn;
436 (PCB).drt = -1;
437 }
438
439
440 static const unsigned char ag_tstt[] = {
441 70,69,68,67,26,12,10,3,0,1,2,4,5,6,7,8,9,64,66,
442 70,56,54,53,51,50,48,47,45,44,42,41,39,38,36,35,33,32,30,29,28,27,26,24,23,
443 21,20,18,17,14,13,12,0,11,15,16,19,22,25,31,34,37,40,43,46,49,52,55,59,
444 60,
445 70,69,68,67,26,12,10,3,0,5,
446 70,69,68,67,26,12,10,3,0,64,66,
447 70,69,68,67,26,12,3,0,4,64,66,
448 10,3,0,5,6,
449 3,0,
450 70,26,12,0,
451 70,26,12,10,0,57,62,63,
452 70,26,12,0,16,59,60,
453 70,26,12,0,16,59,60,
454 70,26,12,0,16,59,60,
455 70,26,12,0,16,59,60,
456 70,26,12,0,16,59,60,
457 70,26,12,0,16,59,60,
458 70,26,12,0,16,59,60,
459 70,26,12,0,16,59,60,
460 70,26,12,0,16,59,60,
461 26,0,
462 70,26,12,0,16,59,60,
463 70,26,12,0,16,59,60,
464 70,26,12,0,16,59,60,
465 12,0,
466 70,69,68,67,26,12,10,3,0,64,66,
467 70,69,68,67,26,12,10,3,0,5,
468 70,26,12,10,0,
469 70,26,12,0,16,59,60,
470
471 };
472
473
474 static unsigned const char ag_astt[241] = {
475 2,2,2,2,2,2,1,5,7,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
476 1,1,1,1,1,1,1,1,1,1,1,1,1,5,7,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,5,5,5,5,5,
477 5,1,5,7,3,2,2,2,2,2,2,5,5,7,3,3,2,2,2,2,2,2,5,7,1,1,1,1,5,7,1,1,3,7,9,9,5,
478 7,1,1,5,1,7,3,1,3,1,1,5,7,3,1,3,1,1,5,7,3,1,3,1,1,5,7,3,1,3,1,1,5,7,2,1,2,
479 1,1,5,7,3,1,3,1,1,5,7,3,1,3,1,1,5,7,3,1,3,1,1,5,7,3,1,3,1,1,5,7,3,1,3,1,7,
480 1,1,5,7,3,1,3,1,1,5,7,2,1,2,1,1,5,7,2,1,2,3,7,2,2,2,2,2,2,5,5,7,3,3,5,5,5,
481 5,5,5,1,5,7,3,9,9,5,9,7,1,1,5,7,2,1,2
482 };
483
484
485 static const unsigned char ag_pstt[] = {
486 68,72,71,70,68,68,1,8,0,0,6,3,2,2,5,4,6,3,3,
487 7,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,16,16,17,17,17,17,7,18,18,19,19,
488 20,20,21,21,58,1,22,21,22,20,19,18,17,16,15,14,13,12,11,10,9,7,22,
489 7,7,7,7,7,7,1,7,2,3,
490 68,72,71,70,68,68,4,4,3,67,67,
491 68,72,71,70,68,68,10,4,23,23,23,
492 1,9,5,24,24,
493 1,6,
494 57,57,59,7,
495 25,25,63,25,8,54,25,54,
496 7,7,58,9,53,7,53,
497 7,7,58,10,50,7,50,
498 7,7,58,11,47,7,47,
499 7,7,58,12,44,7,44,
500 7,7,58,13,41,7,41,
501 7,7,58,14,38,7,38,
502 7,7,58,15,35,7,35,
503 7,7,58,16,32,7,32,
504 7,7,58,17,29,7,29,
505 26,18,
506 7,7,58,19,21,7,21,
507 7,7,58,20,18,7,18,
508 7,7,58,21,15,7,15,
509 12,22,
510 68,72,71,70,68,68,5,5,23,67,67,
511 6,6,6,6,6,6,1,6,24,3,
512 62,62,64,62,25,
513 7,7,58,26,24,7,24,
514
515 };
516
517
518 static const unsigned char ag_sbt[] = {
519 0, 19, 69, 79, 90, 101, 106, 108, 112, 120, 127, 134, 141, 148,
520 155, 162, 169, 176, 183, 185, 192, 199, 206, 208, 219, 229, 234, 241
521 };
522
523
524 static const unsigned char ag_sbe[] = {
525 8, 51, 77, 87, 97, 103, 107, 111, 116, 123, 130, 137, 144, 151,
526 158, 165, 172, 179, 184, 188, 195, 202, 207, 216, 227, 233, 237, 241
527 };
528
529
530 static const unsigned char ag_fl[] = {
531 1,2,1,2,1,2,2,1,0,1,1,1,3,1,1,2,1,1,2,1,1,2,1,1,3,1,1,1,1,2,1,1,2,1,1,
532 2,1,1,2,1,1,2,1,1,2,1,1,2,1,1,2,1,1,2,2,1,1,2,0,1,1,1,2,0,1,1,1,2,1,1,
533 1,1,1
534 };
535
536 static const unsigned char ag_ptt[] = {
537 0, 1, 6, 6, 7, 7, 8, 8, 9, 9, 9, 2, 5, 15, 15, 11, 19, 19,
538 11, 22, 22, 11, 25, 25, 11, 31, 31, 31, 31, 11, 34, 34, 11, 37, 37, 11,
539 40, 40, 11, 43, 43, 11, 46, 46, 11, 49, 49, 11, 52, 52, 11, 55, 55, 11,
540 11, 11, 59, 59, 60, 60, 16, 62, 62, 63, 63, 57, 4, 4, 64, 64, 66, 66,
541 66
542 };
543
544
545 static void ag_ra(void)
546 {
547 switch(ag_rpx[(PCB).ag_ap]) {
548 case 1: VS(0) = ag_rp_1(); break;
549 case 2: VS(0) = ag_rp_5(VS(0)); break;
550 case 3: VS(0) = ag_rp_6(); break;
551 case 4: VS(0) = ag_rp_7(); break;
552 case 5: VS(0) = ag_rp_8(); break;
553 }
554 (PCB).la_ptr = (PCB).pointer;
555 }
556
557 #define TOKEN_NAMES detag2_token_names
558 const char *const detag2_token_names[71] = {
559 "input string",
560 "input string",
561 "html",
562 "eof",
563 "text",
564 "tag",
565 "",
566 "",
567 "",
568 "",
569 "'<'",
570 "tag innards",
571 "'>'",
572 "\"HR\"",
573 "\"hr\"",
574 "",
575 "other stuff",
576 "\"P\"",
577 "\"p\"",
578 "",
579 "\"BR\"",
580 "\"br\"",
581 "",
582 "\"H\"",
583 "\"h\"",
584 "",
585 "header type",
586 "\"HEAD\"",
587 "\"head\"",
588 "\"HTML\"",
589 "\"html\"",
590 "",
591 "\"UL\"",
592 "\"ul\"",
593 "",
594 "\"OL\"",
595 "\"ol\"",
596 "",
597 "\"DL\"",
598 "\"dl\"",
599 "",
600 "\"LI\"",
601 "\"li\"",
602 "",
603 "\"TABLE\"",
604 "\"table\"",
605 "",
606 "\"TR\"",
607 "\"tr\"",
608 "",
609 "\"TD\"",
610 "\"td\"",
611 "",
612 "\"PRE\"",
613 "\"pre\"",
614 "",
615 "\"!--\"",
616 "comment stuff",
617 "tag innard char",
618 "",
619 "",
620 "comment char",
621 "",
622 "",
623 "text char",
624 "ordinary text char",
625 "entity text char",
626 "\"&lt;\"",
627 "\"&gt;\"",
628 "\"&amp;\"",
629 "",
630
631 };
632
633 #ifndef MISSING_FORMAT
634 #define MISSING_FORMAT "Missing %s"
635 #endif
636 #ifndef UNEXPECTED_FORMAT
637 #define UNEXPECTED_FORMAT "Unexpected %s"
638 #endif
639 #ifndef UNNAMED_TOKEN
640 #define UNNAMED_TOKEN "input"
641 #endif
642
643
644 static void ag_diagnose(void) {
645 int ag_snd = (PCB).sn;
646 int ag_k = ag_sbt[ag_snd];
647
648 if (*TOKEN_NAMES[ag_tstt[ag_k]] && ag_astt[ag_k + 1] == ag_action_8) {
649 sprintf((PCB).ag_msg, MISSING_FORMAT, TOKEN_NAMES[ag_tstt[ag_k]]);
650 }
651 else if (ag_astt[ag_sbe[(PCB).sn]] == ag_action_8
652 && (ag_k = (int) ag_sbe[(PCB).sn] + 1) == (int) ag_sbt[(PCB).sn+1] - 1
653 && *TOKEN_NAMES[ag_tstt[ag_k]]) {
654 sprintf((PCB).ag_msg, MISSING_FORMAT, TOKEN_NAMES[ag_tstt[ag_k]]);
655 }
656 else if ((PCB).token_number && *TOKEN_NAMES[(PCB).token_number]) {
657 sprintf((PCB).ag_msg, UNEXPECTED_FORMAT, TOKEN_NAMES[(PCB).token_number]);
658 }
659 else if (isprint(INPUT_CODE((*(PCB).pointer))) && INPUT_CODE((*(PCB).pointer)) != '\\') {
660 char buf[20];
661 sprintf(buf, "\'%c\'", (char) INPUT_CODE((*(PCB).pointer)));
662 sprintf((PCB).ag_msg, UNEXPECTED_FORMAT, buf);
663 }
664 else sprintf((PCB).ag_msg, UNEXPECTED_FORMAT, UNNAMED_TOKEN);
665 (PCB).error_message = (PCB).ag_msg;
666
667
668 }
669 static int ag_action_1_r_proc(void);
670 static int ag_action_2_r_proc(void);
671 static int ag_action_3_r_proc(void);
672 static int ag_action_4_r_proc(void);
673 static int ag_action_1_s_proc(void);
674 static int ag_action_3_s_proc(void);
675 static int ag_action_1_proc(void);
676 static int ag_action_2_proc(void);
677 static int ag_action_3_proc(void);
678 static int ag_action_4_proc(void);
679 static int ag_action_5_proc(void);
680 static int ag_action_6_proc(void);
681 static int ag_action_7_proc(void);
682 static int ag_action_8_proc(void);
683 static int ag_action_9_proc(void);
684 static int ag_action_10_proc(void);
685 static int ag_action_11_proc(void);
686 static int ag_action_8_proc(void);
687
688
689 static int (*const ag_r_procs_scan[])(void) = {
690 ag_action_1_r_proc,
691 ag_action_2_r_proc,
692 ag_action_3_r_proc,
693 ag_action_4_r_proc
694 };
695
696 static int (*const ag_s_procs_scan[])(void) = {
697 ag_action_1_s_proc,
698 ag_action_2_r_proc,
699 ag_action_3_s_proc,
700 ag_action_4_r_proc
701 };
702
703 static int (*const ag_gt_procs_scan[])(void) = {
704 ag_action_1_proc,
705 ag_action_2_proc,
706 ag_action_3_proc,
707 ag_action_4_proc,
708 ag_action_5_proc,
709 ag_action_6_proc,
710 ag_action_7_proc,
711 ag_action_8_proc,
712 ag_action_9_proc,
713 ag_action_10_proc,
714 ag_action_11_proc,
715 ag_action_8_proc
716 };
717
718
719 static int ag_action_10_proc(void) {
720 int ag_t = (PCB).token_number;
721 (PCB).btsx = 0, (PCB).drt = -1;
722 do {
723 ag_track();
724 (PCB).token_number = (detag2_token_type) AG_TCV(INPUT_CODE(*(PCB).la_ptr));
725 (PCB).la_ptr++;
726 if (ag_key_index[(PCB).sn]) {
727 unsigned ag_k = ag_key_index[(PCB).sn];
728 int ag_ch = CONVERT_CASE(INPUT_CODE(*(PCB).pointer));
729 if (ag_ch <= 255) {
730 while (ag_key_ch[ag_k] < ag_ch) ag_k++;
731 if (ag_key_ch[ag_k] == ag_ch) ag_get_key_word(ag_k);
732 }
733 }
734 } while ((PCB).token_number == (detag2_token_type) ag_t);
735 (PCB).la_ptr = (PCB).pointer;
736 return 1;
737 }
738
739 static int ag_action_11_proc(void) {
740 int ag_t = (PCB).token_number;
741
742 (PCB).btsx = 0, (PCB).drt = -1;
743 do {
744 (PCB).vs[(PCB).ssx] = *(PCB).pointer;
745 (PCB).ssx--;
746 ag_track();
747 ag_ra();
748 if ((PCB).exit_flag != AG_RUNNING_CODE) return 0;
749 (PCB).ssx++;
750 (PCB).token_number = (detag2_token_type) AG_TCV(INPUT_CODE(*(PCB).la_ptr));
751 (PCB).la_ptr++;
752 if (ag_key_index[(PCB).sn]) {
753 unsigned ag_k = ag_key_index[(PCB).sn];
754 int ag_ch = CONVERT_CASE(INPUT_CODE(*(PCB).pointer));
755 if (ag_ch <= 255) {
756 while (ag_key_ch[ag_k] < ag_ch) ag_k++;
757 if (ag_key_ch[ag_k] == ag_ch) ag_get_key_word(ag_k);
758 }
759 }
760 }
761 while ((PCB).token_number == (detag2_token_type) ag_t);
762 (PCB).la_ptr = (PCB).pointer;
763 return 1;
764 }
765
766 static int ag_action_3_r_proc(void) {
767 int ag_sd = ag_fl[(PCB).ag_ap] - 1;
768 if (ag_sd) (PCB).sn = (PCB).ss[(PCB).ssx -= ag_sd];
769 (PCB).btsx = 0, (PCB).drt = -1;
770 (PCB).reduction_token = (detag2_token_type) ag_ptt[(PCB).ag_ap];
771 ag_ra();
772 return (PCB).exit_flag == AG_RUNNING_CODE;
773 }
774
775 static int ag_action_3_s_proc(void) {
776 int ag_sd = ag_fl[(PCB).ag_ap] - 1;
777 if (ag_sd) (PCB).sn = (PCB).ss[(PCB).ssx -= ag_sd];
778 (PCB).btsx = 0, (PCB).drt = -1;
779 (PCB).reduction_token = (detag2_token_type) ag_ptt[(PCB).ag_ap];
780 ag_ra();
781 return (PCB).exit_flag == AG_RUNNING_CODE;
782 }
783
784 static int ag_action_4_r_proc(void) {
785 int ag_sd = ag_fl[(PCB).ag_ap] - 1;
786 if (ag_sd) (PCB).sn = (PCB).ss[(PCB).ssx -= ag_sd];
787 (PCB).reduction_token = (detag2_token_type) ag_ptt[(PCB).ag_ap];
788 return 1;
789 }
790
791 static int ag_action_2_proc(void) {
792 (PCB).btsx = 0, (PCB).drt = -1;
793 if ((PCB).ssx >= 128) {
794 (PCB).exit_flag = AG_STACK_ERROR_CODE;
795 PARSER_STACK_OVERFLOW;
796 }
797 (PCB).vs[(PCB).ssx] = *(PCB).pointer;
798 (PCB).ss[(PCB).ssx] = (PCB).sn;
799 (PCB).ssx++;
800 (PCB).sn = (PCB).ag_ap;
801 ag_track();
802 return 0;
803 }
804
805 static int ag_action_9_proc(void) {
806 if ((PCB).drt == -1) {
807 (PCB).drt=(PCB).token_number;
808 (PCB).dssx=(PCB).ssx;
809 (PCB).dsn=(PCB).sn;
810 }
811 ag_prot();
812 (PCB).vs[(PCB).ssx] = ag_null_value;
813 (PCB).ss[(PCB).ssx] = (PCB).sn;
814 (PCB).ssx++;
815 (PCB).sn = (PCB).ag_ap;
816 (PCB).la_ptr = (PCB).pointer;
817 return (PCB).exit_flag == AG_RUNNING_CODE;
818 }
819
820 static int ag_action_2_r_proc(void) {
821 (PCB).ssx++;
822 (PCB).sn = (PCB).ag_ap;
823 return 0;
824 }
825
826 static int ag_action_7_proc(void) {
827 --(PCB).ssx;
828 (PCB).la_ptr = (PCB).pointer;
829 (PCB).exit_flag = AG_SUCCESS_CODE;
830 return 0;
831 }
832
833 static int ag_action_1_proc(void) {
834 ag_track();
835 (PCB).exit_flag = AG_SUCCESS_CODE;
836 return 0;
837 }
838
839 static int ag_action_1_r_proc(void) {
840 (PCB).exit_flag = AG_SUCCESS_CODE;
841 return 0;
842 }
843
844 static int ag_action_1_s_proc(void) {
845 (PCB).exit_flag = AG_SUCCESS_CODE;
846 return 0;
847 }
848
849 static int ag_action_4_proc(void) {
850 int ag_sd = ag_fl[(PCB).ag_ap] - 1;
851 (PCB).reduction_token = (detag2_token_type) ag_ptt[(PCB).ag_ap];
852 (PCB).btsx = 0, (PCB).drt = -1;
853 (PCB).vs[(PCB).ssx] = *(PCB).pointer;
854 if (ag_sd) (PCB).sn = (PCB).ss[(PCB).ssx -= ag_sd];
855 else (PCB).ss[(PCB).ssx] = (PCB).sn;
856 ag_track();
857 while ((PCB).exit_flag == AG_RUNNING_CODE) {
858 unsigned ag_t1 = ag_sbe[(PCB).sn] + 1;
859 unsigned ag_t2 = ag_sbt[(PCB).sn+1] - 1;
860 do {
861 unsigned ag_tx = (ag_t1 + ag_t2)/2;
862 if (ag_tstt[ag_tx] < (unsigned char)(PCB).reduction_token) ag_t1 = ag_tx + 1;
863 else ag_t2 = ag_tx;
864 } while (ag_t1 < ag_t2);
865 (PCB).ag_ap = ag_pstt[ag_t1];
866 if ((ag_s_procs_scan[ag_astt[ag_t1]])() == 0) break;
867 }
868 return 0;
869 }
870
871 static int ag_action_3_proc(void) {
872 int ag_sd = ag_fl[(PCB).ag_ap] - 1;
873 (PCB).btsx = 0, (PCB).drt = -1;
874 (PCB).vs[(PCB).ssx] = *(PCB).pointer;
875 if (ag_sd) (PCB).sn = (PCB).ss[(PCB).ssx -= ag_sd];
876 else (PCB).ss[(PCB).ssx] = (PCB).sn;
877 ag_track();
878 (PCB).reduction_token = (detag2_token_type) ag_ptt[(PCB).ag_ap];
879 ag_ra();
880 while ((PCB).exit_flag == AG_RUNNING_CODE) {
881 unsigned ag_t1 = ag_sbe[(PCB).sn] + 1;
882 unsigned ag_t2 = ag_sbt[(PCB).sn+1] - 1;
883 do {
884 unsigned ag_tx = (ag_t1 + ag_t2)/2;
885 if (ag_tstt[ag_tx] < (unsigned char)(PCB).reduction_token) ag_t1 = ag_tx + 1;
886 else ag_t2 = ag_tx;
887 } while (ag_t1 < ag_t2);
888 (PCB).ag_ap = ag_pstt[ag_t1];
889 if ((ag_s_procs_scan[ag_astt[ag_t1]])() == 0) break;
890 }
891 return 0;
892 }
893
894 static int ag_action_8_proc(void) {
895 ag_undo();
896 (PCB).la_ptr = (PCB).pointer;
897 (PCB).exit_flag = AG_SYNTAX_ERROR_CODE;
898 ag_diagnose();
899 SYNTAX_ERROR;
900 {(PCB).la_ptr = (PCB).pointer + 1; ag_track();}
901 return (PCB).exit_flag == AG_RUNNING_CODE;
902 }
903
904 static int ag_action_5_proc(void) {
905 int ag_sd = ag_fl[(PCB).ag_ap];
906 (PCB).btsx = 0, (PCB).drt = -1;
907 if (ag_sd) (PCB).sn = (PCB).ss[(PCB).ssx -= ag_sd];
908 else {
909 (PCB).ss[(PCB).ssx] = (PCB).sn;
910 }
911 (PCB).la_ptr = (PCB).pointer;
912 (PCB).reduction_token = (detag2_token_type) ag_ptt[(PCB).ag_ap];
913 ag_ra();
914 while ((PCB).exit_flag == AG_RUNNING_CODE) {
915 unsigned ag_t1 = ag_sbe[(PCB).sn] + 1;
916 unsigned ag_t2 = ag_sbt[(PCB).sn+1] - 1;
917 do {
918 unsigned ag_tx = (ag_t1 + ag_t2)/2;
919 if (ag_tstt[ag_tx] < (unsigned char)(PCB).reduction_token) ag_t1 = ag_tx + 1;
920 else ag_t2 = ag_tx;
921 } while (ag_t1 < ag_t2);
922 (PCB).ag_ap = ag_pstt[ag_t1];
923 if ((ag_r_procs_scan[ag_astt[ag_t1]])() == 0) break;
924 }
925 return (PCB).exit_flag == AG_RUNNING_CODE;
926 }
927
928 static int ag_action_6_proc(void) {
929 int ag_sd = ag_fl[(PCB).ag_ap];
930 (PCB).reduction_token = (detag2_token_type) ag_ptt[(PCB).ag_ap];
931 if ((PCB).drt == -1) {
932 (PCB).drt=(PCB).token_number;
933 (PCB).dssx=(PCB).ssx;
934 (PCB).dsn=(PCB).sn;
935 }
936 if (ag_sd) {
937 (PCB).sn = (PCB).ss[(PCB).ssx -= ag_sd];
938 }
939 else {
940 ag_prot();
941 (PCB).vs[(PCB).ssx] = ag_null_value;
942 (PCB).ss[(PCB).ssx] = (PCB).sn;
943 }
944 (PCB).la_ptr = (PCB).pointer;
945 while ((PCB).exit_flag == AG_RUNNING_CODE) {
946 unsigned ag_t1 = ag_sbe[(PCB).sn] + 1;
947 unsigned ag_t2 = ag_sbt[(PCB).sn+1] - 1;
948 do {
949 unsigned ag_tx = (ag_t1 + ag_t2)/2;
950 if (ag_tstt[ag_tx] < (unsigned char)(PCB).reduction_token) ag_t1 = ag_tx + 1;
951 else ag_t2 = ag_tx;
952 } while (ag_t1 < ag_t2);
953 (PCB).ag_ap = ag_pstt[ag_t1];
954 if ((ag_r_procs_scan[ag_astt[ag_t1]])() == 0) break;
955 }
956 return (PCB).exit_flag == AG_RUNNING_CODE;
957 }
958
959
960 void init_detag2(void) {
961 (PCB).la_ptr = (PCB).pointer;
962 (PCB).ss[0] = (PCB).sn = (PCB).ssx = 0;
963 (PCB).exit_flag = AG_RUNNING_CODE;
964 (PCB).line = FIRST_LINE;
965 (PCB).column = FIRST_COLUMN;
966 (PCB).btsx = 0, (PCB).drt = -1;
967 }
968
969 void detag2(void) {
970 init_detag2();
971 (PCB).exit_flag = AG_RUNNING_CODE;
972 while ((PCB).exit_flag == AG_RUNNING_CODE) {
973 unsigned ag_t1 = ag_sbt[(PCB).sn];
974 if (ag_tstt[ag_t1]) {
975 unsigned ag_t2 = ag_sbe[(PCB).sn] - 1;
976 (PCB).token_number = (detag2_token_type) AG_TCV(INPUT_CODE(*(PCB).la_ptr));
977 (PCB).la_ptr++;
978 if (ag_key_index[(PCB).sn]) {
979 unsigned ag_k = ag_key_index[(PCB).sn];
980 int ag_ch = CONVERT_CASE(INPUT_CODE(*(PCB).pointer));
981 if (ag_ch <= 255) {
982 while (ag_key_ch[ag_k] < ag_ch) ag_k++;
983 if (ag_key_ch[ag_k] == ag_ch) ag_get_key_word(ag_k);
984 }
985 }
986 do {
987 unsigned ag_tx = (ag_t1 + ag_t2)/2;
988 if (ag_tstt[ag_tx] > (unsigned char)(PCB).token_number)
989 ag_t1 = ag_tx + 1;
990 else ag_t2 = ag_tx;
991 } while (ag_t1 < ag_t2);
992 if (ag_tstt[ag_t1] != (unsigned char)(PCB).token_number)
993 ag_t1 = ag_sbe[(PCB).sn];
994 }
995 (PCB).ag_ap = ag_pstt[ag_t1];
996 (ag_gt_procs_scan[ag_astt[ag_t1]])();
997 }
998 }
999
1000