comparison tests/agcl/parsifal/good/detag.c @ 0:13d2b8934445

Import AnaGram (near-)release tree into Mercurial.
author David A. Holland
date Sat, 22 Dec 2007 17:52:45 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:13d2b8934445
1 /*
2 detag.syn
3
4 Program to strip HTML tags from HTML files.
5 Copyright (c) 1996 - 1999 Parsifal Software, All
6 Rights Reserved.
7 See the file COPYING for license and usage terms.
8
9 For information about AnaGram, visit http://www.parsifalsoft.com.
10 */
11
12 #include <stdio.h>
13
14
15 /*
16 * AnaGram, A System for Syntax Directed Programming
17 * File generated by: ...
18 *
19 * AnaGram Parsing Engine
20 * Copyright 1993-2002 Parsifal Software. All Rights Reserved.
21 *
22 * This software is provided 'as-is', without any express or implied
23 * warranty. In no event will the authors be held liable for any damages
24 * arising from the use of this software.
25 *
26 * Permission is granted to anyone to use this software for any purpose,
27 * including commercial applications, and to alter it and redistribute it
28 * freely, subject to the following restrictions:
29 *
30 * 1. The origin of this software must not be misrepresented; you must not
31 * claim that you wrote the original software. If you use this software
32 * in a product, an acknowledgment in the product documentation would be
33 * appreciated but is not required.
34 * 2. Altered source versions must be plainly marked as such, and must not be
35 * misrepresented as being the original software.
36 * 3. This notice may not be removed or altered from any source distribution.
37 */
38
39 #ifndef DETAG_H
40 #include "detag.h"
41 #endif
42
43 #ifndef DETAG_H
44 #error Mismatched header file
45 #endif
46
47 #include <ctype.h>
48 #include <stdio.h>
49
50 #define RULE_CONTEXT (&((PCB).cs[(PCB).ssx]))
51 #define ERROR_CONTEXT ((PCB).cs[(PCB).error_frame_ssx])
52 #define CONTEXT ((PCB).cs[(PCB).ssx])
53
54
55
56 detag_pcb_type detag_pcb;
57 #define PCB detag_pcb
58
59 /* Line -, detag.syn */
60 // ----- Embedded C ---------------------------
61
62 FILE *output;
63
64 int main(int argc, char *argv[]) {
65
66 FILE *input;
67 size_t fileLength;
68 size_t stringLength;
69 int errorFlag = 0;
70 char *inString;
71
72
73 /* Check for enough arguments */
74 if (argc != 3) {
75 printf("Program to strip HTML tags from a file\n"
76 "Usage: %s <input filename> <output filename>\n", argv[0]);
77 return 1;
78 }
79
80 /* Open input file for reading only */
81 input = fopen(argv[1],"r");
82 if (input == NULL) {
83 printf("Cannot open %s\n", argv[1]);
84 return 2;
85 }
86
87 /* find out how big the file is */
88 if (fseek(input, SEEK_SET, SEEK_END)) {
89 printf("Strange problems with %s\n", argv[1]);
90 return 3;
91 }
92 fileLength = ftell(input);
93 if (fileLength < 0 ) { // -1L is error return
94 printf("Error getting file length (%d) of %s\n", fileLength, argv[1]);
95 return 4;
96 }
97
98 /* fseek to beginning of file */
99 if (fseek(input, 0, SEEK_SET)) {
100 printf("Strange problems with %s\n", argv[1]);
101 return 5;
102 }
103
104 /* Allocate storage for input string */
105 inString = (char*)malloc(fileLength + 1);
106 if (inString == NULL) {
107 printf("Insufficient memory\n");
108 return 6;
109 }
110
111 /* Read file */
112 stringLength = fread(inString, 1, fileLength, input);
113 if (stringLength == 0) {
114 printf("Unable to read %s\n", argv[1]);
115 return 7;
116 }
117 inString[stringLength] = 0;
118
119
120 /* Open output file for writing only */
121 output = fopen(argv[2],"w");
122 if (output == NULL) {
123 printf("Cannot open %s\n", argv[2]);
124 free(inString);
125 fclose(input);
126 return 8;
127 }
128
129
130 /* Invoke parser */
131 PCB.pointer = (unsigned char *)inString; // using pointer input
132 detag();
133 if (PCB.exit_flag != 1) {
134 printf( "Unsuccessful termination of parse, PCB.exit_flag = %d\n",
135 PCB.exit_flag);
136 }
137
138
139 /* Done */
140 free(inString);
141 fclose(input);
142 fclose(output);
143 printf( " End detag ");
144 return 0;
145 }
146
147
148 #ifndef CONVERT_CASE
149 #define CONVERT_CASE(c) (c)
150 #endif
151 #ifndef TAB_SPACING
152 #define TAB_SPACING 8
153 #endif
154
155 #define ag_rp_1() (putc('\n', output))
156
157 #define ag_rp_5(c) (putc(c, output))
158
159 #define ag_rp_6() (putc('<', output))
160
161 #define ag_rp_7() (putc('>', output))
162
163 #define ag_rp_8() (putc('&', output))
164
165
166 #define READ_COUNTS
167 #define WRITE_COUNTS
168 #undef V
169 #define V(i,t) (*t (&(PCB).vs[(PCB).ssx + i]))
170 #undef VS
171 #define VS(i) (PCB).vs[(PCB).ssx + i]
172
173 #ifndef GET_CONTEXT
174 #define GET_CONTEXT CONTEXT = (PCB).input_context
175 #endif
176
177 typedef enum {
178 ag_action_1,
179 ag_action_2,
180 ag_action_3,
181 ag_action_4,
182 ag_action_5,
183 ag_action_6,
184 ag_action_7,
185 ag_action_8,
186 ag_action_9,
187 ag_action_10,
188 ag_action_11,
189 ag_action_12
190 } ag_parser_action;
191
192
193 #ifndef NULL_VALUE_INITIALIZER
194 #define NULL_VALUE_INITIALIZER = 0
195 #endif
196
197 static int const ag_null_value NULL_VALUE_INITIALIZER;
198
199 static const unsigned char ag_rpx[] = {
200 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
201 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
202 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
203 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 3, 4,
204 5
205 };
206
207 static const unsigned char ag_key_itt[] = {
208 0
209 };
210
211 static const unsigned short ag_key_pt[] = {
212 0
213 };
214
215 static const unsigned char ag_key_ch[] = {
216 0, 97,103,108,255, 38,255, 69, 82, 84,255, 82,255, 65, 68, 82,255,101,
217 114,116,255,114,255, 97,100,114,255, 33, 66, 68, 72, 76, 79, 80, 84, 85,
218 98,100,104,108,111,112,116,117,255
219 };
220
221 static const unsigned char ag_key_act[] = {
222 0,3,3,3,4,2,4,3,0,3,4,3,4,3,0,0,4,3,0,3,4,3,4,3,0,0,4,3,3,3,1,3,3,1,2,
223 3,3,3,1,3,3,1,2,3,4
224 };
225
226 static const unsigned char ag_key_parm[] = {
227 0, 69, 68, 67, 0, 0, 0, 27, 13, 29, 0, 53, 0, 44, 50, 47, 0, 28,
228 14, 30, 0, 54, 0, 45, 51, 48, 0, 56, 20, 38, 23, 41, 35, 17, 0, 32,
229 21, 39, 24, 42, 36, 18, 0, 33, 0
230 };
231
232 static const unsigned char ag_key_jmp[] = {
233 0, 0, 4, 7, 0, 1, 0, 17, 0, 20, 0, 27, 0, 29, 0, 0, 0, 39,
234 0, 42, 0, 49, 0, 51, 0, 0, 0, 10, 13, 15, 7, 23, 25, 11, 13, 33,
235 35, 37, 17, 45, 47, 21, 23, 55, 0
236 };
237
238 static const unsigned char ag_key_index[] = {
239 5, 27, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
240 0, 0, 0, 0, 0, 5, 5, 0, 0
241 };
242
243 static const unsigned char ag_key_ends[] = {
244 109,112,59,0, 116,59,0, 116,59,0, 45,45,0, 82,0, 76,0,
245 65,68,0, 77,76,0, 73,0, 76,0, 69,0, 66,76,69,0, 76,0, 114,0,
246 108,0, 97,100,0, 109,108,0, 105,0, 108,0, 101,0, 98,108,101,0,
247 108,0,
248 };
249
250 #define AG_TCV(x) ag_tcv[(x)]
251
252 static const unsigned char ag_tcv[] = {
253 3, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70,
254 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70,
255 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 26, 26, 26, 26, 26,
256 26, 70, 70, 70, 70, 70, 10, 70, 12, 70, 70, 70, 70, 70, 70, 70, 70, 70,
257 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70,
258 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70,
259 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70,
260 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70,
261 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70,
262 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70,
263 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70,
264 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70,
265 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70,
266 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70,
267 70, 70, 70, 70
268 };
269
270 #ifndef SYNTAX_ERROR
271 #define SYNTAX_ERROR fprintf(stderr,"%s, line %d, column %d\n", \
272 (PCB).error_message, (PCB).line, (PCB).column)
273 #endif
274
275 #ifndef FIRST_LINE
276 #define FIRST_LINE 1
277 #endif
278
279 #ifndef FIRST_COLUMN
280 #define FIRST_COLUMN 1
281 #endif
282
283 #ifndef PARSER_STACK_OVERFLOW
284 #define PARSER_STACK_OVERFLOW {fprintf(stderr, \
285 "\nParser stack overflow, line %d, column %d\n",\
286 (PCB).line, (PCB).column);}
287 #endif
288
289 #ifndef REDUCTION_TOKEN_ERROR
290 #define REDUCTION_TOKEN_ERROR {fprintf(stderr, \
291 "\nReduction token error, line %d, column %d\n", \
292 (PCB).line, (PCB).column);}
293 #endif
294
295
296 #ifndef INPUT_CODE
297 #define INPUT_CODE(T) (T)
298 #endif
299
300 typedef enum
301 {ag_accept_key, ag_set_key, ag_jmp_key, ag_end_key, ag_no_match_key,
302 ag_cf_accept_key, ag_cf_set_key, ag_cf_end_key} key_words;
303
304 static void ag_get_key_word(int ag_k) {
305 int ag_save = (int) ((PCB).la_ptr - (PCB).pointer);
306 const unsigned char *ag_p;
307 int ag_ch;
308 while (1) {
309 switch (ag_key_act[ag_k]) {
310 case ag_cf_end_key: {
311 const unsigned char *sp = ag_key_ends + ag_key_jmp[ag_k];
312 do {
313 if ((ag_ch = *sp++) == 0) {
314 int ag_k1 = ag_key_parm[ag_k];
315 int ag_k2 = ag_key_pt[ag_k1];
316 if (ag_key_itt[ag_k2 + CONVERT_CASE(*(PCB).la_ptr)]) goto ag_fail;
317 (PCB).token_number = (detag_token_type) ag_key_pt[ag_k1 + 1];
318 return;
319 }
320 } while (CONVERT_CASE(*(PCB).la_ptr++) == ag_ch);
321 goto ag_fail;
322 }
323 case ag_end_key: {
324 const unsigned char *sp = ag_key_ends + ag_key_jmp[ag_k];
325 do {
326 if ((ag_ch = *sp++) == 0) {
327 (PCB).token_number = (detag_token_type) ag_key_parm[ag_k];
328 return;
329 }
330 } while (CONVERT_CASE(*(PCB).la_ptr++) == ag_ch);
331 }
332 case ag_no_match_key:
333 ag_fail:
334 (PCB).la_ptr = (PCB).pointer + ag_save;
335 return;
336 case ag_cf_set_key: {
337 int ag_k1 = ag_key_parm[ag_k];
338 int ag_k2 = ag_key_pt[ag_k1];
339 ag_k = ag_key_jmp[ag_k];
340 if (ag_key_itt[ag_k2 + CONVERT_CASE(*(PCB).la_ptr)]) break;
341 ag_save = (int) ((PCB).la_ptr - (PCB).pointer);
342 (PCB).token_number = (detag_token_type) ag_key_pt[ag_k1+1];
343 break;
344 }
345 case ag_set_key:
346 ag_save = (int) ((PCB).la_ptr - (PCB).pointer);
347 (PCB).token_number = (detag_token_type) ag_key_parm[ag_k];
348 case ag_jmp_key:
349 ag_k = ag_key_jmp[ag_k];
350 break;
351 case ag_accept_key:
352 (PCB).token_number = (detag_token_type) ag_key_parm[ag_k];
353 return;
354 case ag_cf_accept_key: {
355 int ag_k1 = ag_key_parm[ag_k];
356 int ag_k2 = ag_key_pt[ag_k1];
357 if (ag_key_itt[ag_k2 + CONVERT_CASE(*(PCB).la_ptr)])
358 (PCB).la_ptr = (PCB).pointer + ag_save;
359 else (PCB).token_number = (detag_token_type) ag_key_pt[ag_k1+1];
360 return;
361 }
362 }
363 ag_ch = CONVERT_CASE(*(PCB).la_ptr++);
364 ag_p = &ag_key_ch[ag_k];
365 if (ag_ch <= 255) while (*ag_p < ag_ch) ag_p++;
366 if (ag_ch > 255 || *ag_p != ag_ch) {
367 (PCB).la_ptr = (PCB).pointer + ag_save;
368 return;
369 }
370 ag_k = (int) (ag_p - ag_key_ch);
371 }
372 }
373
374
375 #ifndef AG_NEWLINE
376 #define AG_NEWLINE 10
377 #endif
378
379 #ifndef AG_RETURN
380 #define AG_RETURN 13
381 #endif
382
383 #ifndef AG_FORMFEED
384 #define AG_FORMFEED 12
385 #endif
386
387 #ifndef AG_TABCHAR
388 #define AG_TABCHAR 9
389 #endif
390
391 static void ag_track(void) {
392 int ag_k = (int) ((PCB).la_ptr - (PCB).pointer);
393 while (ag_k--) {
394 switch (*(PCB).pointer++) {
395 case AG_NEWLINE:
396 (PCB).column = 1, (PCB).line++;
397 case AG_RETURN:
398 case AG_FORMFEED:
399 break;
400 case AG_TABCHAR:
401 (PCB).column += (TAB_SPACING) - ((PCB).column - 1) % (TAB_SPACING);
402 break;
403 default:
404 (PCB).column++;
405 }
406 }
407 }
408
409
410 static void ag_prot(void) {
411 int ag_k;
412 ag_k = 128 - ++(PCB).btsx;
413 if (ag_k <= (PCB).ssx) {
414 (PCB).exit_flag = AG_STACK_ERROR_CODE;
415 PARSER_STACK_OVERFLOW;
416 return;
417 }
418 (PCB).bts[(PCB).btsx] = (PCB).sn;
419 (PCB).bts[ag_k] = (PCB).ssx;
420 (PCB).vs[ag_k] = (PCB).vs[(PCB).ssx];
421 (PCB).ss[ag_k] = (PCB).ss[(PCB).ssx];
422 }
423
424 static void ag_undo(void) {
425 if ((PCB).drt == -1) return;
426 while ((PCB).btsx) {
427 int ag_k = 128 - (PCB).btsx;
428 (PCB).sn = (PCB).bts[(PCB).btsx--];
429 (PCB).ssx = (PCB).bts[ag_k];
430 (PCB).vs[(PCB).ssx] = (PCB).vs[ag_k];
431 (PCB).ss[(PCB).ssx] = (PCB).ss[ag_k];
432 }
433 (PCB).token_number = (detag_token_type) (PCB).drt;
434 (PCB).ssx = (PCB).dssx;
435 (PCB).sn = (PCB).dsn;
436 (PCB).drt = -1;
437 }
438
439
440 static const unsigned char ag_tstt[] = {
441 70,69,68,67,26,12,10,3,0,1,2,4,5,6,7,8,9,64,66,
442 70,56,54,53,51,50,48,47,45,44,42,41,39,38,36,35,33,32,30,29,28,27,26,24,23,
443 21,20,18,17,14,13,12,0,11,15,16,19,22,25,31,34,37,40,43,46,49,52,55,59,
444 60,
445 10,0,5,
446 70,69,68,67,26,12,0,64,66,
447 70,69,68,67,26,12,0,4,64,66,
448 10,0,5,6,
449 3,0,
450 70,26,0,
451 70,26,12,10,0,57,62,63,
452 70,26,12,0,16,59,60,
453 70,26,12,0,16,59,60,
454 70,26,12,0,16,59,60,
455 70,26,12,0,16,59,60,
456 70,26,12,0,16,59,60,
457 70,26,12,0,16,59,60,
458 70,26,12,0,16,59,60,
459 70,26,12,0,16,59,60,
460 70,26,12,0,16,59,60,
461 26,0,
462 70,26,12,0,16,59,60,
463 70,26,12,0,16,59,60,
464 70,26,12,0,16,59,60,
465 12,0,
466 70,69,68,67,26,12,0,64,66,
467 10,0,5,
468 70,26,10,0,
469 70,26,12,0,16,59,60,
470
471 };
472
473
474 static unsigned const char ag_astt[219] = {
475 2,2,2,2,2,2,1,5,7,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
476 1,1,1,1,1,1,1,1,1,1,1,1,1,5,7,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,5,3,2,2,
477 2,2,2,2,5,3,3,2,2,2,2,2,2,5,1,1,1,1,5,1,1,3,7,9,9,5,1,1,5,1,7,3,1,3,1,1,5,
478 7,3,1,3,1,1,5,7,3,1,3,1,1,5,7,3,1,3,1,1,5,7,2,1,2,1,1,5,7,3,1,3,1,1,5,7,3,
479 1,3,1,1,5,7,3,1,3,1,1,5,7,3,1,3,1,1,5,7,3,1,3,1,7,1,1,5,7,3,1,3,1,1,5,7,2,
480 1,2,1,1,5,7,2,1,2,3,7,2,2,2,2,2,2,5,3,3,1,5,3,9,9,9,5,1,1,5,7,2,1,2
481 };
482
483
484 static const unsigned char ag_pstt[] = {
485 68,72,71,70,68,68,1,8,0,0,6,3,2,2,5,4,6,3,3,
486 7,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,16,16,17,17,17,17,7,18,18,19,19,
487 20,20,21,21,58,1,22,21,22,20,19,18,17,16,15,14,13,12,11,10,9,7,22,
488 1,7,3,
489 68,72,71,70,68,68,4,67,67,
490 68,72,71,70,68,68,10,23,23,23,
491 1,9,24,24,
492 1,6,
493 57,57,59,
494 25,25,63,25,8,54,25,54,
495 7,7,58,9,53,7,53,
496 7,7,58,10,50,7,50,
497 7,7,58,11,47,7,47,
498 7,7,58,12,44,7,44,
499 7,7,58,13,41,7,41,
500 7,7,58,14,38,7,38,
501 7,7,58,15,35,7,35,
502 7,7,58,16,32,7,32,
503 7,7,58,17,29,7,29,
504 26,18,
505 7,7,58,19,21,7,21,
506 7,7,58,20,18,7,18,
507 7,7,58,21,15,7,15,
508 12,22,
509 68,72,71,70,68,68,5,67,67,
510 1,6,3,
511 62,62,62,64,
512 7,7,58,26,24,7,24,
513
514 };
515
516
517 static const unsigned char ag_sbt[] = {
518 0, 19, 69, 72, 81, 91, 95, 97, 100, 108, 115, 122, 129, 136,
519 143, 150, 157, 164, 171, 173, 180, 187, 194, 196, 205, 208, 212, 219
520 };
521
522
523 static const unsigned char ag_sbe[] = {
524 8, 51, 70, 78, 87, 92, 96, 99, 104, 111, 118, 125, 132, 139,
525 146, 153, 160, 167, 172, 176, 183, 190, 195, 202, 206, 211, 215, 219
526 };
527
528
529 static const unsigned char ag_fl[] = {
530 1,2,1,2,1,2,2,1,0,1,1,1,3,1,1,2,1,1,2,1,1,2,1,1,3,1,1,1,1,2,1,1,2,1,1,
531 2,1,1,2,1,1,2,1,1,2,1,1,2,1,1,2,1,1,2,2,1,1,2,0,1,1,1,2,0,1,1,1,2,1,1,
532 1,1,1
533 };
534
535 static const unsigned char ag_ptt[] = {
536 0, 1, 6, 6, 7, 7, 8, 8, 9, 9, 9, 2, 5, 15, 15, 11, 19, 19,
537 11, 22, 22, 11, 25, 25, 11, 31, 31, 31, 31, 11, 34, 34, 11, 37, 37, 11,
538 40, 40, 11, 43, 43, 11, 46, 46, 11, 49, 49, 11, 52, 52, 11, 55, 55, 11,
539 11, 11, 59, 59, 60, 60, 16, 62, 62, 63, 63, 57, 4, 4, 64, 64, 66, 66,
540 66
541 };
542
543
544 static void ag_ra(void)
545 {
546 switch(ag_rpx[(PCB).ag_ap]) {
547 case 1: VS(0) = ag_rp_1(); break;
548 case 2: VS(0) = ag_rp_5(VS(0)); break;
549 case 3: VS(0) = ag_rp_6(); break;
550 case 4: VS(0) = ag_rp_7(); break;
551 case 5: VS(0) = ag_rp_8(); break;
552 }
553 (PCB).la_ptr = (PCB).pointer;
554 }
555
556 #define TOKEN_NAMES detag_token_names
557 const char *const detag_token_names[71] = {
558 "input string",
559 "input string",
560 "html",
561 "eof",
562 "text",
563 "tag",
564 "",
565 "",
566 "",
567 "",
568 "'<'",
569 "tag innards",
570 "'>'",
571 "\"HR\"",
572 "\"hr\"",
573 "",
574 "other stuff",
575 "\"P\"",
576 "\"p\"",
577 "",
578 "\"BR\"",
579 "\"br\"",
580 "",
581 "\"H\"",
582 "\"h\"",
583 "",
584 "header type",
585 "\"HEAD\"",
586 "\"head\"",
587 "\"HTML\"",
588 "\"html\"",
589 "",
590 "\"UL\"",
591 "\"ul\"",
592 "",
593 "\"OL\"",
594 "\"ol\"",
595 "",
596 "\"DL\"",
597 "\"dl\"",
598 "",
599 "\"LI\"",
600 "\"li\"",
601 "",
602 "\"TABLE\"",
603 "\"table\"",
604 "",
605 "\"TR\"",
606 "\"tr\"",
607 "",
608 "\"TD\"",
609 "\"td\"",
610 "",
611 "\"PRE\"",
612 "\"pre\"",
613 "",
614 "\"!--\"",
615 "comment stuff",
616 "tag innard char",
617 "",
618 "",
619 "comment char",
620 "",
621 "",
622 "text char",
623 "ordinary text char",
624 "entity text char",
625 "\"&lt;\"",
626 "\"&gt;\"",
627 "\"&amp;\"",
628 "",
629
630 };
631
632 #ifndef MISSING_FORMAT
633 #define MISSING_FORMAT "Missing %s"
634 #endif
635 #ifndef UNEXPECTED_FORMAT
636 #define UNEXPECTED_FORMAT "Unexpected %s"
637 #endif
638 #ifndef UNNAMED_TOKEN
639 #define UNNAMED_TOKEN "input"
640 #endif
641
642
643 static void ag_diagnose(void) {
644 int ag_snd = (PCB).sn;
645 int ag_k = ag_sbt[ag_snd];
646
647 if (*TOKEN_NAMES[ag_tstt[ag_k]] && ag_astt[ag_k + 1] == ag_action_8) {
648 sprintf((PCB).ag_msg, MISSING_FORMAT, TOKEN_NAMES[ag_tstt[ag_k]]);
649 }
650 else if (ag_astt[ag_sbe[(PCB).sn]] == ag_action_8
651 && (ag_k = (int) ag_sbe[(PCB).sn] + 1) == (int) ag_sbt[(PCB).sn+1] - 1
652 && *TOKEN_NAMES[ag_tstt[ag_k]]) {
653 sprintf((PCB).ag_msg, MISSING_FORMAT, TOKEN_NAMES[ag_tstt[ag_k]]);
654 }
655 else if ((PCB).token_number && *TOKEN_NAMES[(PCB).token_number]) {
656 sprintf((PCB).ag_msg, UNEXPECTED_FORMAT, TOKEN_NAMES[(PCB).token_number]);
657 }
658 else if (isprint(INPUT_CODE((*(PCB).pointer))) && INPUT_CODE((*(PCB).pointer)) != '\\') {
659 char buf[20];
660 sprintf(buf, "\'%c\'", (char) INPUT_CODE((*(PCB).pointer)));
661 sprintf((PCB).ag_msg, UNEXPECTED_FORMAT, buf);
662 }
663 else sprintf((PCB).ag_msg, UNEXPECTED_FORMAT, UNNAMED_TOKEN);
664 (PCB).error_message = (PCB).ag_msg;
665
666
667 }
668 static int ag_action_1_r_proc(void);
669 static int ag_action_2_r_proc(void);
670 static int ag_action_3_r_proc(void);
671 static int ag_action_4_r_proc(void);
672 static int ag_action_1_s_proc(void);
673 static int ag_action_3_s_proc(void);
674 static int ag_action_1_proc(void);
675 static int ag_action_2_proc(void);
676 static int ag_action_3_proc(void);
677 static int ag_action_4_proc(void);
678 static int ag_action_5_proc(void);
679 static int ag_action_6_proc(void);
680 static int ag_action_7_proc(void);
681 static int ag_action_8_proc(void);
682 static int ag_action_9_proc(void);
683 static int ag_action_10_proc(void);
684 static int ag_action_11_proc(void);
685 static int ag_action_8_proc(void);
686
687
688 static int (*const ag_r_procs_scan[])(void) = {
689 ag_action_1_r_proc,
690 ag_action_2_r_proc,
691 ag_action_3_r_proc,
692 ag_action_4_r_proc
693 };
694
695 static int (*const ag_s_procs_scan[])(void) = {
696 ag_action_1_s_proc,
697 ag_action_2_r_proc,
698 ag_action_3_s_proc,
699 ag_action_4_r_proc
700 };
701
702 static int (*const ag_gt_procs_scan[])(void) = {
703 ag_action_1_proc,
704 ag_action_2_proc,
705 ag_action_3_proc,
706 ag_action_4_proc,
707 ag_action_5_proc,
708 ag_action_6_proc,
709 ag_action_7_proc,
710 ag_action_8_proc,
711 ag_action_9_proc,
712 ag_action_10_proc,
713 ag_action_11_proc,
714 ag_action_8_proc
715 };
716
717
718 static int ag_action_10_proc(void) {
719 int ag_t = (PCB).token_number;
720 (PCB).btsx = 0, (PCB).drt = -1;
721 do {
722 ag_track();
723 (PCB).token_number = (detag_token_type) AG_TCV(INPUT_CODE(*(PCB).la_ptr));
724 (PCB).la_ptr++;
725 if (ag_key_index[(PCB).sn]) {
726 unsigned ag_k = ag_key_index[(PCB).sn];
727 int ag_ch = CONVERT_CASE(INPUT_CODE(*(PCB).pointer));
728 if (ag_ch <= 255) {
729 while (ag_key_ch[ag_k] < ag_ch) ag_k++;
730 if (ag_key_ch[ag_k] == ag_ch) ag_get_key_word(ag_k);
731 }
732 }
733 } while ((PCB).token_number == (detag_token_type) ag_t);
734 (PCB).la_ptr = (PCB).pointer;
735 return 1;
736 }
737
738 static int ag_action_11_proc(void) {
739 int ag_t = (PCB).token_number;
740
741 (PCB).btsx = 0, (PCB).drt = -1;
742 do {
743 (PCB).vs[(PCB).ssx] = *(PCB).pointer;
744 (PCB).ssx--;
745 ag_track();
746 ag_ra();
747 if ((PCB).exit_flag != AG_RUNNING_CODE) return 0;
748 (PCB).ssx++;
749 (PCB).token_number = (detag_token_type) AG_TCV(INPUT_CODE(*(PCB).la_ptr));
750 (PCB).la_ptr++;
751 if (ag_key_index[(PCB).sn]) {
752 unsigned ag_k = ag_key_index[(PCB).sn];
753 int ag_ch = CONVERT_CASE(INPUT_CODE(*(PCB).pointer));
754 if (ag_ch <= 255) {
755 while (ag_key_ch[ag_k] < ag_ch) ag_k++;
756 if (ag_key_ch[ag_k] == ag_ch) ag_get_key_word(ag_k);
757 }
758 }
759 }
760 while ((PCB).token_number == (detag_token_type) ag_t);
761 (PCB).la_ptr = (PCB).pointer;
762 return 1;
763 }
764
765 static int ag_action_3_r_proc(void) {
766 int ag_sd = ag_fl[(PCB).ag_ap] - 1;
767 if (ag_sd) (PCB).sn = (PCB).ss[(PCB).ssx -= ag_sd];
768 (PCB).btsx = 0, (PCB).drt = -1;
769 (PCB).reduction_token = (detag_token_type) ag_ptt[(PCB).ag_ap];
770 ag_ra();
771 return (PCB).exit_flag == AG_RUNNING_CODE;
772 }
773
774 static int ag_action_3_s_proc(void) {
775 int ag_sd = ag_fl[(PCB).ag_ap] - 1;
776 if (ag_sd) (PCB).sn = (PCB).ss[(PCB).ssx -= ag_sd];
777 (PCB).btsx = 0, (PCB).drt = -1;
778 (PCB).reduction_token = (detag_token_type) ag_ptt[(PCB).ag_ap];
779 ag_ra();
780 return (PCB).exit_flag == AG_RUNNING_CODE;
781 }
782
783 static int ag_action_4_r_proc(void) {
784 int ag_sd = ag_fl[(PCB).ag_ap] - 1;
785 if (ag_sd) (PCB).sn = (PCB).ss[(PCB).ssx -= ag_sd];
786 (PCB).reduction_token = (detag_token_type) ag_ptt[(PCB).ag_ap];
787 return 1;
788 }
789
790 static int ag_action_2_proc(void) {
791 (PCB).btsx = 0, (PCB).drt = -1;
792 if ((PCB).ssx >= 128) {
793 (PCB).exit_flag = AG_STACK_ERROR_CODE;
794 PARSER_STACK_OVERFLOW;
795 }
796 (PCB).vs[(PCB).ssx] = *(PCB).pointer;
797 (PCB).ss[(PCB).ssx] = (PCB).sn;
798 (PCB).ssx++;
799 (PCB).sn = (PCB).ag_ap;
800 ag_track();
801 return 0;
802 }
803
804 static int ag_action_9_proc(void) {
805 if ((PCB).drt == -1) {
806 (PCB).drt=(PCB).token_number;
807 (PCB).dssx=(PCB).ssx;
808 (PCB).dsn=(PCB).sn;
809 }
810 ag_prot();
811 (PCB).vs[(PCB).ssx] = ag_null_value;
812 (PCB).ss[(PCB).ssx] = (PCB).sn;
813 (PCB).ssx++;
814 (PCB).sn = (PCB).ag_ap;
815 (PCB).la_ptr = (PCB).pointer;
816 return (PCB).exit_flag == AG_RUNNING_CODE;
817 }
818
819 static int ag_action_2_r_proc(void) {
820 (PCB).ssx++;
821 (PCB).sn = (PCB).ag_ap;
822 return 0;
823 }
824
825 static int ag_action_7_proc(void) {
826 --(PCB).ssx;
827 (PCB).la_ptr = (PCB).pointer;
828 (PCB).exit_flag = AG_SUCCESS_CODE;
829 return 0;
830 }
831
832 static int ag_action_1_proc(void) {
833 ag_track();
834 (PCB).exit_flag = AG_SUCCESS_CODE;
835 return 0;
836 }
837
838 static int ag_action_1_r_proc(void) {
839 (PCB).exit_flag = AG_SUCCESS_CODE;
840 return 0;
841 }
842
843 static int ag_action_1_s_proc(void) {
844 (PCB).exit_flag = AG_SUCCESS_CODE;
845 return 0;
846 }
847
848 static int ag_action_4_proc(void) {
849 int ag_sd = ag_fl[(PCB).ag_ap] - 1;
850 (PCB).reduction_token = (detag_token_type) ag_ptt[(PCB).ag_ap];
851 (PCB).btsx = 0, (PCB).drt = -1;
852 (PCB).vs[(PCB).ssx] = *(PCB).pointer;
853 if (ag_sd) (PCB).sn = (PCB).ss[(PCB).ssx -= ag_sd];
854 else (PCB).ss[(PCB).ssx] = (PCB).sn;
855 ag_track();
856 while ((PCB).exit_flag == AG_RUNNING_CODE) {
857 unsigned ag_t1 = ag_sbe[(PCB).sn] + 1;
858 unsigned ag_t2 = ag_sbt[(PCB).sn+1] - 1;
859 do {
860 unsigned ag_tx = (ag_t1 + ag_t2)/2;
861 if (ag_tstt[ag_tx] < (unsigned char)(PCB).reduction_token) ag_t1 = ag_tx + 1;
862 else ag_t2 = ag_tx;
863 } while (ag_t1 < ag_t2);
864 (PCB).ag_ap = ag_pstt[ag_t1];
865 if ((ag_s_procs_scan[ag_astt[ag_t1]])() == 0) break;
866 }
867 return 0;
868 }
869
870 static int ag_action_3_proc(void) {
871 int ag_sd = ag_fl[(PCB).ag_ap] - 1;
872 (PCB).btsx = 0, (PCB).drt = -1;
873 (PCB).vs[(PCB).ssx] = *(PCB).pointer;
874 if (ag_sd) (PCB).sn = (PCB).ss[(PCB).ssx -= ag_sd];
875 else (PCB).ss[(PCB).ssx] = (PCB).sn;
876 ag_track();
877 (PCB).reduction_token = (detag_token_type) ag_ptt[(PCB).ag_ap];
878 ag_ra();
879 while ((PCB).exit_flag == AG_RUNNING_CODE) {
880 unsigned ag_t1 = ag_sbe[(PCB).sn] + 1;
881 unsigned ag_t2 = ag_sbt[(PCB).sn+1] - 1;
882 do {
883 unsigned ag_tx = (ag_t1 + ag_t2)/2;
884 if (ag_tstt[ag_tx] < (unsigned char)(PCB).reduction_token) ag_t1 = ag_tx + 1;
885 else ag_t2 = ag_tx;
886 } while (ag_t1 < ag_t2);
887 (PCB).ag_ap = ag_pstt[ag_t1];
888 if ((ag_s_procs_scan[ag_astt[ag_t1]])() == 0) break;
889 }
890 return 0;
891 }
892
893 static int ag_action_8_proc(void) {
894 ag_undo();
895 (PCB).la_ptr = (PCB).pointer;
896 (PCB).exit_flag = AG_SYNTAX_ERROR_CODE;
897 ag_diagnose();
898 SYNTAX_ERROR;
899 {(PCB).la_ptr = (PCB).pointer + 1; ag_track();}
900 return (PCB).exit_flag == AG_RUNNING_CODE;
901 }
902
903 static int ag_action_5_proc(void) {
904 int ag_sd = ag_fl[(PCB).ag_ap];
905 (PCB).btsx = 0, (PCB).drt = -1;
906 if (ag_sd) (PCB).sn = (PCB).ss[(PCB).ssx -= ag_sd];
907 else {
908 (PCB).ss[(PCB).ssx] = (PCB).sn;
909 }
910 (PCB).la_ptr = (PCB).pointer;
911 (PCB).reduction_token = (detag_token_type) ag_ptt[(PCB).ag_ap];
912 ag_ra();
913 while ((PCB).exit_flag == AG_RUNNING_CODE) {
914 unsigned ag_t1 = ag_sbe[(PCB).sn] + 1;
915 unsigned ag_t2 = ag_sbt[(PCB).sn+1] - 1;
916 do {
917 unsigned ag_tx = (ag_t1 + ag_t2)/2;
918 if (ag_tstt[ag_tx] < (unsigned char)(PCB).reduction_token) ag_t1 = ag_tx + 1;
919 else ag_t2 = ag_tx;
920 } while (ag_t1 < ag_t2);
921 (PCB).ag_ap = ag_pstt[ag_t1];
922 if ((ag_r_procs_scan[ag_astt[ag_t1]])() == 0) break;
923 }
924 return (PCB).exit_flag == AG_RUNNING_CODE;
925 }
926
927 static int ag_action_6_proc(void) {
928 int ag_sd = ag_fl[(PCB).ag_ap];
929 (PCB).reduction_token = (detag_token_type) ag_ptt[(PCB).ag_ap];
930 if ((PCB).drt == -1) {
931 (PCB).drt=(PCB).token_number;
932 (PCB).dssx=(PCB).ssx;
933 (PCB).dsn=(PCB).sn;
934 }
935 if (ag_sd) {
936 (PCB).sn = (PCB).ss[(PCB).ssx -= ag_sd];
937 }
938 else {
939 ag_prot();
940 (PCB).vs[(PCB).ssx] = ag_null_value;
941 (PCB).ss[(PCB).ssx] = (PCB).sn;
942 }
943 (PCB).la_ptr = (PCB).pointer;
944 while ((PCB).exit_flag == AG_RUNNING_CODE) {
945 unsigned ag_t1 = ag_sbe[(PCB).sn] + 1;
946 unsigned ag_t2 = ag_sbt[(PCB).sn+1] - 1;
947 do {
948 unsigned ag_tx = (ag_t1 + ag_t2)/2;
949 if (ag_tstt[ag_tx] < (unsigned char)(PCB).reduction_token) ag_t1 = ag_tx + 1;
950 else ag_t2 = ag_tx;
951 } while (ag_t1 < ag_t2);
952 (PCB).ag_ap = ag_pstt[ag_t1];
953 if ((ag_r_procs_scan[ag_astt[ag_t1]])() == 0) break;
954 }
955 return (PCB).exit_flag == AG_RUNNING_CODE;
956 }
957
958
959 void init_detag(void) {
960 (PCB).la_ptr = (PCB).pointer;
961 (PCB).ss[0] = (PCB).sn = (PCB).ssx = 0;
962 (PCB).exit_flag = AG_RUNNING_CODE;
963 (PCB).line = FIRST_LINE;
964 (PCB).column = FIRST_COLUMN;
965 (PCB).btsx = 0, (PCB).drt = -1;
966 }
967
968 void detag(void) {
969 init_detag();
970 (PCB).exit_flag = AG_RUNNING_CODE;
971 while ((PCB).exit_flag == AG_RUNNING_CODE) {
972 unsigned ag_t1 = ag_sbt[(PCB).sn];
973 if (ag_tstt[ag_t1]) {
974 unsigned ag_t2 = ag_sbe[(PCB).sn] - 1;
975 (PCB).token_number = (detag_token_type) AG_TCV(INPUT_CODE(*(PCB).la_ptr));
976 (PCB).la_ptr++;
977 if (ag_key_index[(PCB).sn]) {
978 unsigned ag_k = ag_key_index[(PCB).sn];
979 int ag_ch = CONVERT_CASE(INPUT_CODE(*(PCB).pointer));
980 if (ag_ch <= 255) {
981 while (ag_key_ch[ag_k] < ag_ch) ag_k++;
982 if (ag_key_ch[ag_k] == ag_ch) ag_get_key_word(ag_k);
983 }
984 }
985 do {
986 unsigned ag_tx = (ag_t1 + ag_t2)/2;
987 if (ag_tstt[ag_tx] > (unsigned char)(PCB).token_number)
988 ag_t1 = ag_tx + 1;
989 else ag_t2 = ag_tx;
990 } while (ag_t1 < ag_t2);
991 if (ag_tstt[ag_t1] != (unsigned char)(PCB).token_number)
992 ag_t1 = ag_sbe[(PCB).sn];
993 }
994 (PCB).ag_ap = ag_pstt[ag_t1];
995 (ag_gt_procs_scan[ag_astt[ag_t1]])();
996 }
997 }
998
999