Mercurial > ~dholland > hg > ag > index.cgi
diff tests/agcl/parsifal/detag1.syn @ 0:13d2b8934445
Import AnaGram (near-)release tree into Mercurial.
author | David A. Holland |
---|---|
date | Sat, 22 Dec 2007 17:52:45 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/agcl/parsifal/detag1.syn Sat Dec 22 17:52:45 2007 -0500 @@ -0,0 +1,180 @@ +{ +/* + detag.syn + + Program to strip HTML tags from HTML files. + Copyright (c) 1996 - 1999 Parsifal Software, All + Rights Reserved. + See the file COPYING for license and usage terms. + + For information about AnaGram, visit http://www.parsifalsoft.com. +*/ + +#include <stdio.h> + +} + + +// -- CONFIGURATION SECTION ---------------------------- +[ + default token type = int + pointer input + nest comments + test file mask = "*.htm*" +] +//------------------------------------------------------ + +eof = 0 // In this case, eof is the null char ending a string +tag innard char = ~(eof + '<' + '>') +comment char = ~(eof + '>') +ordinary text char = ~(eof + '<') +header type = '1-6' // for H1, H2,... H6 + +(void) input string $ // specify grammar token + -> html, eof + +html + -> [text | tag...]/... // virtual production w. alternating seq. + +tag + -> '<', tag innards, '>' + + + /* Here we pick up some of the HTML start tags so + we can insert a blank line in the output file for + readability. Note that because keywords take + precedence in AnaGram, the keyword is chosen + by the parser in preference to other input + beginning with the same characters. Turned out + that we didn't really want the blank line in + some cases, so the reduction procedure was + simply removed for those start tags - so, + nothing happens when they get picked up. */ +tag innards + -> {"HR" | "hr"}, other stuff = putc('\n', output); + -> {"P" | "p"}, other stuff = putc('\n', output); + -> {"BR" | "br"}, other stuff + -> {"H" | "h"}, header type, other stuff = putc('\n', output); + -> {"HEAD" | "head" | "HTML" | "html"}, other stuff // avoid getting Hn + -> {"UL" | "ul"}, other stuff + -> {"OL" | "ol"}, other stuff + -> {"DL" | "dl"}, other stuff + -> {"LI" | "li"}, other stuff + -> {"TABLE" | "table"}, other stuff = putc('\n', output); + -> {"TR" | "tr"}, other stuff + -> {"TD" | "td"}, other stuff + -> {"PRE" | "pre"}, other stuff + -> "!--", comment stuff + -> other stuff + +other stuff + -> tag innard char?... + +comment stuff // treatment of comments is a bit simple-minded here + -> comment char?... + +text // text is what we want to copy to output file + ->text char + ->text, text char + + +text char + -> ordinary text char:c = putc(c, output); + -> entity text char + +entity text char // we only bother with 3 entities here + -> "<" = putc('<', output); + -> ">" = putc('>', output); + -> "&" = putc('&', output); + + +{ // ----- Embedded C --------------------------- + +FILE *output; + +int main(int argc, char *argv[]) { + + FILE *input; + size_t fileLength; + size_t stringLength; + int errorFlag = 0; + char *inString; + + + /* Check for enough arguments */ + if (argc != 3) { + printf("Program to strip HTML tags from a file\n" + "Usage: %s <input filename> <output filename>\n", argv[0]); + return 1; + } + + /* Open input file for reading only */ + input = fopen(argv[1],"r"); + if (input == NULL) { + printf("Cannot open %s\n", argv[1]); + return 2; + } + + /* find out how big the file is */ + if (fseek(input, SEEK_SET, SEEK_END)) { + printf("Strange problems with %s\n", argv[1]); + return 3; + } + fileLength = ftell(input); + if (fileLength < 0 ) { // -1L is error return + printf("Error getting file length (%d) of %s\n", fileLength, argv[1]); + return 4; + } + + /* fseek to beginning of file */ + if (fseek(input, 0, SEEK_SET)) { + printf("Strange problems with %s\n", argv[1]); + return 5; + } + + /* Allocate storage for input string */ + inString = (char*)malloc(fileLength + 1); + if (inString == NULL) { + printf("Insufficient memory\n"); + return 6; + } + + /* Read file */ + stringLength = fread(inString, 1, fileLength, input); + if (stringLength == 0) { + printf("Unable to read %s\n", argv[1]); + return 7; + } + inString[stringLength] = 0; + + + /* Open output file for writing only */ + output = fopen(argv[2],"w"); + if (output == NULL) { + printf("Cannot open %s\n", argv[2]); + free(inString); + fclose(input); + return 8; + } + + + /* Invoke parser */ + PCB.pointer = (unsigned char *)inString; // using pointer input + detag(); + if (PCB.exit_flag != 1) { + printf( "Unsuccessful termination of parse, PCB.exit_flag = %d\n", + PCB.exit_flag); + } + + + /* Done */ + free(inString); + fclose(input); + fclose(output); + printf( " End detag "); + return 0; +} + +} // --------- End of Embedded C -------------------- + +