/** * List the name of each HTML file that contains one or more HTML comments in its body text. List any contained comments under the file path+name. * @author Robert John Morton YE572246C * @version 07 August 2019 */ /* COMMANDS: cd /home/rob/Private/website/webtools/comments-extractor/ java commentsExtractor */ import java.io.*; // for file input/output handling class commentsExtractor { private static Writer td, // for 'To Do List' comments cn, // for comments that contain people's names si, // for 'supplementary information' comments pn, // for 'personal:' comments od, // for 'Old discarded text' comments dh, // for 'Document History' comments kr, // for all other comments gw; // general writer variable private static File fo; // for the current HTML file being examined private static int dir_level = 0, // current directory level used in scan() LenDir = 0, // length of parent directory path + a terminating '/' nf = 0; // total number of files processed private static String /* The names of all the top-level directories in the website that must be indexed. There are other top-level directories which are excluded from the indexing process for one reason or another. */ FOLDERS[] = { "book","chaos","computers","home","internet","landshare", "navigation","poems","radio","science","software" }, EX[] = { // Standard comments to be excluded from consideration. "Start of the left-hand navigation frame", "End of left nav frame. Start of top-right title frame", "End of top-right title frame. Start of bottom-right text frame", "End of the bottom-right scrolling text frame", "Start of text frame", "Start of right-hand text frame", "Set the height of the Title Frame (top right of browser window)." }, Blank = "\n\n" +"----------------------------------------------------------------" +"\n", BaseDir = "/home/rob/Private/website", // default base directory SubDir = "", // sub-directory FullPath = "", // absolute path to directory currently being indexed RelFileSpec = "", // relative filespec of current HTML file fp = ""; // file path of current HTML file being examine /* Determine what type of comment has been captured and write it to the appropriate report file. */ private static void commentType(String S) { boolean /* Array of 'this is a long comment' and 'file path+name already written' flags, one for each report file. */ B[] = {false,false,false,false,false,false,false}, F[] = {false,false,false,false,false,false,false}, I = true; // comment to be included in report int fn = 0; // number of the file to which the comment must be written for(int i = 0; i < EX.length; i++) if(S.indexOf(EX[i]) != -1) { I = false; break; } if(I == false) return; // exit if comment not to be included /* Write it to the appropriate file accord- ing to the type of comment encountered. */ if(S.indexOf("ToDoNote:") != -1) { gw = td; fn = 0; } else if(S.indexOf("SuppInfo:") != -1) { gw = si; fn = 1; } else if(S.indexOf("Personal:") != -1) { gw = pn; fn = 2; } else if(S.indexOf("contains names") != -1) { gw = cn; fn = 3; } else if(S.indexOf("Old discarded text:") != -1) { gw = od; fn = 4; } else if(S.indexOf("DocHist:") != -1) { gw = dh; fn = 5; } else { gw = kr; fn = 6; } /* If this is the first comment for this HTML file, write the path+name of the report file first. Then form the printable comment string. */ String C = fp.substring(BaseDir.length() + 1) + "\n"; try { if(fn != 3) C = Blank + C; if(!F[fn]) { gw.write(C); F[fn] = true; } C = S + "\n"; /* If this be a long comment, then if the previous one were short, skip an extra line before writing this long one. Then set the B flag to ind- icate to the next pass that this one was long. Then write this comment with an extra line after it. Else write this comment, which is short. */ if(fn != 3) { if((S.indexOf("\n") != -1) || (S.length() > 80)) { if(!B[fn]) { gw.write("\n"); B[fn] = true; } gw.write(C + "\n"); // long comment } else { gw.write(C); // short comment B[fn] = false; } } } catch(Exception e) { } } /* EXAMINE THE CONTENTS OF THE HTML FILE'S SECTION, EXTRACT THE BODY TEXT AND WRITE IT TO THE REPORT FILE. */ private static void examineThisHTMLfile() { boolean // THE FOLLOWING FLAGS ARE TRUE WHEN: inCom = false, // inputting the content of a comment tag inBdy = false; // inputting from the HTML file's body text int x; // for receiving java char input from file stream char a, b, c, d, f, g, h; // moving character train for /* < b o d y > // capturing the required tags < / b o d y > < ! - - - - > */ a = b = c = d = f = g = h = 0; // initialise character train String S = ""; // comment accumulator string try { FileReader fr = new FileReader(fp); // open this HTML file while((x = fr.read()) != -1) { // loop broken by End-Of-File // shift the characters along the train to make way for the new one. a = b; b = c; c = d; d = f; f = g; g = h; h = (char)x; /* If we are currently inputting a comment tag, add the new char- acter to the comment string. Then if we now have a --> comment terminator, analyse and store the comment. */ if(inCom) { S += h; if(h == '>' && g == '-' && f == '-') { commentType(S); // determine the type of comment captured /* Clear the content of the comment accumulator string and clear 'inCom' flag to show that we are no longer inside a comment. */ S = ""; inCom = false; } } /* Else we are not currently in the middle of a comment, but if we are nonetheless within the HTML file's section... */ else if(inBdy) { /* if we encounter a