/** * Chapter Indexer * @author Robert John Morton * @version 02 July 2009 */ /* This program does NOT generate the java search engine index. Look in the folder 'spider' for this. This program creates a file index??.htm containing a list of hyperlinked titles + description text for every chapter and article in my website-based book The Lost Inheritance. The ?? in index??.htm stands for chapter numbers in the range 01 to 13. Optional 3rd argument E causes entries without a description to be excluded. This program excludes files with a tag from articles_index.htm and instead writes them to the file called articles_noindex.htm. Sample command lines: create the index01.htm file for all articles in Chapter 1: java chapter_indexer home/rob/Private/website_css/book 01 E create all index??.htm files for all chapters (01 to 13): java chapter_indexer home/rob/Private/website_css/book 00 */ import java.io.*; class chapter_indexer { static int dir_level = 0, // current directory level used in scan() dl, // length of parent directory path name + terminating '/' arts = 0; // chapter+articles files counter static Writer fslist, // for file 'index??.htm' for chapter indexes frlist, // for file 'noindex??.htm' for rejects indexes chcont; // for file 'noindex??.htm' for rejects indexes static String A[] = new String[4000], // String array for the articles' filespecs T[] = new String[4000], // String array for the articles' titles ST[] = new String[4000], // String array for the articles' short titles Q[] = new String[4000], // String array for the articles' descriptions Descr = "", // for HTML file description shortTitle = "", // short title of HTML file sd = "", // chapter number CH = "", CHAPPY = ""; static boolean ind = true, // include entries with no descriptions GotDescription = false, GotTheNoIndexTag = false, GotShortTitle = false, B[] = new boolean[4000]; // array of noindex flags /* DETERMINE WHETHER OR NOT THE PRESENTED TAG IS A DESCRIPTION META TAG Called from only one place in HTMLtitle(). */ static void isDescriptionTag(String Tag) { if(GotDescription) return; /* If the tag text text begins with the word "meta" and it contains the word "description" then find the position of the first letter of the word "content". */ if((Tag.indexOf("meta") == 0) && (Tag.indexOf("description") != -1)) { int x = Tag.indexOf("content"); /* Provided the word "content" was found and located, find the posi- tions of the opening quote mark and of the closing quote mark. */ if(x != -1) { x = Tag.indexOf('\"', x + 7) + 1; int y = Tag.indexOf('\"', x); /* Provided some actual text exists between the quote marks, put it in the Descr string and set the "got description" flag to TRUE. */ if(y > x) { Descr = Tag.substring(x, y); GotDescription = true; } } } } /* TEST TO SEE IF THE CAPTURED TAG CONTENT IS A "noindex" TAG. Called from only one place in HTMLtitle(). */ static void isNoIndexTag(String Tag) { if(GotTheNoIndexTag) return; // if we already know that it is, then exit /* If it begins with the word "meta" and it contains the word "robots" and the word "noindex" then it is a "noindex" tag, so set flag = true and exit. */ if((Tag.indexOf("meta") == 0) && (Tag.indexOf("robots") != -1) && (Tag.indexOf("noindex") != -1)) GotTheNoIndexTag = true; return; } /* Test to see if the captured tag content is a short title tag of the form . Called from only one place in HTMLtitle(). */ static void isShortTitleTag(String Tag) { if(GotShortTitle) return; if(!((Tag.startsWith("!--")) && (Tag.endsWith("--")))) return; if(Tag.equals("")) shortTitle = "NULL"; else { int l = Tag.length(); if(l < 25) shortTitle = Tag.substring(3,l - 2); GotShortTitle = true; } } /* EXAMINE THE CONTENTS OF THE HTML FILE Called from one place in scan(). */ static String HTMLtitle(String fp) { FileReader fr; // file reader for the file to be examined boolean // These flags are true when getting inTag = false, // characters that are part of a tag name inTitle = false; // characters that are part of the file's title int x; // for receiving java UNICODE character input from file stream char c; // for each character retrieved from the file input stream String Tag = "", // raw tag input string Title = ""; // title content string Descr = ""; // clear the description string GotDescription = false; GotTheNoIndexTag = false; GotShortTitle = false; try { fr = new FileReader(fp); // create a file reader for this file while((x = fr.read()) != -1) { // loop broken by End-Of-File c = (char)x; // get the next character from the file stream if(c == '<') // If initial tag-delimiter encountered, inTag = true; // we are inside a tag, so exit. /* If a tag termination delimiter is encountered, rationalise tag text to lower case for comparison. */ else if(c == '>') { String tag = Tag.toLowerCase(); /* If it is an initial title tag , indicate that we are now receiving title characters. Else, if it is a terminating title tag indicate that the tag content is now comp- lete and that we are no longer inside the tag. */ if(tag.equals("title")) inTitle = true; else if(tag.equals("/title")) inTitle = false; /* Else we've hit the tag without encountering a des- cription or noindex meta tag. So if the Tag string is empty, fill it with the red "no description" message, otherwise, go check what kind of tag content we've captured.*/ else if(tag.equals("/head")) { if(Descr.equals("")) Descr = "No Description"; break; } else { isDescriptionTag(Tag); isNoIndexTag(Tag); isShortTitleTag(Tag); } Tag = ""; // clear for the next tag to be encountered inTag = false; // we are no longer inside a tag } else if(inTag) // Else, if inside a tag, Tag += c; // add current character to tag name; else if(inTitle) // else if inside the title, Title += c; // add current character to title text. } fr.close(); // close the file reader } catch(Exception e) { // catch 'end-of-file' exception Title = ""; // clear the title tag for the next pass } // Cut out possible leading and trailing '\n' if(!Title.equals("")) { if(Title.indexOf('\n') == 0) Title = Title.substring(1, Title.length()); if(Title.indexOf('\n') == Title.length() - 1) Title = Title.substring(0, Title.length() - 1); } return Title; } /* HTML TAIL FOR THE GENERIC CHAPTER INDEX FILE Called from 2 places in indexChapter(). */ static String tail() { return "\n
\n© June 2018 Robert John Morton\n\n\n"; } /* HTML tail for the generic chapter index file Called from only one place in indexChapter(). */ static String Tail() { return "

\n" + "Back to " + CHAPPY + "\n\n\n\n"; } /* Construct a side-bar contents entry for the chapter's footnote art- icles contents list. s = articles' filespec, t = short title. The first 'if' is to exclude the Short Name of the chapter itself from the foot- notes list. Called from only one place in indexChapter(). */ static String Mhld(String s, String t) { if(s.indexOf("chap" + sd) != -1) return ""; if(s.indexOf("contents" + sd) != -1) return ""; if(s.indexOf("index" + sd) != -1) return ""; if(t.equals("")) t = "NULL"; if(t.length() > 19) t = t.substring(0,19); return "" + t + "
\n"; // quoted href } /* CONSTRUCT AN INDEX FILE ENTRY, comprising: 1) a hyperlink title between definition tags, 2) HTML code to justify the description text, 3) the file description between definition detail tags. Called from 2 places in indexChapter(). */ static String mhld(String L, String D) { String s = ""; if((s.indexOf("chap" + sd) == -1) // if not a chapter file && (s.indexOf("index" + sd) == -1) // and not an index file && (s.indexOf("contents" + sd) == -1) // and not a contents file ) return "<dt>" + L + "</dt>\n" // 1) + "<dd><div>" // 2) + D // 3) + "</div></dd>\n<br>\n"; // 4) else return ""; } /* Make filespec into a hyperlink. If file has no title, use the link text. Return the hyperlink tags with the enclosed text. Called from only one place in indexChapter(). */ static String mhl(String s, String T) { if(T.equals("")) T = s; return "<a href=\"" + s + "\">" + T + "</a>"; } /* THE FOLLOWING METHOD EMBODIES C A R HOARE'S QUICK SORT ALGORITHM. Note that it is a highly re-entrant method: it calls itself indefinitely. Called only by itself and from one place in indexChapter(). */ static void qs(int LO, int HI) throws IOException { int lo = LO; // set moving lo to LO end of partition int hi = HI; // set moving hi to HI end of partition if(HI > LO) { // if the partition contains anything /* Get the content of the mid element of the partition then loop through the array until the indices cross. */ String mid = T[(LO + HI) >> 1]; while(lo <= hi) { /* While the current lowest keyword < midway keyword, push the lower sort boundary up by one element. While the current highest keyword > midway keyword, pull the upper sort boundary down by one element. */ while(lo < HI && T[lo].compareTo(mid) < 0) lo++; while(hi > LO && T[hi].compareTo(mid) > 0) hi--; if(lo <= hi) { // IF LOW INDEX <= HIGH INDEX SWAP THEIR 'CONTENTS' /* Sort by HTML file <title> and Shift along with it the hyper- links, the HTML file descriptions and the "noindex" flags. For each, we need to get the index (offset-extent-filenum) of lo element, put the index of hi element in lo element and put the index of lo element in hi element. */ String x = T[lo]; T[lo] = T[hi]; T[hi] = x; x = ST[lo]; ST[lo] = ST[hi]; ST[hi] = x; x = A[lo]; A[lo] = A[hi]; A[hi] = x; x = Q[lo]; Q[lo] = Q[hi]; Q[hi] = x; boolean b = B[lo]; B[lo] = B[hi]; B[hi] = b; lo++; //push lower sort boundary up by one element hi--; //pull upper sort boundary down by one element } } if(LO < hi) // If hi not yet reached start of file qs(LO,hi); // sort lower partition. if(lo < HI) // If lo not yet reached end of file qs(lo,HI); // sort upper partition. } } /* REMOVE "The" OR "A" FROM THE START OF A FILE TITLE FOR INDEXING. Called from only one place in chapterIndexer(). */ static void killThe(){ for(int i = 0; i < arts; i++) { String s = T[i]; if(s.indexOf("The ") == 0) T[i] = s.substring(4); else if(s.indexOf("A ") == 0) T[i] = s.substring(2); } } /* This method is re-entrant. It calls itself. When invoked, it lists the files and directories contained within the directory 'd' passed to it as its parameter. It then examines each entry in that directory. If an entry is an HTML file, which is [not a _frame or _title or _contents or index?? file] a _frame file, it writes that file's relative filespec to the A array. The 'relative' filespec is the path+filename from the point of view of the parent directory. If an entry is a directory, it ith the current directory. Thus it can handle any depth of sub-directories from the parent. Called from only one place in chapterIndexer(). */ private static void scan(String d) throws IOException { File fd = new File(d); //create file object for given directory name String D[] = fd.list(); //list all items in this directory /* For each HTML file in the sub-directory, get the relative path name of next sub-directory or file, get full path name of [next] sub-directory and create a file object for it. */ for(int i = 0; i < D.length; i++) { String dd = D[i]; String fp = d + "/" + dd; File fs = new File(fp); /* If it is an existing non-development directory then re-enter this method. */ if(fs.isDirectory() && !fp.endsWith("_dev")) { dir_level++; scan(fp); dir_level--; } /* Else, if it is an existing file and it is a "_frame" file and provided we are not in the top-level directory or a "webring access" frame file or a chapter's "index" "_frame" file... */ else if(fs.isFile() && fp.endsWith(".html") && (!dd.equals("index.html"))) { /* then put the HTML file's title in T array, the HTML description in Q array and the article's relative filespec in A array. */ T[arts] = HTMLtitle(fp); ST[arts] = shortTitle; if(GotTheNoIndexTag) B[arts] = true; else B[arts] = false; Q[arts] = Descr; A[arts++] = fp.substring(dl,fp.length()); } } } /* HTML preamble for the generic chapter contents file. Called from one place only in indexChapter(). */ static String HTMLhead2(String ch, String NI) { CH = ch; String chappy = "Chapter " + ch; CHAPPY = chappy; if(ch.equals("13")) chappy = "Epilogue"; return "<html lang=\"en\">\n<head>\n\n" + "<meta http-equiv=\"Content-Type\"" + " content=\"text/html; charset=UTF-8\">\n" + "<meta name=\"viewport\" " + "content=\"width=924, height=840, initial-scale=1\">\n" + "<link href=\"../../rob.css\" rel=\"stylesheet\" type=\"text/css\">" + "\n\n</head>\n<body>\n\n" // insert the front cover image of The Lost Inheritance + "<p>\n<img src=\"../tli.png\" width=\"150\" height=\"217\"\n" + "alt=\"Front cover of The Lost Inheritance by Robert John Morton.\">" // make the Chapter number into a link to the main chapter file + "\n<p>\n<b>Ch" + ch + ": Footnotes</b>\n<br>\n"; } /* HTML PREAMBLE FOR THE GENERIC CHAPTER INDEX FILE. Called from 2 places within indexChapter(). */ static String HTMLhead1(String ch, String NI) { return "<html lang=\"en\">\n<head>\n" + "<meta HTTP-EQUIV=\"Content-Type\"" + " CONTENT=\"text/html; charset=UTF-8\">\n" + "<title>The Lost Inheritance, Chapter " + ch + ": Contents\n" + "\n\n" + "\n\n" + "

\n"; } // DEALS WITH A SPECIFIED CHAPTER. Called from 2 places in main(). static boolean indexChapter(String bd) throws IOException { bd += sd; // add chapter number to file path dl = bd.length() + 1; // length of parent directory path name + 1 File pd = new File(bd); // form file object for parent directory if(!pd.isDirectory()) // exit if command line argument return false; // isn't a directory String S = bd, // ../../book/chap01 U = sd + ".htm"; // path name of this chapter's index file /* Open the "index" file for stream output and write the HTML preamble to the index??.htm file. */ fslist = new FileWriter(S + "/index" + U); fslist.write(HTMLhead1(sd,"")); /* Open the "noindex" file for stream output and write the HTML preamble to noindex??.htm file. */ frlist = new FileWriter(S + "/noindex" + U); frlist.write(HTMLhead1(sd,"no")); /* Open the "contents" file for stream output and write its HTML preamble to the contents??.htm file. */ chcont = new FileWriter(S + "/contents" + U + "l"); chcont.write(HTMLhead2(sd,"")); scan(bd); // create the file list arrays killThe(); // remove The or A from beginnings of titles qs(0,arts - 1); // sort everything by HTML file title /* For each HTML file in the list, possibly exclude entries with no description and create hyperlinked style title. */ for(int i = 0; i < arts; i++) { String s = Q[i]; if(ind || (!s.equals("No Description"))) { String M = A[i], N = T[i], L = mhl(M,N); /* If file contains a "noindex" tag, write file title link and description to noindex file; otherwise write file title link and description to index file and the contents file. */ if(B[i]) frlist.write(mhld(L,s)); else { String w = mhld(L,s); if(!w.equals("")) fslist.write(w); w = Mhld(M,ST[i]); if(!w.equals("")) chcont.write(w); } } } fslist.write(tail()); // write termination for index HTML file fslist.close(); // close the newly-written chapter-index file chcont.write(Tail()); // write termination for contents HTML file chcont.close(); // close the newly-written chapter-contents file frlist.write(tail()); // write termination for noindex HTML file frlist.close(); // close the newly-written chapter-noindex file return true; // success } public static void main(String args[]) throws IOException { String bd = "../../book/chap"; // default path if(args.length > 0) { // If command line argument(s) have been bd = args[0]; // entered, get the name of the base if(args.length > 1) // directory from command line, sd = args[1]; // get chapter number and set flag to if(args.length > 2) // exclude entries with no description. ind = false; } /* If a chapter number has not been specified, then for each of the 13 chapters in the book, first zero the number of articles in the chapter, convert chapter number 'k' to string, add a leading zero for chapters below 10 and go index the chapter. */ if(sd.equals("")) for(int k = 0; k < 13; k++) { arts = 0; sd = "" + (k + 1); // string version of chapter number if(k < 9) sd = "0" + sd; if(!indexChapter(bd)) System.out.println( "Encountered invalid chapter directory: " + bd ); } /* Else, we are indexing just one specified chapter. So get the integer value of chapter number string entered on command line. If the entered chapter number is within the prescribed range [the book has only 13 chapters], go index the chapter. */ else { int i = Integer.parseInt(sd); if(i < 1 || i > 13) System.out.println( "Second arguement must be a chapter number" + " of the form and in the range 00 to 12." ); else if(!indexChapter(bd)) System.out.println(bd + " must be a valid directory."); } } }