/** * Web Site Link Checker. * @author Robert John Morton YE572246C * @version 03 July 2009 HTML revised version 2018-09-10 11:36:36 */ /* This program checks all the links in all the HTML files in the website. It does not crawl the links themselves. Instead, it verifies that the filepath arguement of each "href=" points to a valid file. This can be any file: not just an HTML file. It includes links to txt, java, c, pdf and other files that can be displayed by some means within or from a web browser. This program creates the following output files: bad_imgs.txt bad_links.txt external_imgs.txt external_links.txt good_imgs.txt good_links.txt malformed_imgs.txt malformed_links.txt hash_imgs.txt hash_links.txt Each contains the full filespec of the HTML file currently being examined with an indented list of relevant links for that file. IMPORTANT: this program requires strict syntax for hyperlink and image links. That is: the equals sign between key and value must be close coupled with no intervening spaces, e.g.

0 8 p Called from only one place in checkLnk(). */ static String extractHyperLink(String Tag) { String E = "MALFORMED: " + Tag; // form the error message if(Tag.startsWith("!--")) // return null if a comment tag return ""; if(Tag.indexOf("a href") == -1) // return null if no return ""; // a "hyperlink" tag if(Tag.indexOf("=\"") != 6) // If the string =" is not found beginning return E; // with character number 6, exit with a // "MALFORMED" eror message. /* An =" has been found at position 6; so now, starting with char- acter number 8, find the position p of the terminating quote. */ int p = Tag.indexOf("\"", 8); if(p < 8) // If the sequence is not found, return E; // the link is malformed. if(Tag.indexOf('#',8) != -1) // If the path contains a # character, hashtag = true; // set the hashtag flag. return Tag.substring(8,p); // return the enclosed filespec } /* TRUNCATION AND JOINING OF THE HTML FILE'S FILESPEC AND CURRENT LINK Example 1: Chopping the "../" off the link: ../../airnav/airnav6.htm#PreCap lp y Example 2: Chopping the same number of levels off the HTML file's filespec /home/rob/website/projects/navigation/wayptapp/geom/wpencpg.htm fp j j j j Called from one place each in checkImg and checkLnk(). */ static boolean validLink(String fp, String lp) { int x, y = 0, z = 1, j = fp.length(); /* Skip past all "../"s to first specified direcrory or file name. 'y' ends up as start position 's' of the rest of the link filespec. */ while((x = lp.indexOf("../", y)) != -1) { y = x + 3; z++; } /* In Example 2, we would go round the following FOR-loop 3 times. The WHILE loop searches backwards for the next '/' character. At the end of the FOR loop, 'j' is the position of '/' at which 'fp' must be truncated. */ for(int i = 0; i < z; i++) while(fp.charAt(--j) != '/'); /* Create a file object from the absolute filespec created by joining the truncated pf and the truncated link address. If pf contains a #, chop it off together with its internal anchor name. */ String s = fp.substring(0, j + 1) + lp.substring(y); int hash = 0; if((hash = s.indexOf('#')) != -1) s = s.substring(0,hash); File f = new File(s); if(f.isFile()) return true; // the link is a valid one return false; // this is a bad link } /* EXTRACT THE URL OR FILESPEC FROM A CAPTURED IMAGE TAG

0 9 p Called from only one place in checkImg(). */ static String extractImgURL(String Tag) { String E = "MALFORMED: " + Tag; // form the error message if(Tag.startsWith("!--")) // return null if a comment tag return ""; if(Tag.indexOf("img src") == -1) // return null if not an "image return ""; // source" tag if(Tag.indexOf("=\"") != 7) // If the string =" is not found begin- return E; // ning with character number 7, exit // with a "MALFORMED" eror message. /* An =" has been found at position 7. We must now find the posi- tion p of the terminating quote, starting with character number 9. If the sequence is not found, the link is malformed. */ int p = Tag.indexOf("\"",9); if(p < 9) return E; Tag = Tag.substring(9,p); // extract the text of the URL or filespec boolean goodURL = false; // assume initially that it is a bad link for(int i = 0; i < FE.length; i++) // Check if the URL ends with one of if(Tag.endsWith(FE[i])) { // the 3 valid file extensions for goodURL = true; // images: png jpg gif. If so, set break; // that it is a good URL } if(goodURL) // if it is a good URL return Tag; // return its text else // otherwise, if it is bad, return E; // return the error message "MALFORMED" } /* CHECK TO SEE IF THE CAPTURED TAG IS A HYPERLINK TO AN IMAGE FILE