/** 
  * HTML Tag Checker
  * @author Robert J Morton <robmorton@clara.net>
  * @version 10 July 2018
  * @copyright 28 Nov 2019 Robert J Morton (all rights reserved) */

/* This program checks the integrity of the viewport tag in each HTML file. */

import java.io.*;

class viewport_check {
  private static Writer
    ne;  // to capture files not containing a vieport tag

  private static FileReader
    fr;  // for each HTML file scanned

  private static boolean
    inTag = false,
    BADTAG = false;

  private static String
    tag = "",  // for accumulating the characters of a captured tag
    bd = "",   // first command line arguement [parent file path]
    sd = "",   // 2nd command line arguement [directory to be word-counted]
    fp,        // full path to HTML file being examined for tag-pair errors

    tagtext = "meta name=\"viewport\" " 
            + "content=\"width=924, height=840, initial-scale=1\"";

  private static int 
    dl = 0,        // length of parent directory path name + terminating '/'
    tagsense = 0;  // +1 = start tag; -1 = end tag




  /* Create a file reader for the next HTML file to be scanned, then read in
  the bytes from it, accumulating any text between '<' and '>' characters
  and testing what is thus captured to see if it is one of the tag-types
  that must have a corresponding end-tag. An integer stack techique is em-
  ployed to achieve this. Each tag-type has a 'number of occurrences ele-
  ment within the integer array TagNest[], which is incremented every time
  a tag of that type is encountered and decremented every time its end-tag
  version is encountered. Thus, at the end of the file, if each tag has
  its corresponding end-tag, the value in its TagNest[] element will be
  zero. If it is not, then there is a missing end-tag somewhere for this
  tag-type within the file currently being scanned. 

  Called from only one place in scan(). */

  private static void tagCapture() {
    int c = 0;      // for input character
    BADTAG = true;  // assume a tag is bad until it is proved not to be so
    try {
      fr = new FileReader(fp);  // create a new file reader

      /* Read and examine each each character in turn
      from it until the end-of-file is encountered. */

      while((c = fr.read()) != -1) {

        if(c == '<') {   // if the new character is the start-bracket
          inTag = true;  // '<' of an HTML tag, set the 'in tag' flag
          tag = "";      // and clear the tag string.
        }
        else         // Else, if we are currently inside a
        if(inTag) {  // tag [between a '<' and a '>'] ... 

          /* If the new character is a tag terminator baracker '>', we have
          reached the end of the tag name, so clear the 'in tag' flag and
          proceed to analyse what we have captured. */

          if(c == '>') { 
            inTag = false;

            /* If the tag contains the word "viewport", it is ostensibly
            a viewport tag. So then check to see if the entire viewport
            tag is correct. If so, kill the BADTAG flag for this file. */

            if((tag.indexOf("viewport") != -1) 
            && (tag.equals(tagtext))) {
              BADTAG = false;
              break;
            }
          }
          else               // Else add the new character to
            tag += (char)c;  // the tag accumulator string.

        }  // end of if(inTag)
      }    //end of while() loop

      fr.close();             // close the file reader
    } catch(Exception e) { }  // catches end-of-file exception
  }




  /* When invoked, it examines the files and directories contained within
  the directory 'd' passed to it as its parameter. If an entry is an HTML
  file, it calls the tagCapture method below. The 'relative' filespec is
  the path + filename from the point of view of the current directory. If
  an entry is a directory, it simply calls itself to deal with that (sub)
  directory as it is doing with the current directory. Thus it can handle
  any depth of sub-directories from the parent.

  Called by itself and from only one place in main(). */

  private static void scan(String d) throws IOException {
    char ch = ' ';
    File fd = new File(d);   // create file object for given directory name
    String D[] = fd.list();  // list all items in this directory

    /* For each HTML file in the sub-directory, get name of
    [next] sub-directory and create a file object for it. */

    for(int i = 0; i < D.length; i++) {
      fp = d + "/" + D[i];  // full path to HTML file being examined
      File fs = new File(fp);

      /* Provided it is an existing directory and it isn't
      a development directory, then re-enter this method. */

      if(fs.isDirectory() 
      && !fp.endsWith("webtools")
      && !fp.endsWith("images")
      && !fp.endsWith("applets")
      && !fp.endsWith("java_progs")
      && !fp.endsWith("C-programs")) 
        scan(fp);

      /* ELSE it should be a file. So, if the file exists and
      it is an HTML file, call the tagCapture() method below.*/

      else
      if(fs.isFile()
      && fp.endsWith(".html")
      && !fp.endsWith("index.html") ) {
        tagCapture();
        if(BADTAG)
          ne.write(fp + '\n'); 
      }
    }   // end of the for() loop
  }




  public static void main(String args[]) throws Exception {

    /* Provided at least the two mandatory command line arguements have
    been entered, set the first arguement as the name of the given document
    base directory and the second arguement as the sub-directory to be
    word-counted. */

    bd = "/home/rob/Private";
    sd = "website";

    /* Form the full path and note its length + 1.
    Then create a file object from the full path. */

    String d = bd + "/" + sd;
    dl = bd.length() + 1; 
    File pd = new File(d);

    /* If command line argument is an existing directory, open
    the output file writer in order to be able to use write() */

    if(pd.isDirectory()) {
      ne = new FileWriter("bad_viewport.txt");
      scan(d);  // scan for HTML files in the specified directory tree
      ne.close();
    } else
      System.out.println(d + " is not a directory.");
  }
}