(*
    CasperScannerValidationWithLinenumbers.mll
    Michael Makris, mm3443
    PLT Fall 2018

    Purpose: to validate CasperScanner.mll. Removed Parser dependency and added printf statements to identify current token read.

    Use:
        ocamllex CasperScannerValidationWithLinenumbers.mll
        ocaml CasperScannerValidationWithLinenumbers.ml < CasperScannerValidationInput.txt
*)

{
open Printf
}

(* definitions *)
let whitespace = [' ' '\t']+
let newline = '\n' | '\r' | "\r\n"

let digit = ['0'-'9']
let integer = digit+ (* '-'?digit+ *)
let float = (digit+ '.' digit+ ['e' 'E'] ['+' '-']? digit+)
            | (digit+ '.' ['e' 'E'] ['+' '-']? digit+)
            | ('.' digit+ ['e' 'E'] ['+' '-']? digit+)
            | (digit+ ['e' 'E'] ['+' '-']? digit+)
            | (digit+ '.' digit+) | (digit+ '.')
            | ('.' digit+)

let letter = ['a'-'z' 'A'-'Z' '_']
let id = letter(letter|digit)*

let badbraces = (['{'](whitespace | newline | '{')*['{']) | (['}'](whitespace | newline | '}')*['}']) | (['}'](whitespace | newline)*['{'])
let badbrackets = (['['](whitespace | newline | '[')*['[']) | ([']'](whitespace | newline | ']')*[']'])
let badend = [';'](whitespace | newline)*['{'] | ['}'](whitespace | newline)*[';']
let badcommas = [','](whitespace | newline)*[',']
let badsemis =  [';'](whitespace | newline)*[';']
let badcomments = "*/" | "**"
let badcombs = badbraces | badbrackets | badend | badcommas | badsemis | badcomments

(* rules *)
rule token linecount = parse
  (* whitespace *)
    whitespace { token linecount lexbuf } | newline { token (linecount+1) lexbuf }

  (* comments *)
  | "//" { printf "Line comment starts at line %d\n" linecount; linecomment linecount lexbuf  }
  | "/*" {  printf "Comments level 0 start at line %d\n" linecount; blockcomment 0 linecount linecount lexbuf }
  
  (* operators *)  
  | '(' | ')'   | '[' | ']'   | '{' | '}' | ';' | ',' as op  { printf "delimiter at line %d: %c\n" linecount op; token linecount lexbuf }
  | '_' | '?' | '+' | '-' | '*' | '/' | '%' | '^' | '>' | ">=" | '<' | "<=" | "==" | "!=" | "++" | "--" | '=' | "_=" | "+=" | "-=" | "&&" | "||" | "!" as op { printf "operator at line %d: %s\n" linecount op; token linecount lexbuf }
  
  (* keywords *)  
  | "true" | "false" | "null" | "if" | "else" | "for" | "while" | "break" | "continue" | "return" | "int" | "float" | "str" | "bool" | "void" as word {printf "keyword at line %d: %s\n" linecount word; token linecount lexbuf}

  (* literals *)  
  | integer as inum { printf "integer at line %d: %s (%d)\n" linecount inum (int_of_string inum); token linecount lexbuf }
  | float  as fnum  { printf "float at line %d: %s (%f)\n" linecount fnum (float_of_string fnum); token linecount lexbuf }
  | '''             { stringSQ (Buffer.create 1000) linecount linecount lexbuf }
  | '"'             { stringDQ (Buffer.create 1000) linecount linecount lexbuf }

  (* identifiers *)  
  | id as lexeme    { printf "identifier at line %d: %s\n" linecount lexeme; token linecount lexbuf }
  
  (* end of file *)  
  | eof             { printf "EOF at line %d\n" linecount }

  (* errors *)
  | badcombs as lexeme { raise (Failure("Bad syntax: " ^ lexeme ^ " at line " ^ (string_of_int linecount))) }
  | _ as character     { raise (Failure("Bad character: " ^ Char.escaped character ^ " at line " ^ (string_of_int linecount))) }

  
and stringSQ tempbuffer linecount origline = parse
    '''                 { printf "String at line %d:\n%s\n\n" origline (Buffer.contents tempbuffer); token linecount lexbuf }
  | newline             { Buffer.add_string tempbuffer (Lexing.lexeme lexbuf); stringSQ tempbuffer (linecount+1) origline lexbuf }
  | [^ '''  '\n' '\r']+ { Buffer.add_string tempbuffer (Lexing.lexeme lexbuf); stringSQ tempbuffer linecount origline lexbuf }
  | eof                 { raise (Failure("Unterminated single quotes at line " ^ (string_of_int origline))) }  (* ^ ": " ^ Buffer.contents tempbuffer *)
(*  | _ as character      { Buffer.add_string tempbuffer (Char.escaped character); stringSQ tempbuffer linecount origline lexbuf } *)

and stringDQ tempbuffer linecount origline = parse
    '"'                 { printf "String at line %d:\n%s\n\n" origline (Buffer.contents tempbuffer); token linecount lexbuf }
  | newline             { Buffer.add_string tempbuffer (Lexing.lexeme lexbuf); stringDQ tempbuffer (linecount+1) origline lexbuf }
  | [^ '"'  '\n' '\r']+ { Buffer.add_string tempbuffer (Lexing.lexeme lexbuf); stringDQ tempbuffer linecount origline lexbuf }
  | eof                 { raise (Failure("Unterminated double quotes at line " ^ (string_of_int origline))) }  (* ^ ": " ^ Buffer.contents tempbuffer *)
(*  | _ as character      { Buffer.add_string tempbuffer (Char.escaped character); stringDQ tempbuffer linecount origline lexbuf } *)
  
  
and linecomment linecount = parse
    newline { printf "End of line comment at line %d\n" linecount; token (linecount+1) lexbuf }
  | eof     { printf "EOF at line %d\n" linecount }
  | _       { linecomment linecount lexbuf }

and blockcomment level linecount origline = parse
    "*/"    { if level = 0 then (printf "Comments level %d end at line %d\n" level linecount; token linecount lexbuf)
              else (printf "Comments level %d end at line %d\n" (level) linecount; blockcomment (level-1) linecount origline lexbuf) }
  | "/*"    { printf "Comments level %d start at line %d\n" (level+1) linecount; blockcomment (level+1) linecount origline lexbuf }
  | newline { blockcomment level (linecount+1) origline lexbuf }
  | eof     { raise (Failure("Unterminated comments at line " ^ (string_of_int origline))) }
  | _       { blockcomment level linecount origline lexbuf }

(* trailer *)
{
let main () =
    let cin =
        if Array.length Sys.argv > 1
        then open_in Sys.argv.(1)
        else stdin
    in
        let lexbuf = Lexing.from_channel cin in
            token 1 lexbuf
let _ = Printexc.print main ()
}