(*
    CasperScannerValidation.mll
    Michael Makris, mm3443
    PLT Fall 2018

    Purpose: to validate CasperScanner.mll. Removed Parser dependency and added printf statements to identify current token read.

    Use:
        ocamllex CasperScannerValidation.mll
        ocaml CasperScannerValidation.ml < CasperScannerValidationInput.txt
*)

{
open Printf
}

(* definitions *)
let whitespace = [' ' '\t']+
let newline = '\n' | '\r' | "\r\n"

let digit = ['0'-'9']
let integer = digit+ (* '-'?digit+ *)
let float = (digit+ '.' digit+ ['e' 'E'] ['+' '-']? digit+)
            | (digit+ '.' ['e' 'E'] ['+' '-']? digit+)
            | ('.' digit+ ['e' 'E'] ['+' '-']? digit+)
            | (digit+ ['e' 'E'] ['+' '-']? digit+)
            | (digit+ '.' digit+) | (digit+ '.')
            | ('.' digit+)

let letter = ['a'-'z' 'A'-'Z' '_']
let id = letter(letter|digit)*

let badbraces = (['{'](whitespace | newline | '{')*['{']) | (['}'](whitespace | newline | '}')*['}']) | (['}'](whitespace | newline)*['{'])
let badbrackets = (['['](whitespace | newline | '[')*['[']) | ([']'](whitespace | newline | ']')*[']'])
let badend = [';'](whitespace | newline)*['{'] | ['}'](whitespace | newline)*[';']
let badcommas = [','](whitespace | newline)*[',']
let badsemis =  [';'](whitespace | newline)*[';']
let badcomments = "*/" | "**"
let badcombs = badbraces | badbrackets | badend | badcommas | badsemis | badcomments


(* rules *)
rule token = parse
  (* whitespace *)
    whitespace { token lexbuf } | newline { token lexbuf }

  (* comments *)
  | "//" { printf "Line comment Start\n"; linecomment lexbuf } 
  | "/*" { printf "Comments level 0 start\n"; blockcomment 0 lexbuf }

  (* operators *)
  | '(' | ')' | '[' | ']' | '{' | '}' | ';' | ',' 
  | '_' | '?' | '+' | '-' | '*' | '/' | '%' | '^' 
  | '>'  | ">=" | '<'  | "<=" | "==" | "!=" | "++" | "--" 
  | '=' | "_=" | "+=" | "-=" 
  | "&&" | "||" | "!" 
  as op { printf "operator    %s\n" op; token lexbuf }
  
  (* keywords *)
  | "if" | "else" | "for" | "while" | "break" | "continue" | "return"
  | "int" | "float" | "str" | "bool" | "void" 
  | "true" | "false" | "null"   
  as word {printf "keyword     %s\n" word; token lexbuf}

  (* literals *)
  | integer as lexeme { printf "integer     %s (%d)\n" lexeme (int_of_string lexeme);   token lexbuf }
  | float as lexeme   { printf "float       %s (%f)\n" lexeme (float_of_string lexeme); token lexbuf }
  | '''               { printf "string      %s\n" (stringSQ (Buffer.create 100) lexbuf); token lexbuf }
  | '"'               { printf "string      %s\n" (stringDQ (Buffer.create 100) lexbuf); token lexbuf }  

  (* indentifier *)
  |  id as lexeme     { printf "identifier  %s\n" lexeme; token lexbuf }

  (* end of file *)
  | eof               { printf "EOF\n" }

  (* errors *)
  | badcombs as lexeme { raise (Failure("Bad syntax: " ^ lexeme)) }
  | _ as character     { raise (Failure("Bad character: " ^ Char.escaped character)) }

and stringSQ tempbuffer = parse
    '''                 { Buffer.contents tempbuffer }
  | newline             { Buffer.add_string tempbuffer (Lexing.lexeme lexbuf); stringSQ tempbuffer lexbuf }
  | [^ '''  '\n' '\r']+ { Buffer.add_string tempbuffer (Lexing.lexeme lexbuf); stringSQ tempbuffer lexbuf }
  | eof                 { raise (Failure("Non-terminated single quotes")) }

and stringDQ tempbuffer = parse
    '"'                 { Buffer.contents tempbuffer }
  | newline             { Buffer.add_string tempbuffer (Lexing.lexeme lexbuf); stringDQ tempbuffer lexbuf }
  | [^ '"'  '\n' '\r']+ { Buffer.add_string tempbuffer (Lexing.lexeme lexbuf); stringDQ tempbuffer lexbuf }
  | eof                 { raise (Failure("Non-terminated double quotes")) }

and linecomment = parse
    newline { printf "Line comment End\n"; token lexbuf }
  | eof     { printf "EOF \n" }
  | _       { linecomment lexbuf }

and blockcomment level = parse
    "*/"    { if level = 0 then (printf "Comments level %d end\n" level; token lexbuf)
              else (printf "Comments level %d end \n" level; blockcomment (level-1) lexbuf) }   
  | "/*"    { printf "Comments level %d start\n" (level+1); blockcomment (level+1) lexbuf }
  | newline { blockcomment level lexbuf }
  | eof     { raise (Failure("Non-terminated comments")) }
  | _       { blockcomment level lexbuf }

(* trailer *)

{
let main () =
    let cin =
        if Array.length Sys.argv > 1
        then open_in Sys.argv.(1)
        else stdin
    in
        let lexbuf = Lexing.from_channel cin in
            token lexbuf
let _ = Printexc.print main ()
}