(*
    Ocamllex scanner for Casper
    File: scanner.mll
    Michael Makris, mm3443
    PLT Fall 2018
*)

(* header *)
{ 
   open Parser 
   
   let getChr c = 
       let c = (Scanf.unescaped c) in c .[1]
}

(* definitions *)
let whitespace = [' ' '\t']+
let newline = '\n' | '\r' | "\r\n"

let digit = ['0'-'9']
let integer = digit+ (* '-'?digit+ *)
let float = (digit+ '.' digit+ ['e' 'E'] ['+' '-']? digit+)
            | (digit+ '.' ['e' 'E'] ['+' '-']? digit+)
            | ('.' digit+ ['e' 'E'] ['+' '-']? digit+)
            | (digit+ ['e' 'E'] ['+' '-']? digit+)
            | (digit+ '.' digit+) | (digit+ '.')
            | ('.' digit+)

let letter = ['a'-'z' 'A'-'Z' '_']
let ascii = (('`')(( [' ' - '~' ] ) as mychar)('`'))
let id = letter(letter|digit)*

let badbraces = (['{'](whitespace | newline | '{')*['{']) | (['}'](whitespace | newline)*['{'])
let badbrackets = (['['](whitespace | newline | '[')*['[']) | ([']'](whitespace | newline | ']')*[']']) | ([']'](whitespace | newline)*['['])
let badend = [';'](whitespace | newline)*['{'] | ['}'](whitespace | newline)*[';']
let badcommas = [','](whitespace | newline)*[',']
let badsemis =  [';'](whitespace | newline)*[';']
let badcomments = "*/" | "**"
let badcombs = badbraces | badbrackets | badend | badcommas | badsemis | badcomments

(* rules *)
rule token = parse
  (* whitespace *)
    whitespace { token lexbuf } | newline { token lexbuf }

  (* comments *)
  | "//" { linecomment lexbuf } | "/*" { blockcomment 0 lexbuf }

  (* blocks, delimiters, terminators *)
  | '(' { LPAREN }    | ')' { RPAREN }
  | '[' { LBRACKET }  | ']' { RBRACKET }
  | '{' { LBRACE }    | '}' { RBRACE }
  | ';' { SEMICOLON } | ',' { COMMA }

  (* string operators *)
  | '_' { CONCAT }    | '?' { CHARAT }

  (* inc/decrement operators *)
  | "++" { INC }  | "--" { DEC }

  (* cast operators *)
  | "~" { ITOF }
  
  (* arithmetic operators *)
  | '+' { PLUS }      | '-' { MINUS }
  | '*' { TIMES }     | '/' { DIVIDE }
  | '%' { MODULUS }   | '^' { EXPONENT }

  (* relational operators *)
  | '>'  { GT }   | ">=" { GTE }
  | '<'  { LT }   | "<=" { LTE }
  | "==" { EQ }   | "!=" { NEQ }

  (* assignment operators *)
  | '='  { ASSIGN } | "_=" { CON_ASSIGN } | "+=" { ADD_ASSIGN } | "-=" { SUB_ASSIGN }

  (* logical operators *)
  | "&&" { AND } | "||" { OR } | "!" { NOT }

  (* conditional keywords *)
  | "if" { IF } | "else" { ELSE }

  (* loop keywords *)
  | "for" { FOR } | "while" { WHILE } | "do" { DO } | "until" { UNTIL }
  | "break" { BREAK } | "continue" { CONTINUE }

  (* function keywords *)
  | "return" { RETURN }

  (* type keywords *)
  | "int" { INT }     | "float" { FLOAT } | "str" { STRING } 
  | "chr" { CHAR }  | "bool"  { BOOL }  | "void" { VOID }

  (* literals *)
  | integer as lexeme { INTLITERAL(int_of_string lexeme) }
  | float as lexeme   { FLTLITERAL(lexeme) }
  | '''               { STRLITERAL (stringSQ (Buffer.create 100) lexbuf) }
  | '"'               { STRLITERAL (stringDQ (Buffer.create 100) lexbuf) }
  | ascii             { CHRLITERAL(mychar) }  
  | "true"            { BOOLLITERAL (true) }
  | "false"           { BOOLLITERAL (false) }
  | "null"            { NULL }

  (* indentifier *)
  |  id as lexeme     { IDENTIFIER(lexeme) }

  (* end of file *)
  | eof               { EOF }

  (* errors *)
  | badcombs as lexeme { raise (Failure("Bad syntax: " ^ lexeme)) }
  | _ as character     { raise (Failure("Bad character: " ^ Char.escaped character)) }

and stringSQ tempbuffer = parse
    '''                 { Buffer.contents tempbuffer }
  | newline             { Buffer.add_string tempbuffer (Lexing.lexeme lexbuf); stringSQ tempbuffer lexbuf }
  | [^ '''  '\n' '\r']+ { Buffer.add_string tempbuffer (Lexing.lexeme lexbuf); stringSQ tempbuffer lexbuf }
  | eof                 { raise (Failure("Non-terminated single quotes")) }

and stringDQ tempbuffer = parse
    '"'                 { Buffer.contents tempbuffer }
  | newline             { Buffer.add_string tempbuffer (Lexing.lexeme lexbuf); stringDQ tempbuffer lexbuf }
  | [^ '"'  '\n' '\r']+ { Buffer.add_string tempbuffer (Lexing.lexeme lexbuf); stringDQ tempbuffer lexbuf }
  | eof                 { raise (Failure("Non-terminated double quotes")) }

and linecomment = parse
    newline { token lexbuf }
  | eof     { EOF }
  | _       { linecomment lexbuf }

and blockcomment level = parse
    "*/"    { if level = 0 then token lexbuf else blockcomment (level-1) lexbuf }
  | "/*"    { blockcomment (level+1) lexbuf }
  | newline { blockcomment level lexbuf }
  | eof     { raise (Failure("Non-terminated comments")) }
  | _       { blockcomment level lexbuf }

(* trailer *)

