(* Ocamllex scanner for CPlus*)

{ open Parser }

rule token = parse
  [' ' '\t' '\r' '\n'] { token lexbuf } (* Whitespace *)
| "/*"     { comment lexbuf }           (* Comments *)
| '('      { LPAREN }
| ')'      { RPAREN }
| '{'      { LBRACE }
| '}'      { RBRACE }
| '['      { LSQUARE }
| ']'      { RSQUARE }
| ';'      { SEMI }
| ','      { COMMA }
| '+'      { PLUS }
| '-'      { MINUS }
| '*'      { TIMES }
| '/'      { DIVIDE }
| '%'      { MOD }
| '='      { ASSIGN }
| '"'      { read_string (Buffer.create 17) lexbuf }
| '&'      { AMP }
| '.'      { DOT }
| "->"     { ARROW }
| "%="     { MOD_ASSIGN }
| "++"     { INC }
| "--"     { DEC }
| "=="     { EQ }
| "!="     { NEQ }
| '<'      { LT }
| "<="     { LEQ }
| ">"      { GT }
| ">="     { GEQ }
| "&&"     { AND }
| "||"     { OR }
| "!"      { NOT }
| "if"     { IF }
| "else"   { ELSE }
| "for"    { FOR }
| "while"  { WHILE }
| "return" { RETURN }
| "int"    { INT }
| "char"    { CHAR }
| "size_t" { SIZE_T }
| "string" { STRING }
| "char"   { CHAR }
| "bool"   { BOOL }
| "void"   { VOID }
| "true"   { TRUE }
| "false"  { FALSE }
| "sizeof" { SIZEOF }
| "struct" { STRUCT }
| "NULL"   { NULL }
(* BUILT IN FUNCTIONS *)
| "printf" as n { PRINTF(n) }
| "atoi" as n { ATOI(n) }
| "strdup" as n { STRDUP(n) }
| "printb" as n { PRINTB(n) }
| "print" as n  { PRINT(n) }
| "printbig" as n { PRINTBIG(n) }
| "malloc" as n { MALLOC(n) }
| "free" as n { FREE(n) }
(* LITERALS *)
| ['0'-'9']+ as lxm { LITERAL(int_of_string lxm) }
| ['a'-'z']['a'-'z' 'A'-'Z' '0'-'9' '_']* as lxm { ID(lxm) }
| ['A'-'Z']['a'-'z' 'A'-'Z' '0'-'9' '_']* as structLit { STRUCT_ID(structLit) }
| eof { EOF }
| ['\x00'-'\x7F'] as chr { CHARLIT(chr) }
| _ as char { raise (Failure("illegal character " ^ Char.escaped char)) }

and comment = parse
  "*/" { token lexbuf }
| _    { comment lexbuf }

(*
This "read_string" function was borrowed directly from this link
https://realworldocaml.org/v1/en/html/parsing-with-ocamllex-and-menhir.html
*)
and read_string buf =
  parse
  | '"'       { STRINGLIT (Buffer.contents buf) }
  | '\\' '/'  { Buffer.add_char buf '/'; read_string buf lexbuf }
  | '\\' '\\' { Buffer.add_char buf '\\'; read_string buf lexbuf }
  | '\\' 'b'  { Buffer.add_char buf '\b'; read_string buf lexbuf }
  | '\\' 'f'  { Buffer.add_char buf '\012'; read_string buf lexbuf }
  | '\\' 'n'  { Buffer.add_char buf '\n'; read_string buf lexbuf }
  | '\\' 'r'  { Buffer.add_char buf '\r'; read_string buf lexbuf }
  | '\\' 't'  { Buffer.add_char buf '\t'; read_string buf lexbuf }
  | [^ '"' '\\']+
    { Buffer.add_string buf (Lexing.lexeme lexbuf);
      read_string buf lexbuf
    }
  | _ { raise (Failure ("Illegal string character: " ^ Lexing.lexeme lexbuf)) }
  | eof { raise (Failure ("String is not terminated")) }
