(* Ocamllex scanner for Scolkam *)

{ 
    open Parser 

    let unescape s =
        Scanf.sscanf ("\"" ^ s ^ "\"") "%S%!" (fun x -> x)
}

let alpha = ['a'-'z' 'A'-'Z']
let escape = '\\' ['\\' ''' '"' 'n' 'r' 't']
let escape_char = ''' (escape) '''
let ascii = ([' '-'!' '#'-'[' ']'-'~'])
let digit = ['0'-'9']
let id = alpha (alpha | digit | '_')*
let string = '"' ( (ascii | escape)* as s) '"'
let char = ''' ( ascii | digit ) '''
let float = (digit+) ['.'] digit+
let int = digit+
let whitespace = [' ' '\t' '\r']
let return = '\n'

rule token = parse
  whitespace { token lexbuf } 
| '#'        { single_comment lexbuf }
| "\'\'\'"   { multi_comment1 lexbuf } 
| "\"\"\""   { multi_comment2 lexbuf } 
| '('        { LPAREN }
| ')'        { RPAREN }
| '['		 { LBRACKET }
| ']'		 { RBRACKET }
| ';'        { SEMI }
| ':'        { COLON }
| '\n'       { EOL }
| ','        { COMMA }

(* Operators *)
| '+'      { PLUS }
| '-'      { MINUS }
| '*'      { TIMES }
| '/'      { DIVIDE }
| '%'      { MODULUS }
| "+="     { PLUSEQ }
| "-="     { MINUSEQ }
| "*="     { TIMESEQ }
| "/="     { DIVIDEEQ }
| "%="     { MODULUSEQ }
| '='      { ASSIGN }
| "=="     { EQ }
| "!="     { NEQ }
| '<'      { LT }
| "<="     { LEQ }
| ">"      { GT }
| ">="     { GEQ }
| "and"    { AND }
| "or"     { OR }
| "not"    { NOT }
| "in"     { IN }

(* Control flow *)
| "if"       { IF }
| "elif"     { ELIF }
| "else"     { ELSE }
| "for"      { FOR }
| "while"    { WHILE }
| "break"    { BREAK }
| "continue" { CONTINUE }
| "return"   { RETURN }
| "end"      { END }

(* Data types and atom*)
| "int"      { INT }
| "float"    { FLOAT }
| "str"      { STRING }
| "bool"     { BOOL }
| "True"     { TRUE }
| "False"    { FALSE }
| "None"     { NONE }
| "tuple"	 { TUPLE }

(* Functions and Classes and object management *)
| "def"    { FUNCTION }
| "class"  { CLASS }
| "new"    { NEW }

| int as lxm        { INT_LITERAL(int_of_string lxm) }
| float as lxm      { FLOAT_LITERAL(float_of_string lxm) }
| string            { STRING_LITERAL( (unescape s) ) }
| id as lxm         { ID(lxm) }
| eof               { EOF }
| _ as char { raise (Failure("SyntaxError: Invalid syntax -> " ^ Char.escaped char)) }

and multi_comment1 = parse
  "\'\'\'" { token lexbuf }
| _        { multi_comment1 lexbuf }

and multi_comment2 = parse
  "\"\"\"" { token lexbuf }
| _        { multi_comment2 lexbuf }

and single_comment = parse
  '\n' { token lexbuf }
| _    { single_comment lexbuf } 