module Main where

import Lib
import System.Exit(die)
import System.IO
import System.Environment(getArgs, getProgName)
import qualified Data.Text.IO as TIO
import qualified Data.Text as T
import qualified Data.Map as M
import qualified Numeric.LinearAlgebra as LA
import qualified Numeric.LinearAlgebra.Data as NL
import qualified Numeric.LinearAlgebra.Devel as NLD
import qualified Numeric.LinearAlgebra.SVD.SVDLIBC as SVD
import qualified Control.DeepSeq as DS
import Foreign.C.Types(CInt)

toCInt :: Int -> CInt
toCInt x = (fromIntegral x) :: CInt


main :: IO ()
main = do
  args <- getArgs
  case args of
    [input, depth, vector_length, output] -> do
      outfile  <- openFile output WriteMode
      contents <- TIO.readFile input
      let corpus     = lineTokenize contents
          vocab      = vocabulary corpus
          cm         = parCooccurenceMatrix (read depth::Int) corpus vocab 5
          dim        = M.size vocab
          pmi        = ppmi (read depth::Int) cm dim
          pmis       = NLD.mkCSR $ M.toList pmi
          (u, s, vt) = SVD.sparseSvd (read vector_length::Int) pmis
          w          = DS.force (NL.tr' u) LA.<> (NL.diag s)
          embs       = map (\i -> T.unwords $ map (T.pack . show) $ NL.toList $ w NL.! i) [0..dim-1]
          word_embs  = zip (M.keys vocab) embs

      -- putStr $ show $ M.size pmi
      mapM_ (TIO.hPutStrLn outfile) $ map (\(word, emb) -> T.unwords [word, T.pack " ", emb]) word_embs
      hClose outfile
    _ -> do
      pn <- getProgName
      die $ "Usage: "++pn++" <input_filename> <depth> <vector_length> <output_filename>"
