import System.Exit (die)
import System.Environment (getArgs, getProgName)
import Data.Char (isAlpha, isSpace, toLower)
import System.IO
import qualified Data.Map.Strict as Map
import Control.Parallel.Strategies (using, parList, parMap, rdeepseq)
import GHC.Conc (numCapabilities)
import Control.DeepSeq (deepseq)

-- removes non-alphabetic characters and converts to lower case
cleanAndSplit :: String -> [String]
cleanAndSplit s = words $ map toLower $ filter (\x -> isAlpha x || isSpace x) s

-- Map stage of MapReduce
doMap :: [String] -> Map.Map String Int
doMap = Map.fromListWith (+) . flip zip (repeat 1)

-- Reduce phase
doReduce :: (Ord k, Num a) => [Map.Map k a] -> Map.Map k a
doReduce = Map.unionsWith (+)

-- Chunk a list into non-overlapping lists of given size
chunk :: Int -> [a] -> [[a]]
chunk _ [] = []
chunk n xs = let (ys, zs) = splitAt n xs in ys : chunk n zs

getClosestWord :: Map.Map String Int -> String -> (String, Int)
getClosestWord wordFreq target = (closestWord, resultInt $ Map.lookup closestWord wordFreq)
    where 
    closestWord = getClosestWord' calcLevenshteinDist (Map.keys wordFreq) target


getClosestWord' :: (String -> String -> (Int, String)) -> [String] -> String -> String
getClosestWord' distFunction listWords target' = snd $ minimum $ parMap rdeepseq minimum chunks
  where
    calculatedDistances = parMap rdeepseq (distFunction target') listWords
    chunks = chunk (length calculatedDistances `div` numCapabilities) calculatedDistances


calcLevenshteinDist :: String -> String -> (Int, String)
calcLevenshteinDist w1 w2 = (last $ Prelude.foldl transform [0..length w1] w2, w2)
  where
    transform [] _ = []
    transform xs@(x:xs') c = scanl compute (x + 1) (zip3 w1 xs xs')
      where
        compute z (c', x', y) = minimum [y + 1, z + 1, x' + fromEnum (c /= c')]


resultInt :: Maybe Int -> Int
resultInt r = case r of
    Just x  -> x
    Nothing -> -1


runTests :: Map.Map String Int -> [String] -> IO ()
runTests wordFreq testWords = do
  putStrLn "Testing..."
  hFlush stdout

  let results = [getValueOrClosest wordFreq t | t <- testWords]
      printTuple (str, num) = putStrLn $ "(" ++ str ++ ", " ++ show num ++ ")"
  mapM_ printTuple results
  putStrLn "Done."


getValueOrClosest :: Map.Map String Int -> String -> (String, Int)
getValueOrClosest wordMap word = 
      let value = resultInt $ Map.lookup word wordMap in
      if value /= -1
        then (word, value)
        else getClosestWord wordMap word

main :: IO ()
main = do
  args <- getArgs
  case args of
    [filename, testfile] -> do
      content <- readFile filename
      tests <- readFile testfile
      putStrLn "Starting MapReduce word counting..."
      let wordsList = cleanAndSplit content `using` parList rdeepseq
          chunks = chunk (length wordsList `div` numCapabilities) wordsList
          mapped = map doMap chunks `using` parList rdeepseq
          wordFreq = doReduce mapped
      wordFreq `deepseq` return ()    -- force computation
      putStrLn "MapReduce completed..."
      --runTests wordFreq (cleanAndSplit tests)
    _ -> do 
      pn <- getProgName
      die $ "Usage: " ++ pn ++ " <filename>"
