{-# LANGUAGE TupleSections #-}
module Lib
    (
      wordsToCounts,
      wordsToCountPar,
      topNWords,
      showPair
    ) where
import qualified Data.List as List
import Data.List.Split(chunksOf)
import qualified Data.Map as Map

import Control.Parallel.Strategies (using, parBuffer, rdeepseq, parList, NFData, rpar, parMap)


wordsToCounts :: [String] -> [(String, Int)]
wordsToCounts [] = []
wordsToCounts word_list = Map.toList $ Map.fromListWith (+) $ map (, 1) word_list

wordMapperPar :: [String] -> [(String, Int)]
wordMapperPar wl = map (, 1) wl `using` parBuffer 10 rdeepseq

wordReducer :: (Ord k, Num a) => [(k, a)] -> [(k, a)]
wordReducer wl =  Map.toList $ Map.fromListWith (+) wl

wordReducerPar :: (Ord k, Num a, NFData k, NFData a) => [(k, a)] -> [(k, a)]
wordReducerPar wl = Map.toList $ Map.unionsWith (+) (parMap rpar (Map.fromListWith (+)) chunks)
                    where chunks = chunksOf 500000 wl

wordsToCountPar :: [String] -> [(String, Int)]
wordsToCountPar word_list = wordReducerPar $ wordMapperPar word_list

topNWords :: Int -> [(String, Int)] -> [(String, Int)]
topNWords n l = take n $ List.sortBy (\(_, a) (_, b) -> compare b a) l

showPair :: (String, Int) -> [Char]
showPair (x, y) = show y ++ "  " ++ x
