module Apriori where

import qualified Data.List as List
import qualified Data.Set as Set
import Control.Monad ( guard )
import Control.Monad.Par ( runPar, parMap)
import Control.DeepSeq ( NFData(..) )
import Control.Parallel.Strategies (parList, using, rdeepseq, parBuffer)


newtype Itemset = Itemset (Set.Set String) deriving (Eq, Ord)
instance NFData Itemset where
  rnf (Itemset i) = rnf i

data AssocRule = AssocRule (Set.Set String) (Set.Set String) Double Double deriving (Eq, Ord)
instance Show AssocRule where
    show (AssocRule a b s c) =
        "\n" ++ show a ++ " => " ++ show b ++ " (" ++ show s ++ ", " ++ show c ++ ")"

instance NFData AssocRule where
    rnf (AssocRule a b s c) = rnf a `seq` rnf b `seq` rnf s `seq` rnf c



getSupport :: [Itemset] -> Itemset -> Double
getSupport transactions (Itemset i) =
    fromIntegral (supportCount i) / fromIntegral (length transactions)
    where supportCount i = length $
            filter (Set.isSubsetOf i) $ map (\(Itemset x) -> x) transactions

getConfidence :: [Itemset] -> Itemset -> Itemset -> Double
getConfidence transactions (Itemset a) (Itemset b) =
    getSupport transactions (Itemset $ a `Set.union` b) / getSupport transactions (Itemset a)

removeDup :: Ord a => [a] -> [a]
removeDup l = Set.toList $ Set.fromList l

getAssocRules :: Double -> [Itemset] -> [Itemset] -> [AssocRule]
getAssocRules minConfidence transactions sets = do
    Itemset is <- sets
    subset <- Set.toList $ Set.powerSet is
    let s = is `Set.difference` subset
    guard $ not (Set.null s) && (s /= subset)
    let conf = getConfidence transactions (Itemset subset) (Itemset s)
    guard $ conf > minConfidence
    let supp = getSupport transactions (Itemset subset)
        rule = AssocRule subset s supp conf
    return rule


---- sequential ----
getInitFreqItemsetS :: Double -> [Itemset] -> [Itemset]
getInitFreqItemsetS minSupport transactions =
    let initCandItemset = removeDup $
            concatMap (\(Itemset t) -> map (Itemset . Set.singleton) $ Set.toList t) transactions
    --- sequential
    in filter (\cand -> getSupport transactions cand > minSupport) initCandItemset

aprioriGenS :: [Itemset] -> [Itemset]
aprioriGenS iss =
    let
        -- join step
        selfJoin = [Itemset (a `Set.union` b)
            | (Itemset a) <- iss, (Itemset b) <- iss, validateCandidate a b]
        validateCandidate a b = Set.size (a `Set.difference` b) == 1
        -- prune step
        nonFrequentSubsets (Itemset i) = all (\s -> Itemset s `elem` iss) (properSubsets i)
        powerSetList s = Set.toList $ Set.powerSet s
        properSubsets s = filter (\x -> Set.size x == Set.size s - 1) (powerSetList s)
        --- sequential
        candItemset = filter nonFrequentSubsets selfJoin
    in removeDup candItemset

getFreqItemsetsS :: Double -> [Itemset] -> [Itemset] -> Maybe ([Itemset], [Itemset])
getFreqItemsetsS _ _ [] = Nothing
getFreqItemsetsS minSupport transactions currFreqItemset =
    let nextCandItemset = aprioriGenS currFreqItemset
        --- sequential
        nextFreqItemset = filter (\cand -> getSupport transactions cand > minSupport) nextCandItemset
    in Just (currFreqItemset, nextFreqItemset)


---- parMap ----
getInitFreqItemset :: Double -> [Itemset] -> [Itemset]
getInitFreqItemset minSupport transactions =
    let initCandItemset = removeDup $
            concatMap (\(Itemset t) -> map (Itemset . Set.singleton) $ Set.toList t) transactions
    --- parMap
        initFreqItemset = runPar $ 
            parMap (\cand -> (cand, getSupport transactions cand > minSupport)) initCandItemset
    in concat $ [[cand] | (cand, isFreq) <- initFreqItemset, isFreq]

aprioriGen :: [Itemset] -> [Itemset]
aprioriGen iss =
    let
        -- join step
        selfJoin = [Itemset (a `Set.union` b)
            | (Itemset a) <- iss, (Itemset b) <- iss, validateCandidate a b]
        validateCandidate a b = Set.size (a `Set.difference` b) == 1
        -- prune step
        nonFrequentSubsets (Itemset i) = all (\s -> Itemset s `elem` iss) (properSubsets i)
        powerSetList s = Set.toList $ Set.powerSet s
        properSubsets s = filter (\x -> Set.size x == Set.size s - 1) (powerSetList s)
        --- parMap
        candItemsetLst = runPar $ parMap (\cand -> (cand, nonFrequentSubsets cand)) selfJoin
        candItemset = concat $ [[cand] | (cand, isSubSet) <- candItemsetLst, isSubSet]
    in removeDup candItemset

getFreqItemsets :: Double -> [Itemset] -> [Itemset] -> Maybe ([Itemset], [Itemset])
getFreqItemsets _ _ [] = Nothing
getFreqItemsets minSupport transactions currFreqItemset =
    let nextCandItemset = aprioriGen currFreqItemset
        --- parMap
        nextFreqItemsetLst = runPar $ 
            parMap (\cand -> (cand, getSupport transactions cand > minSupport)) nextCandItemset
        nextFreqItemset = concat $ [[cand] | (cand, isFreq) <- nextFreqItemsetLst, isFreq]
    in Just (currFreqItemset, nextFreqItemset)


--- MapReduce ----
apriori :: Double -> Double -> [Itemset] -> [Itemset] -> [AssocRule]
apriori support confidence transactions transAll = 
    let initFreqItemset = getInitFreqItemsetC support transactions transAll
        freqItemsets = concat $ List.unfoldr (getFreqItemsets support transAll) initFreqItemset
    in getAssocRules confidence transAll freqItemsets

getInitFreqItemsetC :: Double -> [Itemset] -> [Itemset] -> [Itemset]
getInitFreqItemsetC minSupport transactions transAll =
    let initCandItemset = removeDup $
            concatMap (\(Itemset t) -> map (Itemset . Set.singleton) $ Set.toList t) transactions
    --- parMap
        initFreqItemset = runPar $ 
            parMap (\cand -> (cand, getSupport transAll cand > minSupport)) initCandItemset
    in concat $ [[cand] | (cand, isFreq) <- initFreqItemset, isFreq]


aprioriByChunkSize :: Int -> [Itemset] -> Double -> Double -> [AssocRule]
aprioriByChunkSize n transactions support confidence =
    removeDup (concatMap (\c -> apriori support confidence c transactions) chunks 
                `using` parBuffer 100 rdeepseq)
        where
            chunk _ [] = []
            chunk n xs = let (as,bs) = splitAt n xs in as : chunk n bs
            chunks = chunk n transactions

aprioriChunk :: [Itemset] -> Double -> Double -> [AssocRule]
aprioriChunk transactions support confidence =
    removeDup (concatMap (\c -> apriori support confidence [c] transactions) transactions 
                `using` parBuffer 100 rdeepseq)

