Sunday, 9 November 2008

Somewhat Pointfree Haskell Huffman Tree

{-# LANGUAGE NoMonomorphismRestriction #-}

import Prelude hiding (Either(..))
import Data.List

import Data.Maybe
import Data.Ord
import Control.Monad.Reader

lookdown :: (Eq b) => [(a, b)] -> b -> Maybe a

lookdown pairs i = lookup i (map swap pairs)

onFst f (x,y) = (f x, y)

swap (x,y) = (y,x)
f & g = \x -> (f x, g x)

infix 4 &

-------

data Huff a = Leaf a | Branch (Huff a) (Huff a) deriving Show

-- -- Example tree:
-- let t =
-- (Branch (Leaf T)
-- (Branch (Leaf F)
-- (Leaf T)))


foldHuff leaf branch = φ
where φ (Leaf o) = leaf o
φ (Branch left right) = branch (φ left) (φ right)

-- foldHuff (!) (*) t =
-- (* (! T)
-- (* (! F)
-- (! T)))

instance Functor Huff where fmap f = foldHuff (Leaf . f) Branch

depthFirstTraversal = foldHuff return (++)

-- foldHuff (!) (*) t =
-- (return T) ++

-- (return F) ++
-- (return T)
-- = [T,F,T]

member e = foldHuff (== e) (||)


data Direction = Left | Right deriving Show

type Path = [Direction]

branch join left right = \φleft φright -> join (left φleft) (right φright)

label = foldHuff (Leaf . (,) []) -- you are here
(branch Branch (fmap (onFst (Left:))) -- first on your left
(fmap (onFst (Right:))))

type Freq a = (Integer, Huff a)

frequencies = map (length & Leaf . head) . sortBy (comparing length) . group . sort

joinTrees ((p1,t1) : (p2,t2) : freqs) = joinTrees (insertBy (comparing fst) (p1 + p2,Branch t1 t2) freqs)
joinTrees [(_,tree)] = tree

huffmanTree = joinTrees . frequencies

codeTable = depthFirstTraversal . label . huffmanTree

encode codeTable = catMaybes . map (lookdown codeTable)

huffmanCode = concat . join (encode . codeTable)

traverse (Leaf o) path = Just (o, path)
traverse (Branch l r) (Left:path) = traverse l path
traverse (Branch l r) (Right:path) = traverse r path
traverse _ [] = Nothing

huffmanDecode = unfoldr . traverse


-- forall text, length text > 1 ->
correct text = huffmanDecode (huffmanTree text) (huffmanCode text) == text

-- checking how many bits saved on an 8 bit encoded string
savings text = length text * 8 - length (huffmanCode text)


-- > huffmanTree "quick brown fox jumped over the lazy dog"
-- Branch (Branch (Branch (Branch (Branch (Leaf 'i') (Leaf 'j')) (Branch (Leaf 'g') (Leaf 'h'))) (Branch (Branch (Leaf 'm') (Leaf 'n')) (Branch (Leaf 'k') (Leaf 'l')))) (Branch (Branch (Leaf 'd') (Leaf 'r')) (Branch (Branch (Leaf 'c') (Leaf 'f')) (Branch (Leaf 'a') (Leaf 'b'))))) (Branch (Branch (Leaf 'o') (Branch (Leaf 'u') (Leaf 'e'))) (Branch (Leaf ' ') (Branch (Branch (Branch (Leaf 't') (Leaf 'v')) (Branch (Leaf 'p') (Leaf 'q'))) (Branch (Branch (Leaf 'y') (Leaf 'z')) (Branch (Leaf 'w') (Leaf 'x'))))))

-- > correct "quick brown fox jumped over the lazy dog"
-- True

-- > savings "quick brown fox jumped over the lazy dog"
-- 143

3 comments:

Josef said...

Nice. Just a small nit pick. It doesn't really make sense to talk about doing a depth first search in a Huffman tree because all data is stored in the leaves. Hence there is no data that is stored deeper or more shallow than the other. The function you call "depthFirstTraversal" would be more aptly named "leftRightTraversal".

Anonymous said...

Interesting code! Apologies if you already know all this:

The onFst and (&) functions are already available in the Control.Arrow module, where they're called first and (***) respectively. They're for general arrows, but the instances on (->) are pretty handy:

import Control.Arrow
first (+1) (1, 5) ===> (2,5)
((+1) *** (+2)) (8,4) ===> (9,6)

IMHO, it's always good to avoid having to invent your own names & documentation for utility functions like this.

Anonymous said...

Sorry, that should have been (&&&) instead of (***).

((+1) &&& (+2)) 3 ===> (4,5)