Skip to content

Commit

Permalink
Cuckoo Filter with LiquidHaskell bindings
Browse files Browse the repository at this point in the history
  • Loading branch information
kubum committed Aug 26, 2019
0 parents commit 33b8d4f
Show file tree
Hide file tree
Showing 11 changed files with 535 additions and 0 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
.stack-work/
liquid-cuckoo-filter.cabal
.liquid
*~
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# liquid-cuckoo-filter
2 changes: 2 additions & 0 deletions Setup.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
import Distribution.Simple
main = defaultMain
40 changes: 40 additions & 0 deletions package.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
name: liquid-cuckoo-filter
version: 0.1.0.0
github: "kubum/liquid-cuckoo-filter"
license: BSD3
author: "Andrey Fadeyev"
maintainer: "fadeyew@gmail.com"
copyright: "2019 Andrey Fadeyev"

extra-source-files:
- README.md
- ChangeLog.md

# Metadata used when publishing your package
# synopsis: Short description of your package
# category: Web

# To avoid duplicated efforts in documentation and dealing with the
# complications of embedding Haddock markup inside cabal files, it is
# common to point users to the README.md file.
description: Please see the README on GitHub at <https://github.com/kubum/liquid-cuckoo-filter#readme>

dependencies:
- base >= 4.7 && < 5
- vector >= 0.12.0.2
- containers
- cereal

library:
source-dirs: src

tests:
liquid-cuckoo-filter-test:
main: Spec.hs
source-dirs: test
ghc-options:
- -threaded
- -rtsopts
- -with-rtsopts=-N
dependencies:
- liquid-cuckoo-filter
77 changes: 77 additions & 0 deletions src/Data/LiquidCuckooFilter/Entities.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DeriveGeneric #-}

module Data.LiquidCuckooFilter.Entities (
Fingerprint(..),
Bucket(..),
emptyFP,
emptyBucket,
getSize,
getSlot,
setSlot,
isEmpty,
contains,
remove,
) where

import qualified Data.ByteString as BS
import Data.ByteString.Unsafe(unsafeIndex, unsafeDrop, unsafeTake)

import Data.Hashable (Hashable, hash)
import Data.Serialize (Serialize)
import Data.Word (Word8)
import GHC.Generics (Generic)

-- | A Fingerprint is an 8 bit hash of a value
newtype Fingerprint = FP Word8
deriving (Show, Eq, Ord, Generic)
deriving newtype (Hashable)
deriving anyclass Serialize

emptyFP :: Fingerprint
emptyFP = FP 0

{-@ data Bucket = B { content :: BS.ByteString } @-}
data Bucket = B { content :: BS.ByteString } deriving (Show, Eq)

{-@ getSize :: b: Bucket -> { v: Int | bslen (content b) = v } @-}
getSize :: Bucket -> Int
getSize (B bucket) = BS.length bucket

isEmpty :: Bucket -> Bool
isEmpty (B bucket) = BS.null bucket

emptyBucket :: Int -> Bucket
emptyBucket slots = B (BS.replicate slots 0)

{-@ getSlot :: b: Bucket
-> { index: Int | index >= 0 && bslen (content b) > index }
-> Fingerprint @-}
getSlot :: Bucket -> Int -> Fingerprint
getSlot (B bucket) id = FP (unsafeIndex bucket id)

{-@ setSlot :: b: Bucket
-> { index: Int | index >= 0 && bslen (content b) > index }
-> Fingerprint
-> Bucket @-}
setSlot :: Bucket -> Int -> Fingerprint -> Bucket
setSlot (B bucket) id (FP fp) = B (BS.concat [before, addedBytes, after])
where
(before, rest) = unsafeSplitAt id bucket
after = unsafeDrop 1 rest
addedBytes = BS.pack [fp]

{-@ unsafeSplitAt
:: v: { v: Int | v >=0 }
-> b: { bs : BS.ByteString | bslen bs >= v }
-> ( { bs : BS.ByteString | bslen bs == v } , { bs : BS.ByteString | bslen bs == bslen b - v } ) @-}
unsafeSplitAt :: Int -> BS.ByteString -> (BS.ByteString, BS.ByteString)
unsafeSplitAt n bs = (unsafeTake n bs, unsafeDrop n bs)

contains :: Bucket -> Fingerprint -> Bool
contains (B bucket) (FP fp) = BS.elem fp bucket

remove :: Bucket -> Fingerprint -> Bucket
remove (B bucket) (FP fp) = B (BS.filter (\f -> f /= fp) bucket)
47 changes: 47 additions & 0 deletions src/Data/LiquidCuckooFilter/Internal.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE DeriveAnyClass #-}

module Data.LiquidCuckooFilter.Internal (
makeFingerprint,
getSize,
getSlot,
setSlot,
contains,
remove,
preferableEmptyBucket,
nextSlotBucket,
) where

import Data.LiquidCuckooFilter.Entities
import Data.LiquidCuckooFilter.Random(evictRandomFingeprint)

import qualified Data.ByteString as BS

import Data.Bits (xor)
import Data.Hashable (Hashable, hash)
import Data.Serialize (Serialize)
import Data.Word (Word8)
import GHC.Generics (Generic)

nextSlotBucket :: Int -> Bucket -> Maybe Int
nextSlotBucket slots bucket
| isEmpty bucket = Just 0
| otherwise = nextSlotInNonEmptyBucket slots bucket

nextSlotInNonEmptyBucket :: Int -> Bucket -> Maybe Int
nextSlotInNonEmptyBucket slots (B bucket) = BS.findIndex (\b -> b == 0) bucket

preferableEmptyBucket :: Int -> (Int, Bucket) -> (Int, Bucket) -> Maybe (Int, Bucket, Int)
preferableEmptyBucket slots (bucketIndexA, bucketA) (bucketIndexB, bucketB) =
case emptySlotInA of
Just slotIndex -> Just (bucketIndexA, bucketA, slotIndex)
Nothing -> (\slotIndex -> (bucketIndexB, bucketB, slotIndex)) <$> emptySlotInB
where
availability bucket = if isEmpty bucket
then Just 0
else nextSlotInNonEmptyBucket slots bucket
emptySlotInA = availability bucketA
emptySlotInB = availability bucketB

makeFingerprint :: Hashable a => a -> Fingerprint
makeFingerprint a = FP . max 1 $ fromIntegral (abs $ hash a) `mod` 255
24 changes: 24 additions & 0 deletions src/Data/LiquidCuckooFilter/Random.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
module Data.LiquidCuckooFilter.Random (
randomPick,
evictRandomFingeprint,
) where

import System.Random (randomIO, randomRIO)
import System.IO.Unsafe (unsafePerformIO)

import Data.LiquidCuckooFilter.Entities

-- For now we believe it is safe to trust the IO nature of random generator, and ignore referential transparency
randomPick :: a -> a -> a
randomPick h1 h2 = unsafePerformIO $ pick
where
pick = do
bool <- randomIO :: IO Bool
pure (if bool then h1 else h2)

evictRandomFingeprint :: Bucket -> (Int, Fingerprint)
evictRandomFingeprint bucket = unsafePerformIO $ pick
where
pick = do
i <- randomRIO (0, (getSize bucket) - 1)
pure (i, getSlot bucket i) -- it could be an empty bucket
150 changes: 150 additions & 0 deletions src/Data/LiquidCuckooFilterPlain.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DeriveGeneric #-}

{-@ LIQUID "--ple" @-}

module Data.LiquidCuckooFilterPlain
(
Table(..)
, initialize
, insert
, member
, delete
, attemptToWriteFingerprint
) where

import Data.LiquidCuckooFilter.Entities
import Data.LiquidCuckooFilter.Internal
import Data.LiquidCuckooFilter.Random(randomPick, evictRandomFingeprint)

import qualified Data.Vector as V
import Data.Hashable (Hashable, hash)
import Data.Bits (xor)

{-@ V.update :: v1: V.Vector a
-> v2: V.Vector (Int, a)
-> { v3: V.Vector a | vlen v1 == vlen v3 } @-}
{-@ type PositiveInt = { v: Int | v > 0 } @-}
{-@ type BucketN N = { v : V.Vector Bucket | vlen v = N } @-}

-- | A Cuckoo Filter hash table
{-@
data Table m = Table {
slots :: PositiveInt
, totalBuckets :: PositiveInt
, maxNumKicks :: PositiveInt
, buckets :: BucketN totalBuckets
} @-}
data Table m = Table {
slots :: !Int,
totalBuckets :: !Int,
maxNumKicks :: !Int,
buckets :: V.Vector Bucket
} deriving (Show, Eq)

{-@ defaultBuckets :: v : PositiveInt -> PositiveInt -> BucketN v @-}
defaultBuckets :: Int -> Int -> V.Vector Bucket
defaultBuckets totalBuckets slots = V.fromList $ replicate totalBuckets (emptyBucket slots)

{-@ initialize :: PositiveInt -> PositiveInt -> PositiveInt -> Table a @-}
initialize :: Int -> Int -> Int -> Table a
initialize totalBuckets slots maxNumKicks =
Table {
slots = slots,
totalBuckets = totalBuckets,
maxNumKicks = maxNumKicks,
buckets = defaultBuckets totalBuckets slots
}

slotsCount :: Table a -> Int
slotsCount Table {slots} = slots

getMaxNumKicks :: Table a -> Int
getMaxNumKicks Table {maxNumKicks} = maxNumKicks

writeBucket :: Table a -> Bucket -> Int -> Table a
writeBucket filter @ (Table {buckets}) bucket index =
filter {buckets = V.update buckets (V.fromList [(index, bucket)])}

{-@ toIndex :: t: Table a -> Int -> { x: Int | x >=0 && x < vlen (buckets t) } @-}
toIndex :: Table a -> Int -> Int
toIndex Table {totalBuckets} hash = hash `mod` totalBuckets

{-@ readBucket :: t: Table a -> { v: Int | v >= 0 && v < vlen (buckets t) } -> Bucket @-}
readBucket :: Table a -> Int -> Bucket
readBucket Table {buckets} index = buckets V.! index

insert :: (Hashable element) => Table element -> element -> Maybe (Table element)
insert filter element =
let
slots = slotsCount filter
f = makeFingerprint element
h1 = hash element
h2 = h1 `xor` (hash f)
index1 = toIndex filter h1
index2 = toIndex filter h2
bucketA = readBucket filter index1
bucketB = readBucket filter index2
in case preferableEmptyBucket slots (index1, bucketA) (index2, bucketB) of
Just (bucketPosition, bucket, availableSlot) -> Just $ writeBucket filter (setSlot bucket availableSlot f) bucketPosition
Nothing ->
attemptToWriteFingerprint maxNumKicks (f, randomHash) filter
where
maxNumKicks = getMaxNumKicks filter
randomHash = randomPick h1 h2

member :: (Hashable element) => Table element -> element -> Bool
member filter element =
let
f = makeFingerprint element
h1 = hash element
h2 = h1 `xor` (hash f)
index1 = toIndex filter h1
index2 = toIndex filter h2
bucketA = readBucket filter index1
bucketB = readBucket filter index2
in ((contains bucketA f) || (contains bucketB f))

delete :: (Hashable element) => Table element -> element -> Table element
delete filter element =
let
f = makeFingerprint element
h1 = hash element
h2 = h1 `xor` (hash f)
index1 = toIndex filter h1
index2 = toIndex filter h2
bucketA = readBucket filter index1
bucketB = readBucket filter index2
in
case (clean bucketA f) of
Just bucket -> writeBucket filter bucket index1
Nothing ->
case (clean bucketB f) of
Just bucket -> writeBucket filter bucket index2
Nothing -> filter
where
clean bucket fp = if (contains bucket fp) then Just (remove bucket fp) else Nothing

{-@ attemptToWriteFingerprint :: n: Nat -> (Fingerprint, Int) -> Table a -> Maybe (Table a) / [n] @-}
attemptToWriteFingerprint :: Int -> (Fingerprint, Int) -> Table a -> Maybe (Table a)
attemptToWriteFingerprint 0 _ _ = Nothing
attemptToWriteFingerprint remainingKicks (fp, h1) filter =
let slots = slotsCount filter
index1 = toIndex filter h1
bucket = readBucket filter index1
(evictedIndex, evictedFingerprint) = evictRandomFingeprint bucket
bucket' = setSlot bucket evictedIndex fp
h2 = h1 `xor` (hash evictedFingerprint)
filter' = writeBucket filter bucket' index1
index2 = toIndex filter' h2
bucket'' = readBucket filter' index2
in case nextSlotBucket slots bucket'' of
Just slot -> Just $ writeBucket filter' (setSlot bucket'' slot evictedFingerprint) index2
Nothing -> attemptToWriteFingerprint (remainingKicks - 1) (evictedFingerprint, h2) filter'
Loading

0 comments on commit 33b8d4f

Please sign in to comment.