-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Cuckoo Filter with LiquidHaskell bindings
- Loading branch information
0 parents
commit 33b8d4f
Showing
11 changed files
with
535 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
.stack-work/ | ||
liquid-cuckoo-filter.cabal | ||
.liquid | ||
*~ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
# liquid-cuckoo-filter |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
import Distribution.Simple | ||
main = defaultMain |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
name: liquid-cuckoo-filter | ||
version: 0.1.0.0 | ||
github: "kubum/liquid-cuckoo-filter" | ||
license: BSD3 | ||
author: "Andrey Fadeyev" | ||
maintainer: "fadeyew@gmail.com" | ||
copyright: "2019 Andrey Fadeyev" | ||
|
||
extra-source-files: | ||
- README.md | ||
- ChangeLog.md | ||
|
||
# Metadata used when publishing your package | ||
# synopsis: Short description of your package | ||
# category: Web | ||
|
||
# To avoid duplicated efforts in documentation and dealing with the | ||
# complications of embedding Haddock markup inside cabal files, it is | ||
# common to point users to the README.md file. | ||
description: Please see the README on GitHub at <https://github.com/kubum/liquid-cuckoo-filter#readme> | ||
|
||
dependencies: | ||
- base >= 4.7 && < 5 | ||
- vector >= 0.12.0.2 | ||
- containers | ||
- cereal | ||
|
||
library: | ||
source-dirs: src | ||
|
||
tests: | ||
liquid-cuckoo-filter-test: | ||
main: Spec.hs | ||
source-dirs: test | ||
ghc-options: | ||
- -threaded | ||
- -rtsopts | ||
- -with-rtsopts=-N | ||
dependencies: | ||
- liquid-cuckoo-filter |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
{-# LANGUAGE DerivingStrategies #-} | ||
{-# LANGUAGE GeneralizedNewtypeDeriving #-} | ||
{-# LANGUAGE DeriveAnyClass #-} | ||
{-# LANGUAGE DeriveGeneric #-} | ||
|
||
module Data.LiquidCuckooFilter.Entities ( | ||
Fingerprint(..), | ||
Bucket(..), | ||
emptyFP, | ||
emptyBucket, | ||
getSize, | ||
getSlot, | ||
setSlot, | ||
isEmpty, | ||
contains, | ||
remove, | ||
) where | ||
|
||
import qualified Data.ByteString as BS | ||
import Data.ByteString.Unsafe(unsafeIndex, unsafeDrop, unsafeTake) | ||
|
||
import Data.Hashable (Hashable, hash) | ||
import Data.Serialize (Serialize) | ||
import Data.Word (Word8) | ||
import GHC.Generics (Generic) | ||
|
||
-- | A Fingerprint is an 8 bit hash of a value | ||
newtype Fingerprint = FP Word8 | ||
deriving (Show, Eq, Ord, Generic) | ||
deriving newtype (Hashable) | ||
deriving anyclass Serialize | ||
|
||
emptyFP :: Fingerprint | ||
emptyFP = FP 0 | ||
|
||
{-@ data Bucket = B { content :: BS.ByteString } @-} | ||
data Bucket = B { content :: BS.ByteString } deriving (Show, Eq) | ||
|
||
{-@ getSize :: b: Bucket -> { v: Int | bslen (content b) = v } @-} | ||
getSize :: Bucket -> Int | ||
getSize (B bucket) = BS.length bucket | ||
|
||
isEmpty :: Bucket -> Bool | ||
isEmpty (B bucket) = BS.null bucket | ||
|
||
emptyBucket :: Int -> Bucket | ||
emptyBucket slots = B (BS.replicate slots 0) | ||
|
||
{-@ getSlot :: b: Bucket | ||
-> { index: Int | index >= 0 && bslen (content b) > index } | ||
-> Fingerprint @-} | ||
getSlot :: Bucket -> Int -> Fingerprint | ||
getSlot (B bucket) id = FP (unsafeIndex bucket id) | ||
|
||
{-@ setSlot :: b: Bucket | ||
-> { index: Int | index >= 0 && bslen (content b) > index } | ||
-> Fingerprint | ||
-> Bucket @-} | ||
setSlot :: Bucket -> Int -> Fingerprint -> Bucket | ||
setSlot (B bucket) id (FP fp) = B (BS.concat [before, addedBytes, after]) | ||
where | ||
(before, rest) = unsafeSplitAt id bucket | ||
after = unsafeDrop 1 rest | ||
addedBytes = BS.pack [fp] | ||
|
||
{-@ unsafeSplitAt | ||
:: v: { v: Int | v >=0 } | ||
-> b: { bs : BS.ByteString | bslen bs >= v } | ||
-> ( { bs : BS.ByteString | bslen bs == v } , { bs : BS.ByteString | bslen bs == bslen b - v } ) @-} | ||
unsafeSplitAt :: Int -> BS.ByteString -> (BS.ByteString, BS.ByteString) | ||
unsafeSplitAt n bs = (unsafeTake n bs, unsafeDrop n bs) | ||
|
||
contains :: Bucket -> Fingerprint -> Bool | ||
contains (B bucket) (FP fp) = BS.elem fp bucket | ||
|
||
remove :: Bucket -> Fingerprint -> Bucket | ||
remove (B bucket) (FP fp) = B (BS.filter (\f -> f /= fp) bucket) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
{-# LANGUAGE GeneralizedNewtypeDeriving #-} | ||
{-# LANGUAGE DeriveAnyClass #-} | ||
|
||
module Data.LiquidCuckooFilter.Internal ( | ||
makeFingerprint, | ||
getSize, | ||
getSlot, | ||
setSlot, | ||
contains, | ||
remove, | ||
preferableEmptyBucket, | ||
nextSlotBucket, | ||
) where | ||
|
||
import Data.LiquidCuckooFilter.Entities | ||
import Data.LiquidCuckooFilter.Random(evictRandomFingeprint) | ||
|
||
import qualified Data.ByteString as BS | ||
|
||
import Data.Bits (xor) | ||
import Data.Hashable (Hashable, hash) | ||
import Data.Serialize (Serialize) | ||
import Data.Word (Word8) | ||
import GHC.Generics (Generic) | ||
|
||
nextSlotBucket :: Int -> Bucket -> Maybe Int | ||
nextSlotBucket slots bucket | ||
| isEmpty bucket = Just 0 | ||
| otherwise = nextSlotInNonEmptyBucket slots bucket | ||
|
||
nextSlotInNonEmptyBucket :: Int -> Bucket -> Maybe Int | ||
nextSlotInNonEmptyBucket slots (B bucket) = BS.findIndex (\b -> b == 0) bucket | ||
|
||
preferableEmptyBucket :: Int -> (Int, Bucket) -> (Int, Bucket) -> Maybe (Int, Bucket, Int) | ||
preferableEmptyBucket slots (bucketIndexA, bucketA) (bucketIndexB, bucketB) = | ||
case emptySlotInA of | ||
Just slotIndex -> Just (bucketIndexA, bucketA, slotIndex) | ||
Nothing -> (\slotIndex -> (bucketIndexB, bucketB, slotIndex)) <$> emptySlotInB | ||
where | ||
availability bucket = if isEmpty bucket | ||
then Just 0 | ||
else nextSlotInNonEmptyBucket slots bucket | ||
emptySlotInA = availability bucketA | ||
emptySlotInB = availability bucketB | ||
|
||
makeFingerprint :: Hashable a => a -> Fingerprint | ||
makeFingerprint a = FP . max 1 $ fromIntegral (abs $ hash a) `mod` 255 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
module Data.LiquidCuckooFilter.Random ( | ||
randomPick, | ||
evictRandomFingeprint, | ||
) where | ||
|
||
import System.Random (randomIO, randomRIO) | ||
import System.IO.Unsafe (unsafePerformIO) | ||
|
||
import Data.LiquidCuckooFilter.Entities | ||
|
||
-- For now we believe it is safe to trust the IO nature of random generator, and ignore referential transparency | ||
randomPick :: a -> a -> a | ||
randomPick h1 h2 = unsafePerformIO $ pick | ||
where | ||
pick = do | ||
bool <- randomIO :: IO Bool | ||
pure (if bool then h1 else h2) | ||
|
||
evictRandomFingeprint :: Bucket -> (Int, Fingerprint) | ||
evictRandomFingeprint bucket = unsafePerformIO $ pick | ||
where | ||
pick = do | ||
i <- randomRIO (0, (getSize bucket) - 1) | ||
pure (i, getSlot bucket i) -- it could be an empty bucket |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,150 @@ | ||
{-# LANGUAGE OverloadedStrings #-} | ||
{-# LANGUAGE GeneralizedNewtypeDeriving #-} | ||
{-# LANGUAGE DerivingStrategies #-} | ||
{-# LANGUAGE MultiParamTypeClasses #-} | ||
{-# LANGUAGE FlexibleInstances #-} | ||
{-# LANGUAGE NamedFieldPuns #-} | ||
{-# LANGUAGE DeriveAnyClass #-} | ||
{-# LANGUAGE DeriveGeneric #-} | ||
|
||
{-@ LIQUID "--ple" @-} | ||
|
||
module Data.LiquidCuckooFilterPlain | ||
( | ||
Table(..) | ||
, initialize | ||
, insert | ||
, member | ||
, delete | ||
, attemptToWriteFingerprint | ||
) where | ||
|
||
import Data.LiquidCuckooFilter.Entities | ||
import Data.LiquidCuckooFilter.Internal | ||
import Data.LiquidCuckooFilter.Random(randomPick, evictRandomFingeprint) | ||
|
||
import qualified Data.Vector as V | ||
import Data.Hashable (Hashable, hash) | ||
import Data.Bits (xor) | ||
|
||
{-@ V.update :: v1: V.Vector a | ||
-> v2: V.Vector (Int, a) | ||
-> { v3: V.Vector a | vlen v1 == vlen v3 } @-} | ||
{-@ type PositiveInt = { v: Int | v > 0 } @-} | ||
{-@ type BucketN N = { v : V.Vector Bucket | vlen v = N } @-} | ||
|
||
-- | A Cuckoo Filter hash table | ||
{-@ | ||
data Table m = Table { | ||
slots :: PositiveInt | ||
, totalBuckets :: PositiveInt | ||
, maxNumKicks :: PositiveInt | ||
, buckets :: BucketN totalBuckets | ||
} @-} | ||
data Table m = Table { | ||
slots :: !Int, | ||
totalBuckets :: !Int, | ||
maxNumKicks :: !Int, | ||
buckets :: V.Vector Bucket | ||
} deriving (Show, Eq) | ||
|
||
{-@ defaultBuckets :: v : PositiveInt -> PositiveInt -> BucketN v @-} | ||
defaultBuckets :: Int -> Int -> V.Vector Bucket | ||
defaultBuckets totalBuckets slots = V.fromList $ replicate totalBuckets (emptyBucket slots) | ||
|
||
{-@ initialize :: PositiveInt -> PositiveInt -> PositiveInt -> Table a @-} | ||
initialize :: Int -> Int -> Int -> Table a | ||
initialize totalBuckets slots maxNumKicks = | ||
Table { | ||
slots = slots, | ||
totalBuckets = totalBuckets, | ||
maxNumKicks = maxNumKicks, | ||
buckets = defaultBuckets totalBuckets slots | ||
} | ||
|
||
slotsCount :: Table a -> Int | ||
slotsCount Table {slots} = slots | ||
|
||
getMaxNumKicks :: Table a -> Int | ||
getMaxNumKicks Table {maxNumKicks} = maxNumKicks | ||
|
||
writeBucket :: Table a -> Bucket -> Int -> Table a | ||
writeBucket filter @ (Table {buckets}) bucket index = | ||
filter {buckets = V.update buckets (V.fromList [(index, bucket)])} | ||
|
||
{-@ toIndex :: t: Table a -> Int -> { x: Int | x >=0 && x < vlen (buckets t) } @-} | ||
toIndex :: Table a -> Int -> Int | ||
toIndex Table {totalBuckets} hash = hash `mod` totalBuckets | ||
|
||
{-@ readBucket :: t: Table a -> { v: Int | v >= 0 && v < vlen (buckets t) } -> Bucket @-} | ||
readBucket :: Table a -> Int -> Bucket | ||
readBucket Table {buckets} index = buckets V.! index | ||
|
||
insert :: (Hashable element) => Table element -> element -> Maybe (Table element) | ||
insert filter element = | ||
let | ||
slots = slotsCount filter | ||
f = makeFingerprint element | ||
h1 = hash element | ||
h2 = h1 `xor` (hash f) | ||
index1 = toIndex filter h1 | ||
index2 = toIndex filter h2 | ||
bucketA = readBucket filter index1 | ||
bucketB = readBucket filter index2 | ||
in case preferableEmptyBucket slots (index1, bucketA) (index2, bucketB) of | ||
Just (bucketPosition, bucket, availableSlot) -> Just $ writeBucket filter (setSlot bucket availableSlot f) bucketPosition | ||
Nothing -> | ||
attemptToWriteFingerprint maxNumKicks (f, randomHash) filter | ||
where | ||
maxNumKicks = getMaxNumKicks filter | ||
randomHash = randomPick h1 h2 | ||
|
||
member :: (Hashable element) => Table element -> element -> Bool | ||
member filter element = | ||
let | ||
f = makeFingerprint element | ||
h1 = hash element | ||
h2 = h1 `xor` (hash f) | ||
index1 = toIndex filter h1 | ||
index2 = toIndex filter h2 | ||
bucketA = readBucket filter index1 | ||
bucketB = readBucket filter index2 | ||
in ((contains bucketA f) || (contains bucketB f)) | ||
|
||
delete :: (Hashable element) => Table element -> element -> Table element | ||
delete filter element = | ||
let | ||
f = makeFingerprint element | ||
h1 = hash element | ||
h2 = h1 `xor` (hash f) | ||
index1 = toIndex filter h1 | ||
index2 = toIndex filter h2 | ||
bucketA = readBucket filter index1 | ||
bucketB = readBucket filter index2 | ||
in | ||
case (clean bucketA f) of | ||
Just bucket -> writeBucket filter bucket index1 | ||
Nothing -> | ||
case (clean bucketB f) of | ||
Just bucket -> writeBucket filter bucket index2 | ||
Nothing -> filter | ||
where | ||
clean bucket fp = if (contains bucket fp) then Just (remove bucket fp) else Nothing | ||
|
||
{-@ attemptToWriteFingerprint :: n: Nat -> (Fingerprint, Int) -> Table a -> Maybe (Table a) / [n] @-} | ||
attemptToWriteFingerprint :: Int -> (Fingerprint, Int) -> Table a -> Maybe (Table a) | ||
attemptToWriteFingerprint 0 _ _ = Nothing | ||
attemptToWriteFingerprint remainingKicks (fp, h1) filter = | ||
let slots = slotsCount filter | ||
index1 = toIndex filter h1 | ||
bucket = readBucket filter index1 | ||
(evictedIndex, evictedFingerprint) = evictRandomFingeprint bucket | ||
bucket' = setSlot bucket evictedIndex fp | ||
h2 = h1 `xor` (hash evictedFingerprint) | ||
filter' = writeBucket filter bucket' index1 | ||
index2 = toIndex filter' h2 | ||
bucket'' = readBucket filter' index2 | ||
in case nextSlotBucket slots bucket'' of | ||
Just slot -> Just $ writeBucket filter' (setSlot bucket'' slot evictedFingerprint) index2 | ||
Nothing -> attemptToWriteFingerprint (remainingKicks - 1) (evictedFingerprint, h2) filter' |
Oops, something went wrong.