Skip to content

Commit

Permalink
fft; hints; typos
Browse files Browse the repository at this point in the history
  • Loading branch information
sadit committed Mar 15, 2024
1 parent 305e4a3 commit 20a9899
Show file tree
Hide file tree
Showing 7 changed files with 36 additions and 10 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "SimilaritySearch"
uuid = "053f045d-5466-53fd-b400-a066f88fe02a"
authors = ["Eric S. Tellez <donsadit@gmail.com>"]
version = "0.11.7"
version = "0.11.8"

[deps]
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
Expand Down
2 changes: 0 additions & 2 deletions src/SimilaritySearch.jl
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,6 @@ function __init__()
DEFAULT_SEARCH_GRAPH_CONTEXT[] = SearchGraphContext()
end


using PrecompileTools


Expand Down Expand Up @@ -279,5 +278,4 @@ using PrecompileTools
end
end
end

end # end SimilaritySearch module
5 changes: 2 additions & 3 deletions src/fft.jl
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@ function fft(dist::SemiMetric, X::AbstractDatabase, k::Integer; verbose=true)
dmax::Float32 = typemax(Float32)
N == 0 && return (; centers, nn, dists=nndists, dmax)

@inbounds for i in 1:N
@inbounds for i in 1:k
push!(dmaxlist, dmax)
push!(centers, imax)
verbose && println(stderr, "computing fartest point $(length(centers)), dmax: $dmax, imax: $imax, n: $(length(X))")
verbose && println(stderr, "computing farthest point $(length(centers)), dmax: $dmax, imax: $imax, n: $(length(X))")

pivot = X[imax]
@batch minbatch=getminbatch(0, N) for i in 1:N
Expand All @@ -41,7 +41,6 @@ function fft(dist::SemiMetric, X::AbstractDatabase, k::Integer; verbose=true)
end

dmax, imax = findmax(nndists)
length(dmaxlist) < k || break
end

(; centers, nn, dists=nndists, dmax)
Expand Down
2 changes: 1 addition & 1 deletion src/searchgraph/SearchGraph.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ export index!, push_item!
export Neighborhood, IdentityNeighborhood, DistalSatNeighborhood, SatNeighborhood
export find_neighborhood
export BeamSearch, BeamSearchSpace, Callback
export KDisjointHints, DisjointHints, RandomHints, EpsilonHints
export KDisjointHints, DisjointHints, RandomHints, EpsilonHints, KCentersHints
export RandomPruning, KeepNearestPruning, SatPruning, prune!

"""
Expand Down
3 changes: 2 additions & 1 deletion src/searchgraph/context.jl
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,8 @@ end
function SearchGraphContext(;
logger=InformativeLog(),
neighborhood=Neighborhood(SatNeighborhood(0f0)),
hints_callback=DisjointHints(),
#hints_callback=DisjointHints(),
hints_callback=KCentersHints(kfun=x->log(1.2, x)),
#hints_callback=EpsilonHints(quantile=1/64),
hyperparameters_callback=OptimizeParameters(),
parallel_block=4Threads.nthreads(),
Expand Down
25 changes: 25 additions & 0 deletions src/searchgraph/hints.jl
Original file line number Diff line number Diff line change
Expand Up @@ -152,3 +152,28 @@ function execute_callback(index::SearchGraph, ctx::SearchGraphContext, opt::Epsi
index.hints .= v
end

"""
mutable struct KCentersHints
Indicates that hints are a small set of objects having a minimal distance between them
"""
mutable struct KCentersHints <: Callback
samplesize::Function
kfun::Function
end

KCentersHints(; samplesize=sqrt, kfun=n->log(1.15)) = KCentersHints(samplesize, kfun)

function execute_callback(index::SearchGraph, ctx::SearchGraphContext, opt::KCentersHints)
n = length(index)
m = min(n, ceil(Int, opt.samplesize(n)))
s = rand(1:n, m) |> unique! |> sort!
k = min(ceil(Int, opt.kfun(n)), m-1)

D = SubDatabase(database(index), s)
A = fft(distance(index), D, k)
@show n, m, k, length(A.centers)
resize!(index.hints, k)
index.hints .= D.map[A.centers]
end

7 changes: 5 additions & 2 deletions test/testfft.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,12 @@ using Test, SimilaritySearch, LinearAlgebra

@testset "farthest first traversal" begin
dist = L2Distance()
X = rand(Float32, 4, 300)
res = fft(dist, MatrixDatabase(X), 30)
X = rand(Float32, 4, 30)
k = 10
res = fft(dist, MatrixDatabase(X), k)
@test k == length(res.centers)
@test Set(res.centers) == Set(res.nn)
@test all(res.dmax .>= res.dists)
end


2 comments on commit 20a9899

@sadit
Copy link
Owner Author

@sadit sadit commented on 20a9899 Mar 15, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator register()

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/102980

Tip: Release Notes

Did you know you can add release notes too? Just add markdown formatted text underneath the comment after the text
"Release notes:" and it will be added to the registry PR, and if TagBot is installed it will also be added to the
release that TagBot creates. i.e.

@JuliaRegistrator register

Release notes:

## Breaking changes

- blah

To add them here just re-invoke and the PR will be updated.

Tagging

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.11.8 -m "<description of version>" 20a9899edb4e6df85b33b9858b2dbb91e99f12a1
git push origin v0.11.8

Please sign in to comment.