Skip to content

Commit

Permalink
Fixed duplicate checks for Levenshtein distance.
Browse files Browse the repository at this point in the history
  • Loading branch information
MrAlexSee committed Oct 23, 2018
1 parent 120ed4a commit fba5034
Showing 1 changed file with 20 additions and 6 deletions.
26 changes: 20 additions & 6 deletions related/neighborhood.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,27 +162,36 @@ float runLevenK1(const unordered_set<string> &wordSet, const vector<string> &pat
}
}

// Stores already checked words.
// Prevents duplicate checks which might happen with insertions and deletions.
unordered_set<string> checked;

// Deletions for k = 1.
for (size_t i = 0; i < pattern.size(); ++i)
{
const size_t prefixSize = i + 1;
string candidate(pattern.size() - 1, ' ');
string candidate(pattern.size() - 1, 0);

char *src = const_cast<char *>(candidate.c_str());

memcpy(src, pattern.c_str(), i);
memcpy(src + i, pattern.c_str() + prefixSize, pattern.size() - prefixSize);

if (wordSet.find(candidate) != wordSet.end())
if (checked.find(candidate) == checked.end())
{
nMatches += 1;
if (wordSet.find(candidate) != wordSet.end())
{
nMatches += 1;
}

checked.insert(candidate);
}
}

// Insertions for k = 1.
for (size_t i = 0; i < pattern.size() + 1; ++i)
{
string candidate(pattern.size() + 1, ' ');
string candidate(pattern.size() + 1, 0);
const size_t prefixSize = i + 1;

char *src = const_cast<char *>(candidate.c_str());
Expand All @@ -194,9 +203,14 @@ float runLevenK1(const unordered_set<string> &wordSet, const vector<string> &pat
{
candidate[i] = c;

if (wordSet.find(candidate) != wordSet.end())
if (checked.find(candidate) == checked.end())
{
nMatches += 1;
if (wordSet.find(candidate) != wordSet.end())
{
nMatches += 1;
}

checked.insert(candidate);
}
}
}
Expand Down

0 comments on commit fba5034

Please sign in to comment.