Skip to content

Commit 316cbd8

Browse files
authored
Merge pull request #953 from openzim/suggestions_fix
Fixed suggestions for a single-word-followed-by-space query
2 parents e67b598 + 2b5a905 commit 316cbd8

File tree

2 files changed

+115
-125
lines changed

2 files changed

+115
-125
lines changed

src/suggestion.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ Xapian::Query SuggestionDataBase::parseQuery(const std::string& query)
143143
m_queryParser.set_stemming_strategy(Xapian::QueryParser::STEM_SOME);
144144
xquery = m_queryParser.parse_query(query, flags);
145145

146-
if ( !query.empty() && xquery.get_num_subqueries() == 0 ) {
146+
if ( !query.empty() && xquery.empty() ) {
147147
// a non-empty query string produced an empty xapian query which means
148148
// that the query string is made solely of punctuation.
149149
xquery = Xapian::Query(Xapian::Query::OP_WILDCARD, query);

test/suggestion.cpp

+114-124
Original file line numberDiff line numberDiff line change
@@ -138,16 +138,13 @@ TEST(Suggestion, singleTermOrder) {
138138
TempZimArchive tza("testZim");
139139
const zim::Archive archive = tza.createZimFromTitles(titles);
140140

141-
std::vector<std::string> resultSet = getSuggestions(archive, "berlin", archive.getEntryCount());
142-
std::vector<std::string> expectedResult = {
143-
"berlin",
144-
"berlin wall",
145-
"hotel berlin, berlin",
146-
"again berlin",
147-
"not berlin"
148-
};
149-
150-
ASSERT_EQ(expectedResult , resultSet);
141+
EXPECT_SUGGESTION_RESULTS(archive, "berlin",
142+
"berlin",
143+
"berlin wall",
144+
"hotel berlin, berlin",
145+
"again berlin",
146+
"not berlin"
147+
);
151148
}
152149

153150
TEST(Suggestion, caseDiacriticsAndHomogrpaphsHandling) {
@@ -220,17 +217,13 @@ TEST(Suggestion, partialQuery) {
220217
TempZimArchive tza("testZim");
221218
const zim::Archive archive = tza.createZimFromTitles(titles);
222219

223-
// "wo"
224-
std::vector<std::string> resultSet = getSuggestions(archive, "Wo", archive.getEntryCount());
225-
std::vector<std::string> expectedResult = {
226-
"Wolf",
227-
"Hour of the wolf",
228-
"The wolf of Shingashina",
229-
"The wolf of Wall Street",
230-
"Terma termb the wolf of wall street termc"
231-
};
232-
233-
ASSERT_EQ(expectedResult, resultSet);
220+
EXPECT_SUGGESTION_RESULTS(archive, "Wo",
221+
"Wolf",
222+
"Hour of the wolf",
223+
"The wolf of Shingashina",
224+
"The wolf of Wall Street",
225+
"Terma termb the wolf of wall street termc"
226+
);
234227
}
235228

236229
TEST(Suggestion, phraseOrder) {
@@ -245,14 +238,11 @@ TEST(Suggestion, phraseOrder) {
245238
TempZimArchive tza("testZim");
246239
const zim::Archive archive = tza.createZimFromTitles(titles);
247240

248-
std::vector<std::string> resultSet = getSuggestions(archive, "winter autumn summer", archive.getEntryCount());
249-
std::vector<std::string> expectedResult = {
250-
"winter autumn summer terma",
251-
"autumn summer winter",
252-
"summer winter autumn"
253-
};
254-
255-
ASSERT_EQ(expectedResult, resultSet);
241+
EXPECT_SUGGESTION_RESULTS(archive, "winter autumn summer",
242+
"winter autumn summer terma",
243+
"autumn summer winter",
244+
"summer winter autumn"
245+
);
256246
}
257247

258248
TEST(Suggestion, incrementalSearch) {
@@ -264,76 +254,94 @@ TEST(Suggestion, incrementalSearch) {
264254
"The wolf of Wall Street Book" ,
265255
"Hour of the wolf",
266256
"Wolf",
267-
"Terma termb the wolf of wall street termc"
257+
"Terma termb the wolf of wall street termc",
258+
"Are there any beasts in this country?"
268259
};
269260

270-
std::vector<std::string> resultSet, expectedResult;
271-
272261
TempZimArchive tza("testZim");
273262
const zim::Archive archive = tza.createZimFromTitles(titles);
274263

275-
// "wolf"
276-
resultSet = getSuggestions(archive, "Wolf", archive.getEntryCount());
277-
expectedResult = {
278-
"Wolf",
279-
"Hour of the wolf",
280-
"The wolf among sheeps",
281-
"The wolf of Shingashina",
282-
"The wolf of Wall Street",
283-
"The wolf of Wall Street Book",
284-
"Terma termb the wolf of wall street termc"
285-
};
286-
287-
ASSERT_EQ(expectedResult, resultSet);
264+
EXPECT_SUGGESTION_RESULTS(archive, "Wolf",
265+
"Wolf",
266+
"Hour of the wolf",
267+
"The wolf among sheeps",
268+
"The wolf of Shingashina",
269+
"The wolf of Wall Street",
270+
"The wolf of Wall Street Book",
271+
"Terma termb the wolf of wall street termc"
272+
);
288273

289-
// "the"
290-
resultSet = getSuggestions(archive, "the", archive.getEntryCount());
291-
expectedResult = {
292-
"The chocolate factory",
293-
"The wolf among sheeps",
294-
"The wolf of Shingashina",
295-
"The wolf of Wall Street",
296-
"The wolf of Wall Street Book",
297-
"Hour of the wolf",
298-
"Terma termb the wolf of wall street termc"
299-
};
274+
EXPECT_SUGGESTION_RESULTS(archive, "Wolf ",
275+
"Wolf",
276+
"Hour of the wolf",
277+
"The wolf among sheeps",
278+
"The wolf of Shingashina",
279+
"The wolf of Wall Street",
280+
"The wolf of Wall Street Book",
281+
"Terma termb the wolf of wall street termc"
282+
);
300283

301-
ASSERT_EQ(expectedResult, resultSet);
284+
EXPECT_SUGGESTION_RESULTS(archive, "the",
285+
"The chocolate factory",
286+
"The wolf among sheeps",
287+
"The wolf of Shingashina",
288+
"The wolf of Wall Street",
289+
"The wolf of Wall Street Book",
290+
"Hour of the wolf",
291+
"Terma termb the wolf of wall street termc",
292+
"Are there any beasts in this country?"
293+
);
302294

303-
// "the wolf"
304-
resultSet = getSuggestions(archive, "the wolf", archive.getEntryCount());
305-
expectedResult = {
306-
"The wolf among sheeps",
307-
"The wolf of Shingashina",
308-
"The wolf of Wall Street",
309-
"The wolf of Wall Street Book",
310-
"Hour of the wolf",
311-
"Terma termb the wolf of wall street termc"
312-
};
295+
EXPECT_SUGGESTION_RESULTS(archive, "the ",
296+
"The chocolate factory",
297+
"The wolf among sheeps",
298+
"The wolf of Shingashina",
299+
"The wolf of Wall Street",
300+
"The wolf of Wall Street Book",
301+
"Hour of the wolf",
302+
"Terma termb the wolf of wall street termc"
303+
);
313304

314-
ASSERT_EQ(expectedResult, resultSet);
305+
EXPECT_SUGGESTION_RESULTS(archive, "the wol",
306+
"Hour of the wolf",
307+
"The wolf among sheeps",
308+
"The wolf of Shingashina",
309+
"The wolf of Wall Street",
310+
"The wolf of Wall Street Book",
311+
"Terma termb the wolf of wall street termc"
312+
);
315313

316-
// "the wolf of"
317-
resultSet = getSuggestions(archive, "the wolf of", archive.getEntryCount());
318-
expectedResult = {
319-
"The wolf of Shingashina",
320-
"The wolf of Wall Street",
321-
"The wolf of Wall Street Book",
322-
"Terma termb the wolf of wall street termc",
323-
"Hour of the wolf"
324-
};
314+
EXPECT_SUGGESTION_RESULTS(archive, "the wolf",
315+
"The wolf among sheeps",
316+
"The wolf of Shingashina",
317+
"The wolf of Wall Street",
318+
"The wolf of Wall Street Book",
319+
"Hour of the wolf",
320+
"Terma termb the wolf of wall street termc"
321+
);
325322

326-
ASSERT_EQ(expectedResult, resultSet);
323+
EXPECT_SUGGESTION_RESULTS(archive, "the wolf ",
324+
"The wolf among sheeps",
325+
"The wolf of Shingashina",
326+
"The wolf of Wall Street",
327+
"The wolf of Wall Street Book",
328+
"Hour of the wolf",
329+
"Terma termb the wolf of wall street termc"
330+
);
327331

328-
// "the wolf of wall"
329-
resultSet = getSuggestions(archive, "the wolf of wall", archive.getEntryCount());
330-
expectedResult = {
331-
"The wolf of Wall Street",
332-
"The wolf of Wall Street Book",
333-
"Terma termb the wolf of wall street termc"
334-
};
332+
EXPECT_SUGGESTION_RESULTS(archive, "the wolf of",
333+
"The wolf of Shingashina",
334+
"The wolf of Wall Street",
335+
"The wolf of Wall Street Book",
336+
"Terma termb the wolf of wall street termc",
337+
"Hour of the wolf"
338+
);
335339

336-
ASSERT_EQ(expectedResult, resultSet);
340+
EXPECT_SUGGESTION_RESULTS(archive, "the wolf of wall",
341+
"The wolf of Wall Street",
342+
"The wolf of Wall Street Book",
343+
"Terma termb the wolf of wall street termc"
344+
);
337345
}
338346

339347
TEST(Suggestion, phraseOutOfWindow) {
@@ -347,14 +355,11 @@ TEST(Suggestion, phraseOutOfWindow) {
347355
TempZimArchive tza("testZim");
348356
const zim::Archive archive = tza.createZimFromTitles(titles);
349357

350-
std::vector<std::string> resultSet = getSuggestions(archive, "the dummy query", archive.getEntryCount());
351-
std::vector<std::string> expectedResult = {
352-
"This is the dummy query phrase",
353-
"aterm the bterm dummy query cterm",
354-
"the aterm bterm dummy cterm query"
355-
};
356-
357-
ASSERT_EQ(expectedResult, resultSet);
358+
EXPECT_SUGGESTION_RESULTS(archive, "the dummy query",
359+
"This is the dummy query phrase",
360+
"aterm the bterm dummy query cterm",
361+
"the aterm bterm dummy cterm query"
362+
);
358363
}
359364

360365
TEST(Suggestion, checkStopword) {
@@ -369,11 +374,9 @@ TEST(Suggestion, checkStopword) {
369374

370375
// "she", "and", "the" are stopwords, If stopwords are properly handled, they
371376
// should be included in the result documents.
372-
std::vector<std::string> resultSet = getSuggestions(archive, "she and the apple", archive.getEntryCount());
373-
std::vector<std::string> expectedResult = {
374-
"she and the apple"
375-
};
376-
ASSERT_EQ(expectedResult, resultSet);
377+
EXPECT_SUGGESTION_RESULTS(archive, "she and the apple",
378+
"she and the apple"
379+
);
377380
}
378381

379382
TEST(Suggestion, checkRedirectionCollapse) {
@@ -467,14 +470,11 @@ TEST(Suggestion, anchorQueryToBeginning) {
467470
TempZimArchive tza("testZim");
468471
const zim::Archive archive = tza.createZimFromTitles(titles);
469472

470-
std::vector<std::string> resultSet = getSuggestions(archive, "This is a title", archive.getEntryCount());
471-
std::vector<std::string> expectedResult = {
472-
"this is a title aterm bterm cterm",
473-
"aterm bterm this is a title cterm",
474-
"aterm this is a title bterm cterm"
475-
};
476-
477-
ASSERT_EQ(expectedResult, resultSet);
473+
EXPECT_SUGGESTION_RESULTS(archive, "This is a title",
474+
"this is a title aterm bterm cterm",
475+
"aterm bterm this is a title cterm",
476+
"aterm this is a title bterm cterm"
477+
);
478478
}
479479

480480
// To secure compatibity of new zim files with older kiwixes, we need to index
@@ -682,24 +682,14 @@ TEST(Suggestion, CJK) {
682682
creator.finishZimCreation();
683683

684684
zim::Archive archive(tza.getPath());
685-
{
686-
std::vector<std::string> resultSet = getSuggestions(archive, "平方", archive.getEntryCount());
687-
688-
// We should get two results
689-
std::vector<std::string> expectedResult = {
690-
"平方",
691-
"平方根"
692-
};
693-
ASSERT_EQ(resultSet, expectedResult);
694-
}
695-
696-
{
697-
std::vector<std::string> resultSet = getSuggestions(archive, "平方根", archive.getEntryCount());
685+
EXPECT_SUGGESTION_RESULTS(archive, "平方",
686+
"平方",
687+
"平方根"
688+
);
698689

699-
// We should get only one result
700-
std::vector<std::string> expectedResult = {"平方根"};
701-
ASSERT_EQ(resultSet, expectedResult);
702-
}
690+
EXPECT_SUGGESTION_RESULTS(archive, "平方根",
691+
"平方根"
692+
);
703693
}
704694

705695
} // unnamed namespace

0 commit comments

Comments
 (0)