-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSynoAnalyzer.cpp
65 lines (53 loc) · 2.6 KB
/
SynoAnalyzer.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#include "SynoAnalyzer.h"
#include "lucene++/Lucene.h"
#include "lucene++/LuceneTypes.h"
#include "lucene++/StandardTokenizer.h"
#include "lucene++/StandardFilter.h"
#include "lucene++/LowerCaseFilter.h"
#include "lucene++/PorterStemFilter.h"
#include "lucene++/StopFilter.h"
#include "lucene++/Synchronize.h"
namespace Search {
// https://raw.githubusercontent.com/apache/lucene/main/lucene/analysis/common/src/resources/org/apache/lucene/analysis/cjk/stopwords.txt
const wchar_t* SynoAnalyzer::_ENGLISH_STOP_WORDS[] = {
L"a", L"and", L"are", L"as", L"at", L"be", L"but",
L"by", L"for", L"if", L"in", L"into", L"is", L"it",
L"no", L"not", L"of", L"on", L"or", L"s", L"such",
L"t", L"that", L"the", L"their", L"then", L"there",
L"these", L"they", L"this", L"to", L"was", L"will", L"with", L"www"
};
SynoAnalyzer::SynoAnalyzer(Lucene::LuceneVersion::Version matchVersion) {
this->stopSet = getDefaultStopSet();
this->matchVersion = matchVersion;
}
SynoAnalyzer::SynoAnalyzer(Lucene::LuceneVersion::Version matchVersion, Lucene::HashSet<Lucene::String> stopwords) {
this->stopSet = stopwords;
this->matchVersion = matchVersion;
}
SynoAnalyzer::SynoAnalyzer(Lucene::LuceneVersion::Version matchVersion, Lucene::HashSet<Lucene::String> stopwords, Lucene::HashSet<Lucene::String> exclusions) {
this->stopSet = stopwords;
this->exclusionSet = exclusions;
this->matchVersion = matchVersion;
}
SynoAnalyzer::~SynoAnalyzer() {
}
const Lucene::HashSet<Lucene::String> SynoAnalyzer::getDefaultStopSet() {
static Lucene::HashSet<Lucene::String> stopSet;
LUCENE_RUN_ONCE( stopSet = Lucene::HashSet<Lucene::String>::newInstance(_ENGLISH_STOP_WORDS, _ENGLISH_STOP_WORDS + SIZEOF_ARRAY(_ENGLISH_STOP_WORDS)) );
return stopSet;
}
void SynoAnalyzer::setStemExclusionTable(Lucene::HashSet<Lucene::String> exclusions) {
exclusionSet = exclusions;
setPreviousTokenStream(Lucene::LuceneObjectPtr()); // force a new stemmer to be created
}
Lucene::TokenStreamPtr SynoAnalyzer::tokenStream(const Lucene::String& fieldName, const Lucene::ReaderPtr& reader) {
Lucene::TokenStreamPtr result = Lucene::newLucene<Lucene::StandardTokenizer>(matchVersion, reader);
result = Lucene::newLucene<Lucene::StandardFilter>(result);
result = Lucene::newLucene<Lucene::LowerCaseFilter>(result);
result = Lucene::newLucene<Lucene::StopFilter>(Lucene::StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), result, stopSet);
result = Lucene::newLucene<Lucene::PorterStemFilter>(result);
return result;
}
SynoAnalyzerSavedStreams::~SynoAnalyzerSavedStreams() {
}
}