-
Notifications
You must be signed in to change notification settings - Fork 316
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
0a30a3a
commit fdc2a1b
Showing
8 changed files
with
146 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
module; | ||
|
||
#include <fcntl.h> | ||
#include <filesystem> | ||
#include <sys/mman.h> | ||
#include <sys/stat.h> | ||
#include <unistd.h> | ||
import stl; | ||
|
||
export module mmap; | ||
|
||
using namespace infinity; | ||
namespace fs = std::filesystem; | ||
|
||
namespace infinity { | ||
|
||
export int MmapFile(const String &fp, u8 *&data_ptr, SizeT &data_len) { | ||
data_ptr = nullptr; | ||
data_len = 0; | ||
long len_f = fs::file_size(fp); | ||
if (len_f == 0) | ||
return -1; | ||
int f = open(fp.c_str(), O_RDONLY); | ||
void *tmpd = mmap(NULL, len_f, PROT_READ, MAP_SHARED, f, 0); | ||
if (tmpd == MAP_FAILED) | ||
return -1; | ||
close(f); | ||
int rc = madvise(tmpd, len_f, MADV_RANDOM | MADV_DONTDUMP); | ||
if (rc < 0) | ||
return -1; | ||
data_ptr = (u8 *)tmpd; | ||
data_len = len_f; | ||
return 0; | ||
} | ||
|
||
export int MunmapFile(u8 *&data_ptr, SizeT &data_len) { | ||
if (data_ptr != nullptr) { | ||
int rc = munmap(data_ptr, data_len); | ||
if (rc < 0) | ||
return -1; | ||
data_ptr = nullptr; | ||
data_len = 0; | ||
} | ||
return 0; | ||
} | ||
|
||
} // namespace infinity |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
module; | ||
|
||
import stl; | ||
import term_meta; | ||
import posting_list_format; | ||
import fst; | ||
import mmap; | ||
import infinity_exception; | ||
module dict_reader; | ||
|
||
namespace infinity { | ||
|
||
DictionaryReader::DictionaryReader(const String &dict_path, const PostingFormatOption &option) | ||
: dict_path_(dict_path), meta_loader_(option), data_ptr_(nullptr), data_len_(0) { | ||
int rc = MmapFile(dict_path, data_ptr_, data_len_); | ||
if (rc < 0) { | ||
throw UnrecoverableException("MmapFile failed"); | ||
} | ||
// fst_root_addr + addr_offset(21) == fst_len | ||
SizeT fst_root_addr = ReadU64LE(data_ptr_ + data_len_ - 4 - 8); | ||
SizeT fst_len = fst_root_addr + 21; | ||
u8 *fst_data = data_ptr_ + (data_len_ - fst_len); | ||
fst_ = MakeUnique<Fst>(fst_data, fst_len); | ||
} | ||
|
||
DictionaryReader::~DictionaryReader() { | ||
if (data_ptr_ != nullptr) { | ||
int rc = MunmapFile(data_ptr_, data_len_); | ||
if (rc < 0) { | ||
throw UnrecoverableException("MunmapFile failed"); | ||
} | ||
} | ||
} | ||
|
||
bool DictionaryReader::Lookup(const String &key, TermMeta &term_meta) { | ||
u64 val; | ||
bool found = fst_->Get((u8 *)key.c_str(), key.length(), val); | ||
if (!found) | ||
return false; | ||
u8 *data_cursor = data_ptr_ + val; | ||
SizeT left_size = data_len_ - val; | ||
meta_loader_.Load(data_cursor, left_size, term_meta); | ||
return true; | ||
} | ||
|
||
void DictionaryReader::LookupPrefix(const String &prefix, Vector<Pair<String, TermMeta>> &term_metas) { | ||
FstStream s(*fst_, (u8 *)prefix.c_str(), prefix.length()); | ||
Vector<u8> key; | ||
u64 val; | ||
String term; | ||
TermMeta term_meta; | ||
while (s.Next(key, val)) { | ||
term = String((char *)key.data(), key.size()); | ||
u8 *data_cursor = data_ptr_ + val; | ||
SizeT left_size = data_len_ - val; | ||
meta_loader_.Load(data_cursor, left_size, term_meta); | ||
term_metas.push_back({term, term_meta}); | ||
} | ||
} | ||
|
||
} // namespace infinity |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,20 +1,28 @@ | ||
module; | ||
|
||
import stl; | ||
import memory_pool; | ||
import segment_posting; | ||
import index_defines; | ||
import index_config; | ||
import term_meta; | ||
import posting_list_format; | ||
import fst; | ||
export module dict_reader; | ||
|
||
namespace infinity { | ||
|
||
export class DictionaryReader { | ||
private: | ||
const String &dict_path_; | ||
TermMetaLoader meta_loader_; | ||
u8 *data_ptr_; | ||
SizeT data_len_; | ||
UniquePtr<Fst> fst_; | ||
|
||
public: | ||
DictionaryReader(const String &root_path){}; | ||
DictionaryReader(const String &dict_path, const PostingFormatOption &option); | ||
|
||
~DictionaryReader(); | ||
|
||
~DictionaryReader() = default; | ||
bool Lookup(const String &key, TermMeta &term_meta); | ||
|
||
bool Lookup(const String &key, u64 &value) { return false; } | ||
void LookupPrefix(const String &prefix, Vector<Pair<String, TermMeta>> &term_metas); | ||
}; | ||
} // namespace infinity |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters