@@ -47,11 +47,149 @@ export struct DirectIO {
47
47
SizeT length_;
48
48
};
49
49
50
+ export struct TermTuple {
51
+ std::string_view term_;
52
+ u32 doc_id_;
53
+ u32 term_pos_;
54
+ int Compare (const TermTuple &rhs) const {
55
+ int ret = term_.compare (rhs.term_ );
56
+ if (ret == 0 ) {
57
+ if (doc_id_ != rhs.doc_id_ ) {
58
+ if (doc_id_ < rhs.doc_id_ )
59
+ return -1 ;
60
+ else
61
+ return 1 ;
62
+ } else {
63
+ if (term_pos_ != rhs.term_pos_ ) {
64
+ if (term_pos_ < rhs.term_pos_ )
65
+ return -1 ;
66
+ else
67
+ return 1 ;
68
+ }
69
+ return 0 ;
70
+ }
71
+ } else
72
+ return ret < 0 ? -1 : 1 ;
73
+ }
74
+
75
+ bool operator ==(const TermTuple &other) const { return Compare (other) == 0 ; }
76
+
77
+ bool operator >(const TermTuple &other) const { return Compare (other) < 0 ; }
78
+
79
+ bool operator <(const TermTuple &other) const { return Compare (other) > 0 ; }
80
+
81
+ TermTuple (char *p, u16 len) : term_(p, len - sizeof (doc_id_) - sizeof (term_pos_) - 1 ) {
82
+ doc_id_ = *((u32 *)(p + term_.size () + 1 ));
83
+ term_pos_ = *((u32 *)(p + term_.size () + 1 + sizeof (doc_id_)));
84
+ }
85
+ };
86
+
87
+ template <typename KeyType, typename LenType, typename = void >
88
+ struct KeyAddress {
89
+ char *data{nullptr };
90
+ u64 addr;
91
+ u32 idx;
92
+
93
+ KeyAddress (char *p, u64 ad, u32 i) {
94
+ data = p;
95
+ addr = ad;
96
+ idx = i;
97
+ }
98
+
99
+ KeyAddress () {
100
+ data = nullptr ;
101
+ addr = -1 ;
102
+ idx = -1 ;
103
+ }
104
+
105
+ KeyType KEY () { return KeyType (data + sizeof (LenType), LEN ()); }
106
+ KeyType KEY () const { return KeyType (data + sizeof (LenType), LEN ()); }
107
+ LenType LEN () const { return *(LenType *)data; }
108
+ u64 &ADDR () { return addr; }
109
+ u64 ADDR () const { return addr; }
110
+ u32 IDX () const { return idx; }
111
+ u32 &IDX () { return idx; }
112
+
113
+ int Compare (const KeyAddress &p) const {
114
+ if (KEY () == p.KEY ())
115
+ return 0 ;
116
+ else if (KEY () > p.KEY ())
117
+ return 1 ;
118
+ else
119
+ return -1 ;
120
+ }
121
+
122
+ bool operator ==(const KeyAddress &other) const { return Compare (other) == 0 ; }
123
+
124
+ bool operator >(const KeyAddress &other) const { return Compare (other) < 0 ; }
125
+
126
+ bool operator <(const KeyAddress &other) const { return Compare (other) > 0 ; }
127
+ };
128
+
129
+ template <typename KeyType, typename LenType>
130
+ struct KeyAddress <KeyType, LenType, typename std::enable_if<std::is_scalar<KeyType>::value>::type> {
131
+ char *data{nullptr };
132
+ u64 addr;
133
+ u32 idx;
134
+
135
+ KeyAddress (char *p, u64 ad, u32 i) {
136
+ data = p;
137
+ addr = ad;
138
+ idx = i;
139
+ }
140
+
141
+ KeyAddress () {
142
+ data = nullptr ;
143
+ addr = -1 ;
144
+ idx = -1 ;
145
+ }
146
+
147
+ KeyType &KEY () { return *(KeyType *)(data + sizeof (LenType)); }
148
+ KeyType KEY () const { return *(KeyType *)(data + sizeof (LenType)); }
149
+
150
+ LenType LEN () const { return *(LenType *)data; }
151
+ u64 &ADDR () { return addr; }
152
+ u64 ADDR () const { return addr; }
153
+ u32 IDX () const { return idx; }
154
+ u32 &IDX () { return idx; }
155
+
156
+ int Compare (const KeyAddress &p) const {
157
+ if (KEY () == p.KEY ())
158
+ return 0 ;
159
+ else if (KEY () > p.KEY ())
160
+ return 1 ;
161
+ else
162
+ return -1 ;
163
+
164
+ LenType len1 = LEN () / sizeof (KeyType);
165
+ LenType len2 = p.LEN () / sizeof (KeyType);
166
+
167
+ for (LenType i = 1 ; i < len1 && i < len2; ++i) {
168
+ if (((KeyType *)(data + sizeof (LenType)))[i] > ((KeyType *)(p.data + sizeof (LenType)))[i])
169
+ return 1 ;
170
+ if (((KeyType *)(data + sizeof (LenType)))[i] < ((KeyType *)(p.data + sizeof (LenType)))[i])
171
+ return -1 ;
172
+ }
173
+
174
+ if (len1 == len2)
175
+ return 0 ;
176
+
177
+ if (len1 > len2)
178
+ return 1 ;
179
+ return -1 ;
180
+ }
181
+
182
+ bool operator ==(const KeyAddress &other) const { return Compare (other) == 0 ; }
183
+
184
+ bool operator >(const KeyAddress &other) const { return Compare (other) < 0 ; }
185
+
186
+ bool operator <(const KeyAddress &other) const { return Compare (other) > 0 ; }
187
+ };
188
+
50
189
export template <typename KeyType, typename LenType>
51
190
class SortMerger {
52
191
typedef SortMerger<KeyType, LenType> self_t ;
53
-
54
- struct KeyAddr ;
192
+ typedef KeyAddress<KeyType, LenType> KeyAddr;
55
193
String filenm_;
56
194
const u32 MAX_GROUP_SIZE_; // !< max group size
57
195
const u32 BS_SIZE_; // !< in fact it equals to memory size
@@ -60,8 +198,8 @@ class SortMerger {
60
198
u32 OUT_BUF_SIZE_; // !< max size of output buffer
61
199
const u32 OUT_BUF_NUM_; // !< output threads number
62
200
63
- std::priority_queue<struct KeyAddr > pre_heap_; // !< predict heap
64
- std::priority_queue<struct KeyAddr > merge_heap_; // !< merge heap
201
+ std::priority_queue<KeyAddr> pre_heap_; // !< predict heap
202
+ std::priority_queue<KeyAddr> merge_heap_; // !< merge heap
65
203
66
204
u32 *micro_run_idx_{nullptr }; // !< the access index of each microruns
67
205
u32 *micro_run_pos_{nullptr }; // !< the access position within each microruns
@@ -102,65 +240,6 @@ class SortMerger {
102
240
103
241
u64 FILE_LEN_;
104
242
105
- struct KeyAddr {
106
- char *data{nullptr };
107
- u64 addr;
108
- u32 idx;
109
-
110
- KeyAddr (char *p, u64 ad, u32 i) {
111
- data = p;
112
- addr = ad;
113
- idx = i;
114
- }
115
-
116
- KeyAddr () {
117
- data = nullptr ;
118
- addr = -1 ;
119
- idx = -1 ;
120
- }
121
-
122
- KeyType &KEY () { return *(KeyType *)(data + sizeof (LenType)); }
123
- KeyType KEY () const { return *(KeyType *)(data + sizeof (LenType)); }
124
- LenType LEN () const { return *(LenType *)data; }
125
- u64 &ADDR () { return addr; }
126
- u64 ADDR () const { return addr; }
127
- u32 IDX () const { return idx; }
128
- u32 &IDX () { return idx; }
129
-
130
- int Compare (const KeyAddr &p) const {
131
- if (KEY () == p.KEY ())
132
- return 0 ;
133
-
134
- if (KEY () > p.KEY ())
135
- return 1 ;
136
- if (KEY () < p.KEY ())
137
- return -1 ;
138
-
139
- LenType len1 = LEN () / sizeof (KeyType);
140
- LenType len2 = p.LEN () / sizeof (KeyType);
141
-
142
- for (LenType i = 1 ; i < len1 && i < len2; ++i) {
143
- if (((KeyType *)(data + sizeof (LenType)))[i] > ((KeyType *)(p.data + sizeof (LenType)))[i])
144
- return 1 ;
145
- if (((KeyType *)(data + sizeof (LenType)))[i] < ((KeyType *)(p.data + sizeof (LenType)))[i])
146
- return -1 ;
147
- }
148
-
149
- if (len1 == len2)
150
- return 0 ;
151
-
152
- if (len1 > len2)
153
- return 1 ;
154
- return -1 ;
155
- }
156
-
157
- bool operator ==(const KeyAddr &other) const { return Compare (other) == 0 ; }
158
-
159
- bool operator >(const KeyAddr &other) const { return Compare (other) < 0 ; }
160
-
161
- bool operator <(const KeyAddr &other) const { return Compare (other) > 0 ; }
162
- };
163
-
164
243
void NewBuffer ();
165
244
166
245
void Init (DirectIO &io_stream);
0 commit comments