-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathmodel_meta.yml
269 lines (269 loc) · 9.22 KB
/
model_meta.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
model_meta:
sentence-transformers/all-MiniLM-L6-v2:
link: https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2
revision: 8b3219a92973c328a8e22fadcfa821b5dc75636a
desc: all-MiniLM-L6-v2 by Sentence Transformers
seq_len: 512
size: 23
dim: 384
license: Apache 2.0
organization: Sentence Transformers
mteb_overall: 56.26
mteb_retrieval: 41.95
mteb_sts: 78.90
mteb_clustering: 42.35
intfloat/multilingual-e5-small:
link: https://huggingface.co/intfloat/multilingual-e5-small
revision: e4ce9877abf3edfe10b0d82785e83bdcb973e22e
desc: multilingual-e5-small by Microsoft
seq_len: 512
size: 44
dim: 384
license: MIT License
organization: Microsoft
mteb_overall: 57.87
mteb_retrieval: 46.64
mteb_sts: 79.10
mteb_clustering: 37.08
intfloat/multilingual-e5-large-instruct:
link: https://huggingface.co/intfloat/multilingual-e5-large-instruct
revision: baa7be480a7de1539afce709c8f13f833a510e0a
desc: multilingual-e5-large-instruct by Microsoft
seq_len: 514
size: 560
dim: 1024
license: MIT License
organization: Microsoft
instruction_query_arxiv: Given a query, retrieve a relevant paper title and abstract from arXiv
instruction_query_wikipedia: Given a query, retrieve a relevant title and passage from Wikipedia
instruction_query_stackexchange: Given a query, retrieve a relevant question and answer from Stack Exchange
instruction_sts: Retrieve semantically similar text
instruction_clustering: Identify the topic/theme/category of the text
mteb_overall: 64.41
mteb_retrieval: 52.47
mteb_sts: 84.78
mteb_clustering: 47.10
intfloat/e5-mistral-7b-instruct:
link: https://huggingface.co/intfloat/e5-mistral-7b-instruct
revision: 07163b72af1488142a360786df853f237b1a3ca1
desc: e5-mistral-7b-instruct by Microsoft
seq_len: 32768
size: 7111
dim: 4096
license: MIT License
organization: Microsoft
instruction_query_arxiv: Given a query, retrieve a relevant paper title and abstract from arXiv
instruction_query_wikipedia: Given a query, retrieve a relevant title and passage from Wikipedia
instruction_query_stackexchange: Given a query, retrieve a relevant question and answer from Stack Exchange
instruction_sts: Retrieve semantically similar text
instruction_clustering: Identify the topic/theme/category of the text
mteb_overall: 66.63
mteb_retrieval: 56.89
mteb_sts: 84.63
mteb_clustering: 50.26
GritLM/GritLM-7B:
link: https://huggingface.co/GritLM/GritLM-7B
revision: 13f00a0e36500c80ce12870ea513846a066004af
desc: GritLM-7B by Contextual AI, HKU, Microsoft
seq_len: 32768
size: 7240
dim: 4096
license: Apache 2.0
organization: Contextual AI, HKU, Microsoft
instruction_query_arxiv: Given a query, retrieve a relevant paper title and abstract from arXiv
instruction_query_wikipedia: Given a query, retrieve a relevant title and passage from Wikipedia
instruction_query_stackexchange: Given a query, retrieve a relevant question and answer from Stack Exchange
instruction_sts: Retrieve semantically similar text
instruction_clustering: Identify the topic/theme/category of the text
mteb_overall: 66.76
mteb_retrieval: 57.41
mteb_sts: 83.35
mteb_clustering: 50.61
BAAI/bge-large-en-v1.5:
link: https://huggingface.co/BAAI/bge-large-en-v1.5
revision: d4aa6901d3a41ba39fb536a557fa166f842b0e09
desc: bge-large-en-v1.5 by BAAI
seq_len: 512
size: 335
dim: 1024
license: MIT
organization: BAAI
mteb_overall: 64.23
mteb_retrieval: 54.29
mteb_sts: 83.11
mteb_clustering: 46.08
nvidia/NV-Embed-v1:
link: https://huggingface.co/nvidia/NV-Embed-v1
revision: 77b11725df91ca45663471a0f2ec6c06e04cbadb
desc: NV-Embed-v1 by Nvidia
seq_len: 32768
size: 7851
dim: 4096
license: CC-BY-NC-4.0
organization: Nvidia
mteb_overall: 69.32
mteb_retrieval: 59.36
mteb_sts: 82.84
mteb_clustering: 52.8
Alibaba-NLP/gte-Qwen2-7B-instruct:
link: https://huggingface.co/Alibaba-NLP/gte-Qwen2-7B-instruct
revision: e26182b2122f4435e8b3ebecbf363990f409b45b
desc: gte-Qwen2-7B-instruct by Alibaba
seq_len: 131072
size: 7613
dim: 3584
license: Apache 2.0
organization: Alibaba
instruction_query_arxiv: Given a query, retrieve a relevant paper title and abstract from arXiv
instruction_query_wikipedia: Given a query, retrieve a relevant title and passage from Wikipedia
instruction_query_stackexchange: Given a query, retrieve a relevant question and answer from Stack Exchange
instruction_clustering: Identify the topic/theme/category of the text
instruction_sts: Retrieve semantically similar text
mteb_overall: 70.24
mteb_retrieval: 60.25
mteb_sts: 83.04
mteb_clustering: 56.92
Salesforce/SFR-Embedding-2_R:
link: https://huggingface.co/Salesforce/SFR-Embedding-2_R
revision: 91762139d94ed4371a9fa31db5551272e0b83818
desc: SFR-Embedding-2_R by Salesforce
seq_len: 32768
size: 7111
dim: 4096
license: CC-BY-NC-4.0
organization: Salesforce
instruction_query_arxiv: Given a query, retrieve a relevant paper title and abstract from arXiv
instruction_query_wikipedia: Given a query, retrieve a relevant title and passage from Wikipedia
instruction_query_stackexchange: Given a query, retrieve a relevant question and answer from Stack Exchange
instruction_clustering: Identify the topic/theme/category of the text
instruction_sts: Retrieve semantically similar text
mteb_overall: 70.31
mteb_retrieval: 60.18
mteb_sts: 81.26
mteb_clustering: 56.17
jinaai/jina-embeddings-v2-base-en:
link: https://huggingface.co/jinaai/jina-embeddings-v2-base-en
revision: 31b72fbf354fea65264ec54edf0b189d94b92d39
desc: jina-embeddings-v2-base-en by Jina AI
seq_len: 8192
size: 137
dim: 768
license: Apache 2.0
organization: Jina AI
mteb_overall: 60.38
mteb_retrieval: 47.87
mteb_sts: 80.70
mteb_clustering: 41.73
mixedbread-ai/mxbai-embed-large-v1:
link: https://huggingface.co/mixedbread-ai/mxbai-embed-large-v1
revision: 990580e27d329c7408b3741ecff85876e128e203
desc: mxbai-embed-large-v1 by mixedbread.ai
seq_len: 512
size: 335
dim: 1024
license: Apache 2.0
organization: mixedbread.ai
mteb_overall: 64.68
mteb_retrieval: 54.39
mteb_sts: 85.00
mteb_clustering: 46.71
nomic-ai/nomic-embed-text-v1.5:
link: https://huggingface.co/nomic-ai/nomic-embed-text-v1.5
revision: b0753ae76394dd36bcfb912a46018088bca48be0
desc: nomic-embed-text-v1.5 by nomic.ai
seq_len: 8192
size: 137
dim: 768
license: Apache 2.0
organization: nomic.ai
mteb_overall: 62.28
mteb_retrieval: 53.01
mteb_sts: 81.94
mteb_clustering: 43.93
nomic-ai/nomic-embed-text-v1:
link: https://huggingface.co/nomic-ai/nomic-embed-text-v1
revision: 0759316f275aa0cb93a5b830973843ca66babcf5
desc: nomic-embed-text-v1 by nomic.ai
seq_len: 8192
size: 137
dim: 768
license: Apache 2.0
organization: nomic.ai
mteb_overall: 62.39
mteb_retrieval: 52.81
mteb_sts: 82.06
mteb_clustering: 43.91
McGill-NLP/LLM2Vec-Meta-Llama-3-8B-Instruct-mntp-supervised:
link: https://huggingface.co/McGill-NLP/LLM2Vec-Meta-Llama-3-8B-Instruct-mntp-supervised
revision: baa8ebf04a1c2500e61288e7dad65e8ae42601a7
desc: LLM2Vec by McGill
seq_len: 8192
size: 7505
dim: 4096
license: MIT
organization: McGill
mteb_overall: 65.01
mteb_retrieval: 56.63
mteb_sts: 83.58
mteb_clustering: 46.45
voyage-multilingual-2:
link: https://docs.voyageai.com/docs/embeddings
revision: "1"
desc: voyage-multilingual-2 by Voyage AI
seq_len: 32000
dim: 1024
license: Proprietary
organization: Voyage AI
voyage-large-2-instruct:
link: https://docs.voyageai.com/docs/embeddings
revision: "1"
desc: voyage-large-2-instruct by Voyage AI
seq_len: 16000
dim: 1024
license: Proprietary
organization: Voyage AI
mteb_overall: 68.28
mteb_retrieval: 58.28
mteb_sts: 84.58
mteb_clustering: 53.35
text-embedding-004:
link: https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/text-embeddings-api
revision: "1"
desc: text-embedding-004 by Google
seq_len: 2048
dim: 768
license: Proprietary
organization: Google
mteb_overall: 66.31
mteb_retrieval: 55.7
mteb_sts: 85.07
mteb_clustering: 47.48
text-embedding-3-large:
link: https://platform.openai.com/docs/guides/embeddings
revision: "1"
desc: text-embedding-3-large by OpenAI
seq_len: 8191
dim: 3072
license: Proprietary
organization: OpenAI
mteb_overall: 64.59
mteb_retrieval: 55.44
mteb_sts: 81.73
mteb_clustering: 49.01
embed-english-v3.0:
link: https://docs.cohere.com/docs/cohere-embed
revision: "1"
desc: embed-english-v3.0 by Cohere
seq_len: 512
dim: 1024
license: Proprietary
organization: Cohere
mteb_overall: 64.47
mteb_retrieval: 55
mteb_sts: 82.62
mteb_clustering: 47.43
BM25:
link: https://github.com/xhluca/bm25s
desc: Fast lexical search via BM25
license: MIT
mteb_retrieval: 42.4