From 4d66434c80050ace3b927f3fc1829b8dd377f78a Mon Sep 17 00:00:00 2001 From: HSILA Date: Fri, 24 Jan 2025 22:50:50 -0500 Subject: [PATCH 1/8] feat: Integrating ChemTEB (#1708) * Add SMILES, AI Paraphrase and Inter-Source Paragraphs PairClassification Tasks * Add chemical subsets of NQ and HotpotQA datasets as Retrieval tasks * Add PubChem Synonyms PairClassification task * Update task __init__ for previously added tasks * Add nomic-bert loader * Add a script to run the evaluation pipeline for chemical-related tasks * Add 15 Wikipedia article classification tasks * Add PairClassification and BitextMining tasks for Coconut SMILES * Fix naming of some Classification and PairClassification tasks * Fix some classification tasks naming issues * Integrate WANDB with benchmarking script * Update .gitignore * Fix `nomic_models.py` issue with retrieval tasks, similar to issue #1115 in original repo * Add one chemical model and some SentenceTransformer models * Fix a naming issue for SentenceTransformer models * Add OpenAI, bge-m3 and matscibert models * Add PubChem SMILES Bitext Mining tasks * Change metric namings to be more descriptive * Add English e5 and bge v1 models, all the sizes * Add two Wikipedia Clustering tasks * Add a try-except in evaluation script to skip faulty models during the benchmark. * Add bge v1.5 models and clustering score extraction to json parser * Add Amazon Titan embedding models * Add Cohere Bedrock models * Add two SDS Classification tasks * Add SDS Classification tasks to classification init and chem_eval * Add a retrieval dataset, update dataset names and revisions * Update revision for the CoconutRetrieval dataset: handle duplicate SMILES (documents) * Update `CoconutSMILES2FormulaPC` task * Change CoconutRetrieval dataset to a smaller one * Update some models - Integrate models added in ChemTEB (such as amazon, cohere bedrock and nomic bert) with latest modeling format in mteb. - Update the metadata for the mentioned models * Fix a typo `open_weights` argument is repeated twice * Update ChemTEB tasks - Rename some tasks for better readability. - Merge some BitextMining and PairClassification tasks into a single task with subsets (`PubChemSMILESBitextMining` and `PubChemSMILESPC`) - Add a new multilingual task (`PubChemWikiPairClassification`) consisting of 12 languages. - Update dataset paths, revisions and metadata for most tasks. - Add a `Chemistry` domain to `TaskMetadata` * Remove unnecessary files and tasks for MTEB * Update some ChemTEB tasks - Move `PubChemSMILESBitextMining` to `eng` folder - Add citations for tasks involving SDS, NQ, Hotpot, PubChem data - Update Clustering tasks `category` - Change `main_score` for `PubChemAISentenceParaphrasePC` * Create ChemTEB benchmark * Remove `CoconutRetrieval` * Update tasks and benchmarks tables with ChemTEB * Mention ChemTEB in readme * Fix some issues, update task metadata, lint - `eval_langs` fixed - Dataset path was fixed for two datasets - Metadata was completed for all tasks, mainly following fields: `date`, `task_subtypes`, `dialect`, `sample_creation` - ruff lint - rename `nomic_bert_models.py` to `nomic_bert_model.py` and update it. * Remove `nomic_bert_model.py` as it is now compatible with SentenceTransformer. * Remove `WikipediaAIParagraphsParaphrasePC` task due to being trivial. * Merge `amazon_models` and `cohere_bedrock_models.py` into `bedrock_models.py` * Remove unnecessary `load_data` for some tasks. * Update `bedrock_models.py`, `openai_models.py` and two dataset revisions - Text should be truncated for amazon text embedding models. - `text-embedding-ada-002` returns null embeddings for some inputs with 8192 tokens. - Two datasets are updated, dropping very long samples (len > 99th percentile) * Add a layer of dynamic truncation for amazon models in `bedrock_models.py` * Replace `metadata_dict` with `self.metadata` in `PubChemSMILESPC.py` * fix model meta for bedrock models * Add reference comment to original Cohere API implementation --- README.md | 1 + docs/benchmarks.md | 33 ++- docs/tasks.md | 55 +++- mteb/abstasks/TaskMetadata.py | 1 + mteb/benchmarks/benchmarks.py | 44 +++ mteb/models/bedrock_models.py | 264 ++++++++++++++++++ mteb/models/overview.py | 2 + mteb/tasks/BitextMining/__init__.py | 1 + .../eng/PubChemSMILESBitextMining.py | 68 +++++ mteb/tasks/BitextMining/eng/__init__.py | 0 mteb/tasks/Classification/__init__.py | 17 ++ .../eng/SDSEyeProtectionClassification.py | 44 +++ .../eng/SDSGlovesClassification.py | 44 +++ .../eng/WikipediaBioMetChemClassification.py | 37 +++ .../WikipediaBiolumNeurochemClassification.py | 37 +++ ...kipediaChemEngSpecialtiesClassification.py | 37 +++ .../eng/WikipediaChemFieldsClassification.py | 37 +++ .../WikipediaChemistryTopicsClassification.py | 37 +++ ...pediaCompChemSpectroscopyClassification.py | 37 +++ ...ediaCryobiologySeparationClassification.py | 37 +++ ...CrystallographyAnalyticalClassification.py | 37 +++ ...ediaGreenhouseEnantiopureClassification.py | 37 +++ .../WikipediaIsotopesFissionClassification.py | 37 +++ .../WikipediaLuminescenceClassification.py | 37 +++ ...WikipediaOrganicInorganicClassification.py | 37 +++ ...ipediaSaltsSemiconductorsClassification.py | 37 +++ ...ipediaSolidStateColloidalClassification.py | 37 +++ ...kipediaTheoreticalAppliedClassification.py | 37 +++ mteb/tasks/Clustering/__init__.py | 2 + ...WikipediaChemistrySpecialtiesClustering.py | 37 +++ .../eng/WikipediaChemistryTopicsClustering.py | 37 +++ mteb/tasks/PairClassification/__init__.py | 5 + .../eng/PubChemAISentenceParaphrasePC.py | 60 ++++ .../PairClassification/eng/PubChemSMILESPC.py | 128 +++++++++ .../eng/PubChemSynonymPC.py | 61 ++++ .../eng/PubChemWikiParagraphsPC.py | 60 ++++ .../PubChemWikiPairClassification.py | 77 +++++ mteb/tasks/Retrieval/__init__.py | 2 + .../Retrieval/eng/ChemHotpotQARetrieval.py | 60 ++++ mteb/tasks/Retrieval/eng/ChemNQRetrieval.py | 45 +++ 40 files changed, 1678 insertions(+), 25 deletions(-) create mode 100644 mteb/models/bedrock_models.py create mode 100644 mteb/tasks/BitextMining/eng/PubChemSMILESBitextMining.py create mode 100644 mteb/tasks/BitextMining/eng/__init__.py create mode 100644 mteb/tasks/Classification/eng/SDSEyeProtectionClassification.py create mode 100644 mteb/tasks/Classification/eng/SDSGlovesClassification.py create mode 100644 mteb/tasks/Classification/eng/WikipediaBioMetChemClassification.py create mode 100644 mteb/tasks/Classification/eng/WikipediaBiolumNeurochemClassification.py create mode 100644 mteb/tasks/Classification/eng/WikipediaChemEngSpecialtiesClassification.py create mode 100644 mteb/tasks/Classification/eng/WikipediaChemFieldsClassification.py create mode 100644 mteb/tasks/Classification/eng/WikipediaChemistryTopicsClassification.py create mode 100644 mteb/tasks/Classification/eng/WikipediaCompChemSpectroscopyClassification.py create mode 100644 mteb/tasks/Classification/eng/WikipediaCryobiologySeparationClassification.py create mode 100644 mteb/tasks/Classification/eng/WikipediaCrystallographyAnalyticalClassification.py create mode 100644 mteb/tasks/Classification/eng/WikipediaGreenhouseEnantiopureClassification.py create mode 100644 mteb/tasks/Classification/eng/WikipediaIsotopesFissionClassification.py create mode 100644 mteb/tasks/Classification/eng/WikipediaLuminescenceClassification.py create mode 100644 mteb/tasks/Classification/eng/WikipediaOrganicInorganicClassification.py create mode 100644 mteb/tasks/Classification/eng/WikipediaSaltsSemiconductorsClassification.py create mode 100644 mteb/tasks/Classification/eng/WikipediaSolidStateColloidalClassification.py create mode 100644 mteb/tasks/Classification/eng/WikipediaTheoreticalAppliedClassification.py create mode 100644 mteb/tasks/Clustering/eng/WikipediaChemistrySpecialtiesClustering.py create mode 100644 mteb/tasks/Clustering/eng/WikipediaChemistryTopicsClustering.py create mode 100644 mteb/tasks/PairClassification/eng/PubChemAISentenceParaphrasePC.py create mode 100644 mteb/tasks/PairClassification/eng/PubChemSMILESPC.py create mode 100644 mteb/tasks/PairClassification/eng/PubChemSynonymPC.py create mode 100644 mteb/tasks/PairClassification/eng/PubChemWikiParagraphsPC.py create mode 100644 mteb/tasks/PairClassification/multilingual/PubChemWikiPairClassification.py create mode 100644 mteb/tasks/Retrieval/eng/ChemHotpotQARetrieval.py create mode 100644 mteb/tasks/Retrieval/eng/ChemNQRetrieval.py diff --git a/README.md b/README.md index f556cad894..daf715f029 100644 --- a/README.md +++ b/README.md @@ -517,5 +517,6 @@ You may also want to read and cite the amazing work that has extended MTEB & int - Orion Weller, Benjamin Chang, Sean MacAvaney, Kyle Lo, Arman Cohan, Benjamin Van Durme, Dawn Lawrie, Luca Soldaini. "[FollowIR: Evaluating and Teaching Information Retrieval Models to Follow Instructions](https://arxiv.org/abs/2403.15246)" arXiv 2024 - Dawei Zhu, Liang Wang, Nan Yang, Yifan Song, Wenhao Wu, Furu Wei, Sujian Li. "[LongEmbed: Extending Embedding Models for Long Context Retrieval](https://arxiv.org/abs/2404.12096)" arXiv 2024 - Kenneth Enevoldsen, Márton Kardos, Niklas Muennighoff, Kristoffer Laigaard Nielbo. "[The Scandinavian Embedding Benchmarks: Comprehensive Assessment of Multilingual and Monolingual Text Embedding](https://arxiv.org/abs/2406.02396)" arXiv 2024 +- Ali Shiraee Kasmaee, Mohammad Khodadad, Mohammad Arshi Saloot, Nick Sherck, Stephen Dokas, Hamidreza Mahyar, Soheila Samiee. "[ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance & Efficiency on a Specific Domain](https://arxiv.org/abs/2412.00532)" arXiv 2024 For works that have used MTEB for benchmarking, you can find them on the [leaderboard](https://huggingface.co/spaces/mteb/leaderboard). diff --git a/docs/benchmarks.md b/docs/benchmarks.md index a5abe50215..7c0f07d878 100644 --- a/docs/benchmarks.md +++ b/docs/benchmarks.md @@ -7,16 +7,27 @@ The following table gives you an overview of the benchmarks in MTEB. | Name | # Tasks | Task Types | Domains | Languages | |------|---------|------------|---------|-----------| -| [CoIR](https://github.com/CoIR-team/coir) | 10 | {'Retrieval': 10} | [Written, Programming] | python,c++,sql,go,eng,php,javascript,ruby,java | -| [MINERSBitextMining](https://arxiv.org/pdf/2406.07424) | 7 | {'BitextMining': 7} | [Written, Social, Reviews] | sun,kaz,tzl,ido,abs,arq,yue,tam,nij,glg,slk,hsb,ber,xho,cbk,pol,uzb,ina,kab,swh,amh,fao,kzj,lfn,uig,sqi,deu,ang,ind,bug,pms,ibo,cym,eus,spa,ceb,tgl,ron,isl,ita,csb,cha,fin,est,pes,jpn,tel,tha,oci,cmn,min,fry,bbc,epo,lit,rus,bos,hrv,war,ara,bjn,mkd,srp,ast,nno,urd,pam,aze,eng,ace,bew,kor,dan,awa,mui,hye,ban,cor,ben,gle,swe,mad,bul,lat,cat,nob,fra,pcm,ell,mar,vie,tat,ukr,gsw,kat,arz,dsb,lvs,nld,tur,bel,max,nds,afr,khm,dtp,yor,ces,gla,zsm,mak,ile,nov,orv,bre,swg,rej,mhr,mon,mal,jav,heb,slv,bhp,kur,wuu,tuk,por,hun,hin,hau,yid | +| [BRIGHT](https://brightbenchmark.github.io/) | 1 | {'Retrieval': 1} | [Non-fiction] | eng | +| [ChemTEB](https://arxiv.org/abs/2412.00532) | 27 | {'BitextMining': 1, 'Classification': 17, 'Clustering': 2, 'PairClassification': 5, 'Retrieval': 2} | [Chemistry] | nld,tur,eng,ces,kor,zho,spa,hin,jpn,deu,fra,msa,por | +| [CoIR](https://github.com/CoIR-team/coir) | 10 | {'Retrieval': 10} | [Written, Programming] | javascript,ruby,sql,go,eng,java,php,python,c++ | +| [LongEmbed](https://arxiv.org/abs/2404.12096v2) | 6 | {'Retrieval': 6} | [Fiction, Academic, Written, Blog, Non-fiction, Spoken, Encyclopaedic] | eng | +| [MINERSBitextMining](https://arxiv.org/pdf/2406.07424) | 7 | {'BitextMining': 7} | [Reviews, Written, Social] | sqi,ban,srp,jpn,nds,lat,por,mon,kur,bul,slv,mak,deu,uzb,yor,kzj,max,kat,cha,yid,zsm,spa,pms,mhr,min,fao,heb,nij,mui,tuk,rus,bew,swe,pes,slk,ceb,bjn,ido,abs,ukr,ina,kab,tgl,cor,dan,kaz,fry,rej,hrv,ces,lfn,glg,dsb,hau,ace,urd,ben,yue,nld,eng,epo,ron,xho,wuu,cmn,ind,ang,hsb,mad,pam,nov,swh,bbc,pcm,ara,hye,mkd,nno,ast,jav,lvs,mal,swg,nob,tat,arz,vie,ile,tam,est,ber,bre,csb,pol,afr,cbk,bug,tzl,kor,ibo,hun,war,aze,tha,mar,uig,gla,orv,hin,amh,bel,sun,fin,cat,awa,gsw,isl,oci,ell,cym,arq,ita,fra,bos,dtp,eus,bhp,tel,tur,khm,lit,gle | +| MTEB(Europe, beta) | 74 | {'BitextMining': 7, 'Classification': 21, 'Clustering': 8, 'Retrieval': 15, 'InstructionRetrieval': 3, 'MultilabelClassification': 2, 'PairClassification': 6, 'Reranking': 3, 'STS': 9} | [Web, Fiction, Social, Academic, Religious, Written, Medical, Blog, Constructed, Non-fiction, Legal, News, Government, Reviews, Spoken, Encyclopaedic, Programming, Subtitles] | qvm,esk,nlg,toj,gup,llg,jpn,azj,for,lav,kmh,por,bsj,tna,upv,cta,smk,zty,qvz,ntj,ton,uvh,cjk,kgf,gaw,bak,seh,jiv,hui,ksr,uli,kwi,qvw,kkl,arl,msk,omw,aai,tet,yby,mva,fao,kgk,min,kac,dji,box,rus,chz,emp,ktm,bps,bon,nus,bss,cut,sue,meq,kpr,rwo,ceb,zaj,mib,aui,apc,kdl,mxb,okv,rai,big,reg,ulk,mlg,yap,tpt,hrv,nak,plu,nde,kyc,arp,hau,ary,alp,apr,caa,mbh,uvl,zat,bjp,urd,bki,lin,mek,hlt,iws,spl,xav,yml,lcm,ese,xho,are,mux,lww,ndg,ntu,tzj,ame,yss,zar,fil,aii,csy,gvs,zpm,amh,spp,ken,avt,ltz,swh,viv,kmk,zul,bqp,cav,wln,leu,tcs,tuf,mkd,clu,msy,too,ast,amx,quf,jav,yre,nhe,tat,lbk,maj,msm,rug,nor,tbc,prf,pad,zlm,kze,wnc,fai,cbs,mai,aoi,mxq,bao,kos,mlh,nep,mkl,roo,umb,poh,bod,nna,aey,afr,aly,cac,maa,aze,fon,tha,mhl,chd,tpi,tzm,acq,kyz,nbq,yle,ape,bco,att,nin,mkj,yuj,ata,djr,atb,enq,cpb,sxb,rmc,zas,guj,kbq,gfk,tgo,acm,cux,fin,npi,etr,tsn,dob,mpt,alq,byx,cak,cso,spy,oci,asm,ttc,nwi,srn,hmn,gyr,hto,ngu,cpa,tif,fuv,kue,yuw,ote,mgw,ssg,bos,mvn,dop,aso,mox,ndj,stp,mpp,nas,kon,mks,caf,mbs,mcd,wap,cco,tod,aon,aom,cnl,srp,zga,lat,sja,kpj,nhi,nko,swp,bho,blw,mih,mon,sna,bgs,als,kyf,kur,bul,uzb,knj,mam,yor,zos,gdr,aka,bam,bmh,gnw,lid,cha,msc,zpl,gun,qxn,zsm,spa,mgh,nca,cpc,quc,hvn,bvr,agu,ngp,aak,jni,mau,sab,wos,huv,swe,kea,tum,pes,som,pbt,mmo,amo,kgp,taq,sbe,mil,nhg,bmu,bvd,wrs,atg,muy,tpa,ign,vmy,uri,chf,cek,knf,pib,soy,boa,ces,xed,pma,hix,kbc,orm,sim,ace,nhw,kud,ppo,xnn,yut,snx,ilo,zaa,nld,bsp,aau,myk,grn,bkq,cme,bbb,ssd,fur,knc,knv,heg,urw,ayr,ons,sat,crx,rop,szl,suz,ncl,anh,kto,tca,chk,xla,qxh,ziw,ntp,azb,ara,tew,sot,cjv,djk,usa,ltg,cap,arz,lmo,vec,jao,wer,dhg,vie,ded,hop,khk,faa,tam,sus,mwc,ikk,kek,mie,trc,tue,ura,crh,bkd,bzj,kwj,klt,sps,jid,xsi,swa,qxo,lim,nqo,hns,tmd,mbt,mbc,ibo,hun,wrk,bnp,abt,kaq,car,kiz,nvm,nfa,gul,guo,uzn,beo,aer,nhy,otm,cjo,tgk,bel,eri,mca,wsk,rro,row,bsn,tpz,fij,tvk,msb,mpx,abx,poy,sgb,kas,tcz,top,dif,awk,cbc,bea,ell,myy,pus,bmr,ssx,pao,ebk,ajp,opm,wnu,gub,acr,tbf,ubr,cth,taj,aby,kde,mqj,zao,khm,hat,gle,azg,cbv,ian,apu,ptp,kbm,met,plt,sag,agd,pag,ydd,ckb,mzz,div,kmg,miz,tac,tuo,gvn,boj,tee,mph,mna,qwh,gng,agg,mle,rgu,haw,med,kyg,mig,nhu,tnc,waj,kat,lua,zpz,kpx,tof,ven,dzo,yaa,bqc,klv,qul,kqw,bef,gai,heb,nuy,zac,mcr,zpc,ssw,meu,tuk,gui,kmo,usp,otq,khs,ksj,xbi,nya,cya,aoj,kmr,grc,sny,snp,mir,piu,geb,tgl,dik,agn,dan,qvn,kaz,kbp,mto,tiy,xon,zav,dww,zap,kqa,lac,kne,wat,cbt,naf,inb,kwf,crn,azz,wim,ben,wro,poi,yue,awb,cgc,eng,mjc,amf,mps,mwe,ncu,cle,tdt,hne,zai,gdn,toc,bhl,kir,ron,fue,kyq,ixl,ghs,ncj,tbz,nnq,mio,kwd,mxp,beu,sbk,fuh,gym,ztq,mey,ikw,pab,kmb,cof,tso,ipi,byr,aia,wiv,agm,npl,ter,hye,iou,tku,nno,cnt,kqc,sll,lvs,gnn,nob,dah,nii,san,wuv,udu,gux,ots,zpq,cuk,mbj,nab,bjz,hbo,imo,mcf,glk,zam,twi,srd,sin,zca,qvc,agr,con,kjs,zaw,mav,gum,dov,ood,soq,tte,msa,chq,cbk,isn,kpf,ptu,mri,cao,aeb,cni,aaz,yon,pan,sgz,rom,mop,gwi,nou,uig,gla,far,atd,hin,tnp,bbr,kpg,huu,arn,jvn,cat,awa,amm,urb,run,mit,pir,gam,adz,tir,isl,pls,mlt,qve,nyu,txu,tbg,dwy,quy,ruf,kiw,shp,amr,ita,maq,dgr,fra,kin,ubu,gof,gaz,mgc,cmo,ctu,tel,eus,mcq,bpr,ino,snd,bgt,mwf,acu,jic,kkc,jac,lit,xtd,dyu,kvn,zyp,prs,cop,auc,wed,apb,sqi,ban,wal,poe,tnk,myu,otn,kje,ong,bkx,zsr,hch,agt,wiu,spm,zpu,scn,sri,myw,buk,kdc,zho,sbs,slv,deu,kqf,kvg,tgp,bhg,dwr,xtm,amu,wbp,tim,ory,tos,kan,kbh,mya,mwp,mcb,shn,bdd,cub,yrb,tbo,yal,lug,tah,txq,emi,hub,nso,slk,zpo,zpv,bmk,nss,bjn,nch,bzd,shj,ukr,mbl,tlf,kab,kew,kpw,luo,cpy,kmu,kup,zab,pri,snc,wbi,acf,gmv,glg,amp,qup,nop,srq,yka,apw,mqb,wmt,bch,ewe,sey,lbb,epo,qvh,taw,fuc,kql,ksd,smo,gvf,cmn,yad,ind,qvs,obo,wmw,nsn,anv,mic,pap,ake,fas,cbr,bjr,glv,mdy,tsw,gvc,noa,bus,bjv,cwe,pon,pio,snn,mal,nho,bba,jae,mxt,wol,nif,ycn,lao,tfr,ffm,qub,hus,bzh,mlp,mti,not,nys,tzo,arb,mos,kam,cuc,dgc,pah,pjt,est,bxh,hot,bre,kms,cot,awx,bjk,pwg,cpu,hla,mpm,fuf,pol,tnn,shi,auy,mpj,tuc,bug,kor,zad,war,ars,rkb,mni,cbu,lif,mar,dad,mee,dgz,mco,kik,apz,mkn,sco,mbb,maz,lij,khz,hmo,guh,sun,cbi,lgl,nhr,tiw,daa,amn,amk,tke,lex,mag,cym,eko,zia,mcp,gah,urt,sua,cab,quh,srm,vid,blz,mmx,apn,tur,rmy,bem,yaq,ctp,cui,lus,tav,cax,yva | +| MTEB(Indic, beta) | 23 | {'BitextMining': 4, 'Clustering': 1, 'Classification': 13, 'STS': 1, 'PairClassification': 1, 'Retrieval': 2, 'Reranking': 1} | [Web, Fiction, Social, Encyclopaedic, Religious, Written, Constructed, Non-fiction, Legal, News, Spoken, Reviews, Government] | ban,pag,ckb,ydd,srp,azj,jpn,bho,por,sna,als,scn,cjk,zho,mwr,bul,slv,deu,yor,bak,ory,aka,bam,kat,lua,kan,dzo,mya,zsm,spa,shn,min,nus,fao,heb,kac,lug,tuk,kea,rus,ssw,tum,swe,nso,pes,slk,som,mup,pbt,nya,ceb,bjn,kmr,apc,taq,ukr,kab,luo,tgl,dik,dan,kaz,kbp,hrv,ces,glg,ary,hau,ace,urd,ben,boy,ewe,ilo,yue,lin,nld,eng,hne,epo,kir,grn,ron,xho,smo,fur,knc,cmn,ind,ayr,sat,szl,pap,fas,kmb,tso,ltz,swh,brx,zul,azb,doi,ara,hye,mkd,nno,ast,jav,lvs,mal,lao,sot,wol,nob,ltg,tat,san,arz,lmo,vec,nor,vie,sag,khk,arb,mos,kam,tam,bgc,mai,gbm,srd,est,twi,crh,sin,nep,swa,umb,bod,pol,lim,nqo,afr,bug,kor,ibo,mri,hun,aeb,war,ars,mni,fon,tha,mar,tpi,tzm,acq,pan,uzn,kik,gla,uig,hin,lij,tgk,amh,bel,sun,acm,guj,fin,cat,awa,fij,npi,run,tsn,kas,tir,isl,asm,mlt,ell,oci,mag,cym,pus,gom,quy,ajp,raj,fuv,ita,kin,bos,fra,gaz,eus,tel,tur,snd,kon,khm,bem,dyu,gle,hat,lit,prs,lus,plt | +| MTEB(Medical) | 12 | {'Retrieval': 9, 'Clustering': 2, 'Reranking': 1} | [Web, Academic, Medical, Written, Non-fiction, Government] | rus,eng,kor,ara,spa,zho,vie,fra,pol,cmn | +| MTEB(Multilingual, beta) | 132 | {'BitextMining': 13, 'Classification': 43, 'Clustering': 17, 'Retrieval': 18, 'InstructionRetrieval': 3, 'MultilabelClassification': 5, 'PairClassification': 11, 'Reranking': 6, 'STS': 16} | [Web, Fiction, Social, Academic, Religious, Written, Medical, Blog, Constructed, Non-fiction, Legal, Government, News, Reviews, Spoken, Encyclopaedic, Programming, Subtitles] | qvm,esk,nlg,toj,gup,llg,jpn,azj,for,lav,kmh,por,bsj,tna,upv,cta,smk,zty,qvz,ntj,ton,uvh,cjk,kgf,gaw,bak,seh,jiv,hui,ksr,uli,kwi,qvw,kkl,arl,msk,omw,aai,tet,yby,mva,fao,kgk,min,kac,dji,mui,box,rus,chz,emp,bew,ktm,bps,bon,nus,bss,cut,sue,meq,kpr,rwo,ceb,zaj,mib,aui,apc,kdl,mxb,okv,rai,big,reg,ulk,mlg,yap,tpt,rej,hrv,nak,plu,nde,lfn,kyc,arp,hau,ary,alp,apr,caa,mbh,uvl,zat,bjp,urd,bki,lin,mek,hlt,iws,spl,xav,yml,lcm,ese,xho,are,mux,lww,ndg,ntu,tzj,ame,yss,zar,fil,aii,csy,gvs,zpm,amh,spp,ken,avt,ltz,swh,viv,kmk,zul,bqp,cav,wln,leu,tcs,tuf,mkd,clu,msy,too,ast,amx,quf,jav,yre,nhe,tat,lbk,maj,msm,rug,nor,tbc,prf,pad,zlm,kze,wnc,fai,cbs,mai,aoi,mxq,bao,kos,mlh,nep,mkl,roo,umb,poh,bod,nna,aey,afr,aly,cac,maa,aze,fon,tha,mhl,chd,tpi,tzm,acq,kyz,nbq,yle,ape,bco,att,nin,mkj,yuj,ata,djr,atb,enq,cpb,sxb,rmc,zas,guj,kbq,gfk,tgo,acm,cux,fin,npi,etr,tsn,dob,mpt,alq,byx,cak,cso,spy,oci,asm,ttc,nwi,srn,hmn,gyr,hto,arq,ngu,cpa,tif,fuv,raj,kue,yuw,ote,mgw,ssg,bos,mvn,dop,aso,mox,ndj,stp,mpp,nas,kon,mks,caf,mbs,mcd,wap,cco,tod,aon,aom,cnl,srp,zga,lat,sja,kpj,nhi,nko,swp,bho,blw,mih,mon,sna,bgs,als,kyf,kur,bul,uzb,knj,mam,yor,zos,gdr,aka,bam,bmh,gnw,lid,cha,msc,zpl,gun,qxn,zsm,spa,mgh,nca,yid,pms,mhr,cpc,quc,hvn,bvr,agu,svk,ngp,aak,jni,mau,sab,wos,huv,swe,kea,tum,pes,som,mup,pbt,mmo,amo,kgp,ido,taq,sbe,mil,nhg,bmu,bvd,wrs,atg,muy,tpa,chv,ign,vmy,cor,uri,fry,chf,cek,knf,pib,soy,boa,ces,xed,pma,hix,kbc,orm,sim,ace,nhw,kud,ppo,xnn,yut,boy,snx,ilo,zaa,nld,bsp,aau,myk,grn,bkq,cme,bbb,ssd,fur,knc,wuu,knv,heg,urw,ayr,ons,sat,crx,ang,hsb,rop,szl,suz,mad,ncl,anh,kto,tca,chk,xla,qxh,brx,ziw,ntp,azb,ara,tew,sot,cjv,djk,usa,ltg,cap,arz,lmo,vec,jao,wer,dhg,vie,ded,hop,khk,faa,tam,bgc,sus,mwc,ikk,kek,mie,trc,tue,ura,crh,ber,bkd,bzj,kwj,klt,sps,jid,xsi,swa,qxo,csb,lim,nqo,hns,tmd,mbt,mbc,ibo,hun,wrk,bnp,abt,kaq,car,kiz,nvm,nfa,gul,guo,uzn,beo,aer,nhy,otm,orv,cjo,tgk,bel,eri,mca,wsk,rro,row,bsn,tpz,fij,tvk,msb,mpx,abx,poy,sgb,kas,tcz,top,dif,awk,cbc,bea,ell,myy,pus,bmr,ssx,pao,ebk,ajp,opm,wnu,gub,acr,max,tbf,ubr,cth,taj,aby,kde,mqj,zao,tyv,khm,hat,gle,azg,cbv,ian,apu,ptp,kbm,met,plt,sag,agd,sah,pag,ydd,ckb,mzz,div,kmg,miz,tac,tuo,gvn,boj,tee,mph,mna,qwh,gng,agg,mle,mak,rgu,haw,med,kyg,mig,nhu,tnc,waj,kat,lua,zpz,kpx,tof,ven,dzo,yaa,bqc,klv,qul,kqw,bef,gai,heb,nuy,zac,mcr,zpc,ssw,meu,tuk,gui,kmo,usp,otq,khs,ksj,xbi,nya,cya,aoj,kmr,grc,sny,snp,mir,piu,geb,tgl,dik,agn,dan,qvn,kaz,kbp,mto,tiy,xon,zav,dww,zap,kqa,lac,kne,wat,cbt,naf,inb,kwf,crn,azz,wim,ben,wro,poi,yue,awb,cgc,eng,mjc,amf,mps,mwe,ncu,cle,tdt,hne,zai,gdn,toc,bhl,kir,ron,fue,kyq,ixl,ghs,ncj,tbz,nnq,mio,kwd,mxp,beu,sbk,fuh,gym,ztq,mey,ikw,pab,pam,kmb,cof,tso,ipi,byr,aia,wiv,pcm,agm,doi,npl,ter,hye,iou,tku,nno,cnt,kqc,sll,lvs,gnn,nob,dah,nii,san,wuv,udu,gux,ots,zpq,cuk,mbj,nab,bjz,hbo,imo,mcf,glk,zam,twi,srd,sin,zca,qvc,agr,con,kjs,zaw,mav,gum,dov,ood,soq,tte,msa,chq,cbk,tzl,isn,kpf,ptu,mri,cao,aeb,cni,aaz,yon,pan,sgz,rom,mop,gwi,nou,uig,gla,far,atd,hin,tnp,bbr,kpg,huu,arn,jvn,cat,awa,amm,urb,run,mit,pir,gam,adz,tir,isl,pls,mlt,gsw,qve,nyu,txu,tbg,dwy,quy,ruf,kiw,shp,amr,ita,maq,dgr,fra,kin,ubu,gof,gaz,mgc,cmo,ctu,tel,eus,mcq,bpr,ino,snd,bgt,mwf,acu,jic,kkc,jac,lit,xtd,dyu,kvn,zyp,prs,cop,auc,wed,apb,sqi,ban,wal,poe,tnk,myu,otn,kje,ong,bkx,zsr,nds,hch,agt,wiu,spm,zpu,scn,sri,myw,buk,kdc,zho,sbs,slv,mwr,deu,kqf,kvg,tgp,bhg,dwr,xtm,amu,wbp,tim,ory,kzj,tos,kan,kbh,mya,mwp,mcb,shn,bdd,cub,yrb,tbo,yal,nij,lug,tah,txq,emi,hub,nso,slk,zpo,zpv,bmk,nss,bjn,nch,abs,bzd,shj,ukr,mbl,ina,tlf,kab,kew,kpw,luo,cpy,kmu,kup,zab,pri,snc,wbi,acf,gmv,glg,dsb,amp,qup,nop,srq,yka,apw,mqb,wmt,bch,ewe,sey,lbb,epo,qvh,taw,fuc,kql,ksd,smo,gvf,cmn,yad,ind,qvs,obo,wmw,nsn,anv,mic,pap,ake,nov,fas,cbr,bjr,glv,mdy,bbc,tsw,gvc,noa,bus,bjv,cwe,pon,pio,snn,swg,mal,nho,bba,jae,mxt,wol,nif,ycn,lao,tfr,ffm,qub,hus,bzh,mlp,mti,not,nys,ile,tzo,arb,mos,kam,cuc,dgc,pah,pjt,gbm,est,bxh,hot,bre,kms,cot,awx,bjk,pwg,cpu,hla,mpm,fuf,pol,tnn,shi,auy,mpj,tuc,bug,kor,zad,war,ars,rkb,mni,cbu,lif,mar,krc,dad,mee,dgz,mco,kik,apz,mkn,sco,mbb,maz,lij,khz,hmo,guh,sun,cbi,lgl,nhr,tiw,daa,amn,amk,tke,lex,mag,cym,gom,eko,zia,mcp,gah,urt,sua,cab,quh,srm,dtp,vid,blz,bhp,mmx,apn,tur,rmy,bem,yaq,ctp,cui,lus,tav,cax,yva | | [MTEB(Retrieval w/Instructions)](https://arxiv.org/abs/2403.15246) | 3 | {'InstructionRetrieval': 3} | [Written, News] | eng | -| [MTEB(Scandinavian)](https://kennethenevoldsen.github.io/scandinavian-embedding-benchmark/) | 28 | {'BitextMining': 2, 'Classification': 13, 'Retrieval': 7, 'Clustering': 6} | [Encyclopaedic, Spoken, Non-fiction, Government, News, Fiction, Social, Blog, Reviews, Written, Web, Legal] | nob,fao,swe,isl,dan,nno | -| MTEB(code) | 12 | {'Retrieval': 12} | [Written, Programming] | python,c++,sql,c,go,eng,shell,typescript,php,scala,rust,swift,javascript,ruby,java | -| [MTEB(deu)](https://arxiv.org/html/2401.02709v1) | 19 | {'Classification': 6, 'Clustering': 4, 'PairClassification': 2, 'Reranking': 1, 'Retrieval': 4, 'STS': 2} | [Encyclopaedic, Spoken, News, Reviews, Written, Web] | eng,deu,pol,fra | -| MTEB(eng) | 67 | {'Classification': 12, 'Retrieval': 26, 'Clustering': 11, 'Reranking': 4, 'STS': 10, 'PairClassification': 3, 'Summarization': 1} | [Encyclopaedic, Spoken, Non-fiction, Blog, News, Medical, Social, Programming, Written, Reviews, Web, Academic] | tur,fra,eng,cmn,pol,ita,nld,spa,deu,ara | -| [MTEB(fra)](https://arxiv.org/abs/2405.20468) | 26 | {'Classification': 6, 'Clustering': 7, 'PairClassification': 2, 'Reranking': 2, 'Retrieval': 5, 'STS': 3, 'Summarization': 1} | [Encyclopaedic, Spoken, Non-fiction, News, Social, Reviews, Written, Web, Legal, Academic] | eng,deu,pol,fra | -| MTEB(kor) | 6 | {'Classification': 1, 'Reranking': 1, 'Retrieval': 2, 'STS': 2} | [Encyclopaedic, Spoken, News, Reviews, Written, Web] | kor | -| [MTEB(law)](https://aclanthology.org/2023.eacl-main.148/) | 8 | {'Retrieval': 8} | [Written, Legal] | eng,deu,zho | -| [MTEB(pol)](https://arxiv.org/abs/2405.10138) | 18 | {'Classification': 7, 'Clustering': 3, 'PairClassification': 4, 'STS': 4} | [Spoken, Non-fiction, News, Fiction, Social, Written, Web, Legal, Academic] | pol,deu,eng,fra | -| [MTEB(rus)](https://aclanthology.org/2023.eacl-main.148/) | 23 | {'Classification': 9, 'Clustering': 3, 'MultilabelClassification': 2, 'PairClassification': 1, 'Reranking': 2, 'Retrieval': 3, 'STS': 3} | [Encyclopaedic, Spoken, Blog, News, Social, Reviews, Written, Web, Academic] | rus | +| [MTEB(Scandinavian)](https://kennethenevoldsen.github.io/scandinavian-embedding-benchmark/) | 28 | {'BitextMining': 2, 'Classification': 13, 'Retrieval': 7, 'Clustering': 6} | [Web, Fiction, Social, Written, Blog, Non-fiction, Legal, News, Spoken, Reviews, Government, Encyclopaedic] | swe,nno,isl,dan,fao,nob | +| MTEB(code) | 12 | {'Retrieval': 12} | [Written, Programming] | javascript,ruby,sql,go,c,eng,shell,typescript,rust,java,php,python,scala,swift,c++ | +| [MTEB(deu)](https://arxiv.org/html/2401.02709v1) | 19 | {'Classification': 6, 'Clustering': 4, 'PairClassification': 2, 'Reranking': 1, 'Retrieval': 4, 'STS': 2} | [Web, Written, News, Spoken, Reviews, Encyclopaedic] | pol,deu,fra,eng | +| MTEB(eng, beta) | 41 | {'Classification': 8, 'Retrieval': 10, 'Clustering': 8, 'Reranking': 2, 'STS': 9, 'PairClassification': 3, 'Summarization': 1} | [Web, Academic, Social, Written, Medical, Blog, Non-fiction, News, Spoken, Reviews, Encyclopaedic, Programming] | nld,tur,eng,ara,spa,ita,deu,fra,pol,cmn | +| MTEB(eng, classic) | 67 | {'Classification': 12, 'Retrieval': 26, 'Clustering': 11, 'Reranking': 4, 'STS': 10, 'PairClassification': 3, 'Summarization': 1} | [Web, Academic, Social, Written, Medical, Blog, Non-fiction, News, Spoken, Reviews, Encyclopaedic, Programming] | nld,tur,eng,ara,spa,ita,deu,fra,pol,cmn | +| [MTEB(fra)](https://arxiv.org/abs/2405.20468) | 26 | {'Classification': 6, 'Clustering': 7, 'PairClassification': 2, 'Reranking': 2, 'Retrieval': 5, 'STS': 3, 'Summarization': 1} | [Web, Academic, Social, Written, Non-fiction, Legal, News, Spoken, Reviews, Encyclopaedic] | pol,deu,fra,eng | +| [MTEB(jpn)](https://github.com/sbintuitions/JMTEB) | 16 | {'Clustering': 2, 'Classification': 4, 'STS': 2, 'PairClassification': 1, 'Retrieval': 6, 'Reranking': 1} | [Web, Academic, Written, Non-fiction, News, Spoken, Reviews, Encyclopaedic] | jpn | +| MTEB(kor) | 6 | {'Classification': 1, 'Reranking': 1, 'Retrieval': 2, 'STS': 2} | [Web, Written, News, Spoken, Reviews, Encyclopaedic] | kor | +| [MTEB(law)](https://aclanthology.org/2023.eacl-main.148/) | 8 | {'Retrieval': 8} | [Written, Legal] | deu,zho,eng | +| [MTEB(pol)](https://arxiv.org/abs/2405.10138) | 18 | {'Classification': 7, 'Clustering': 3, 'PairClassification': 4, 'STS': 4} | [Web, Fiction, Academic, Social, Written, Non-fiction, Legal, News, Spoken] | pol,deu,fra,eng | +| [MTEB(rus)](https://aclanthology.org/2023.eacl-main.148/) | 23 | {'Classification': 9, 'Clustering': 3, 'MultilabelClassification': 2, 'PairClassification': 1, 'Reranking': 2, 'Retrieval': 3, 'STS': 3} | [Web, Social, Academic, Written, Blog, News, Spoken, Reviews, Encyclopaedic] | rus | +| [NanoBEIR](https://huggingface.co/collections/zeta-alpha-ai/nanobeir-66e1a0af21dfd93e620cd9f6) | 13 | {'Retrieval': 13} | [Web, Academic, Social, Medical, Written, Non-fiction, News, Encyclopaedic] | eng | +| [RAR-b](https://arxiv.org/abs/2404.06347) | 17 | {'Retrieval': 17} | [Encyclopaedic, Written, Programming] | eng | \ No newline at end of file diff --git a/docs/tasks.md b/docs/tasks.md index f4ec3c792e..15b9474168 100644 --- a/docs/tasks.md +++ b/docs/tasks.md @@ -122,6 +122,8 @@ The following tables give you an overview of the tasks in MTEB. | [CUREv1](https://huggingface.co/datasets/clinia/CUREv1) | ['eng', 'fra', 'spa'] | Retrieval | s2p | [Medical, Academic, Written] | None | None | | [CanadaTaxCourtOutcomesLegalBenchClassification](https://huggingface.co/datasets/nguha/legalbench) (Neel Guha, 2023) | ['eng'] | Classification | s2s | [Legal, Written] | None | None | | [CataloniaTweetClassification](https://aclanthology.org/2020.lrec-1.171/) | ['cat', 'spa'] | Classification | s2s | [Social, Government, Written] | None | None | +| [ChemHotpotQARetrieval](https://arxiv.org/abs/2412.00532) (Kasmaee et al., 2024) | ['eng'] | Retrieval | s2p | [Chemistry] | None | None | +| [ChemNQRetrieval](https://arxiv.org/abs/2412.00532) (Kasmaee et al., 2024) | ['eng'] | Retrieval | s2p | [Chemistry] | None | None | | [ClimateFEVER](https://www.sustainablefinance.uzh.ch/en/research/climate-fever.html) (Thomas Diggelmann, 2021) | ['eng'] | Retrieval | s2p | | None | None | | [ClimateFEVERHardNegatives](https://www.sustainablefinance.uzh.ch/en/research/climate-fever.html) (Thomas Diggelmann, 2021) | ['eng'] | Retrieval | s2p | | None | None | | [CmedqaRetrieval](https://aclanthology.org/2022.emnlp-main.357.pdf) | ['cmn'] | Retrieval | s2p | [Medical, Written] | None | None | @@ -425,6 +427,12 @@ The following tables give you an overview of the tasks in MTEB. | [PolEmo2.0-IN](https://aclanthology.org/K19-1092.pdf) | ['pol'] | Classification | s2s | [Written, Social] | None | None | | [PolEmo2.0-OUT](https://aclanthology.org/K19-1092.pdf) | ['pol'] | Classification | s2s | [Written, Social] | None | None | | [PpcPC](https://arxiv.org/pdf/2207.12759.pdf) (Sławomir Dadas, 2022) | ['pol'] | PairClassification | s2s | [Fiction, Non-fiction, Web, Written, Spoken, Social, News] | None | None | +| [PubChemAISentenceParaphrasePC](https://arxiv.org/abs/2412.00532) (Kasmaee et al., 2024) | ['eng'] | PairClassification | s2s | [Chemistry] | None | None | +| [PubChemSMILESBitextMining](https://arxiv.org/abs/2412.00532) (Kasmaee et al., 2024) | ['eng'] | BitextMining | s2s | [Chemistry] | None | None | +| [PubChemSMILESPC](https://arxiv.org/abs/2412.00532) (Kasmaee et al., 2024) | ['eng'] | PairClassification | s2s | [Chemistry] | None | None | +| [PubChemSynonymPC](https://arxiv.org/abs/2412.00532) (Kasmaee et al., 2024) | ['eng'] | PairClassification | s2s | [Chemistry] | None | None | +| [PubChemWikiPairClassification](https://arxiv.org/abs/2412.00532) (Kasmaee et al., 2024) | ['ces', 'deu', 'eng', 'fra', 'hin', 'jpn', 'kor', 'msa', 'nld', 'por', 'spa', 'tur', 'zho'] | PairClassification | s2s | [Chemistry] | None | None | +| [PubChemWikiParagraphsPC](https://arxiv.org/abs/2412.00532) (Kasmaee et al., 2024) | ['eng'] | PairClassification | p2p | [Chemistry] | None | None | | [PublicHealthQA](https://huggingface.co/datasets/xhluca/publichealth-qa) | ['ara', 'eng', 'fra', 'kor', 'rus', 'spa', 'vie', 'zho'] | Retrieval | s2p | [Medical, Government, Web, Written] | None | None | | [PunjabiNewsClassification](https://github.com/goru001/nlp-for-punjabi/) (Anoop Kunchukuttan, 2020) | ['pan'] | Classification | s2s | [News, Written] | None | None | | [QBQTC](https://github.com/CLUEbenchmark/QBQTC/tree/main/dataset) | ['cmn'] | STS | s2s | | None | None | @@ -468,6 +476,8 @@ The following tables give you an overview of the tasks in MTEB. | [SCDDVerificationLegalBenchClassification](https://huggingface.co/datasets/nguha/legalbench) (Neel Guha, 2023) | ['eng'] | Classification | s2s | [Legal, Written] | None | None | | [SCIDOCS](https://allenai.org/data/scidocs) (Arman Cohan, 2020) | ['eng'] | Retrieval | s2p | [Academic, Written, Non-fiction] | None | None | | [SCIDOCS-PL](https://allenai.org/data/scidocs) (Konrad Wojtasik, 2024) | ['pol'] | Retrieval | s2p | | None | None | +| [SDSEyeProtectionClassification](https://arxiv.org/abs/2412.00532) (Kasmaee et al., 2024) | ['eng'] | Classification | s2p | [Chemistry] | None | None | +| [SDSGlovesClassification](https://arxiv.org/abs/2412.00532) (Kasmaee et al., 2024) | ['eng'] | Classification | s2p | [Chemistry] | None | None | | [SIB200Classification](https://arxiv.org/abs/2309.07445) (Adelani et al., 2023) | ['ace', 'acm', 'acq', 'aeb', 'afr', 'ajp', 'aka', 'als', 'amh', 'apc', 'arb', 'ars', 'ary', 'arz', 'asm', 'ast', 'awa', 'ayr', 'azb', 'azj', 'bak', 'bam', 'ban', 'bel', 'bem', 'ben', 'bho', 'bjn', 'bod', 'bos', 'bug', 'bul', 'cat', 'ceb', 'ces', 'cjk', 'ckb', 'crh', 'cym', 'dan', 'deu', 'dik', 'dyu', 'dzo', 'ell', 'eng', 'epo', 'est', 'eus', 'ewe', 'fao', 'fij', 'fin', 'fon', 'fra', 'fur', 'fuv', 'gaz', 'gla', 'gle', 'glg', 'grn', 'guj', 'hat', 'hau', 'heb', 'hin', 'hne', 'hrv', 'hun', 'hye', 'ibo', 'ilo', 'ind', 'isl', 'ita', 'jav', 'jpn', 'kab', 'kac', 'kam', 'kan', 'kas', 'kat', 'kaz', 'kbp', 'kea', 'khk', 'khm', 'kik', 'kin', 'kir', 'kmb', 'kmr', 'knc', 'kon', 'kor', 'lao', 'lij', 'lim', 'lin', 'lit', 'lmo', 'ltg', 'ltz', 'lua', 'lug', 'luo', 'lus', 'lvs', 'mag', 'mai', 'mal', 'mar', 'min', 'mkd', 'mlt', 'mni', 'mos', 'mri', 'mya', 'nld', 'nno', 'nob', 'npi', 'nqo', 'nso', 'nus', 'nya', 'oci', 'ory', 'pag', 'pan', 'pap', 'pbt', 'pes', 'plt', 'pol', 'por', 'prs', 'quy', 'ron', 'run', 'rus', 'sag', 'san', 'sat', 'scn', 'shn', 'sin', 'slk', 'slv', 'smo', 'sna', 'snd', 'som', 'sot', 'spa', 'srd', 'srp', 'ssw', 'sun', 'swe', 'swh', 'szl', 'tam', 'taq', 'tat', 'tel', 'tgk', 'tgl', 'tha', 'tir', 'tpi', 'tsn', 'tso', 'tuk', 'tum', 'tur', 'twi', 'tzm', 'uig', 'ukr', 'umb', 'urd', 'uzn', 'vec', 'vie', 'war', 'wol', 'xho', 'ydd', 'yor', 'yue', 'zho', 'zsm', 'zul'] | Classification | s2s | [News, Written] | None | None | | [SIB200ClusteringS2S](https://arxiv.org/abs/2309.07445) (Adelani et al., 2023) | ['ace', 'acm', 'acq', 'aeb', 'afr', 'ajp', 'aka', 'als', 'amh', 'apc', 'arb', 'ars', 'ary', 'arz', 'asm', 'ast', 'awa', 'ayr', 'azb', 'azj', 'bak', 'bam', 'ban', 'bel', 'bem', 'ben', 'bho', 'bjn', 'bod', 'bos', 'bug', 'bul', 'cat', 'ceb', 'ces', 'cjk', 'ckb', 'crh', 'cym', 'dan', 'deu', 'dik', 'dyu', 'dzo', 'ell', 'eng', 'epo', 'est', 'eus', 'ewe', 'fao', 'fij', 'fin', 'fon', 'fra', 'fur', 'fuv', 'gaz', 'gla', 'gle', 'glg', 'grn', 'guj', 'hat', 'hau', 'heb', 'hin', 'hne', 'hrv', 'hun', 'hye', 'ibo', 'ilo', 'ind', 'isl', 'ita', 'jav', 'jpn', 'kab', 'kac', 'kam', 'kan', 'kas', 'kat', 'kaz', 'kbp', 'kea', 'khk', 'khm', 'kik', 'kin', 'kir', 'kmb', 'kmr', 'knc', 'kon', 'kor', 'lao', 'lij', 'lim', 'lin', 'lit', 'lmo', 'ltg', 'ltz', 'lua', 'lug', 'luo', 'lus', 'lvs', 'mag', 'mai', 'mal', 'mar', 'min', 'mkd', 'mlt', 'mni', 'mos', 'mri', 'mya', 'nld', 'nno', 'nob', 'npi', 'nqo', 'nso', 'nus', 'nya', 'oci', 'ory', 'pag', 'pan', 'pap', 'pbt', 'pes', 'plt', 'pol', 'por', 'prs', 'quy', 'ron', 'run', 'rus', 'sag', 'san', 'sat', 'scn', 'shn', 'sin', 'slk', 'slv', 'smo', 'sna', 'snd', 'som', 'sot', 'spa', 'srd', 'srp', 'ssw', 'sun', 'swe', 'swh', 'szl', 'tam', 'taq', 'tat', 'tel', 'tgk', 'tgl', 'tha', 'tir', 'tpi', 'tsn', 'tso', 'tuk', 'tum', 'tur', 'twi', 'tzm', 'uig', 'ukr', 'umb', 'urd', 'uzn', 'vec', 'vie', 'war', 'wol', 'xho', 'ydd', 'yor', 'yue', 'zho', 'zsm', 'zul'] | Clustering | s2s | [News, Written] | None | None | | [SICK-BR-PC](https://linux.ime.usp.br/~thalen/SICK_PT.pdf) | ['por'] | PairClassification | s2s | [Web, Written] | None | None | @@ -595,8 +605,25 @@ The following tables give you an overview of the tasks in MTEB. | [WebLINXCandidatesReranking](https://mcgill-nlp.github.io/weblinx) (Xing Han Lù, 2024) | ['eng'] | Reranking | p2p | [Academic, Web, Written] | None | None | | [WikiCitiesClustering](https://huggingface.co/datasets/wikipedia) | ['eng'] | Clustering | p2p | [Encyclopaedic, Written] | None | None | | [WikiClusteringP2P.v2](https://github.com/Rysias/wiki-clustering) | ['bos', 'cat', 'ces', 'dan', 'eus', 'glv', 'ilo', 'kur', 'lav', 'min', 'mlt', 'sco', 'sqi', 'wln'] | Clustering | p2p | [Encyclopaedic, Written] | None | None | +| [WikipediaBioMetChemClassification](https://arxiv.org/abs/2412.00532) (Kasmaee et al., 2024) | ['eng'] | Classification | s2s | [Chemistry] | None | None | +| [WikipediaBiolumNeurochemClassification](https://arxiv.org/abs/2412.00532) (Kasmaee et al., 2024) | ['eng'] | Classification | s2s | [Chemistry] | None | None | +| [WikipediaChemEngSpecialtiesClassification](https://arxiv.org/abs/2412.00532) (Kasmaee et al., 2024) | ['eng'] | Classification | s2s | [Chemistry] | None | None | +| [WikipediaChemFieldsClassification](https://arxiv.org/abs/2412.00532) (Kasmaee et al., 2024) | ['eng'] | Classification | s2s | [Chemistry] | None | None | +| [WikipediaChemistryTopicsClassification](https://arxiv.org/abs/2412.00532) (Kasmaee et al., 2024) | ['eng'] | Classification | s2s | [Chemistry] | None | None | +| [WikipediaChemistryTopicsClustering](https://arxiv.org/abs/2412.00532) (Kasmaee et al., 2024) | ['eng'] | Clustering | s2p | [Chemistry] | None | None | +| [WikipediaCompChemSpectroscopyClassification](https://arxiv.org/abs/2412.00532) (Kasmaee et al., 2024) | ['eng'] | Classification | s2s | [Chemistry] | None | None | +| [WikipediaCryobiologySeparationClassification](https://arxiv.org/abs/2412.00532) (Kasmaee et al., 2024) | ['eng'] | Classification | s2s | [Chemistry] | None | None | +| [WikipediaCrystallographyAnalyticalClassification](https://arxiv.org/abs/2412.00532) (Kasmaee et al., 2024) | ['eng'] | Classification | s2s | [Chemistry] | None | None | +| [WikipediaGreenhouseEnantiopureClassification](https://arxiv.org/abs/2412.00532) (Kasmaee et al., 2024) | ['eng'] | Classification | s2s | [Chemistry] | None | None | +| [WikipediaIsotopesFissionClassification](https://arxiv.org/abs/2412.00532) (Kasmaee et al., 2024) | ['eng'] | Classification | s2s | [Chemistry] | None | None | +| [WikipediaLuminescenceClassification](https://arxiv.org/abs/2412.00532) (Kasmaee et al., 2024) | ['eng'] | Classification | s2s | [Chemistry] | None | None | +| [WikipediaOrganicInorganicClassification](https://arxiv.org/abs/2412.00532) (Kasmaee et al., 2024) | ['eng'] | Classification | s2s | [Chemistry] | None | None | | [WikipediaRerankingMultilingual](https://huggingface.co/datasets/ellamind/wikipedia-2023-11-reranking-multilingual) | ['ben', 'bul', 'ces', 'dan', 'deu', 'eng', 'fas', 'fin', 'hin', 'ita', 'nld', 'nor', 'por', 'ron', 'srp', 'swe'] | Reranking | s2p | [Encyclopaedic, Written] | {'test': 24000} | {'test': {'num_samples': 24000, 'number_of_characters': 83866932, 'num_positive': 24000, 'num_negative': 192000, 'min_query_length': 7, 'avg_query_length': 59.09, 'max_query_length': 180, 'unique_query': 23997, 'min_positive_length': 100, 'avg_positive_length': 385.45, 'max_positive_length': 3515, 'unique_positive': 23993, 'min_negative_length': 100, 'avg_negative_length': 381.24, 'max_negative_length': 9461, 'unique_negative': 191783, 'hf_subset_descriptive_stats': {'bg': {'num_samples': 1500, 'number_of_characters': 5145316, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 18, 'avg_query_length': 60.83, 'max_query_length': 166, 'unique_query': 1500, 'min_positive_length': 100, 'avg_positive_length': 375.89, 'max_positive_length': 2241, 'unique_positive': 1500, 'min_negative_length': 100, 'avg_negative_length': 374.19, 'max_negative_length': 4869, 'unique_negative': 11996}, 'bn': {'num_samples': 1500, 'number_of_characters': 5390581, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 7, 'avg_query_length': 47.27, 'max_query_length': 123, 'unique_query': 1500, 'min_positive_length': 100, 'avg_positive_length': 394.59, 'max_positive_length': 2338, 'unique_positive': 1499, 'min_negative_length': 100, 'avg_negative_length': 393.98, 'max_negative_length': 5104, 'unique_negative': 11996}, 'cs': {'num_samples': 1500, 'number_of_characters': 5079180, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 17, 'avg_query_length': 56.27, 'max_query_length': 137, 'unique_query': 1500, 'min_positive_length': 100, 'avg_positive_length': 383.84, 'max_positive_length': 2300, 'unique_positive': 1499, 'min_negative_length': 100, 'avg_negative_length': 368.25, 'max_negative_length': 3487, 'unique_negative': 11982}, 'da': {'num_samples': 1500, 'number_of_characters': 4746132, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 17, 'avg_query_length': 56.75, 'max_query_length': 137, 'unique_query': 1499, 'min_positive_length': 100, 'avg_positive_length': 351.68, 'max_positive_length': 2159, 'unique_positive': 1500, 'min_negative_length': 100, 'avg_negative_length': 344.46, 'max_negative_length': 2563, 'unique_negative': 11972}, 'de': {'num_samples': 1500, 'number_of_characters': 5483592, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 20, 'avg_query_length': 70.0, 'max_query_length': 180, 'unique_query': 1499, 'min_positive_length': 100, 'avg_positive_length': 391.54, 'max_positive_length': 2674, 'unique_positive': 1500, 'min_negative_length': 100, 'avg_negative_length': 399.27, 'max_negative_length': 3083, 'unique_negative': 12000}, 'en': {'num_samples': 1500, 'number_of_characters': 6217884, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 18, 'avg_query_length': 68.37, 'max_query_length': 162, 'unique_query': 1500, 'min_positive_length': 100, 'avg_positive_length': 451.73, 'max_positive_length': 3515, 'unique_positive': 1500, 'min_negative_length': 100, 'avg_negative_length': 453.14, 'max_negative_length': 3662, 'unique_negative': 12000}, 'fa': {'num_samples': 1500, 'number_of_characters': 4732619, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 12, 'avg_query_length': 48.67, 'max_query_length': 119, 'unique_query': 1500, 'min_positive_length': 100, 'avg_positive_length': 347.7, 'max_positive_length': 2571, 'unique_positive': 1500, 'min_negative_length': 100, 'avg_negative_length': 344.84, 'max_negative_length': 4707, 'unique_negative': 11978}, 'fi': {'num_samples': 1500, 'number_of_characters': 5209132, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 14, 'avg_query_length': 55.34, 'max_query_length': 132, 'unique_query': 1500, 'min_positive_length': 100, 'avg_positive_length': 394.71, 'max_positive_length': 2129, 'unique_positive': 1498, 'min_negative_length': 100, 'avg_negative_length': 377.84, 'max_negative_length': 2574, 'unique_negative': 11972}, 'hi': {'num_samples': 1500, 'number_of_characters': 5620959, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 13, 'avg_query_length': 50.78, 'max_query_length': 125, 'unique_query': 1499, 'min_positive_length': 100, 'avg_positive_length': 420.38, 'max_positive_length': 2361, 'unique_positive': 1500, 'min_negative_length': 100, 'avg_negative_length': 409.52, 'max_negative_length': 5912, 'unique_negative': 11996}, 'it': {'num_samples': 1500, 'number_of_characters': 5420496, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 23, 'avg_query_length': 70.05, 'max_query_length': 156, 'unique_query': 1500, 'min_positive_length': 100, 'avg_positive_length': 396.97, 'max_positive_length': 2082, 'unique_positive': 1500, 'min_negative_length': 100, 'avg_negative_length': 393.33, 'max_negative_length': 9461, 'unique_negative': 11993}, 'nl': {'num_samples': 1500, 'number_of_characters': 5169556, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 18, 'avg_query_length': 65.34, 'max_query_length': 136, 'unique_query': 1500, 'min_positive_length': 100, 'avg_positive_length': 380.79, 'max_positive_length': 1864, 'unique_positive': 1500, 'min_negative_length': 100, 'avg_negative_length': 375.03, 'max_negative_length': 3641, 'unique_negative': 11985}, 'pt': {'num_samples': 1500, 'number_of_characters': 5474356, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 18, 'avg_query_length': 65.12, 'max_query_length': 176, 'unique_query': 1500, 'min_positive_length': 100, 'avg_positive_length': 404.02, 'max_positive_length': 3057, 'unique_positive': 1499, 'min_negative_length': 100, 'avg_negative_length': 397.55, 'max_negative_length': 2877, 'unique_negative': 11991}, 'ro': {'num_samples': 1500, 'number_of_characters': 4796113, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 14, 'avg_query_length': 61.97, 'max_query_length': 169, 'unique_query': 1500, 'min_positive_length': 100, 'avg_positive_length': 346.71, 'max_positive_length': 1917, 'unique_positive': 1499, 'min_negative_length': 100, 'avg_negative_length': 348.59, 'max_negative_length': 4213, 'unique_negative': 11971}, 'sr': {'num_samples': 1500, 'number_of_characters': 5271732, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 15, 'avg_query_length': 55.67, 'max_query_length': 146, 'unique_query': 1500, 'min_positive_length': 100, 'avg_positive_length': 386.35, 'max_positive_length': 2421, 'unique_positive': 1499, 'min_negative_length': 100, 'avg_negative_length': 384.06, 'max_negative_length': 3668, 'unique_negative': 11974}, 'no': {'num_samples': 1500, 'number_of_characters': 5036586, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 14, 'avg_query_length': 55.29, 'max_query_length': 129, 'unique_query': 1500, 'min_positive_length': 100, 'avg_positive_length': 367.72, 'max_positive_length': 1450, 'unique_positive': 1500, 'min_negative_length': 100, 'avg_negative_length': 366.84, 'max_negative_length': 2841, 'unique_negative': 11996}, 'sv': {'num_samples': 1500, 'number_of_characters': 5072698, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 17, 'avg_query_length': 57.73, 'max_query_length': 133, 'unique_query': 1500, 'min_positive_length': 100, 'avg_positive_length': 372.59, 'max_positive_length': 2493, 'unique_positive': 1500, 'min_negative_length': 100, 'avg_negative_length': 368.94, 'max_negative_length': 3680, 'unique_negative': 11999}}}} | | [WikipediaRetrievalMultilingual](https://huggingface.co/datasets/ellamind/wikipedia-2023-11-retrieval-multilingual-queries) | ['ben', 'bul', 'ces', 'dan', 'deu', 'eng', 'fas', 'fin', 'hin', 'ita', 'nld', 'nor', 'por', 'ron', 'srp', 'swe'] | Retrieval | s2p | [Encyclopaedic, Written] | None | None | +| [WikipediaSaltsSemiconductorsClassification](https://arxiv.org/abs/2412.00532) (Kasmaee et al., 2024) | ['eng'] | Classification | s2s | [Chemistry] | None | None | +| [WikipediaSolidStateColloidalClassification](https://arxiv.org/abs/2412.00532) (Kasmaee et al., 2024) | ['eng'] | Classification | s2s | [Chemistry] | None | None | +| [WikipediaSpecialtiesInChemistryClustering](https://arxiv.org/abs/2412.00532) (Kasmaee et al., 2024) | ['eng'] | Clustering | s2p | [Chemistry] | None | None | +| [WikipediaTheoreticalAppliedClassification](https://arxiv.org/abs/2412.00532) (Kasmaee et al., 2024) | ['eng'] | Classification | s2s | [Chemistry] | None | None | | [WinoGrande](https://winogrande.allenai.org/) (Xiao et al., 2024) | ['eng'] | Retrieval | s2s | [Encyclopaedic, Written] | None | None | | [WisesightSentimentClassification](https://github.com/PyThaiNLP/wisesight-sentiment) | ['tha'] | Classification | s2s | [Social, News, Written] | None | None | | XMarket (Bonab et al., 2021) | ['deu', 'eng', 'spa'] | Retrieval | s2p | | None | None | @@ -819,7 +846,7 @@ The following tables give you an overview of the tasks in MTEB. | cco | Comaltepec Chinantec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ceb | Cebuano | Austronesian | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 6 | | cek | Eastern Khumi Chin | Sino-Tibetan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| ces | Czech | Indo-European | 4 | 5 | 2 | 0 | 1 | 1 | 1 | 2 | 0 | 0 | 0 | 16 | +| ces | Czech | Indo-European | 4 | 5 | 2 | 0 | 1 | 2 | 1 | 2 | 0 | 0 | 0 | 17 | | cgc | Kagayanen | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | cha | Chamorro | Austronesian | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | | chd | Highland Oaxaca Chontal | Tequistlatecan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | @@ -875,7 +902,7 @@ The following tables give you an overview of the tasks in MTEB. | dah | Gwahatike | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | dan | Danish | Indo-European | 5 | 9 | 2 | 0 | 1 | 0 | 1 | 5 | 0 | 0 | 0 | 23 | | ded | Dedua | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| deu | German | Indo-European | 6 | 14 | 7 | 0 | 1 | 6 | 2 | 18 | 4 | 0 | 0 | 58 | +| deu | German | Indo-European | 6 | 14 | 7 | 0 | 1 | 7 | 2 | 18 | 4 | 0 | 0 | 59 | | dgc | Casiguran Dumagat Agta | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | dgr | Dogrib | Athabaskan-Eyak-Tlingit | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | dgz | Daga | Dagan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | @@ -903,7 +930,7 @@ The following tables give you an overview of the tasks in MTEB. | ell | Modern Greek (1453-) | Indo-European | 3 | 6 | 1 | 0 | 1 | 2 | 0 | 3 | 0 | 0 | 0 | 16 | | emi | Mussau-Emira | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | emp | Northern Emberá | Chocoan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| eng | English | Indo-European | 16 | 143 | 16 | 3 | 1 | 8 | 8 | 105 | 13 | 2 | 1 | 316 | +| eng | English | Indo-European | 17 | 160 | 18 | 3 | 1 | 13 | 8 | 107 | 13 | 2 | 1 | 343 | | enq | Enga | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | epo | Esperanto | Artificial Language | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | | eri | Ogea | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | @@ -924,7 +951,7 @@ The following tables give you an overview of the tasks in MTEB. | fin | Finnish | Uralic | 3 | 5 | 1 | 0 | 1 | 1 | 2 | 5 | 1 | 0 | 0 | 19 | | fon | Fon | Atlantic-Congo | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | | for | Fore | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| fra | French | Indo-European | 7 | 13 | 8 | 0 | 1 | 5 | 3 | 15 | 4 | 0 | 1 | 57 | +| fra | French | Indo-European | 7 | 13 | 8 | 0 | 1 | 6 | 3 | 15 | 4 | 0 | 1 | 58 | | fry | Western Frisian | Indo-European | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | fuc | Pulaar | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | fue | Borgu Fulfulde | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | @@ -981,7 +1008,7 @@ The following tables give you an overview of the tasks in MTEB. | hch | Huichol | Uto-Aztecan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | heb | Hebrew | Afro-Asiatic | 4 | 5 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 11 | | heg | Helong | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| hin | Hindi | Indo-European | 9 | 12 | 2 | 0 | 0 | 1 | 2 | 10 | 2 | 0 | 0 | 38 | +| hin | Hindi | Indo-European | 9 | 12 | 2 | 0 | 0 | 2 | 2 | 10 | 2 | 0 | 0 | 39 | | hix | Hixkaryána | Cariban | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | hla | Halia | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | hlt | Matu Chin | Sino-Tibetan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | @@ -1030,7 +1057,7 @@ The following tables give you an overview of the tasks in MTEB. | jid | Bu (Kaduna State) | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | jiv | Shuar | Chicham | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | jni | Janji | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| jpn | Japanese | Japonic | 5 | 8 | 3 | 0 | 0 | 1 | 3 | 13 | 2 | 0 | 0 | 35 | +| jpn | Japanese | Japonic | 5 | 8 | 3 | 0 | 0 | 2 | 3 | 13 | 2 | 0 | 0 | 36 | | jvn | Caribbean Javanese | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | kab | Kabyle | Afro-Asiatic | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | | kac | Kachin | Sino-Tibetan | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 4 | @@ -1086,7 +1113,7 @@ The following tables give you an overview of the tasks in MTEB. | knj | Western Kanjobal | Mayan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | knv | Tabo | Unclassified | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | kon | Kongo | Unclassified | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | -| kor | Korean | Koreanic | 4 | 8 | 1 | 0 | 1 | 2 | 1 | 9 | 3 | 0 | 0 | 29 | +| kor | Korean | Koreanic | 4 | 8 | 1 | 0 | 1 | 3 | 1 | 9 | 3 | 0 | 0 | 30 | | kos | Kosraean | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | kpf | Komba | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | kpg | Kapingamarangi | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | @@ -1231,7 +1258,7 @@ The following tables give you an overview of the tasks in MTEB. | mqb | Mbuko | Afro-Asiatic | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mqj | Mamasa | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mri | Maori | Austronesian | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 5 | -| msa | Malay (macrolanguage) | Unclassified | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| msa | Malay (macrolanguage) | Unclassified | 1 | 2 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 4 | | msb | Masbatenyo | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | msc | Sankaran Maninka | Mande | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | msk | Mansaka | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | @@ -1292,7 +1319,7 @@ The following tables give you an overview of the tasks in MTEB. | nij | Ngaju | Austronesian | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | | nin | Ninzo | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | nko | Nkonya | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| nld | Dutch | Indo-European | 6 | 6 | 1 | 0 | 1 | 0 | 1 | 2 | 2 | 0 | 0 | 19 | +| nld | Dutch | Indo-European | 6 | 6 | 1 | 0 | 1 | 1 | 1 | 2 | 2 | 0 | 0 | 20 | | nlg | Gela | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | nna | Nyangumarta | Pama-Nyungan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | nno | Norwegian Nynorsk | Unclassified | 4 | 3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | @@ -1364,7 +1391,7 @@ The following tables give you an overview of the tasks in MTEB. | poi | Highland Popoluca | Mixe-Zoque | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | pol | Polish | Indo-European | 4 | 11 | 4 | 0 | 1 | 4 | 0 | 18 | 4 | 0 | 0 | 46 | | pon | Pohnpeian | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| por | Portuguese | Indo-European | 4 | 9 | 1 | 0 | 2 | 2 | 1 | 5 | 3 | 0 | 0 | 27 | +| por | Portuguese | Indo-European | 4 | 9 | 1 | 0 | 2 | 3 | 1 | 5 | 3 | 0 | 0 | 28 | | poy | Pogolo | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ppo | Folopa | Teberan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | prf | Paranan | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | @@ -1449,7 +1476,7 @@ The following tables give you an overview of the tasks in MTEB. | soq | Kanasi | Dagan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | sot | Southern Sotho | Atlantic-Congo | 1 | 2 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 5 | | soy | Miyobe | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| spa | Spanish | Indo-European | 4 | 13 | 4 | 0 | 1 | 2 | 2 | 13 | 4 | 0 | 0 | 43 | +| spa | Spanish | Indo-European | 4 | 13 | 4 | 0 | 1 | 3 | 2 | 13 | 4 | 0 | 0 | 44 | | spl | Selepet | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | spm | Akukem | Ramu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | spp | Supyire Senoufo | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | @@ -1546,7 +1573,7 @@ The following tables give you an overview of the tasks in MTEB. | tuk | Turkmen | Turkic | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | | tum | Tumbuka | Atlantic-Congo | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | | tuo | Tucano | Tucanoan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| tur | Turkish | Turkic | 4 | 7 | 1 | 0 | 0 | 2 | 0 | 3 | 2 | 0 | 0 | 19 | +| tur | Turkish | Turkic | 4 | 7 | 1 | 0 | 0 | 3 | 0 | 3 | 2 | 0 | 0 | 20 | | tvk | Southeast Ambrym | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | twi | Twi | Unclassified | 2 | 3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | | txq | Tii | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | @@ -1656,7 +1683,7 @@ The following tables give you an overview of the tasks in MTEB. | zaw | Mitla Zapotec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | zca | Coatecas Altas Zapotec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | zga | Kinga | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| zho | Chinese | Unclassified | 2 | 2 | 1 | 0 | 0 | 1 | 1 | 13 | 0 | 0 | 0 | 20 | +| zho | Chinese | Unclassified | 2 | 2 | 1 | 0 | 0 | 2 | 1 | 13 | 0 | 0 | 0 | 21 | | zia | Zia | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ziw | Zigula | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | zlm | Malay (individual language) | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | @@ -1675,7 +1702,7 @@ The following tables give you an overview of the tasks in MTEB. | zty | Yatee Zapotec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | zul | Zulu | Atlantic-Congo | 2 | 3 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 7 | | zyp | Zyphe Chin | Sino-Tibetan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| Total | None | None | None | 1394 | 795 | 304 | 3 | 28 | 67 | 51 | 473 | 85 | 2 | 2 | +| Total | None | None | None | 1395 | 812 | 306 | 3 | 28 | 84 | 51 | 475 | 85 | 2 | 2 | diff --git a/mteb/abstasks/TaskMetadata.py b/mteb/abstasks/TaskMetadata.py index 07c4f97a04..b04753d877 100644 --- a/mteb/abstasks/TaskMetadata.py +++ b/mteb/abstasks/TaskMetadata.py @@ -70,6 +70,7 @@ "Web", "Written", "Programming", + "Chemistry", ] SAMPLE_CREATION_METHOD = Literal[ diff --git a/mteb/benchmarks/benchmarks.py b/mteb/benchmarks/benchmarks.py index 233c7a79b3..415b11eddb 100644 --- a/mteb/benchmarks/benchmarks.py +++ b/mteb/benchmarks/benchmarks.py @@ -1232,3 +1232,47 @@ def load_results( primaryClass={cs.CL} }""", ) + +CHEMTEB = Benchmark( + name="ChemTEB", + tasks=get_tasks( + tasks=[ + "PubChemSMILESBitextMining", + "SDSEyeProtectionClassification", + "SDSGlovesClassification", + "WikipediaBioMetChemClassification", + "WikipediaGreenhouseEnantiopureClassification", + "WikipediaSolidStateColloidalClassification", + "WikipediaOrganicInorganicClassification", + "WikipediaCryobiologySeparationClassification", + "WikipediaChemistryTopicsClassification", + "WikipediaTheoreticalAppliedClassification", + "WikipediaChemFieldsClassification", + "WikipediaLuminescenceClassification", + "WikipediaIsotopesFissionClassification", + "WikipediaSaltsSemiconductorsClassification", + "WikipediaBiolumNeurochemClassification", + "WikipediaCrystallographyAnalyticalClassification", + "WikipediaCompChemSpectroscopyClassification", + "WikipediaChemEngSpecialtiesClassification", + "WikipediaChemistryTopicsClustering", + "WikipediaSpecialtiesInChemistryClustering", + "PubChemAISentenceParaphrasePC", + "PubChemSMILESPC", + "PubChemSynonymPC", + "PubChemWikiParagraphsPC", + "PubChemWikiPairClassification", + "ChemNQRetrieval", + "ChemHotpotQARetrieval", + ], + ), + description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", + reference="https://arxiv.org/abs/2412.00532", + citation=""" + @article{kasmaee2024chemteb, + title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal={arXiv preprint arXiv:2412.00532}, + year={2024} +}""", +) diff --git a/mteb/models/bedrock_models.py b/mteb/models/bedrock_models.py new file mode 100644 index 0000000000..4616209df1 --- /dev/null +++ b/mteb/models/bedrock_models.py @@ -0,0 +1,264 @@ +from __future__ import annotations + +import json +import logging +import re +from functools import partial +from typing import Any + +import numpy as np +import tqdm + +from mteb.encoder_interface import PromptType +from mteb.model_meta import ModelMeta +from mteb.models.cohere_models import model_prompts as cohere_model_prompts +from mteb.models.cohere_models import supported_languages as cohere_supported_languages +from mteb.requires_package import requires_package + +from .wrapper import Wrapper + +logger = logging.getLogger(__name__) + + +class BedrockWrapper(Wrapper): + def __init__( + self, + model_id: str, + provider: str, + max_tokens: int, + model_prompts: dict[str, str] | None = None, + **kwargs, + ) -> None: + requires_package(self, "boto3", "The AWS SDK for Python") + import boto3 + + boto3_session = boto3.session.Session() + region_name = boto3_session.region_name + self._client = boto3.client("bedrock-runtime", region_name) + + self._model_id = model_id + self._provider = provider.lower() + + if self._provider == "cohere": + self.model_prompts = ( + self.validate_task_to_prompt_name(model_prompts) + if model_prompts + else None + ) + self._max_batch_size = 96 + self._max_sequence_length = max_tokens * 4 + else: + self._max_tokens = max_tokens + + def encode( + self, + sentences: list[str], + *, + task_name: str | None = None, + prompt_type: PromptType | None = None, + **kwargs: Any, + ) -> np.ndarray: + requires_package(self, "boto3", "Amazon Bedrock") + show_progress_bar = ( + False + if "show_progress_bar" not in kwargs + else kwargs.pop("show_progress_bar") + ) + if self._provider == "amazon": + return self._encode_amazon(sentences, show_progress_bar) + elif self._provider == "cohere": + prompt_name = self.get_prompt_name( + self.model_prompts, task_name, prompt_type + ) + cohere_task_type = self.model_prompts.get(prompt_name, "search_document") + return self._encode_cohere(sentences, cohere_task_type, show_progress_bar) + else: + raise ValueError( + f"Unknown provider '{self._provider}'. Must be 'amazon' or 'cohere'." + ) + + def _encode_amazon( + self, sentences: list[str], show_progress_bar: bool = False + ) -> np.ndarray: + from botocore.exceptions import ValidationError + + all_embeddings = [] + # https://docs.aws.amazon.com/bedrock/latest/userguide/titan-embedding-models.html + max_sequence_length = int(self._max_tokens * 4.5) + + for sentence in tqdm.tqdm( + sentences, leave=False, disable=not show_progress_bar + ): + if len(sentence) > max_sequence_length: + truncated_sentence = sentence[:max_sequence_length] + else: + truncated_sentence = sentence + + try: + embedding = self._embed_amazon(truncated_sentence) + all_embeddings.append(embedding) + + except ValidationError as e: + error_str = str(e) + pattern = r"request input token count:\s*(\d+)" + match = re.search(pattern, error_str) + if match: + num_tokens = int(match.group(1)) + + ratio = 0.9 * (self._max_tokens / num_tokens) + dynamic_cutoff = int(len(truncated_sentence) * ratio) + + embedding = self._embed_amazon(truncated_sentence[:dynamic_cutoff]) + all_embeddings.append(embedding) + else: + raise e + + return np.array(all_embeddings) + + def _encode_cohere( + self, + sentences: list[str], + cohere_task_type: str, + show_progress_bar: bool = False, + ) -> np.ndarray: + batches = [ + sentences[i : i + self._max_batch_size] + for i in range(0, len(sentences), self._max_batch_size) + ] + + all_embeddings = [] + + for batch in tqdm.tqdm(batches, leave=False, disable=not show_progress_bar): + response = self._client.invoke_model( + body=json.dumps( + { + "texts": [sent[: self._max_sequence_length] for sent in batch], + "input_type": cohere_task_type, + } + ), + modelId=self._model_id, + accept="*/*", + contentType="application/json", + ) + all_embeddings.extend(self._to_numpy(response)) + + return np.array(all_embeddings) + + def _embed_amazon(self, sentence: str) -> np.ndarray: + response = self._client.invoke_model( + body=json.dumps({"inputText": sentence}), + modelId=self._model_id, + accept="application/json", + contentType="application/json", + ) + return self._to_numpy(response) + + def _to_numpy(self, embedding_response) -> np.ndarray: + response = json.loads(embedding_response.get("body").read()) + key = "embedding" if self._provider == "amazon" else "embeddings" + return np.array(response[key]) + + +amazon_titan_embed_text_v1 = ModelMeta( + name="bedrock/amazon-titan-embed-text-v1", + revision="1", + release_date="2023-09-27", + languages=None, # not specified + loader=partial( + BedrockWrapper, + model_id="amazon.titan-embed-text-v1", + provider="amazon", + max_tokens=8192, + ), + max_tokens=8192, + embed_dim=1536, + open_weights=False, + n_parameters=None, + public_training_code=None, + public_training_data=None, # assumed + training_datasets=None, + license=None, + reference="https://aws.amazon.com/about-aws/whats-new/2023/09/amazon-titan-embeddings-generally-available/", + similarity_fn_name="cosine", + framework=["API"], + use_instructions=False, +) + +amazon_titan_embed_text_v2 = ModelMeta( + name="bedrock/amazon-titan-embed-text-v2", + revision="1", + release_date="2024-04-30", + languages=None, # not specified + loader=partial( + BedrockWrapper, + model_id="amazon.titan-embed-text-v2:0", + provider="amazon", + max_tokens=8192, + ), + max_tokens=8192, + embed_dim=1024, + open_weights=False, + n_parameters=None, + public_training_code=None, + public_training_data=None, # assumed + training_datasets=None, + license=None, + reference="https://aws.amazon.com/about-aws/whats-new/2024/04/amazon-titan-text-embeddings-v2-amazon-bedrock/", + similarity_fn_name="cosine", + framework=["API"], + use_instructions=False, +) +# Note: For the original Cohere API implementation, refer to: +# https://github.com/embeddings-benchmark/mteb/blob/main/mteb/models/cohere_models.py +# This implementation uses the Amazon Bedrock endpoint for Cohere models. +cohere_embed_english_v3 = ModelMeta( + loader=partial( + BedrockWrapper, + model_id="cohere.embed-english-v3", + provider="cohere", + max_tokens=512, + model_prompts=cohere_model_prompts, + ), + name="bedrock/cohere-embed-english-v3", + languages=["eng-Latn"], + open_weights=False, + reference="https://cohere.com/blog/introducing-embed-v3", + revision="1", + release_date="2023-11-02", + n_parameters=None, + public_training_code=None, + public_training_data=None, # assumed + training_datasets=None, + max_tokens=512, + embed_dim=1024, + license=None, + similarity_fn_name="cosine", + framework=["API"], + use_instructions=True, +) + +cohere_embed_multilingual_v3 = ModelMeta( + loader=partial( + BedrockWrapper, + model_id="cohere.embed-multilingual-v3", + provider="cohere", + max_tokens=512, + model_prompts=cohere_model_prompts, + ), + name="bedrock/cohere-embed-multilingual-v3", + languages=cohere_supported_languages, + open_weights=False, + reference="https://cohere.com/blog/introducing-embed-v3", + revision="1", + release_date="2023-11-02", + n_parameters=None, + public_training_code=None, + public_training_data=None, # assumed + training_datasets=None, + max_tokens=512, + embed_dim=1024, + license=None, + similarity_fn_name="cosine", + framework=["API"], + use_instructions=True, +) diff --git a/mteb/models/overview.py b/mteb/models/overview.py index ccc8fbdda4..399d8359c5 100644 --- a/mteb/models/overview.py +++ b/mteb/models/overview.py @@ -13,6 +13,7 @@ from mteb.model_meta import ModelMeta from mteb.models import ( arctic_models, + bedrock_models, bge_models, bm25, cde_models, @@ -100,6 +101,7 @@ uae_models, text2vec_models, stella_models, + bedrock_models, uae_models, voyage_models, ] diff --git a/mteb/tasks/BitextMining/__init__.py b/mteb/tasks/BitextMining/__init__.py index c176077215..1cec5d5ddc 100644 --- a/mteb/tasks/BitextMining/__init__.py +++ b/mteb/tasks/BitextMining/__init__.py @@ -1,6 +1,7 @@ from __future__ import annotations from .dan.BornholmskBitextMining import * +from .eng.PubChemSMILESBitextMining import * from .kat.TbilisiCityHallBitextMining import * from .multilingual.BibleNLPBitextMining import * from .multilingual.BUCCBitextMining import * diff --git a/mteb/tasks/BitextMining/eng/PubChemSMILESBitextMining.py b/mteb/tasks/BitextMining/eng/PubChemSMILESBitextMining.py new file mode 100644 index 0000000000..4951d8c596 --- /dev/null +++ b/mteb/tasks/BitextMining/eng/PubChemSMILESBitextMining.py @@ -0,0 +1,68 @@ +from __future__ import annotations + +from mteb.abstasks.AbsTaskBitextMining import AbsTaskBitextMining +from mteb.abstasks.MultilingualTask import MultilingualTask +from mteb.abstasks.TaskMetadata import TaskMetadata + +COL_MAPPING = { + "iso-title": {"title": "sentence1", "isomeric_smiles": "sentence2"}, + "iso-desc": {"description": "sentence1", "isomeric_smiles": "sentence2"}, + "canon-title": {"title": "sentence1", "canonical_smiles": "sentence2"}, + "canon-desc": {"description": "sentence1", "canonical_smiles": "sentence2"}, +} + +EVAL_LANGS = { + "iso-title": ["eng-Latn", "eng-Latn"], + "iso-desc": ["eng-Latn", "eng-Latn"], + "canon-title": ["eng-Latn", "eng-Latn"], + "canon-desc": ["eng-Latn", "eng-Latn"], +} + + +class PubChemSMILESBitextMining(MultilingualTask, AbsTaskBitextMining): + metadata = TaskMetadata( + name="PubChemSMILESBitextMining", + dataset={ + "path": "BASF-AI/PubChemSMILESBitextMining", + "revision": "36700ea628118312ebf2f90ad2353a9a8f188dc9", + }, + description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", + reference="https://arxiv.org/abs/2412.00532", + type="BitextMining", + category="s2s", + modalities=["text"], + eval_splits=["test"], + eval_langs=EVAL_LANGS, + main_score="f1", + date=("2024-06-01", "2024-11-30"), + domains=["Chemistry"], + task_subtypes=[], + license="cc-by-nc-sa-4.0", + annotations_creators="derived", + dialect=[], + sample_creation="created", + bibtex_citation=""" + @article{kasmaee2024chemteb, + title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal={arXiv preprint arXiv:2412.00532}, + year={2024} + } + @article{kim2023pubchem, + title={PubChem 2023 update}, + author={Kim, Sunghwan and Chen, Jie and Cheng, Tiejun and Gindulyte, Asta and He, Jia and He, Siqian and Li, Qingliang and Shoemaker, Benjamin A and Thiessen, Paul A and Yu, Bo and others}, + journal={Nucleic acids research}, + volume={51}, + number={D1}, + pages={D1373--D1380}, + year={2023}, + publisher={Oxford University Press} + } + """, + ) + + def dataset_transform(self): + for subset in self.hf_subsets: + self.dataset[subset] = self.dataset[subset].rename_columns( + COL_MAPPING[subset] + ) diff --git a/mteb/tasks/BitextMining/eng/__init__.py b/mteb/tasks/BitextMining/eng/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/mteb/tasks/Classification/__init__.py b/mteb/tasks/Classification/__init__.py index 3e80ae2181..12b0623b6b 100644 --- a/mteb/tasks/Classification/__init__.py +++ b/mteb/tasks/Classification/__init__.py @@ -33,10 +33,27 @@ from .eng.NewsClassification import * from .eng.PatentClassification import * from .eng.PoemSentimentClassification import * +from .eng.SDSEyeProtectionClassification import * +from .eng.SDSGlovesClassification import * from .eng.ToxicChatClassification import * from .eng.ToxicConversationsClassification import * from .eng.TweetSentimentExtractionClassification import * from .eng.TweetTopicSingleClassification import * +from .eng.WikipediaBiolumNeurochemClassification import * +from .eng.WikipediaBioMetChemClassification import * +from .eng.WikipediaChemEngSpecialtiesClassification import * +from .eng.WikipediaChemFieldsClassification import * +from .eng.WikipediaChemistryTopicsClassification import * +from .eng.WikipediaCompChemSpectroscopyClassification import * +from .eng.WikipediaCryobiologySeparationClassification import * +from .eng.WikipediaCrystallographyAnalyticalClassification import * +from .eng.WikipediaGreenhouseEnantiopureClassification import * +from .eng.WikipediaIsotopesFissionClassification import * +from .eng.WikipediaLuminescenceClassification import * +from .eng.WikipediaOrganicInorganicClassification import * +from .eng.WikipediaSaltsSemiconductorsClassification import * +from .eng.WikipediaSolidStateColloidalClassification import * +from .eng.WikipediaTheoreticalAppliedClassification import * from .eng.YahooAnswersTopicsClassification import * from .eng.YelpReviewFullClassification import * from .est.estonian_valence import * diff --git a/mteb/tasks/Classification/eng/SDSEyeProtectionClassification.py b/mteb/tasks/Classification/eng/SDSEyeProtectionClassification.py new file mode 100644 index 0000000000..197060ba0c --- /dev/null +++ b/mteb/tasks/Classification/eng/SDSEyeProtectionClassification.py @@ -0,0 +1,44 @@ +from __future__ import annotations + +from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class SDSEyeProtectionClassification(AbsTaskClassification): + metadata = TaskMetadata( + name="SDSEyeProtectionClassification", + description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", + reference="https://arxiv.org/abs/2412.00532", + dataset={ + "path": "BASF-AI/SDSEyeProtectionClassification", + "revision": "35cbe5ee544dd26e343238a333de4568e6f77819", + }, + type="Classification", + category="s2p", + modalities=["text"], + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="accuracy", + date=("2024-06-01", "2024-11-30"), + domains=["Chemistry"], + task_subtypes=[], + license="cc-by-nc-sa-4.0", + annotations_creators="LM-generated and reviewed", + dialect=[], + sample_creation="created", + bibtex_citation=""" + @article{kasmaee2024chemteb, + title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal={arXiv preprint arXiv:2412.00532}, + year={2024} + } + @inproceedings{pereira2020msds, + title={MSDS-OPP: Operator Procedures Prediction in Material Safety Data Sheets}, + author={Pereira, Eliseu}, + booktitle={15th Doctoral Symposium}, + pages={42}, + year={2020} + } + """, + ) diff --git a/mteb/tasks/Classification/eng/SDSGlovesClassification.py b/mteb/tasks/Classification/eng/SDSGlovesClassification.py new file mode 100644 index 0000000000..ac471d58e9 --- /dev/null +++ b/mteb/tasks/Classification/eng/SDSGlovesClassification.py @@ -0,0 +1,44 @@ +from __future__ import annotations + +from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class SDSGlovesClassification(AbsTaskClassification): + metadata = TaskMetadata( + name="SDSGlovesClassification", + description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", + reference="https://arxiv.org/abs/2412.00532", + dataset={ + "path": "BASF-AI/SDSGlovesClassification", + "revision": "c723236c5ec417d79512e6104aca9d2cd88168f6", + }, + type="Classification", + category="s2p", + modalities=["text"], + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="accuracy", + date=("2024-06-01", "2024-11-30"), + domains=["Chemistry"], + task_subtypes=[], + license="cc-by-nc-sa-4.0", + annotations_creators="LM-generated and reviewed", + dialect=[], + sample_creation="created", + bibtex_citation=""" + @article{kasmaee2024chemteb, + title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal={arXiv preprint arXiv:2412.00532}, + year={2024} + } + @inproceedings{pereira2020msds, + title={MSDS-OPP: Operator Procedures Prediction in Material Safety Data Sheets}, + author={Pereira, Eliseu}, + booktitle={15th Doctoral Symposium}, + pages={42}, + year={2020} + } + """, + ) diff --git a/mteb/tasks/Classification/eng/WikipediaBioMetChemClassification.py b/mteb/tasks/Classification/eng/WikipediaBioMetChemClassification.py new file mode 100644 index 0000000000..3b494f46f6 --- /dev/null +++ b/mteb/tasks/Classification/eng/WikipediaBioMetChemClassification.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class WikipediaBioMetChemClassification(AbsTaskClassification): + metadata = TaskMetadata( + name="WikipediaBioMetChemClassification", + description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", + reference="https://arxiv.org/abs/2412.00532", + dataset={ + "path": "BASF-AI/WikipediaEasy2GeneExpressionVsMetallurgyClassification", + "revision": "6ac491e5de9070c6dd434b31e76d3d379123dcff", + }, + type="Classification", + category="s2s", + modalities=["text"], + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="accuracy", + date=("2024-06-01", "2024-11-30"), + domains=["Chemistry"], + task_subtypes=[], + license="cc-by-nc-sa-4.0", + annotations_creators="derived", + dialect=[], + sample_creation="created", + bibtex_citation=""" + @article{kasmaee2024chemteb, + title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal={arXiv preprint arXiv:2412.00532}, + year={2024} + } + """, + ) diff --git a/mteb/tasks/Classification/eng/WikipediaBiolumNeurochemClassification.py b/mteb/tasks/Classification/eng/WikipediaBiolumNeurochemClassification.py new file mode 100644 index 0000000000..623ec8fc66 --- /dev/null +++ b/mteb/tasks/Classification/eng/WikipediaBiolumNeurochemClassification.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class WikipediaBiolumNeurochemClassification(AbsTaskClassification): + metadata = TaskMetadata( + name="WikipediaBiolumNeurochemClassification", + description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", + reference="https://arxiv.org/abs/2412.00532", + dataset={ + "path": "BASF-AI/WikipediaMedium2BioluminescenceVsNeurochemistryClassification", + "revision": "2f68b7d34c2be896e46b14533573b366e59e5aae", + }, + type="Classification", + category="s2s", + modalities=["text"], + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="accuracy", + date=("2024-06-01", "2024-11-30"), + domains=["Chemistry"], + task_subtypes=[], + license="cc-by-nc-sa-4.0", + annotations_creators="derived", + dialect=[], + sample_creation="created", + bibtex_citation=""" + @article{kasmaee2024chemteb, + title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal={arXiv preprint arXiv:2412.00532}, + year={2024} + } + """, + ) diff --git a/mteb/tasks/Classification/eng/WikipediaChemEngSpecialtiesClassification.py b/mteb/tasks/Classification/eng/WikipediaChemEngSpecialtiesClassification.py new file mode 100644 index 0000000000..c95abcd4f2 --- /dev/null +++ b/mteb/tasks/Classification/eng/WikipediaChemEngSpecialtiesClassification.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class WikipediaChemEngSpecialtiesClassification(AbsTaskClassification): + metadata = TaskMetadata( + name="WikipediaChemEngSpecialtiesClassification", + description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", + reference="https://arxiv.org/abs/2412.00532", + dataset={ + "path": "BASF-AI/WikipediaMedium5Classification", + "revision": "f81a76a2fb690e5d5bd7a26dd07e85cdf8405dfb", + }, + type="Classification", + category="s2s", + modalities=["text"], + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="accuracy", + date=("2024-06-01", "2024-11-30"), + domains=["Chemistry"], + task_subtypes=[], + license="cc-by-nc-sa-4.0", + annotations_creators="derived", + dialect=[], + sample_creation="created", + bibtex_citation=""" + @article{kasmaee2024chemteb, + title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal={arXiv preprint arXiv:2412.00532}, + year={2024} + } + """, + ) diff --git a/mteb/tasks/Classification/eng/WikipediaChemFieldsClassification.py b/mteb/tasks/Classification/eng/WikipediaChemFieldsClassification.py new file mode 100644 index 0000000000..7c0179fb1e --- /dev/null +++ b/mteb/tasks/Classification/eng/WikipediaChemFieldsClassification.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class WikipediaChemFieldsClassification(AbsTaskClassification): + metadata = TaskMetadata( + name="WikipediaChemFieldsClassification", + description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", + reference="https://arxiv.org/abs/2412.00532", + dataset={ + "path": "BASF-AI/WikipediaEZ10Classification", + "revision": "a75fae77759acc115f015f2b856baa47776d733d", + }, + type="Classification", + category="s2s", + modalities=["text"], + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="accuracy", + date=("2024-06-01", "2024-11-30"), + domains=["Chemistry"], + task_subtypes=[], + license="cc-by-nc-sa-4.0", + annotations_creators="derived", + dialect=[], + sample_creation="created", + bibtex_citation=""" + @article{kasmaee2024chemteb, + title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal={arXiv preprint arXiv:2412.00532}, + year={2024} + } + """, + ) diff --git a/mteb/tasks/Classification/eng/WikipediaChemistryTopicsClassification.py b/mteb/tasks/Classification/eng/WikipediaChemistryTopicsClassification.py new file mode 100644 index 0000000000..02751b1a32 --- /dev/null +++ b/mteb/tasks/Classification/eng/WikipediaChemistryTopicsClassification.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class WikipediaChemistryTopicsClassification(AbsTaskClassification): + metadata = TaskMetadata( + name="WikipediaChemistryTopicsClassification", + description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", + reference="https://arxiv.org/abs/2412.00532", + dataset={ + "path": "BASF-AI/WikipediaEasy10Classification", + "revision": "d8fb355db2248f95df8ea410a43aa1db1ee96ba4", + }, + type="Classification", + category="s2s", + modalities=["text"], + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="accuracy", + date=("2024-06-01", "2024-11-30"), + domains=["Chemistry"], + task_subtypes=[], + license="cc-by-nc-sa-4.0", + annotations_creators="derived", + dialect=[], + sample_creation="created", + bibtex_citation=""" + @article{kasmaee2024chemteb, + title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal={arXiv preprint arXiv:2412.00532}, + year={2024} + } + """, + ) diff --git a/mteb/tasks/Classification/eng/WikipediaCompChemSpectroscopyClassification.py b/mteb/tasks/Classification/eng/WikipediaCompChemSpectroscopyClassification.py new file mode 100644 index 0000000000..28a42ac044 --- /dev/null +++ b/mteb/tasks/Classification/eng/WikipediaCompChemSpectroscopyClassification.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class WikipediaCompChemSpectroscopyClassification(AbsTaskClassification): + metadata = TaskMetadata( + name="WikipediaCompChemSpectroscopyClassification", + description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", + reference="https://arxiv.org/abs/2412.00532", + dataset={ + "path": "BASF-AI/WikipediaMedium2ComputationalVsSpectroscopistsClassification", + "revision": "474d706a22b0451b5846d623aa4b4234ba5b0513", + }, + type="Classification", + category="s2s", + modalities=["text"], + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="accuracy", + date=("2024-06-01", "2024-11-30"), + domains=["Chemistry"], + task_subtypes=[], + license="cc-by-nc-sa-4.0", + annotations_creators="derived", + dialect=[], + sample_creation="created", + bibtex_citation=""" + @article{kasmaee2024chemteb, + title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal={arXiv preprint arXiv:2412.00532}, + year={2024} + } + """, + ) diff --git a/mteb/tasks/Classification/eng/WikipediaCryobiologySeparationClassification.py b/mteb/tasks/Classification/eng/WikipediaCryobiologySeparationClassification.py new file mode 100644 index 0000000000..0e01454298 --- /dev/null +++ b/mteb/tasks/Classification/eng/WikipediaCryobiologySeparationClassification.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class WikipediaCryobiologySeparationClassification(AbsTaskClassification): + metadata = TaskMetadata( + name="WikipediaCryobiologySeparationClassification", + description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", + reference="https://arxiv.org/abs/2412.00532", + dataset={ + "path": "BASF-AI/WikipediaEasy5Classification", + "revision": "858633e882dadd1ec6a0d220f7549bcafd379236", + }, + type="Classification", + category="s2s", + modalities=["text"], + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="accuracy", + date=("2024-06-01", "2024-11-30"), + domains=["Chemistry"], + task_subtypes=[], + license="cc-by-nc-sa-4.0", + annotations_creators="derived", + dialect=[], + sample_creation="created", + bibtex_citation=""" + @article{kasmaee2024chemteb, + title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal={arXiv preprint arXiv:2412.00532}, + year={2024} + } + """, + ) diff --git a/mteb/tasks/Classification/eng/WikipediaCrystallographyAnalyticalClassification.py b/mteb/tasks/Classification/eng/WikipediaCrystallographyAnalyticalClassification.py new file mode 100644 index 0000000000..724ffc4249 --- /dev/null +++ b/mteb/tasks/Classification/eng/WikipediaCrystallographyAnalyticalClassification.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class WikipediaCrystallographyAnalyticalClassification(AbsTaskClassification): + metadata = TaskMetadata( + name="WikipediaCrystallographyAnalyticalClassification", + description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", + reference="https://arxiv.org/abs/2412.00532", + dataset={ + "path": "BASF-AI/WikipediaMedium2CrystallographyVsChromatographyTitrationpHClassification", + "revision": "740565a6a853aaed1114a13bdfd5fd46857b4f11", + }, + type="Classification", + category="s2s", + modalities=["text"], + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="accuracy", + date=("2024-06-01", "2024-11-30"), + domains=["Chemistry"], + task_subtypes=[], + license="cc-by-nc-sa-4.0", + annotations_creators="derived", + dialect=[], + sample_creation="created", + bibtex_citation=""" + @article{kasmaee2024chemteb, + title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal={arXiv preprint arXiv:2412.00532}, + year={2024} + } + """, + ) diff --git a/mteb/tasks/Classification/eng/WikipediaGreenhouseEnantiopureClassification.py b/mteb/tasks/Classification/eng/WikipediaGreenhouseEnantiopureClassification.py new file mode 100644 index 0000000000..b701584a70 --- /dev/null +++ b/mteb/tasks/Classification/eng/WikipediaGreenhouseEnantiopureClassification.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class WikipediaGreenhouseEnantiopureClassification(AbsTaskClassification): + metadata = TaskMetadata( + name="WikipediaGreenhouseEnantiopureClassification", + description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", + reference="https://arxiv.org/abs/2412.00532", + dataset={ + "path": "BASF-AI/WikipediaEasy2GreenhouseVsEnantiopureClassification", + "revision": "0cfc1a83b6ed832454e8f4f93f7a0e26208274d9", + }, + type="Classification", + category="s2s", + modalities=["text"], + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="accuracy", + date=("2024-06-01", "2024-11-30"), + domains=["Chemistry"], + task_subtypes=[], + license="cc-by-nc-sa-4.0", + annotations_creators="derived", + dialect=[], + sample_creation="created", + bibtex_citation=""" + @article{kasmaee2024chemteb, + title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal={arXiv preprint arXiv:2412.00532}, + year={2024} + } + """, + ) diff --git a/mteb/tasks/Classification/eng/WikipediaIsotopesFissionClassification.py b/mteb/tasks/Classification/eng/WikipediaIsotopesFissionClassification.py new file mode 100644 index 0000000000..252ad85ed9 --- /dev/null +++ b/mteb/tasks/Classification/eng/WikipediaIsotopesFissionClassification.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class WikipediaIsotopesFissionClassification(AbsTaskClassification): + metadata = TaskMetadata( + name="WikipediaIsotopesFissionClassification", + description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", + reference="https://arxiv.org/abs/2412.00532", + dataset={ + "path": "BASF-AI/WikipediaHard2IsotopesVsFissionProductsNuclearFissionClassification", + "revision": "897743346c7c794264f7dbfadc3978aa2895e8e2", + }, + type="Classification", + category="s2s", + modalities=["text"], + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="accuracy", + date=("2024-06-01", "2024-11-30"), + domains=["Chemistry"], + task_subtypes=[], + license="cc-by-nc-sa-4.0", + annotations_creators="derived", + dialect=[], + sample_creation="created", + bibtex_citation=""" + @article{kasmaee2024chemteb, + title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal={arXiv preprint arXiv:2412.00532}, + year={2024} + } + """, + ) diff --git a/mteb/tasks/Classification/eng/WikipediaLuminescenceClassification.py b/mteb/tasks/Classification/eng/WikipediaLuminescenceClassification.py new file mode 100644 index 0000000000..8e115b59d4 --- /dev/null +++ b/mteb/tasks/Classification/eng/WikipediaLuminescenceClassification.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class WikipediaLuminescenceClassification(AbsTaskClassification): + metadata = TaskMetadata( + name="WikipediaLuminescenceClassification", + description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", + reference="https://arxiv.org/abs/2412.00532", + dataset={ + "path": "BASF-AI/WikipediaHard2BioluminescenceVsLuminescenceClassification", + "revision": "21c4dcebe2c5b36a35292e6441e7a10b59bf4896", + }, + type="Classification", + category="s2s", + modalities=["text"], + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="accuracy", + date=("2024-06-01", "2024-11-30"), + domains=["Chemistry"], + task_subtypes=[], + license="cc-by-nc-sa-4.0", + annotations_creators="derived", + dialect=[], + sample_creation="created", + bibtex_citation=""" + @article{kasmaee2024chemteb, + title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal={arXiv preprint arXiv:2412.00532}, + year={2024} + } + """, + ) diff --git a/mteb/tasks/Classification/eng/WikipediaOrganicInorganicClassification.py b/mteb/tasks/Classification/eng/WikipediaOrganicInorganicClassification.py new file mode 100644 index 0000000000..0ad784b69b --- /dev/null +++ b/mteb/tasks/Classification/eng/WikipediaOrganicInorganicClassification.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class WikipediaOrganicInorganicClassification(AbsTaskClassification): + metadata = TaskMetadata( + name="WikipediaOrganicInorganicClassification", + description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", + reference="https://arxiv.org/abs/2412.00532", + dataset={ + "path": "BASF-AI/WikipediaEasy2SpecialClassification", + "revision": "96d1d9b37c4693f74c46c83d63a290573f78d511", + }, + type="Classification", + category="s2s", + modalities=["text"], + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="accuracy", + date=("2024-06-01", "2024-11-30"), + domains=["Chemistry"], + task_subtypes=[], + license="cc-by-nc-sa-4.0", + annotations_creators="derived", + dialect=[], + sample_creation="created", + bibtex_citation=""" + @article{kasmaee2024chemteb, + title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal={arXiv preprint arXiv:2412.00532}, + year={2024} + } + """, + ) diff --git a/mteb/tasks/Classification/eng/WikipediaSaltsSemiconductorsClassification.py b/mteb/tasks/Classification/eng/WikipediaSaltsSemiconductorsClassification.py new file mode 100644 index 0000000000..a409f87c8d --- /dev/null +++ b/mteb/tasks/Classification/eng/WikipediaSaltsSemiconductorsClassification.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class WikipediaSaltsSemiconductorsClassification(AbsTaskClassification): + metadata = TaskMetadata( + name="WikipediaSaltsSemiconductorsClassification", + description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", + reference="https://arxiv.org/abs/2412.00532", + dataset={ + "path": "BASF-AI/WikipediaHard2SaltsVsSemiconductorMaterialsClassification", + "revision": "9e5415a096012fa2d1f3a929952cf9859e4550e7", + }, + type="Classification", + category="s2s", + modalities=["text"], + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="accuracy", + date=("2024-06-01", "2024-11-30"), + domains=["Chemistry"], + task_subtypes=[], + license="cc-by-nc-sa-4.0", + annotations_creators="derived", + dialect=[], + sample_creation="created", + bibtex_citation=""" + @article{kasmaee2024chemteb, + title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal={arXiv preprint arXiv:2412.00532}, + year={2024} + } + """, + ) diff --git a/mteb/tasks/Classification/eng/WikipediaSolidStateColloidalClassification.py b/mteb/tasks/Classification/eng/WikipediaSolidStateColloidalClassification.py new file mode 100644 index 0000000000..43f95c50f3 --- /dev/null +++ b/mteb/tasks/Classification/eng/WikipediaSolidStateColloidalClassification.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class WikipediaSolidStateColloidalClassification(AbsTaskClassification): + metadata = TaskMetadata( + name="WikipediaSolidStateColloidalClassification", + description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", + reference="https://arxiv.org/abs/2412.00532", + dataset={ + "path": "BASF-AI/WikipediaEasy2SolidStateVsColloidalClassification", + "revision": "7d8df44e588b6143d4856c781f72f919fa0599a7", + }, + type="Classification", + category="s2s", + modalities=["text"], + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="accuracy", + date=("2024-06-01", "2024-11-30"), + domains=["Chemistry"], + task_subtypes=[], + license="cc-by-nc-sa-4.0", + annotations_creators="derived", + dialect=[], + sample_creation="created", + bibtex_citation=""" + @article{kasmaee2024chemteb, + title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal={arXiv preprint arXiv:2412.00532}, + year={2024} + } + """, + ) diff --git a/mteb/tasks/Classification/eng/WikipediaTheoreticalAppliedClassification.py b/mteb/tasks/Classification/eng/WikipediaTheoreticalAppliedClassification.py new file mode 100644 index 0000000000..f33b02f4bb --- /dev/null +++ b/mteb/tasks/Classification/eng/WikipediaTheoreticalAppliedClassification.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class WikipediaTheoreticalAppliedClassification(AbsTaskClassification): + metadata = TaskMetadata( + name="WikipediaTheoreticalAppliedClassification", + description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", + reference="https://arxiv.org/abs/2412.00532", + dataset={ + "path": "BASF-AI/WikipediaEZ2Classification", + "revision": "7896906653d31d7102a143d7f55d67cd688e3147", + }, + type="Classification", + category="s2s", + modalities=["text"], + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="accuracy", + date=("2024-06-01", "2024-11-30"), + domains=["Chemistry"], + task_subtypes=[], + license="cc-by-nc-sa-4.0", + annotations_creators="derived", + dialect=[], + sample_creation="created", + bibtex_citation=""" + @article{kasmaee2024chemteb, + title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal={arXiv preprint arXiv:2412.00532}, + year={2024} + } + """, + ) diff --git a/mteb/tasks/Clustering/__init__.py b/mteb/tasks/Clustering/__init__.py index 014796a4cb..de27839290 100644 --- a/mteb/tasks/Clustering/__init__.py +++ b/mteb/tasks/Clustering/__init__.py @@ -18,6 +18,8 @@ from .eng.StackExchangeClusteringP2P import * from .eng.TwentyNewsgroupsClustering import * from .eng.WikiCitiesClustering import * +from .eng.WikipediaChemistrySpecialtiesClustering import * +from .eng.WikipediaChemistryTopicsClustering import * from .fra.AlloProfClusteringP2P import * from .fra.AlloProfClusteringS2S import * from .fra.HALClusteringS2S import * diff --git a/mteb/tasks/Clustering/eng/WikipediaChemistrySpecialtiesClustering.py b/mteb/tasks/Clustering/eng/WikipediaChemistrySpecialtiesClustering.py new file mode 100644 index 0000000000..a4e4082a69 --- /dev/null +++ b/mteb/tasks/Clustering/eng/WikipediaChemistrySpecialtiesClustering.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +from mteb.abstasks.AbsTaskClustering import AbsTaskClustering +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class WikipediaChemistrySpecialtiesClustering(AbsTaskClustering): + metadata = TaskMetadata( + name="WikipediaSpecialtiesInChemistryClustering", + description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", + reference="https://arxiv.org/abs/2412.00532", + dataset={ + "path": "BASF-AI/WikipediaMedium5Clustering", + "revision": "7754d8d296f9f4c3af1c6426fab36304730ccddf", + }, + type="Clustering", + category="s2p", + modalities=["text"], + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="v_measure", + date=("2024-06-01", "2024-11-30"), + domains=["Chemistry"], + task_subtypes=[], + license="cc-by-nc-sa-4.0", + annotations_creators="derived", + dialect=[], + sample_creation="created", + bibtex_citation=""" + @article{kasmaee2024chemteb, + title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal={arXiv preprint arXiv:2412.00532}, + year={2024} + } + """, + ) diff --git a/mteb/tasks/Clustering/eng/WikipediaChemistryTopicsClustering.py b/mteb/tasks/Clustering/eng/WikipediaChemistryTopicsClustering.py new file mode 100644 index 0000000000..bfa5e1fcf3 --- /dev/null +++ b/mteb/tasks/Clustering/eng/WikipediaChemistryTopicsClustering.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +from mteb.abstasks.AbsTaskClustering import AbsTaskClustering +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class WikipediaChemistryTopicsClustering(AbsTaskClustering): + metadata = TaskMetadata( + name="WikipediaChemistryTopicsClustering", + description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", + reference="https://arxiv.org/abs/2412.00532", + dataset={ + "path": "BASF-AI/WikipediaEasy10Clustering", + "revision": "0a0886b06acbfc735bca6a71b21ce1e5cb92a37b", + }, + type="Clustering", + category="s2p", + modalities=["text"], + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="v_measure", + date=("2024-06-01", "2024-11-30"), + domains=["Chemistry"], + task_subtypes=[], + license="cc-by-nc-sa-4.0", + annotations_creators="derived", + dialect=[], + sample_creation="created", + bibtex_citation=""" + @article{kasmaee2024chemteb, + title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal={arXiv preprint arXiv:2412.00532}, + year={2024} + } + """, + ) diff --git a/mteb/tasks/PairClassification/__init__.py b/mteb/tasks/PairClassification/__init__.py index c2057a4952..e229195df0 100644 --- a/mteb/tasks/PairClassification/__init__.py +++ b/mteb/tasks/PairClassification/__init__.py @@ -4,6 +4,10 @@ from .ces.CTKFactsNLI import * from .deu.FalseFriendsDeEnPC import * from .eng.LegalBenchPC import * +from .eng.PubChemAISentenceParaphrasePC import * +from .eng.PubChemSMILESPC import * +from .eng.PubChemSynonymPC import * +from .eng.PubChemWikiParagraphsPC import * from .eng.SprintDuplicateQuestionsPC import * from .eng.TwitterSemEval2015PC import * from .eng.TwitterURLCorpusPC import * @@ -13,6 +17,7 @@ from .kor.KlueNLI import * from .multilingual.OpusparcusPC import * from .multilingual.PawsXPairClassification import * +from .multilingual.PubChemWikiPairClassification import * from .multilingual.RTE3 import * from .multilingual.XNLI import * from .multilingual.XStance import * diff --git a/mteb/tasks/PairClassification/eng/PubChemAISentenceParaphrasePC.py b/mteb/tasks/PairClassification/eng/PubChemAISentenceParaphrasePC.py new file mode 100644 index 0000000000..f453ebee31 --- /dev/null +++ b/mteb/tasks/PairClassification/eng/PubChemAISentenceParaphrasePC.py @@ -0,0 +1,60 @@ +from __future__ import annotations + +from mteb.abstasks.AbsTaskPairClassification import AbsTaskPairClassification +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class PubChemAISentenceParaphrasePC(AbsTaskPairClassification): + metadata = TaskMetadata( + name="PubChemAISentenceParaphrasePC", + description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", + reference="https://arxiv.org/abs/2412.00532", + dataset={ + "path": "BASF-AI/PubChemAISentenceParaphrasePC", + "revision": "f33a205966ce032f957c3a22f4f9e378f89a2c56", + }, + type="PairClassification", + category="s2s", + modalities=["text"], + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="max_ap", + date=("2024-06-01", "2024-11-30"), + domains=["Chemistry"], + task_subtypes=[], + license="cc-by-nc-sa-4.0", + annotations_creators="LM-generated", + dialect=[], + sample_creation="created", + bibtex_citation=""" + @article{kasmaee2024chemteb, + title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal={arXiv preprint arXiv:2412.00532}, + year={2024} + } + @article{kim2023pubchem, + title={PubChem 2023 update}, + author={Kim, Sunghwan and Chen, Jie and Cheng, Tiejun and Gindulyte, Asta and He, Jia and He, Siqian and Li, Qingliang and Shoemaker, Benjamin A and Thiessen, Paul A and Yu, Bo and others}, + journal={Nucleic acids research}, + volume={51}, + number={D1}, + pages={D1373--D1380}, + year={2023}, + publisher={Oxford University Press} + } + """, + ) + + def dataset_transform(self): + _dataset = {} + for split in self.metadata.eval_splits: + hf_dataset = self.dataset[split] + _dataset[split] = [ + { + "sentence1": hf_dataset["sent1"], + "sentence2": hf_dataset["sent2"], + "labels": hf_dataset["labels"], + } + ] + self.dataset = _dataset diff --git a/mteb/tasks/PairClassification/eng/PubChemSMILESPC.py b/mteb/tasks/PairClassification/eng/PubChemSMILESPC.py new file mode 100644 index 0000000000..b3e297e043 --- /dev/null +++ b/mteb/tasks/PairClassification/eng/PubChemSMILESPC.py @@ -0,0 +1,128 @@ +from __future__ import annotations + +import datasets + +from mteb.abstasks.AbsTaskPairClassification import AbsTaskPairClassification +from mteb.abstasks.TaskMetadata import TaskMetadata + +_DATASET_COLUMN_MAP = [ + { + "name": "iso-desc", + "sent1": "description", + "sent2": "isomeric_smiles", + "labels": "labels", + }, + { + "name": "iso-title", + "sent1": "title", + "sent2": "isomeric_smiles", + "labels": "labels", + }, + { + "name": "canon-desc", + "sent1": "description", + "sent2": "canonical_smiles", + "labels": "labels", + }, + { + "name": "canon-title", + "sent1": "title", + "sent2": "canonical_smiles", + "labels": "labels", + }, +] + + +class PubChemSMILESPC(AbsTaskPairClassification): + metadata = TaskMetadata( + name="PubChemSMILESPC", + description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", + reference="https://arxiv.org/abs/2412.00532", + dataset={ + "path": "BASF-AI/PubChemSMILESPairClassification", + "revision": "7ba40b69f5fe6ffe4cc189aac9e1710913c73c8a", + }, + type="PairClassification", + category="s2s", + modalities=["text"], + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="max_ap", + date=("2024-06-01", "2024-11-30"), + domains=["Chemistry"], + task_subtypes=[], + license="cc-by-nc-sa-4.0", + annotations_creators="derived", + dialect=[], + sample_creation="created", + bibtex_citation=""" + @article{kasmaee2024chemteb, + title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal={arXiv preprint arXiv:2412.00532}, + year={2024} + } + @article{kim2023pubchem, + title={PubChem 2023 update}, + author={Kim, Sunghwan and Chen, Jie and Cheng, Tiejun and Gindulyte, Asta and He, Jia and He, Siqian and Li, Qingliang and Shoemaker, Benjamin A and Thiessen, Paul A and Yu, Bo and others}, + journal={Nucleic acids research}, + volume={51}, + number={D1}, + pages={D1373--D1380}, + year={2023}, + publisher={Oxford University Press} + } + """, + ) + + def load_data(self): + """Load dataset from HuggingFace hub""" + if self.data_loaded: + return + + _hf_dataset = None + for dataset_col_map in _DATASET_COLUMN_MAP: + _dataset = datasets.load_dataset( + self.metadata.dataset["path"], + dataset_col_map["name"], + revision=self.metadata.dataset["revision"], + ) + + _dataset = _dataset.rename_columns( + { + dataset_col_map["sent1"]: "sentence1", + dataset_col_map["sent2"]: "sentence2", + dataset_col_map["labels"]: "labels", + } + ) + + if _hf_dataset is None: + _hf_dataset = _dataset + else: + _hf_dataset["test"] = datasets.concatenate_datasets( + [_hf_dataset["test"], _dataset["test"]] + ) + + self.dataset = _hf_dataset + self.dataset_transform() + self.data_loaded = True + + def dataset_transform(self): + self.dataset = self.stratified_subsampling( + self.dataset, + seed=self.seed, + splits=self.metadata["eval_splits"], + label="labels", + ) + + _dataset = {} + for split in self.metadata.eval_splits: + hf_dataset = self.dataset[split] + _dataset[split] = [ + { + "sentence1": hf_dataset["sentence1"], + "sentence2": hf_dataset["sentence2"], + "labels": hf_dataset["labels"], + } + ] + self.dataset = _dataset diff --git a/mteb/tasks/PairClassification/eng/PubChemSynonymPC.py b/mteb/tasks/PairClassification/eng/PubChemSynonymPC.py new file mode 100644 index 0000000000..6b6dfd81c8 --- /dev/null +++ b/mteb/tasks/PairClassification/eng/PubChemSynonymPC.py @@ -0,0 +1,61 @@ +from __future__ import annotations + +from mteb.abstasks.AbsTaskPairClassification import AbsTaskPairClassification +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class PubChemSynonymPC(AbsTaskPairClassification): + metadata = TaskMetadata( + name="PubChemSynonymPC", + description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", + reference="https://arxiv.org/abs/2412.00532", + dataset={ + "path": "BASF-AI/PubChemSynonymPC", + "revision": "5037d69d177c9628fb79cb57eea1299178b28c1b", + }, + type="PairClassification", + category="s2s", + modalities=["text"], + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="max_ap", + date=("2024-06-01", "2024-11-30"), + domains=["Chemistry"], + task_subtypes=[], + license="cc-by-nc-sa-4.0", + annotations_creators="derived", + dialect=[], + sample_creation="created", + bibtex_citation=""" + @article{kasmaee2024chemteb, + title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal={arXiv preprint arXiv:2412.00532}, + year={2024} + } + @article{kim2023pubchem, + title={PubChem 2023 update}, + author={Kim, Sunghwan and Chen, Jie and Cheng, Tiejun and Gindulyte, Asta and He, Jia and He, Siqian and Li, Qingliang and Shoemaker, Benjamin A and Thiessen, Paul A and Yu, Bo and others}, + journal={Nucleic acids research}, + volume={51}, + number={D1}, + pages={D1373--D1380}, + year={2023}, + publisher={Oxford University Press} + } + """, + ) + + def dataset_transform(self): + _dataset = {} + + for split in self.metadata.eval_splits: + hf_dataset = self.dataset[split] + _dataset[split] = [ + { + "sentence1": hf_dataset["title"], + "sentence2": hf_dataset["synonyms"], + "labels": hf_dataset["labels"], + } + ] + self.dataset = _dataset diff --git a/mteb/tasks/PairClassification/eng/PubChemWikiParagraphsPC.py b/mteb/tasks/PairClassification/eng/PubChemWikiParagraphsPC.py new file mode 100644 index 0000000000..679580f28c --- /dev/null +++ b/mteb/tasks/PairClassification/eng/PubChemWikiParagraphsPC.py @@ -0,0 +1,60 @@ +from __future__ import annotations + +from mteb.abstasks.AbsTaskPairClassification import AbsTaskPairClassification +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class PubChemWikiParagraphsPC(AbsTaskPairClassification): + metadata = TaskMetadata( + name="PubChemWikiParagraphsPC", + description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", + reference="https://arxiv.org/abs/2412.00532", + dataset={ + "path": "BASF-AI/PubChemWikiParagraphsPC", + "revision": "7fb14716e4106b72f51a16e682e5cd2d67e9bd70", + }, + type="PairClassification", + category="p2p", + modalities=["text"], + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="max_ap", + date=("2024-06-01", "2024-11-30"), + domains=["Chemistry"], + task_subtypes=[], + license="cc-by-nc-sa-4.0", + annotations_creators="derived", + dialect=[], + sample_creation="created", + bibtex_citation=""" + @article{kasmaee2024chemteb, + title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal={arXiv preprint arXiv:2412.00532}, + year={2024} + } + @article{kim2023pubchem, + title={PubChem 2023 update}, + author={Kim, Sunghwan and Chen, Jie and Cheng, Tiejun and Gindulyte, Asta and He, Jia and He, Siqian and Li, Qingliang and Shoemaker, Benjamin A and Thiessen, Paul A and Yu, Bo and others}, + journal={Nucleic acids research}, + volume={51}, + number={D1}, + pages={D1373--D1380}, + year={2023}, + publisher={Oxford University Press} + } + """, + ) + + def dataset_transform(self): + _dataset = {} + for split in self.metadata.eval_splits: + hf_dataset = self.dataset[split] + _dataset[split] = [ + { + "sentence1": hf_dataset["sent1"], + "sentence2": hf_dataset["sent2"], + "labels": hf_dataset["labels"], + } + ] + self.dataset = _dataset diff --git a/mteb/tasks/PairClassification/multilingual/PubChemWikiPairClassification.py b/mteb/tasks/PairClassification/multilingual/PubChemWikiPairClassification.py new file mode 100644 index 0000000000..59a0605a82 --- /dev/null +++ b/mteb/tasks/PairClassification/multilingual/PubChemWikiPairClassification.py @@ -0,0 +1,77 @@ +from __future__ import annotations + +from mteb.abstasks.AbsTaskPairClassification import AbsTaskPairClassification +from mteb.abstasks.MultilingualTask import MultilingualTask +from mteb.abstasks.TaskMetadata import TaskMetadata + +_LANGUAGES = { + "de": ["deu-Latn", "eng-Latn"], + "nl": ["nld-Latn", "eng-Latn"], + "zh": ["zho-Hans", "eng-Latn"], + "fr": ["fra-Latn", "eng-Latn"], + "es": ["spa-Latn", "eng-Latn"], + "pt": ["por-Latn", "eng-Latn"], + "ms": ["msa-Latn", "eng-Latn"], + "ko": ["kor-Hang", "eng-Latn"], + "tr": ["tur-Latn", "eng-Latn"], + "hi": ["hin-Deva", "eng-Latn"], + "cs": ["ces-Latn", "eng-Latn"], + "ja": ["jpn-Jpan", "eng-Latn"], +} + + +class PubChemWikiPairClassification(AbsTaskPairClassification, MultilingualTask): + metadata = TaskMetadata( + name="PubChemWikiPairClassification", + dataset={ + "path": "BASF-AI/PubChemWikiMultilingualPC", + "revision": "3412b208896a37e4ebb5ff7b96f6cc313ee9d2e3", + }, + description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", + reference="https://arxiv.org/abs/2412.00532", + category="s2s", + modalities=["text"], + type="PairClassification", + eval_splits=["test"], + eval_langs=_LANGUAGES, + main_score="max_ap", + date=("2024-06-01", "2024-11-30"), + domains=["Chemistry"], + task_subtypes=[], + license="cc-by-nc-sa-4.0", + annotations_creators="derived", + dialect=[], + sample_creation="created", + bibtex_citation=""" + @article{kasmaee2024chemteb, + title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal={arXiv preprint arXiv:2412.00532}, + year={2024} + } + @article{kim2023pubchem, + title={PubChem 2023 update}, + author={Kim, Sunghwan and Chen, Jie and Cheng, Tiejun and Gindulyte, Asta and He, Jia and He, Siqian and Li, Qingliang and Shoemaker, Benjamin A and Thiessen, Paul A and Yu, Bo and others}, + journal={Nucleic acids research}, + volume={51}, + number={D1}, + pages={D1373--D1380}, + year={2023}, + publisher={Oxford University Press} + } + """, + ) + + def dataset_transform(self) -> None: + _dataset = {} + for lang in self.hf_subsets: + _dataset[lang] = {} + hf_dataset = self.dataset[lang][self.metadata.eval_splits[0]] + _dataset[lang]["test"] = [ + { + "sentence1": hf_dataset["sent1"], + "sentence2": hf_dataset["sent2"], + "labels": hf_dataset["labels"], + } + ] + self.dataset = _dataset diff --git a/mteb/tasks/Retrieval/__init__.py b/mteb/tasks/Retrieval/__init__.py index d83df7ec5e..6c146379ea 100644 --- a/mteb/tasks/Retrieval/__init__.py +++ b/mteb/tasks/Retrieval/__init__.py @@ -29,6 +29,8 @@ from .eng.ARCChallengeRetrieval import * from .eng.ArguAnaRetrieval import * from .eng.BrightRetrieval import * +from .eng.ChemHotpotQARetrieval import * +from .eng.ChemNQRetrieval import * from .eng.ClimateFEVERRetrieval import * from .eng.CQADupstackAndroidRetrieval import * from .eng.CQADupstackEnglishRetrieval import * diff --git a/mteb/tasks/Retrieval/eng/ChemHotpotQARetrieval.py b/mteb/tasks/Retrieval/eng/ChemHotpotQARetrieval.py new file mode 100644 index 0000000000..88fbc50df4 --- /dev/null +++ b/mteb/tasks/Retrieval/eng/ChemHotpotQARetrieval.py @@ -0,0 +1,60 @@ +from __future__ import annotations + +from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class ChemHotpotQARetrieval(AbsTaskRetrieval): + metadata = TaskMetadata( + name="ChemHotpotQARetrieval", + dataset={ + "path": "BASF-AI/ChemHotpotQARetrieval", + "revision": "1840e8a5ac6ec752bbdd97d543ead0189bc7c25b", + }, + description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", + reference="https://arxiv.org/abs/2412.00532", + type="Retrieval", + category="s2p", + modalities=["text"], + eval_splits=["train", "dev", "test"], + eval_langs=["eng-Latn"], + main_score="ndcg_at_10", + date=("2024-06-01", "2024-11-30"), + domains=["Chemistry"], + task_subtypes=[], + license="cc-by-nc-sa-4.0", + annotations_creators="derived", + dialect=[], + sample_creation="found", + bibtex_citation=""" + @article{kasmaee2024chemteb, + title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal={arXiv preprint arXiv:2412.00532}, + year={2024} + } + @inproceedings{yang-etal-2018-hotpotqa, + title = "{H}otpot{QA}: A Dataset for Diverse, Explainable Multi-hop Question Answering", + author = "Yang, Zhilin and + Qi, Peng and + Zhang, Saizheng and + Bengio, Yoshua and + Cohen, William and + Salakhutdinov, Ruslan and + Manning, Christopher D.", + editor = "Riloff, Ellen and + Chiang, David and + Hockenmaier, Julia and + Tsujii, Jun{'}ichi", + booktitle = "Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing", + month = oct # "-" # nov, + year = "2018", + address = "Brussels, Belgium", + publisher = "Association for Computational Linguistics", + url = "https://aclanthology.org/D18-1259", + doi = "10.18653/v1/D18-1259", + pages = "2369--2380", + abstract = "Existing question answering (QA) datasets fail to train QA systems to perform complex reasoning and provide explanations for answers. We introduce HotpotQA, a new dataset with 113k Wikipedia-based question-answer pairs with four key features: (1) the questions require finding and reasoning over multiple supporting documents to answer; (2) the questions are diverse and not constrained to any pre-existing knowledge bases or knowledge schemas; (3) we provide sentence-level supporting facts required for reasoning, allowing QA systems to reason with strong supervision and explain the predictions; (4) we offer a new type of factoid comparison questions to test QA systems{'} ability to extract relevant facts and perform necessary comparison. We show that HotpotQA is challenging for the latest QA systems, and the supporting facts enable models to improve performance and make explainable predictions.", + } +""", + ) diff --git a/mteb/tasks/Retrieval/eng/ChemNQRetrieval.py b/mteb/tasks/Retrieval/eng/ChemNQRetrieval.py new file mode 100644 index 0000000000..1e77971331 --- /dev/null +++ b/mteb/tasks/Retrieval/eng/ChemNQRetrieval.py @@ -0,0 +1,45 @@ +from __future__ import annotations + +from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class ChemNQRetrieval(AbsTaskRetrieval): + metadata = TaskMetadata( + name="ChemNQRetrieval", + dataset={ + "path": "BASF-AI/ChemNQRetrieval", + "revision": "5d958fb6b31055495347724d46431ba41309b03a", + }, + description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", + reference="https://arxiv.org/abs/2412.00532", + type="Retrieval", + category="s2p", + modalities=["text"], + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="ndcg_at_10", + date=("2024-06-01", "2024-11-30"), + domains=["Chemistry"], + task_subtypes=[], + license="cc-by-nc-sa-4.0", + annotations_creators="derived", + dialect=[], + sample_creation="found", + bibtex_citation=""" + @article{kasmaee2024chemteb, + title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal={arXiv preprint arXiv:2412.00532}, + year={2024} + } + @article{47761, + title = {Natural Questions: a Benchmark for Question Answering Research}, + author = {Tom Kwiatkowski and Jennimaria Palomaki and Olivia Redfield and Michael Collins and Ankur Parikh + and Chris Alberti and Danielle Epstein and Illia Polosukhin and Matthew Kelcey and Jacob Devlin and Kenton Lee + and Kristina N. Toutanova and Llion Jones and Ming-Wei Chang and Andrew Dai and Jakob Uszkoreit and Quoc Le + and Slav Petrov}, + year = {2019}, + journal = {Transactions of the Association of Computational Linguistics}} + """, + ) From 223bf324c213f222785bbf2db88e30c8069c610b Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Sat, 25 Jan 2025 03:53:39 +0000 Subject: [PATCH 2/8] Update points table --- docs/mmteb/points_table.md | 200 ++++++++++++++++++------------------- 1 file changed, 100 insertions(+), 100 deletions(-) diff --git a/docs/mmteb/points_table.md b/docs/mmteb/points_table.md index cd166890d7..dfb4a6b31c 100644 --- a/docs/mmteb/points_table.md +++ b/docs/mmteb/points_table.md @@ -2,103 +2,103 @@ _Note_: this table is **autogenerated** and should not be edited. It is intended to get an overview of contributions. - | GitHub | New dataset | Review PR | Bug fixes | Coordination | Paper writing | Dataset annotations | Running Models | New task | Total | -|:------------------|--------------:|------------:|------------:|---------------:|----------------:|----------------------:|-----------------:|-----------:|--------:| -| KennethEnevoldsen | 68 | 326 | 87 | 81 | 0 | 35 | 0 | 0 | 597 | -| isaac-chung | 120 | 194 | 50 | 54 | 12 | 1 | 0 | 2 | 433 | -| imenelydiaker | 120 | 144 | 24 | 70 | 0 | 0 | 0 | 0 | 358 | -| awinml | 300 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 302 | -| x-tabdeveloping | 144 | 32 | 10 | 41 | 0 | 0 | 0 | 12 | 239 | -| davidstap | 176 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 176 | -| jaygala24 | 149 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 149 | -| wissam-sib | 134 | 6 | 4 | 0 | 0 | 0 | 0 | 0 | 144 | -| Muennighoff | 0 | 48 | 0 | 70 | 0 | 0 | 24 | 0 | 142 | -| orionw | 0 | 20 | 20 | 75 | 0 | 0 | 0 | 10 | 125 | -| dokato | 94 | 6 | 12 | 0 | 0 | 0 | 0 | 0 | 112 | -| gentaiscool | 110 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 110 | -| jupyterjazz | 108 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 108 | -| SaitejaUtpala | 102 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 102 | -| vaibhavad | 6 | 4 | 8 | 75 | 0 | 0 | 0 | 0 | 93 | -| schmarion | 88 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 88 | -| MathieuCiancone | 88 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 88 | -| GabrielSequeira | 88 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 88 | -| digantamisra98 | 71 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 71 | -| shreeya-dhakal | 54 | 8 | 0 | 0 | 0 | 0 | 0 | 0 | 62 | -| Rysias | 58 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 58 | -| Samoed | 18 | 2 | 22 | 0 | 0 | 0 | 9 | 0 | 51 | -| sivareddyg | 0 | 0 | 0 | 50 | 0 | 0 | 0 | 0 | 50 | -| gowitheflow-1998 | 50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 50 | -| asparius | 34 | 14 | 0 | 0 | 0 | 0 | 0 | 0 | 48 | -| Akash190104 | 46 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 46 | -| MartinBernstorff | 2 | 8 | 13 | 20 | 0 | 0 | 0 | 0 | 43 | -| akshita-sukhlecha | 36 | 0 | 4 | 0 | 0 | 0 | 0 | 0 | 40 | -| staoxiao | 40 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 40 | -| bp-high | 36 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 36 | -| rafalposwiata | 36 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 36 | -| KranthiGV | 20 | 14 | 0 | 0 | 0 | 0 | 0 | 0 | 34 | -| loicmagne | 0 | 0 | 28 | 0 | 0 | 0 | 0 | 0 | 28 | -| ShawonAshraf | 28 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 28 | -| bjoernpl | 28 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 28 | -| jphme | 28 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 28 | -| rasdani | 28 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 28 | -| violenil | 26 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 26 | -| mariyahendriksen | 0 | 0 | 0 | 0 | 24 | 0 | 0 | 0 | 24 | -| dwzhu-pku | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 24 | -| hgissbkh | 0 | 2 | 13 | 0 | 3 | 0 | 0 | 5 | 23 | -| taeminlee | 22 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 22 | -| kwojtasi | 22 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 22 | -| jankounchained | 14 | 0 | 8 | 0 | 0 | 0 | 0 | 0 | 22 | -| tomaarsen | 0 | 2 | 0 | 20 | 0 | 0 | 0 | 0 | 22 | -| crystina-z | 21 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 21 | -| mrshu | 16 | 4 | 0 | 0 | 0 | 1 | 0 | 0 | 21 | -| john-b-yang | 0 | 0 | 0 | 0 | 20 | 0 | 0 | 0 | 20 | -| rbroc | 20 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 20 | -| mmhamdy | 20 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 20 | -| ManuelFay | 2 | 0 | 13 | 0 | 0 | 0 | 0 | 5 | 20 | -| AlexeyVatolin | 0 | 0 | 20 | 0 | 0 | 0 | 0 | 0 | 20 | -| Andrian0s | 14 | 4 | 2 | 0 | 0 | 0 | 0 | 0 | 20 | -| thakur-nandan | 18 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 18 | -| manandey | 18 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 18 | -| PranjalChitale | 16 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 16 | -| dipam7 | 14 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 16 | -| sted97 | 16 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 16 | -| Sakshamrzt | 12 | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 16 | -| taidnguyen | 14 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 14 | -| artemsnegirev | 12 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 14 | -| slvnwhrl | 12 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 12 | -| anpalmak2003 | 9 | 0 | 0 | 0 | 0 | 3 | 0 | 0 | 12 | -| Art3mis07 | 12 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 12 | -| guenthermi | 12 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 12 | -| jordiclive | 2 | 0 | 10 | 0 | 0 | 0 | 0 | 0 | 12 | -| xhluca | 6 | 2 | 4 | 0 | 0 | 0 | 0 | 0 | 12 | -| henilp105 | 0 | 0 | 2 | 0 | 0 | 9 | 0 | 0 | 11 | -| MariyaTikhonova | 7 | 0 | 0 | 0 | 0 | 4 | 0 | 0 | 11 | -| ab1992ao | 8 | 0 | 0 | 0 | 0 | 3 | 0 | 0 | 11 | -| tmp_handle | 0 | 0 | 0 | 10 | 0 | 0 | 0 | 0 | 10 | -| swj0419 | 10 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 10 | -| Ruqyai | 2 | 8 | 0 | 0 | 0 | 0 | 0 | 0 | 10 | -| ZhengLiu101 | 10 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 10 | -| Alenush | 6 | 0 | 0 | 0 | 0 | 4 | 0 | 0 | 10 | -| ABorghini | 10 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 10 | -| simon-clematide | 10 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 10 | -| sarahooker | 0 | 0 | 0 | 0 | 10 | 0 | 0 | 0 | 10 | -| guangyusong | 10 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 10 | -| HLasse | 0 | 0 | 5 | 0 | 0 | 5 | 0 | 0 | 10 | -| cassanof | 8 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 10 | -| hongjin-su | 10 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 10 | -| xiamengzhou | 10 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 10 | -| xu3kev | 10 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 10 | -| howard-yen | 10 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 10 | -| malteos | 10 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 10 | -| ljvmiranda921 | 10 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 10 | -| marcobellagente93 | 6 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | -| izhx | 6 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | -| MexicanLemonade | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | -| antoniolanza1996 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2 | -| achibb | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | -| NouamaneTazi | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | -| PhilipMay | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | -| cslizc | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | -| bakrianoo | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | -| hanhainebula | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | -| monikernemo | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | \ No newline at end of file + | GitHub | New dataset | Review PR | Running Models | Bug fixes | Coordination | Dataset annotations | Paper writing | New task | Total | +|:------------------|--------------:|------------:|-----------------:|------------:|---------------:|----------------------:|----------------:|-----------:|--------:| +| KennethEnevoldsen | 68 | 326 | 0 | 87 | 81 | 35 | 0 | 0 | 597 | +| isaac-chung | 120 | 194 | 0 | 50 | 54 | 1 | 12 | 2 | 433 | +| imenelydiaker | 120 | 144 | 0 | 24 | 70 | 0 | 0 | 0 | 358 | +| awinml | 300 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 302 | +| x-tabdeveloping | 144 | 32 | 0 | 10 | 41 | 0 | 0 | 12 | 239 | +| davidstap | 176 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 176 | +| jaygala24 | 149 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 149 | +| wissam-sib | 134 | 6 | 0 | 4 | 0 | 0 | 0 | 0 | 144 | +| Muennighoff | 0 | 48 | 24 | 0 | 70 | 0 | 0 | 0 | 142 | +| orionw | 0 | 20 | 0 | 20 | 75 | 0 | 0 | 10 | 125 | +| dokato | 94 | 6 | 0 | 12 | 0 | 0 | 0 | 0 | 112 | +| gentaiscool | 110 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 110 | +| jupyterjazz | 108 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 108 | +| SaitejaUtpala | 102 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 102 | +| vaibhavad | 6 | 4 | 0 | 8 | 75 | 0 | 0 | 0 | 93 | +| schmarion | 88 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 88 | +| MathieuCiancone | 88 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 88 | +| GabrielSequeira | 88 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 88 | +| digantamisra98 | 71 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 71 | +| shreeya-dhakal | 54 | 8 | 0 | 0 | 0 | 0 | 0 | 0 | 62 | +| Rysias | 58 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 58 | +| Samoed | 18 | 2 | 9 | 22 | 0 | 0 | 0 | 0 | 51 | +| sivareddyg | 0 | 0 | 0 | 0 | 50 | 0 | 0 | 0 | 50 | +| gowitheflow-1998 | 50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 50 | +| asparius | 34 | 14 | 0 | 0 | 0 | 0 | 0 | 0 | 48 | +| Akash190104 | 46 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 46 | +| MartinBernstorff | 2 | 8 | 0 | 13 | 20 | 0 | 0 | 0 | 43 | +| akshita-sukhlecha | 36 | 0 | 0 | 4 | 0 | 0 | 0 | 0 | 40 | +| staoxiao | 40 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 40 | +| bp-high | 36 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 36 | +| rafalposwiata | 36 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 36 | +| KranthiGV | 20 | 14 | 0 | 0 | 0 | 0 | 0 | 0 | 34 | +| loicmagne | 0 | 0 | 0 | 28 | 0 | 0 | 0 | 0 | 28 | +| ShawonAshraf | 28 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 28 | +| bjoernpl | 28 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 28 | +| jphme | 28 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 28 | +| rasdani | 28 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 28 | +| violenil | 26 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 26 | +| mariyahendriksen | 0 | 0 | 0 | 0 | 0 | 0 | 24 | 0 | 24 | +| dwzhu-pku | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 24 | +| hgissbkh | 0 | 2 | 0 | 13 | 0 | 0 | 3 | 5 | 23 | +| taeminlee | 22 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 22 | +| kwojtasi | 22 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 22 | +| jankounchained | 14 | 0 | 0 | 8 | 0 | 0 | 0 | 0 | 22 | +| tomaarsen | 0 | 2 | 0 | 0 | 20 | 0 | 0 | 0 | 22 | +| crystina-z | 21 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 21 | +| mrshu | 16 | 4 | 0 | 0 | 0 | 1 | 0 | 0 | 21 | +| john-b-yang | 0 | 0 | 0 | 0 | 0 | 0 | 20 | 0 | 20 | +| rbroc | 20 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 20 | +| mmhamdy | 20 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 20 | +| ManuelFay | 2 | 0 | 0 | 13 | 0 | 0 | 0 | 5 | 20 | +| AlexeyVatolin | 0 | 0 | 0 | 20 | 0 | 0 | 0 | 0 | 20 | +| Andrian0s | 14 | 4 | 0 | 2 | 0 | 0 | 0 | 0 | 20 | +| thakur-nandan | 18 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 18 | +| manandey | 18 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 18 | +| PranjalChitale | 16 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 16 | +| dipam7 | 14 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 16 | +| sted97 | 16 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 16 | +| Sakshamrzt | 12 | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 16 | +| taidnguyen | 14 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 14 | +| artemsnegirev | 12 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 14 | +| slvnwhrl | 12 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 12 | +| anpalmak2003 | 9 | 0 | 0 | 0 | 0 | 3 | 0 | 0 | 12 | +| Art3mis07 | 12 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 12 | +| guenthermi | 12 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 12 | +| jordiclive | 2 | 0 | 0 | 10 | 0 | 0 | 0 | 0 | 12 | +| xhluca | 6 | 2 | 0 | 4 | 0 | 0 | 0 | 0 | 12 | +| henilp105 | 0 | 0 | 0 | 2 | 0 | 9 | 0 | 0 | 11 | +| MariyaTikhonova | 7 | 0 | 0 | 0 | 0 | 4 | 0 | 0 | 11 | +| ab1992ao | 8 | 0 | 0 | 0 | 0 | 3 | 0 | 0 | 11 | +| tmp_handle | 0 | 0 | 0 | 0 | 10 | 0 | 0 | 0 | 10 | +| swj0419 | 10 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 10 | +| Ruqyai | 2 | 8 | 0 | 0 | 0 | 0 | 0 | 0 | 10 | +| ZhengLiu101 | 10 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 10 | +| Alenush | 6 | 0 | 0 | 0 | 0 | 4 | 0 | 0 | 10 | +| ABorghini | 10 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 10 | +| simon-clematide | 10 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 10 | +| sarahooker | 0 | 0 | 0 | 0 | 0 | 0 | 10 | 0 | 10 | +| guangyusong | 10 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 10 | +| HLasse | 0 | 0 | 0 | 5 | 0 | 5 | 0 | 0 | 10 | +| cassanof | 8 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 10 | +| hongjin-su | 10 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 10 | +| xiamengzhou | 10 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 10 | +| xu3kev | 10 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 10 | +| howard-yen | 10 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 10 | +| malteos | 10 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 10 | +| ljvmiranda921 | 10 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 10 | +| marcobellagente93 | 6 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | +| izhx | 6 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | +| MexicanLemonade | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | +| antoniolanza1996 | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 2 | +| achibb | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | +| NouamaneTazi | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | +| PhilipMay | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | +| cslizc | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | +| bakrianoo | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | +| hanhainebula | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | +| monikernemo | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | \ No newline at end of file From dfba463a945314ef8b2f9132b3a547b81d04dd5d Mon Sep 17 00:00:00 2001 From: github-actions Date: Sat, 25 Jan 2025 04:05:16 +0000 Subject: [PATCH 3/8] 1.30.0 Automatically generated by python-semantic-release --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 70ad64bd07..2883e6596a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "mteb" -version = "1.29.16" +version = "1.30.0" description = "Massive Text Embedding Benchmark" readme = "README.md" authors = [ From ee0f15ad03313d3a030c6f21ae6aafd9bc95bbb0 Mon Sep 17 00:00:00 2001 From: Roman Solomatin Date: Sat, 25 Jan 2025 19:41:41 +0300 Subject: [PATCH 4/8] feat: add instruct wrapper (#1768) * add instruct wrapper * use get_task_instruction * add logging messages * apply based on PromptType * update description * change example model * move nvembed * Update mteb/models/instruct_wrapper.py Co-authored-by: Isaac Chung * update docstrings * add instruction to docs * Apply suggestions from code review Co-authored-by: Isaac Chung * lint --------- Co-authored-by: Isaac Chung --- docs/adding_a_model.md | 15 ++++++ mteb/models/instruct_wrapper.py | 85 +++++++++++++++++++++++++++++++++ mteb/models/nvidia_models.py | 75 +++++++---------------------- 3 files changed, 116 insertions(+), 59 deletions(-) diff --git a/docs/adding_a_model.md b/docs/adding_a_model.md index 0e22e1ea3a..314c6e9c39 100644 --- a/docs/adding_a_model.md +++ b/docs/adding_a_model.md @@ -71,3 +71,18 @@ The leaderboard [automatically refreshes daily](https://github.com/embeddings-be ###### Instantiating the Model with Prompts If you are unable to directly add the prompts in the model configuration, you can instantiate the model using the `sentence_transformers_loader` and pass `prompts` as an argument. For more details, see the `mteb/models/bge_models.py` file. + +##### Adding instruction models + +Models that use instructions can use the [`InstructSentenceTransformerWrapper`](../mteb/models/instruct_wrapper.py). For example: +```python +model = ModelMeta( + loader=partial( + InstructSentenceTransformerWrapper, + model="nvidia/NV-Embed-v1", + revision="7604d305b621f14095a1aa23d351674c2859553a", + instruction_template="Instruct: {instruction}\nQuery: ", + ), + ... +) +``` \ No newline at end of file diff --git a/mteb/models/instruct_wrapper.py b/mteb/models/instruct_wrapper.py index 2ee3a09b56..cc6e814629 100644 --- a/mteb/models/instruct_wrapper.py +++ b/mteb/models/instruct_wrapper.py @@ -6,6 +6,7 @@ import numpy as np import torch +from sentence_transformers import SentenceTransformer from mteb.encoder_interface import PromptType @@ -78,3 +79,87 @@ def encode( return embeddings return InstructWrapper(model_name_or_path, mode, instruction_template, **kwargs) + + +class InstructSentenceTransformerWrapper(Wrapper): + def __init__( + self, + model_name: str, + revision: str, + instruction_template: str | Callable[[str], str] | None = None, + max_seq_length: int | None = None, + apply_instruction_to_passages: bool = True, + padding_side: str | None = None, + add_eos_token: bool = False, + **kwargs: Any, + ): + """Instruct Sentence Transformer Wrapper. Wrapper that passes instructions to the Sentence Transformer model. + Applied for models like NV-Embed, gte-Qwen, e5-mistral, etc. + + Arguments: + model_name: Model name of the sentence transformers model. + revision: Revision of the sentence transformers model. + instruction_template: Model template. Should contain the string '{instruction}'. + max_seq_length: Maximum sequence length. If None, the maximum sequence length will be read from the model config. + apply_instruction_to_passages: Whether to apply the instruction template to the passages. + padding_side: Padding side. If None, the padding side will be read from the model config. + add_eos_token: Whether to add the eos token to each input example. + **kwargs: Kwargs for Sentence Transformer model. + """ + if ( + isinstance(instruction_template, str) + and "{instruction}" not in instruction_template + ): + raise ValueError( + "Instruction template must contain the string '{instruction}'." + ) + if instruction_template is None: + logger.warning( + "No instruction template provided. Instructions will be used as-is." + ) + + self.model_name = model_name + self.model = SentenceTransformer(model_name, revision=revision, **kwargs) + self.instruction_template = instruction_template + self.apply_instruction_to_passages = apply_instruction_to_passages + self.add_eos_token = add_eos_token + if max_seq_length is not None: + self.model.max_seq_length = max_seq_length + if padding_side is not None: + self.model.tokenizer.padding_side = padding_side + + def encode( + self, + sentences: Sequence[str], + *, + task_name: str, + prompt_type: PromptType | None = None, + **kwargs: Any, + ) -> np.ndarray: + if self.add_eos_token: + sentences = [ + example + self.model.tokenizer.eos_token for example in sentences + ] + + instruction = self.get_task_instruction(task_name, prompt_type) + + # to passage prompts won't be applied to passages + if not self.apply_instruction_to_passages and prompt_type == PromptType.passage: + instruction = None + logger.info( + f"No instruction used, because prompt type = {prompt_type.passage}" + ) + + if instruction: + logger.info(f"Using instruction: '{instruction}' for task: '{task_name}'") + + embeddings = self.model.encode( + sentences, + prompt=instruction, + **kwargs, + ) + + if isinstance(embeddings, torch.Tensor): + # sometimes in kwargs can be return_tensors=True + embeddings = embeddings.cpu().detach().float().numpy() + return embeddings diff --git a/mteb/models/nvidia_models.py b/mteb/models/nvidia_models.py index 1997a85274..f3b313356a 100644 --- a/mteb/models/nvidia_models.py +++ b/mteb/models/nvidia_models.py @@ -1,17 +1,11 @@ from __future__ import annotations import logging -from collections.abc import Sequence from functools import partial -from typing import Any - -import numpy as np -import torch -from sentence_transformers import CrossEncoder, SentenceTransformer from mteb.encoder_interface import PromptType from mteb.model_meta import ModelMeta -from mteb.models.sentence_transformer_wrapper import SentenceTransformerWrapper +from mteb.models.instruct_wrapper import InstructSentenceTransformerWrapper logger = logging.getLogger(__name__) @@ -22,56 +16,6 @@ def instruction_template( return f"Instruct: {instruction}\nQuery: " if instruction else "" -class NvEmbedWrapper(SentenceTransformerWrapper): - def __init__( - self, - model: str | SentenceTransformer | CrossEncoder, - revision: str | None = None, - model_prompts: dict[str, str] | None = None, - **kwargs, - ) -> None: - super().__init__(model, revision, model_prompts, **kwargs) - self.model.max_seq_length = 32768 - self.model.tokenizer.padding_side = "right" - logger.warning( - "Instructions are used in both query and docs, which may cause performance discrepancies from the original implementation." - ) - - def encode( - self, - sentences: Sequence[str], - *, - task_name: str, - prompt_type: PromptType | None = None, - **kwargs: Any, - ) -> np.ndarray: - # Add eos token to each input example - sentences = [example + self.model.tokenizer.eos_token for example in sentences] - - instruction = "" - if prompt_type == PromptType.query: - instruction = self.get_instruction(task_name, prompt_type) - - prompt = instruction_template(instruction) - - if prompt: - logger.info(f"Using {prompt=} for task={task_name} {prompt_type=}") - else: - logger.info(f"No model prompts found for task={task_name} {prompt_type=}") - - logger.info(f"Encoding {len(sentences)} sentences.") - - embeddings = self.model.encode( - sentences, - prompt=prompt, - normalize_embeddings=True, - **kwargs, - ) - if isinstance(embeddings, torch.Tensor): - embeddings = embeddings.cpu().detach().float().numpy() - return embeddings - - nvidia_training_datasets = { # source: https://arxiv.org/pdf/2405.17428 "ArguAna": ["train"], @@ -120,11 +64,18 @@ def encode( "STSBenchmark": ["train"], "STSBenchmarkMultilingualSTS": ["train"], # translated, not trained on } + NV_embed_v2 = ModelMeta( loader=partial( # type: ignore - NvEmbedWrapper, + InstructSentenceTransformerWrapper, model="nvidia/NV-Embed-v2", + revision="7604d305b621f14095a1aa23d351674c2859553a", + instruction_template=instruction_template, trust_remote_code=True, + max_seq_length=32768, + padding_side="right", + # for nv-embed, we add eos token to each input example + add_eos_token=True, ), name="nvidia/NV-Embed-v2", languages=["eng_Latn"], @@ -146,9 +97,15 @@ def encode( NV_embed_v1 = ModelMeta( loader=partial( # type: ignore - NvEmbedWrapper, + InstructSentenceTransformerWrapper, model="nvidia/NV-Embed-v1", + revision="7604d305b621f14095a1aa23d351674c2859553a", + instruction_template=instruction_template, trust_remote_code=True, + max_seq_length=32768, + padding_side="right", + # for nv-embed, we add eos token to each input example + add_eos_token=True, ), name="nvidia/NV-Embed-v1", languages=["eng_Latn"], From 3598b3746ed69af75afbfe2b03f0081d8b14923a Mon Sep 17 00:00:00 2001 From: github-actions Date: Sat, 25 Jan 2025 16:50:17 +0000 Subject: [PATCH 5/8] 1.31.0 Automatically generated by python-semantic-release --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 2883e6596a..e989421883 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "mteb" -version = "1.30.0" +version = "1.31.0" description = "Massive Text Embedding Benchmark" readme = "README.md" authors = [ From 95714d06da6a06969e92526262d57f718e840c21 Mon Sep 17 00:00:00 2001 From: Sam <40773225+sam-hey@users.noreply.github.com> Date: Sun, 26 Jan 2025 08:03:43 +0100 Subject: [PATCH 6/8] doc: update pr template (#1871) * doc: update pr template * group testing & add: do not delete --------- Co-authored-by: sam021313 <40773225+sam021313@users.noreply.github.com> --- .github/pull_request_template.md | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index be1d1c7418..71ed2554f9 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -4,11 +4,18 @@ -## Checklist +### Code Quality +- [ ] **Code Formatted**: Format the code using `make lint` to maintain consistent style. -- [ ] Run tests locally to make sure nothing is broken using `make test`. -- [ ] Run the formatter to format the code using `make lint`. +### Documentation + +- [ ] **Updated Documentation**: Add or update documentation to reflect the changes introduced in this PR. + +### Testing + +- [ ] **New Tests Added**: Write tests to cover new functionality. Validate with `make test-with-coverage`. +- [ ] **Tests Passed**: Run tests locally using `make test` or `make test-with-coverage` to ensure no existing functionality is broken. ### Adding datasets checklist From 1d660892288d02379e67a59b94523410497ee20b Mon Sep 17 00:00:00 2001 From: Roman Solomatin Date: Sun, 26 Jan 2025 10:21:58 +0300 Subject: [PATCH 7/8] fix: fix jina v1, 2 models (#1872) fix jina models --- mteb/models/gme_models.py | 1 - mteb/models/jina_models.py | 24 ++++++++++++++++++++++++ mteb/models/lens_models.py | 8 +------- 3 files changed, 25 insertions(+), 8 deletions(-) diff --git a/mteb/models/gme_models.py b/mteb/models/gme_models.py index 53476403bc..804dfbc84d 100644 --- a/mteb/models/gme_models.py +++ b/mteb/models/gme_models.py @@ -1,7 +1,6 @@ from __future__ import annotations import logging -from functools import partial from mteb.model_meta import ModelMeta diff --git a/mteb/models/jina_models.py b/mteb/models/jina_models.py index e855ad3c7a..00641e9c89 100644 --- a/mteb/models/jina_models.py +++ b/mteb/models/jina_models.py @@ -245,6 +245,12 @@ def encode( jina_embeddings_v2_base_en = ModelMeta( + loader=partial( + SentenceTransformerWrapper, + model_name="jinaai/jina-embeddings-v2-base-en", + revision="6e85f575bc273f1fd840a658067d0157933c83f0", + trust_remote_code=True, + ), name="jinaai/jina-embeddings-v2-base-en", languages=["eng-Latn"], open_weights=True, @@ -266,6 +272,12 @@ def encode( ) jina_embeddings_v2_small_en = ModelMeta( + loader=partial( + SentenceTransformerWrapper, + model_name="jinaai/jina-embeddings-v2-small-en", + revision="796cff318cdd4e5fbe8b7303a1ef8cbec36996ef", + trust_remote_code=True, + ), name="jinaai/jina-embeddings-v2-small-en", languages=["eng-Latn"], open_weights=True, @@ -287,6 +299,12 @@ def encode( ) jina_embedding_b_en_v1 = ModelMeta( + loader=partial( + SentenceTransformerWrapper, + model_name="jinaai/jina-embedding-b-en-v1", + revision="aa0645035294a8c0607ce5bb700aba982cdff32c", + trust_remote_code=True, + ), name="jinaai/jina-embedding-b-en-v1", languages=["eng-Latn"], open_weights=True, @@ -308,6 +326,12 @@ def encode( ) jina_embedding_s_en_v1 = ModelMeta( + loader=partial( + SentenceTransformerWrapper, + model_name="jinaai/jina-embedding-s-en-v1", + revision="c1fed70aa4823a640f1a7150a276e4d3b08dce08", + trust_remote_code=True, + ), name="jinaai/jina-embedding-s-en-v1", languages=["eng-Latn"], open_weights=True, diff --git a/mteb/models/lens_models.py b/mteb/models/lens_models.py index 2fe54b26a1..380724e53e 100644 --- a/mteb/models/lens_models.py +++ b/mteb/models/lens_models.py @@ -1,12 +1,6 @@ from __future__ import annotations -from functools import partial - -import torch - -from mteb.encoder_interface import PromptType -from mteb.model_meta import ModelMeta, sentence_transformers_loader -from mteb.models.instruct_wrapper import instruct_wrapper +from mteb.model_meta import ModelMeta from .bge_models import bge_full_data From 7e7571e0956fe75f19572560771261044b5faf59 Mon Sep 17 00:00:00 2001 From: github-actions Date: Sun, 26 Jan 2025 07:36:48 +0000 Subject: [PATCH 8/8] 1.31.1 Automatically generated by python-semantic-release --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index e989421883..f4a0111cd5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "mteb" -version = "1.31.0" +version = "1.31.1" description = "Massive Text Embedding Benchmark" readme = "README.md" authors = [