From afdd5e63349922fed9d71087b9f3cb280c5f991a Mon Sep 17 00:00:00 2001
From: le zhang <le.zhang@mila.quebec>
Date: Fri, 5 Jan 2024 11:08:26 -0500
Subject: [PATCH] update readme

---
 README.md                                        |  14 +++++++++-----
 ...hot 2024-01-05 at 10.52.11\342\200\257AM.png" | Bin 1412 -> 0 bytes
 .../abaltion.png                                 | Bin
 .../aro.png                                      | Bin
 ...mage-20240105105019281.png => motivation.png} | Bin
 .../sugarcrepe.png                               | Bin
 .../vlchecklist.png                              | Bin
 7 files changed, 9 insertions(+), 5 deletions(-)
 delete mode 100644 "assets/Screenshot 2024-01-05 at 10.52.11\342\200\257AM.png"
 rename "assets/Screenshot 2024-01-05 at 10.54.41\342\200\257AM.png" => assets/abaltion.png (100%)
 rename "assets/Screenshot 2024-01-05 at 10.52.15\342\200\257AM.png" => assets/aro.png (100%)
 rename assets/{image-20240105105019281.png => motivation.png} (100%)
 rename "assets/Screenshot 2024-01-05 at 10.53.52\342\200\257AM.png" => assets/sugarcrepe.png (100%)
 rename "assets/Screenshot 2024-01-05 at 10.53.26\342\200\257AM.png" => assets/vlchecklist.png (100%)

diff --git a/README.md b/README.md
index af26f58..0662707 100644
--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@
 
 TL;DR: We Propose **two losses** to enhance model's **compositional understanding** ability for any contrastive vision-language models loss like CLIP. The two losses are applied on our **generated hard negative** examples.
 
-![image-20240105105019281](./assets/image-20240105105019281.png)
+![motivation](./assets/motivation.png)
 
 **This repo forks from wonderful [OpenCLIP](https://github.com/mlfoundations/open_clip)**, for model and training details, please refer to original repo.
 
@@ -40,7 +40,9 @@ The result checkpoint will be at `Enhance-FineGrained/src/Outputs`
 
 We evaluate our method on four downstream task [ARO](https://github.com/mertyg/vision-language-models-are-bows), [VALSE](https://github.com/Heidelberg-NLP/VALSE) and [VL-CheckList](https://github.com/om-ai-lab/VL-CheckList), and very recent [SugarCrepe](https://github.com/RAIVNLab/sugar-crepe) and we also provide evaluation code. However, one need go to official github page to download dataset to evaluate on them.
 
-### ARO&VALSE![Screenshot 2024-01-05 at 10.52.15 AM](./assets/Screenshot 2024-01-05 at 10.52.15 AM.png)
+### ARO&VALSE
+
+![ARO](./assets/aro.png)
 
 Evaluation code for ARO is included in `Enhance-FineGrained/vision-language-models-are-bows`, to reproduce results, you need 
 
@@ -53,13 +55,15 @@ Evaluation code for ARO is included in `Enhance-FineGrained/vision-language-mode
 1. Evaluation code for VALSE is included in `Enhance-FineGrained/VALSE`, to reproduce results on valse, please download dataset [here](https://github.com/Heidelberg-NLP/VALSE) first. **Then replace dataset** path in `Enhance-FineGrained/VALSE/clip_valse_eval.py` `Enhance-FineGrained/VALSE/xvlm_valse_eval.py`
 2. replace `$checkpoint` in `Enhance-FineGrained/VALSE/scripts` then run the scripts, evaluation results will be included in `/home/mila/l/le.zhang/scratch/Enhance-FineGrained/VALSE/output`
 
-### VL-CheckList![Screenshot 2024-01-05 at 10.53.26 AM](./assets/Screenshot 2024-01-05 at 10.53.26 AM.png)
+### VL-CheckList
+![vlchecklist](./assets/vlchecklist.png)
 
 Please refer to [official github](https://github.com/om-ai-lab/VL-CheckList) repo to download dataset and perform evaluation. *Note that Downloading the dataset can be quite cumbersome*
 
 we provide script at [here](https://github.com/rabiulcste/vl_checklist/tree/ca0c68d1f457f670139feb75a6b884adff88aeee)
 
-### :star2: SugarCrepe![Screenshot 2024-01-05 at 10.53.52 AM](./assets/Screenshot 2024-01-05 at 10.53.52 AM.png)
+### :star2: SugarCrepe
+![sugarcrepe](./assets/sugarcrepe.png)
 
 [SugarCrepe](https://github.com/RAIVNLab/sugar-crepe) is a benchmark for faithful vision-language compositionality evaluation. This dataset **fix a several biases** in all above benchmarks *rendering them hackable that blind models with no access to the image outperform state-of-the-art vision-language models*. 
 
@@ -76,7 +80,7 @@ python main_eval.py --model ViT-B-32 --pretrained Enhance-FineGrained/clip/epoch
 
 Our method entails curriculum learning, which is validated by the growth of adaptive threshold
 
-![Screenshot 2024-01-05 at 10.54.41 AM](./assets/Screenshot 2024-01-05 at 10.54.41 AM.png)
+![abaltion](./assets/abaltion.png)
 
 # :paperclip: Citation
 
diff --git "a/assets/Screenshot 2024-01-05 at 10.52.11\342\200\257AM.png" "b/assets/Screenshot 2024-01-05 at 10.52.11\342\200\257AM.png"
deleted file mode 100644
index b07268e04c6e0dd24bf4599c5a01294c86fdd4a0..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 1412
zcmeAS@N?(olHy`uVBq!ia0vp^F+l9Z!3HF68E@Roz`&TC>FgZf>Flf!P?VpRnUl)E
zpfNEyA>omyj>d_^#Ds(sXSFrCipqmJI(mW<>JJ_`ap2T}qX{Px=FW(Tc_1P1fH~w4
z!{We$+p`_6Iut~Tr%cPaad|@Qx1i0Ju6If0><>%mUn$30uyl!-I`d0=hnz_<0U?JD
z9LQiuVV^Qxq~Qjy!!{4W3xN*bL<Fp555+MisXN}NK9J?qG^g<mx5GAvBDW;234a+*
zT=R4i@^?>q{%nex^Ie7_TmP)M41)>N7?LE}#M#2x<P|~^IMNJ~4A_&L+Z@^wWU71)
zRLzn6B*`VACD|omBzZ~FNW!aOmu#aFD~~15N{4Og&T`I1&2r6r6LOCvFdR`^u)&Sr
zxh3KuI~!ZKr^D)5f=L?<d3bI&{Ci~CCJ_9dscLzD<HHk^n~p4UsW@R&m#pb$Y}T}4
z=XPU*4Sv7>|1Y*+X5OvFeE9zB|NsBr`}hC<{Zt;FGutI3_-dFLJc?76+iYqs28Mc9
zYJ_K+uP=iZkj=rs!YIYS3S=<?u^po{obAS_!N3d_XJTMz&tzbMs__HTATR-lL3$uG
z^Q1Ngb|6m(h$|K_!BxjBV1~1ufN~6u!OotZKsq?NC^a>&I3vI0PXFdtKmpDIk4T{F
z#8g3;(KATp1JL{{nIRD+5xzcF$@#f@i7EL>sd^Q;1t4=6Y$~jP%-qzHM1_jnoV;SI
z3R@+x3M(KRB&@Hb09I0xZL1XF8=&BvUzDm~re~mMpk&9TprBw=l#*r@<l+X^4&>P?
zWt5Z@Sn2DRmzV368|&p4rRy77T3YHG80i}s=>k>g7FXt#Bv$C=6)QswftllyTAW;z
zSx}OhpQivaH!&%{w8U0P31kr*K-`&Hk%_P=uNdkrpa=CqGWv#k2KsQbfm&@qqE-Q!
z6{$IqE}6N&Faz3QXJ%#tR*oToun<XO1VW>w5wb=k33QF2#i>Qk`ME#~OY9Jl1=Wl!
z3Dsz$5ArusI6wj!ED8)|J1!f2c#PR`Z3!|s3XDXVOc(zUAPr1nV4w&jHGmjIgYfos
zYCo<6>1IzC$B+ufH<u1_G8phMZ%p~W@b)FeFIM^m3VWXCN(tXMF!L)vYu;Y<4+l0f
zocl@xG8X8H|Nmq4{~QA{W`UgZ_L8IE5d{Vo$4`ei{XXb3OYU6V@#2x8gha--%Kzyl
zGY$w_Z2sre82w(b;z0z%^C%?p$D!}{lFxO{uk`P0UcdF2<#Rd48ezV^`{l8E1%Ff<
j+51=v*0qvDzGGjtg*#O9!iBX!S2B3I`njxgN@xNAK2+{9

diff --git "a/assets/Screenshot 2024-01-05 at 10.54.41\342\200\257AM.png" b/assets/abaltion.png
similarity index 100%
rename from "assets/Screenshot 2024-01-05 at 10.54.41\342\200\257AM.png"
rename to assets/abaltion.png
diff --git "a/assets/Screenshot 2024-01-05 at 10.52.15\342\200\257AM.png" b/assets/aro.png
similarity index 100%
rename from "assets/Screenshot 2024-01-05 at 10.52.15\342\200\257AM.png"
rename to assets/aro.png
diff --git a/assets/image-20240105105019281.png b/assets/motivation.png
similarity index 100%
rename from assets/image-20240105105019281.png
rename to assets/motivation.png
diff --git "a/assets/Screenshot 2024-01-05 at 10.53.52\342\200\257AM.png" b/assets/sugarcrepe.png
similarity index 100%
rename from "assets/Screenshot 2024-01-05 at 10.53.52\342\200\257AM.png"
rename to assets/sugarcrepe.png
diff --git "a/assets/Screenshot 2024-01-05 at 10.53.26\342\200\257AM.png" b/assets/vlchecklist.png
similarity index 100%
rename from "assets/Screenshot 2024-01-05 at 10.53.26\342\200\257AM.png"
rename to assets/vlchecklist.png