Skip to content

Commit 82c0054

Browse files
MohitIntelssarkar2regisss
authored
Upgrade SD output image verification with CLIP score (huggingface#920)
Co-authored-by: Sayantan Sarkar <sasarkar@habana.ai> Co-authored-by: regisss <15324346+regisss@users.noreply.github.com>
1 parent 5aa26af commit 82c0054

File tree

2 files changed

+41
-64
lines changed

2 files changed

+41
-64
lines changed

tests/clip_coco_utils.py

+14
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,12 @@
11
import os
2+
3+
# Calculate CLIP score
4+
from functools import partial
25
from pathlib import Path
36
from urllib.request import urlretrieve
47

8+
import torch
9+
from torchmetrics.functional.multimodal import clip_score
510
from transformers import AutoImageProcessor, AutoTokenizer, VisionTextDualEncoderModel, VisionTextDualEncoderProcessor
611

712

@@ -39,3 +44,12 @@ def create_clip_roberta_model():
3944
processor.save_pretrained("clip-roberta")
4045

4146
print("Model generated.")
47+
48+
49+
clip_score_fn = partial(clip_score, model_name_or_path="openai/clip-vit-base-patch16")
50+
51+
52+
def calculate_clip_score(images, prompts):
53+
images_int = (images * 255).astype("uint8")
54+
clip_score = clip_score_fn(torch.from_numpy(images_int).permute(0, 3, 1, 2), prompts).detach()
55+
return round(float(clip_score), 4)

tests/test_diffusers.py

+27-64
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@
5151
)
5252
from optimum.habana.utils import set_seed
5353

54-
from .clip_coco_utils import download_files
54+
from .clip_coco_utils import calculate_clip_score, download_files
5555

5656

5757
if os.environ.get("GAUDI2_CI", "0") == "1":
@@ -608,6 +608,8 @@ def test_no_throughput_regression_autocast(self):
608608

609609
@slow
610610
def test_no_generation_regression(self):
611+
seed = 27
612+
set_seed(seed)
611613
model_name = "CompVis/stable-diffusion-v1-4"
612614
# fp32
613615
scheduler = GaudiDDIMScheduler.from_pretrained(model_name, subfolder="scheduler")
@@ -619,39 +621,33 @@ def test_no_generation_regression(self):
619621
use_hpu_graphs=True,
620622
gaudi_config=GaudiConfig(use_torch_autocast=False),
621623
)
622-
set_seed(27)
624+
625+
prompt = "An image of a squirrel in Picasso style"
626+
generator = torch.manual_seed(seed)
623627
outputs = pipeline(
624-
prompt="An image of a squirrel in Picasso style",
628+
prompt=prompt,
629+
generator=generator,
625630
output_type="np",
626631
)
627632

628633
if os.environ.get("GAUDI2_CI", "0") == "1":
629-
expected_slice = np.array(
630-
[
631-
0.68306947,
632-
0.6812112,
633-
0.67309505,
634-
0.70057267,
635-
0.6582885,
636-
0.6325019,
637-
0.6708976,
638-
0.6226433,
639-
0.58038336,
640-
]
641-
)
634+
target_score = 29.8925
642635
else:
643-
expected_slice = np.array(
644-
[0.70760196, 0.7136303, 0.7000798, 0.714934, 0.6776865, 0.6800843, 0.6923707, 0.6653969, 0.6408076]
645-
)
636+
target_score = 36.774
637+
646638
image = outputs.images[0]
647639
pil_image = numpy_to_pil(image)[0]
648640
pil_image.save("test_no_generation_regression_output.png")
649641

642+
clip_score = calculate_clip_score(np.expand_dims(image, axis=0), [prompt])
643+
650644
self.assertEqual(image.shape, (512, 512, 3))
651-
self.assertLess(np.abs(expected_slice - image[-3:, -3:, -1].flatten()).max(), 5e-3)
645+
self.assertGreaterEqual(clip_score, target_score)
652646

653647
@slow
654648
def test_no_generation_regression_ldm3d(self):
649+
seed = 27
650+
set_seed(seed)
655651
model_name = "Intel/ldm3d-4c"
656652
# fp32
657653
scheduler = GaudiDDIMScheduler.from_pretrained(model_name, subfolder="scheduler")
@@ -663,61 +659,28 @@ def test_no_generation_regression_ldm3d(self):
663659
use_hpu_graphs=True,
664660
gaudi_config=GaudiConfig(),
665661
)
666-
set_seed(27)
662+
663+
prompt = "An image of a squirrel in Picasso style"
664+
generator = torch.manual_seed(seed)
667665
outputs = pipeline(
668-
prompt="An image of a squirrel in Picasso style",
666+
prompt=prompt,
667+
generator=generator,
669668
output_type="np",
670669
)
671670

672671
if os.environ.get("GAUDI2_CI", "0") == "1":
673-
expected_slice_rgb = np.array(
674-
[
675-
0.2099357,
676-
0.16664368,
677-
0.08352646,
678-
0.20643419,
679-
0.16748399,
680-
0.08781305,
681-
0.21379063,
682-
0.19943115,
683-
0.04389626,
684-
]
685-
)
686-
expected_slice_depth = np.array(
687-
[
688-
0.68369114,
689-
0.6827824,
690-
0.6852779,
691-
0.6836072,
692-
0.6888298,
693-
0.6895473,
694-
0.6853674,
695-
0.67561126,
696-
0.660434,
697-
]
698-
)
672+
target_score = 28.0894
699673
else:
700-
expected_slice_rgb = np.array([0.7083766, 1.0, 1.0, 0.70610344, 0.9867363, 1.0, 0.7214538, 1.0, 1.0])
701-
expected_slice_depth = np.array(
702-
[
703-
0.919621,
704-
0.92072034,
705-
0.9184986,
706-
0.91994286,
707-
0.9242079,
708-
0.93387043,
709-
0.92345214,
710-
0.93558526,
711-
0.9223714,
712-
]
713-
)
674+
target_score = 35.81
675+
714676
rgb = outputs.rgb[0]
715677
depth = outputs.depth[0]
716678

679+
rgb_clip_score = calculate_clip_score(np.expand_dims(rgb, axis=0), [prompt])
680+
717681
self.assertEqual(rgb.shape, (512, 512, 3))
718682
self.assertEqual(depth.shape, (512, 512, 1))
719-
self.assertLess(np.abs(expected_slice_rgb - rgb[-3:, -3:, -1].flatten()).max(), 5e-3)
720-
self.assertLess(np.abs(expected_slice_depth - depth[-3:, -3:, -1].flatten()).max(), 5e-3)
683+
self.assertGreaterEqual(rgb_clip_score, target_score)
721684

722685
@slow
723686
def test_no_generation_regression_upscale(self):

0 commit comments

Comments
 (0)