Skip to content

Commit

Permalink
Refactor (#1445)
Browse files Browse the repository at this point in the history
  • Loading branch information
jknndy authored Dec 27, 2024
1 parent 3f8bff4 commit e454403
Show file tree
Hide file tree
Showing 5 changed files with 375 additions and 395 deletions.
52 changes: 14 additions & 38 deletions recipe_scrapers/nhshealthierfamilies.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,48 +19,28 @@ def title(self):
title = title[:-7]
return title

def _get_recipe_metadata(self):
def _get_recipe_content(self):
container = self.soup.find("div", {"class": "bh-recipe__description"})
descriptions = container.findAll("p")
content = "".join([description.get_text() for description in descriptions])
return "".join([description.get_text() for description in descriptions])

def prep_time(self):
content = self._get_recipe_content()
prep_time = re.search(r"Prep: (\d+) mins", content)
return get_minutes(prep_time.group(0)) if prep_time else 0

def cook_time(self):
content = self._get_recipe_content()
cook_time = re.search(r"Cook: (\d+) mins", content)
recipe_yields = re.search(r"Serves (\d+)", content)
return {
"prep_time": get_minutes(prep_time.group(0)) if prep_time else None,
"cook_time": get_minutes(cook_time.group(0)) if cook_time else None,
"yields": get_yields(recipe_yields.group(0)) if recipe_yields else None,
}
return get_minutes(cook_time.group(0)) if cook_time else 0

def total_time(self):
metadata = self._get_recipe_metadata()
return metadata["prep_time"] + metadata["cook_time"]
return self.prep_time() + self.cook_time()

def yields(self):
metadata = self._get_recipe_metadata()
return metadata["yields"]

def image(self):
return self.soup.find("img", {"class": "nhsuk-image__img"})["src"]

def ingredients(self):
ingredients = []
instructions_div = self.soup.find("div", {"class": "bh-recipe-instructions"})
ul = instructions_div.find("ul")

if ul:
for li in ul.findAll("li"):
ingredients.append(normalize_string(li.get_text()))

# Stop when encountering an 'ol' element which is where instructions are stored.
for sibling in ul.find_next_siblings():
if sibling.name == "ol":
break
if sibling.name == "ul":
for li in sibling.findAll("li"):
ingredients.append(normalize_string(li.get_text()))

return ingredients
content = self._get_recipe_content()
recipe_yields = re.search(r"Serves (\d+)", content)
return get_yields(recipe_yields.group(0)) if recipe_yields else None

def ingredient_groups(self):
return group_ingredients(
Expand All @@ -82,7 +62,3 @@ def instructions(self):
return "\n".join(
[normalize_string(instruction) for instruction in instructions]
)

def description(self):
description_meta = self.soup.find("meta", {"name": "description"})
return normalize_string(description_meta["content"])
7 changes: 4 additions & 3 deletions tests/test_data/nhs.uk/nhshealthierfamilies_1.json
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,10 @@
"Meanwhile, cook the rice according to pack instructions.",
"Season the chilli with pepper and serve with the boiled rice."
],
"category": null,
"yields": "4 servings",
"description": "This classic chilli is packed with flavour. It also freezes well, so is perfect for batch-cooking.",
"description": "This classic chilli is packed with flavour. It also freezes well, so is perfect to batch-cook.",
"total_time": 35,
"image": "https://assets.nhs.uk/campaigns-cms-prod/images/Chilli-con-carne_x7m8d91.width-320.jpg"
"cook_time": 25,
"prep_time": 10,
"image": "https://digitalcampaignsstorage.blob.core.windows.net/campaigns-cms-prod/images/Chilli-con-carne_x7m8d91.width-320.jpg"
}
Loading

0 comments on commit e454403

Please sign in to comment.