Skip to content

Commit

Permalink
Update parallelization episode
Browse files Browse the repository at this point in the history
  • Loading branch information
joelnitta committed Dec 11, 2024
1 parent 71d36ae commit 7f83f51
Show file tree
Hide file tree
Showing 11 changed files with 301 additions and 109 deletions.
34 changes: 31 additions & 3 deletions episodes/branch.Rmd
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
---
title: 'Branching'
teaching: 10
teaching: 30
exercises: 2
---

Expand Down Expand Up @@ -152,7 +152,7 @@ Before moving on, let's define another **custom function** function: `model_glan
You will need to write custom functions frequently when using `targets`, so it's good to get used to it!

As the name `model_glance()` suggests (it is good to write functions with names that indicate their purpose), this will build a model then immediately run `glance()` on it.
The reason for doing so is that we get a **dataframe as a result**, which as previously mentioned is very helpful for branching, as we will see in the next section.
The reason for doing so is that we get a **dataframe as a result**, which is very helpful for branching, as we will see in the next section.
Save this in `R/functions.R`:

```{r}
Expand Down Expand Up @@ -310,14 +310,42 @@ Add the step to the workflow:

```{r}
#| label = "example-model-augment-show",
#| code = readLines("files/plans/plan_8.R")[2:35],
#| code = readLines("files/plans/plan_7.R")[2:36],
#| eval = FALSE
```

::::::::::::::::::::::::::::::::::

:::::::::::::::::::::::::::::::::::::

### Further simplify the workflow

You may have noticed that we can further simplify the workflow: there is no need to have separate `penguins_data` and `penguins_data_grouped` dataframes.
In general it is best to keep the number of named objects as small as possible to make it easier to reason about your code.
Let's combine the cleaning and grouping step into a single command:

```{r}
#| label = "example-model-show-8",
#| eval = FALSE,
#| code = readLines("files/plans/plan_8.R")[2:35]
```

And run it once more:

```{r}
#| label: example-model-show-8
#| echo: false
pushd(plan_6_dir)
# simulate already running the plan once
write_example_plan("plan_7.R")
tar_make(reporter = "silent")
# run version of plan that uses `model_glance_orig()` (doesn't include species
# names in output)
write_example_plan("plan_8.R")
tar_make()
popd()
```

::::::::::::::::::::::::::::::::::::: {.callout}

## Best practices for branching
Expand Down
35 changes: 17 additions & 18 deletions episodes/files/plans/plan_10.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,27 +16,26 @@ tar_plan(
path_to_file("penguins_raw.csv"),
read_csv(!!.x, show_col_types = FALSE)
),
# Clean data
penguins_data = clean_penguin_data(penguins_data_raw),
# Build models
models = list(
combined_model = lm(
bill_depth_mm ~ bill_length_mm, data = penguins_data),
species_model = lm(
bill_depth_mm ~ bill_length_mm + species, data = penguins_data),
interaction_model = lm(
bill_depth_mm ~ bill_length_mm * species, data = penguins_data)
# Clean and group data
tar_group_by(
penguins_data,
clean_penguin_data(penguins_data_raw),
species
),
# Get model summaries
# Get summary of combined model with all species together
combined_summary = model_glance(penguins_data),
# Get summary of one model per species
tar_target(
model_summaries,
glance_with_mod_name_slow(models),
pattern = map(models)
species_summary,
model_glance_slow(penguins_data),
pattern = map(penguins_data)
),
# Get model predictions
# Get predictions of combined model with all species together
combined_predictions = model_glance_slow(penguins_data),
# Get predictions of one model per species
tar_target(
model_predictions,
augment_with_mod_name_slow(models),
pattern = map(models)
species_predictions,
model_augment_slow(penguins_data),
pattern = map(penguins_data)
)
)
34 changes: 34 additions & 0 deletions episodes/files/plans/plan_6c.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
options(tidyverse.quiet = TRUE)
source("R/functions.R")
source("R/packages.R")

tar_plan(
# Load raw data
tar_file_read(
penguins_data_raw,
path_to_file("penguins_raw.csv"),
read_csv(!!.x, show_col_types = FALSE)
),
# Clean and group data
tar_group_by(
penguins_data,
clean_penguin_data(penguins_data_raw),
species
),
# Get summary of combined model with all species together
combined_summary = model_glance(penguins_data),
# Get summary of one model per species
tar_target(
species_summary,
model_glance(penguins_data),
pattern = map(penguins_data)
),
# Get predictions of combined model with all species together
combined_predictions = model_glance(penguins_data),
# Get predictions of one model per species
tar_target(
species_predictions,
model_augment(penguins_data),
pattern = map(penguins_data)
)
)
31 changes: 19 additions & 12 deletions episodes/files/plans/plan_7.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,19 +11,26 @@ tar_plan(
),
# Clean data
penguins_data = clean_penguin_data(penguins_data_raw),
# Build models
models = list(
combined_model = lm(
bill_depth_mm ~ bill_length_mm, data = penguins_data),
species_model = lm(
bill_depth_mm ~ bill_length_mm + species, data = penguins_data),
interaction_model = lm(
bill_depth_mm ~ bill_length_mm * species, data = penguins_data)
# Group data
tar_group_by(
penguins_data_grouped,
penguins_data,
species
),
# Get model summaries
# Get summary of combined model with all species together
combined_summary = model_glance(penguins_data),
# Get summary of one model per species
tar_target(
model_summaries,
glance_with_mod_name(models),
pattern = map(models)
species_summary,
model_glance(penguins_data_grouped),
pattern = map(penguins_data_grouped)
),
# Get predictions of combined model with all species together
combined_predictions = model_glance(penguins_data_grouped),
# Get predictions of one model per species
tar_target(
species_predictions,
model_augment(penguins_data_grouped),
pattern = map(penguins_data_grouped)
)
)
35 changes: 17 additions & 18 deletions episodes/files/plans/plan_8.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,27 +9,26 @@ tar_plan(
path_to_file("penguins_raw.csv"),
read_csv(!!.x, show_col_types = FALSE)
),
# Clean data
penguins_data = clean_penguin_data(penguins_data_raw),
# Build models
models = list(
combined_model = lm(
bill_depth_mm ~ bill_length_mm, data = penguins_data),
species_model = lm(
bill_depth_mm ~ bill_length_mm + species, data = penguins_data),
interaction_model = lm(
bill_depth_mm ~ bill_length_mm * species, data = penguins_data)
# Clean and group data
tar_group_by(
penguins_data,
clean_penguin_data(penguins_data_raw),
species
),
# Get model summaries
# Get summary of combined model with all species together
combined_summary = model_glance(penguins_data),
# Get summary of one model per species
tar_target(
model_summaries,
glance_with_mod_name(models),
pattern = map(models)
species_summary,
model_glance(penguins_data),
pattern = map(penguins_data)
),
# Get model predictions
# Get predictions of combined model with all species together
combined_predictions = model_glance(penguins_data),
# Get predictions of one model per species
tar_target(
model_predictions,
augment_with_mod_name(models),
pattern = map(models)
species_predictions,
model_augment(penguins_data),
pattern = map(penguins_data)
)
)
35 changes: 17 additions & 18 deletions episodes/files/plans/plan_9.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,27 +16,26 @@ tar_plan(
path_to_file("penguins_raw.csv"),
read_csv(!!.x, show_col_types = FALSE)
),
# Clean data
penguins_data = clean_penguin_data(penguins_data_raw),
# Build models
models = list(
combined_model = lm(
bill_depth_mm ~ bill_length_mm, data = penguins_data),
species_model = lm(
bill_depth_mm ~ bill_length_mm + species, data = penguins_data),
interaction_model = lm(
bill_depth_mm ~ bill_length_mm * species, data = penguins_data)
# Clean and group data
tar_group_by(
penguins_data,
clean_penguin_data(penguins_data_raw),
species
),
# Get model summaries
# Get summary of combined model with all species together
combined_summary = model_glance(penguins_data),
# Get summary of one model per species
tar_target(
model_summaries,
glance_with_mod_name(models),
pattern = map(models)
species_summary,
model_glance(penguins_data),
pattern = map(penguins_data)
),
# Get model predictions
# Get predictions of combined model with all species together
combined_predictions = model_glance(penguins_data),
# Get predictions of one model per species
tar_target(
model_predictions,
augment_with_mod_name(models),
pattern = map(models)
species_predictions,
model_augment(penguins_data),
pattern = map(penguins_data)
)
)
2 changes: 1 addition & 1 deletion episodes/files/tar_functions/model_augment.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
model_glance <- function(penguins_data) {
model_augment <- function(penguins_data) {
# Make model
model <- lm(
bill_depth_mm ~ bill_length_mm,
Expand Down
17 changes: 17 additions & 0 deletions episodes/files/tar_functions/model_augment_slow.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
model_augment_slow <- function(penguins_data) {
Sys.sleep(4)
# Make model
model <- lm(
bill_depth_mm ~ bill_length_mm,
data = penguins_data)
# Get species name
species_name <- unique(penguins_data$species)
# If this is the combined dataset with multiple
# species, changed name to 'combined'
if (length(species_name) > 1) {
species_name <- "combined"
}
# Get model summary and add species name
augment(model) |>
mutate(species = species_name, .before = 1)
}
17 changes: 17 additions & 0 deletions episodes/files/tar_functions/model_glance_slow.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
model_glance_slow <- function(penguins_data) {
Sys.sleep(4)
# Make model
model <- lm(
bill_depth_mm ~ bill_length_mm,
data = penguins_data)
# Get species name
species_name <- unique(penguins_data$species)
# If this is the combined dataset with multiple
# species, changed name to 'combined'
if (length(species_name) > 1) {
species_name <- "combined"
}
# Get model summary and add species name
glance(model) |>
mutate(species = species_name, .before = 1)
}
Loading

0 comments on commit 7f83f51

Please sign in to comment.