mlr-org · be-marc · Feb 26, 2024 · Oct 31, 2023 · Nov 2, 2023 · Feb 25, 2024
diff --git a/book/chapters/appendices/solutions.qmd b/book/chapters/appendices/solutions.qmd
@@ -1807,23 +1807,25 @@ library(mlr3)
 library(mlr3learners)
 set.seed(1)
 
-fifa20 = fifa[,5:42]
+feat_of_interest = c("age", "skill_ball_control", "skill_curve", "skill_dribbling",  "skill_fk_accuracy", "skill_long_passing", "value_eur")
+fifa20 = fifa[,feat_of_interest]
+
 task_fifa = as_task_regr(fifa20, target = "value_eur", id = "fifa20")
 
 learner = lrn("regr.ranger")
 learner$train(task_fifa)
 learner$model
 ```
 
-2. Use the permutation importance method to calculate variable importance ranking. Which variable is the most important? Is it surprising?
+2. Use the permutation importance method to calculate feature importance ranking. Which feature is the most important? Do you find the results surprising?
 
 **With `iml`**
 
 ```{r solutions-045, warning=FALSE, message=FALSE}
 library(iml)
 model = Predictor$new(learner,
                 data = fifa20,
-                y = fifa$value_eur)
+                y = "value_eur")
 
 effect = FeatureImp$new(model,
                 loss = "rmse")
@@ -1835,7 +1837,7 @@ effect$plot()
 ```{r solutions-046, warning=FALSE, message=FALSE}
 library(DALEX)
 ranger_exp = DALEX::explain(learner,
-  data = fifa20,
+  data = fifa20[, setdiff(names(fifa20), "value_eur")],
   y = fifa$value_eur,
   label = "Fifa 2020",
   verbose = FALSE)
@@ -1845,34 +1847,32 @@ head(ranger_effect)
 plot(ranger_effect)
 ```
 
-3. Use the Partial Dependence profile to draw the global behavior of the model for this variable. Is it aligned with your expectations?
+3. Use the partial dependence plot/profile to draw the global behavior of the model for this feature. Is it aligned with your expectations?
 
 **With `iml`**
 
 ```{r solutions-047, warning=FALSE, message=FALSE}
-num_features = c("movement_reactions", "skill_ball_control", "age")
+impfeat = c("skill_ball_control")
 
-effect = FeatureEffects$new(model)
-plot(effect, features = num_features)
+effect = FeatureEffects$new(model, features = impfeat)
+plot(effect)
 ```
 
 **With `DALEX`**
 
 ```{r solutions-048, warning=FALSE, message=FALSE}
-num_features = c("movement_reactions", "skill_ball_control", "age")
+impfeat = c("skill_ball_control")
 
-ranger_profiles = model_profile(ranger_exp, variables = num_features)
+ranger_profiles = model_profile(ranger_exp, variables = impfeat)
 plot(ranger_profiles)
 ```
 
-4 Choose one of the football players. You can choose some well-known striker (e.g. Robert Lewandowski) or a well-known goalkeeper (e.g. Manuel Neuer). The following tasks are worth repeating for several different choices.
+4. Choose Robert Lewandowski as a specific example and calculate and plot the Shapley values. Which feature is locally the most important and has the strongest influence on his valuation as a soccer player?
 
 ```{r solutions-049, warning=FALSE, message=FALSE}
-player_1 = fifa["R. Lewandowski", 5:42]
+player_1 = fifa20["R. Lewandowski",]
 ```
 
-5. For the selected footballer, calculate and plot the Shapley values. Which variable is locally the most important and has the strongest influence on the valuation of the footballer?
-
 **With `iml`**
 
 ```{r solutions-050, warning=FALSE, message=FALSE}
@@ -1889,18 +1889,6 @@ ranger_shap = predict_parts(ranger_exp,
 plot(ranger_shap, show_boxplots = FALSE)
 ```
 
-6. For the selected footballer, calculate the Ceteris Paribus / Individual Conditional Expectation profiles to draw the local behavior of the model for this variable. Is it different from the global behavior?
-
-**With `DALEX`**
-
-```{r solutions-052, warning=FALSE, message=FALSE}
-num_features = c("movement_reactions", "skill_ball_control", "age")
-
-ranger_ceteris = predict_profile(ranger_exp, player_1)
-plot(ranger_ceteris, variables = num_features) +
-  ggtitle("Ceteris paribus for R. Lewandowski", " ")
-```
-
 ## Solutions to @sec-special
 
 1. Run a benchmark experiment on `tsk("german_credit")` with `lrn("classif.featureless")`, `lrn("classif.log_reg")`, and `lrn("classif.ranger")`. Tune the prediction thresholds of all learners by encapsulating them in a `po("learner_cv")` (with two-fold CV), followed by a `po("tunethreshold")`. Use `msr("classif.costs", costs = costs)`, where the `costs` matrix is as follows: true positive is `-10`, true negative is `-1`, false positive is `2`, and false negative is `3`. Use this measure in `po("tunethreshold")` and when evaluating your benchmark experiment.