Dev/lightgbm (#27)

* catching back up to main * changed averageweight model to lightgbm * more tuning for lightgbm averageweight * models with lightgbm * added tuning and variable importance plots --------- Co-authored-by: Phil Henrickson <phil.henrickson@github.com>
phenrickson · May 24, 2024 · 45dd16f · 45dd16f
1 parent 34fe1b0
commit 45dd16f
Show file tree

Hide file tree

Showing 9 changed files with 814 additions and 394 deletions.
diff --git a/_targets.R b/_targets.R
@@ -76,10 +76,10 @@ list(
             train_outcome_wflow(outcome = 'averageweight',
                                 weights = 5,
                                 valid_years = valid_years,
-                                recipe = recipe_linear,
-                                model_spec = glmnet_spec(),
-                                grid = glmnet_grid(),
-                                splines = spline_vars())
+                                recipe = recipe_trees,
+                                model_spec = lightgbm_spec(),
+                                grid = lightgbm_grid()),
+        packages = c("bonsai", "lightgbm")
     ),
     # now fit model
     tar_target(
@@ -106,12 +106,11 @@ list(
             train_outcome_wflow(outcome = 'average',
                                 ratings = 25,
                                 valid_years = valid_years,
-                                recipe = recipe_linear,
-                                model_spec = glmnet_spec(),
-                                grid = glmnet_grid(),
+                                recipe = recipe_trees,
+                                model_spec = lightgbm_spec(),
+                                grid = lightgbm_grid(),
                                 ids = id_vars(),
-                                predictors = c("est_averageweight", predictor_vars()),
-                                splines = c("est_averageweight", spline_vars()))
+                                predictors = c("est_averageweight", predictor_vars()))
     ),
     # now train usersrated
     tar_target(
@@ -121,12 +120,20 @@ list(
             train_outcome_wflow(outcome = 'usersrated',
                                 ratings = 25,
                                 valid_years = valid_years,
-                                recipe = recipe_linear,
-                                model_spec = glmnet_spec(),
-                                grid = glmnet_grid(),
+                                recipe = recipe_trees,
+                                model_spec = lightgbm_spec(),
+                                grid = lightgbm_grid(),
                                 ids = id_vars(),
-                                predictors = c("est_averageweight", predictor_vars()),
-                                splines = c("est_averageweight", spline_vars()))
+                                predictors = c("est_averageweight", predictor_vars()))
+    ),
+    # extract tuning plots
+    tar_target(
+        name = tuning_plots,
+        command =
+            bind_rows(averageweight_tuned,
+                      average_tuned,
+                      usersrated_tuned) |>
+            get_tuning_plots()
     ),
     # fit models to whole of training
     # average
@@ -187,12 +194,6 @@ list(
                            file = "targets-runs/tracking.csv"),
         format = "file"
     ),
-    # # render report with quarto
-    # tar_quarto(
-    #     report,
-    #     path = "results.qmd",
-    #     quiet = F
-    # ),
     ## finalize models and predict test set
     # get training and validation
     tar_target(

diff --git a/_targets/meta/meta b/_targets/meta/meta
diff --git a/docs/index.html b/docs/index.html
diff --git a/docs/results.md b/docs/results.md
@@ -1,6 +1,6 @@
 # Model Results
 
-2024-05-23
+2024-05-24
 
 # pipeline
 
@@ -15,54 +15,58 @@ graph LR
   end
   subgraph Graph
     direction LR
-    x04d83687468f3fc4(["averageweight_tuned"]):::uptodate --> x0f34a02bd6ed632f(["averageweight_fit"]):::uptodate
-    x170105733f313efa(["training_imputed"]):::uptodate --> x8b18f98f2746942f(["usersrated_tuned"]):::uptodate
-    x2b7f0716b8751c70(["games_raw"]):::uptodate --> xe73a0f95bb7c669b(["games_prepared"]):::uptodate
-    x0f34a02bd6ed632f(["averageweight_fit"]):::uptodate --> x170105733f313efa(["training_imputed"]):::uptodate
-    x6a9d509448f9bd3e(["split"]):::uptodate --> x170105733f313efa(["training_imputed"]):::uptodate
-    x0f34a02bd6ed632f(["averageweight_fit"]):::uptodate --> x5a0dca7931d71121(["averageweight_final"]):::uptodate
-    xe5bd2d3c6fdf29ae(["training_and_validation"]):::uptodate --> x5a0dca7931d71121(["averageweight_final"]):::uptodate
-    x32d592eaf8d082e6(["valid_predictions"]):::uptodate --> xc2507955de0caf4e(["valid_metrics"]):::uptodate
     xe5bd2d3c6fdf29ae(["training_and_validation"]):::uptodate --> xd99568a85da9c31b(["usersrated_final"]):::uptodate
     xd6936e852f1fbe47(["usersrated_fit"]):::uptodate --> xd99568a85da9c31b(["usersrated_final"]):::uptodate
+    xe73a0f95bb7c669b(["games_prepared"]):::uptodate --> x6a9d509448f9bd3e(["split"]):::uptodate
+    x5a0dca7931d71121(["averageweight_final"]):::uptodate --> xcbeb0ce109eb5b86(["averageweight_vetiver"]):::uptodate
+    x04d83687468f3fc4(["averageweight_tuned"]):::uptodate --> xcbeb0ce109eb5b86(["averageweight_vetiver"]):::uptodate
+    xe5bd2d3c6fdf29ae(["training_and_validation"]):::uptodate --> xcbeb0ce109eb5b86(["averageweight_vetiver"]):::uptodate
+    xc2507955de0caf4e(["valid_metrics"]):::uptodate --> xcbeb0ce109eb5b86(["averageweight_vetiver"]):::uptodate
+    x0f34a02bd6ed632f(["averageweight_fit"]):::uptodate --> x5a0dca7931d71121(["averageweight_final"]):::uptodate
+    xe5bd2d3c6fdf29ae(["training_and_validation"]):::uptodate --> x5a0dca7931d71121(["averageweight_final"]):::uptodate
+    x04d83687468f3fc4(["averageweight_tuned"]):::uptodate --> x0f34a02bd6ed632f(["averageweight_fit"]):::uptodate
     x51e4cab15d71e6a3(["average_final"]):::uptodate --> x09d4ee9c0cdd997f(["test_predictions"]):::uptodate
     x5a0dca7931d71121(["averageweight_final"]):::uptodate --> x09d4ee9c0cdd997f(["test_predictions"]):::uptodate
     x6a9d509448f9bd3e(["split"]):::uptodate --> x09d4ee9c0cdd997f(["test_predictions"]):::uptodate
     xd99568a85da9c31b(["usersrated_final"]):::uptodate --> x09d4ee9c0cdd997f(["test_predictions"]):::uptodate
-    x0f34a02bd6ed632f(["averageweight_fit"]):::uptodate --> x60bd72cb3684ce1d(["validation_imputed"]):::uptodate
-    x6a9d509448f9bd3e(["split"]):::uptodate --> x60bd72cb3684ce1d(["validation_imputed"]):::uptodate
-    xe73a0f95bb7c669b(["games_prepared"]):::uptodate --> x6a9d509448f9bd3e(["split"]):::uptodate
-    xe5bd2d3c6fdf29ae(["training_and_validation"]):::uptodate --> x156a6ed4485a6769(["usersrated_vetiver"]):::uptodate
-    xd99568a85da9c31b(["usersrated_final"]):::uptodate --> x156a6ed4485a6769(["usersrated_vetiver"]):::uptodate
-    x8b18f98f2746942f(["usersrated_tuned"]):::uptodate --> x156a6ed4485a6769(["usersrated_vetiver"]):::uptodate
-    xc2507955de0caf4e(["valid_metrics"]):::uptodate --> x156a6ed4485a6769(["usersrated_vetiver"]):::uptodate
+    xd0ff2216903615b0(["average_tuned"]):::uptodate --> x45aff652992a023a(["details"]):::uptodate
+    x04d83687468f3fc4(["averageweight_tuned"]):::uptodate --> x45aff652992a023a(["details"]):::uptodate
+    x8b18f98f2746942f(["usersrated_tuned"]):::uptodate --> x45aff652992a023a(["details"]):::uptodate
     x170105733f313efa(["training_imputed"]):::uptodate --> xe5bd2d3c6fdf29ae(["training_and_validation"]):::uptodate
     x60bd72cb3684ce1d(["validation_imputed"]):::uptodate --> xe5bd2d3c6fdf29ae(["training_and_validation"]):::uptodate
-    x170105733f313efa(["training_imputed"]):::uptodate --> xd0ff2216903615b0(["average_tuned"]):::uptodate
-    xc0de7cb1ceaaae9a(["average_fit"]):::uptodate --> x51e4cab15d71e6a3(["average_final"]):::uptodate
-    xe5bd2d3c6fdf29ae(["training_and_validation"]):::uptodate --> x51e4cab15d71e6a3(["average_final"]):::uptodate
-    x45aff652992a023a(["details"]):::uptodate --> x3c2f8ffacaa45076(["reports"]):::dispatched
-    xc2507955de0caf4e(["valid_metrics"]):::uptodate --> x3c2f8ffacaa45076(["reports"]):::dispatched
-    x32d592eaf8d082e6(["valid_predictions"]):::uptodate --> x3c2f8ffacaa45076(["reports"]):::dispatched
+    x8b18f98f2746942f(["usersrated_tuned"]):::uptodate --> xd6936e852f1fbe47(["usersrated_fit"]):::uptodate
+    xd0ff2216903615b0(["average_tuned"]):::uptodate --> xc0de7cb1ceaaae9a(["average_fit"]):::uptodate
+    x2b7f0716b8751c70(["games_raw"]):::uptodate --> xe73a0f95bb7c669b(["games_prepared"]):::uptodate
     x51e4cab15d71e6a3(["average_final"]):::uptodate --> x1b61a8b8a8b5e6c2(["average_vetiver"]):::uptodate
     xd0ff2216903615b0(["average_tuned"]):::uptodate --> x1b61a8b8a8b5e6c2(["average_vetiver"]):::uptodate
     xe5bd2d3c6fdf29ae(["training_and_validation"]):::uptodate --> x1b61a8b8a8b5e6c2(["average_vetiver"]):::uptodate
     xc2507955de0caf4e(["valid_metrics"]):::uptodate --> x1b61a8b8a8b5e6c2(["average_vetiver"]):::uptodate
-    xd0ff2216903615b0(["average_tuned"]):::uptodate --> xc0de7cb1ceaaae9a(["average_fit"]):::uptodate
-    x6a9d509448f9bd3e(["split"]):::uptodate --> x04d83687468f3fc4(["averageweight_tuned"]):::uptodate
+    xe5bd2d3c6fdf29ae(["training_and_validation"]):::uptodate --> x156a6ed4485a6769(["usersrated_vetiver"]):::uptodate
+    xd99568a85da9c31b(["usersrated_final"]):::uptodate --> x156a6ed4485a6769(["usersrated_vetiver"]):::uptodate
+    x8b18f98f2746942f(["usersrated_tuned"]):::uptodate --> x156a6ed4485a6769(["usersrated_vetiver"]):::uptodate
+    xc2507955de0caf4e(["valid_metrics"]):::uptodate --> x156a6ed4485a6769(["usersrated_vetiver"]):::uptodate
+    xd0ff2216903615b0(["average_tuned"]):::uptodate --> x5009635ca9660f5b(["tuning_plots"]):::uptodate
+    x04d83687468f3fc4(["averageweight_tuned"]):::uptodate --> x5009635ca9660f5b(["tuning_plots"]):::uptodate
+    x8b18f98f2746942f(["usersrated_tuned"]):::uptodate --> x5009635ca9660f5b(["tuning_plots"]):::uptodate
+    x170105733f313efa(["training_imputed"]):::uptodate --> xd0ff2216903615b0(["average_tuned"]):::uptodate
+    x0f34a02bd6ed632f(["averageweight_fit"]):::uptodate --> x170105733f313efa(["training_imputed"]):::uptodate
+    x6a9d509448f9bd3e(["split"]):::uptodate --> x170105733f313efa(["training_imputed"]):::uptodate
     xc0de7cb1ceaaae9a(["average_fit"]):::uptodate --> x32d592eaf8d082e6(["valid_predictions"]):::uptodate
     xd6936e852f1fbe47(["usersrated_fit"]):::uptodate --> x32d592eaf8d082e6(["valid_predictions"]):::uptodate
     x60bd72cb3684ce1d(["validation_imputed"]):::uptodate --> x32d592eaf8d082e6(["valid_predictions"]):::uptodate
-    x8b18f98f2746942f(["usersrated_tuned"]):::uptodate --> xd6936e852f1fbe47(["usersrated_fit"]):::uptodate
-    xd0ff2216903615b0(["average_tuned"]):::uptodate --> x45aff652992a023a(["details"]):::uptodate
-    x04d83687468f3fc4(["averageweight_tuned"]):::uptodate --> x45aff652992a023a(["details"]):::uptodate
-    x8b18f98f2746942f(["usersrated_tuned"]):::uptodate --> x45aff652992a023a(["details"]):::uptodate
+    x45aff652992a023a(["details"]):::uptodate --> x3c2f8ffacaa45076(["reports"]):::dispatched
+    x5009635ca9660f5b(["tuning_plots"]):::uptodate --> x3c2f8ffacaa45076(["reports"]):::dispatched
+    xc2507955de0caf4e(["valid_metrics"]):::uptodate --> x3c2f8ffacaa45076(["reports"]):::dispatched
+    x32d592eaf8d082e6(["valid_predictions"]):::uptodate --> x3c2f8ffacaa45076(["reports"]):::dispatched
+    x0f34a02bd6ed632f(["averageweight_fit"]):::uptodate --> x60bd72cb3684ce1d(["validation_imputed"]):::uptodate
+    x6a9d509448f9bd3e(["split"]):::uptodate --> x60bd72cb3684ce1d(["validation_imputed"]):::uptodate
+    x32d592eaf8d082e6(["valid_predictions"]):::uptodate --> xc2507955de0caf4e(["valid_metrics"]):::uptodate
+    x6a9d509448f9bd3e(["split"]):::uptodate --> x04d83687468f3fc4(["averageweight_tuned"]):::uptodate
+    x170105733f313efa(["training_imputed"]):::uptodate --> x8b18f98f2746942f(["usersrated_tuned"]):::uptodate
     x45aff652992a023a(["details"]):::uptodate --> x92c9ab88ae1439d1(["tracking"]):::uptodate
     xc2507955de0caf4e(["valid_metrics"]):::uptodate --> x92c9ab88ae1439d1(["tracking"]):::uptodate
-    x5a0dca7931d71121(["averageweight_final"]):::uptodate --> xcbeb0ce109eb5b86(["averageweight_vetiver"]):::uptodate
-    x04d83687468f3fc4(["averageweight_tuned"]):::uptodate --> xcbeb0ce109eb5b86(["averageweight_vetiver"]):::uptodate
-    xe5bd2d3c6fdf29ae(["training_and_validation"]):::uptodate --> xcbeb0ce109eb5b86(["averageweight_vetiver"]):::uptodate
-    xc2507955de0caf4e(["valid_metrics"]):::uptodate --> xcbeb0ce109eb5b86(["averageweight_vetiver"]):::uptodate
+    xc0de7cb1ceaaae9a(["average_fit"]):::uptodate --> x51e4cab15d71e6a3(["average_final"]):::uptodate
+    xe5bd2d3c6fdf29ae(["training_and_validation"]):::uptodate --> x51e4cab15d71e6a3(["average_final"]):::uptodate
   end
   classDef uptodate stroke:#000000,color:#ffffff,fill:#354823;
   classDef dispatched stroke:#000000,color:#000000,fill:#DC863B;
@@ -77,17 +81,17 @@ workflow objects
 
 <div>
 
-<div id="yachfaepoo" style="padding-left:0px;padding-right:0px;padding-top:10px;padding-bottom:10px;overflow-x:auto;overflow-y:auto;width:auto;height:auto;">
+<div id="myakhitngh" style="padding-left:0px;padding-right:0px;padding-top:10px;padding-bottom:10px;overflow-x:auto;overflow-y:auto;width:auto;height:auto;">
   &#10;  
 
 | name                |                time | seconds |
 |:--------------------|--------------------:|--------:|
-| average_tuned       | 2024-05-23 13:06:48 | 178.743 |
-| averageweight_tuned | 2024-05-23 12:59:27 |  86.476 |
-| usersrated_tuned    | 2024-05-23 13:03:39 | 193.233 |
-| average_fit         | 2024-05-23 13:09:00 |  49.730 |
-| averageweight_fit   | 2024-05-23 12:59:58 |  21.004 |
-| usersrated_fit      | 2024-05-23 13:07:56 |  53.211 |
+| average_tuned       | 2024-05-24 09:34:30 | 474.570 |
+| averageweight_tuned | 2024-05-24 08:58:55 | 224.914 |
+| usersrated_tuned    | 2024-05-24 09:26:26 | 481.283 |
+| average_fit         | 2024-05-24 09:36:27 |  44.877 |
+| averageweight_fit   | 2024-05-24 08:59:23 |  18.921 |
+| usersrated_fit      | 2024-05-24 09:35:28 |  43.523 |
 
 </div>
 
@@ -97,14 +101,14 @@ workflow details
 
 <div>
 
-<div id="eifralfcxj" style="padding-left:0px;padding-right:0px;padding-top:10px;padding-bottom:10px;overflow-x:auto;overflow-y:auto;width:auto;height:auto;">
+<div id="cjgmyyjyuu" style="padding-left:0px;padding-right:0px;padding-top:10px;padding-bottom:10px;overflow-x:auto;overflow-y:auto;width:auto;height:auto;">
   &#10;  
 
-| outcome       | wflow_id | penalty | mixture | .config               |
-|:--------------|:---------|--------:|--------:|:----------------------|
-| average       | glmnet   |    0.01 |     0.5 | Preprocessor1_Model27 |
-| usersrated    | glmnet   |    0.01 |     0.5 | Preprocessor1_Model27 |
-| averageweight | glmnet   |    0.01 |     0.5 | Preprocessor1_Model27 |
+| outcome       | wflow_id | min_n | tree_depth | .config               |
+|:--------------|:---------|------:|-----------:|:----------------------|
+| average       | lightgbm |    15 |          7 | Preprocessor1_Model06 |
+| usersrated    | lightgbm |    15 |          7 | Preprocessor1_Model06 |
+| averageweight | lightgbm |    15 |          7 | Preprocessor1_Model06 |
 
 </div>
 
@@ -116,7 +120,7 @@ workflow details
 
 <div>
 
-<div id="qrgmzdajxe" style="padding-left:0px;padding-right:0px;padding-top:10px;padding-bottom:10px;overflow-x:auto;overflow-y:auto;width:auto;height:auto;">
+<div id="oxwwsalhjq" style="padding-left:0px;padding-right:0px;padding-top:10px;padding-bottom:10px;overflow-x:auto;overflow-y:auto;width:auto;height:auto;">
   &#10;  
 
 | minratings | outcome       |     rmse |     mae |    mape |   rsq |   ccc |
@@ -138,7 +142,7 @@ workflow details
 
 <div>
 
-<div id="jutokskvul" style="padding-left:0px;padding-right:0px;padding-top:10px;padding-bottom:10px;overflow-x:auto;overflow-y:auto;width:auto;height:auto;">
+<div id="ktomeklcin" style="padding-left:0px;padding-right:0px;padding-top:10px;padding-bottom:10px;overflow-x:auto;overflow-y:auto;width:auto;height:auto;">
   &#10;  
 
 | minratings | outcome       | yearpublished |     rmse |     mae |    mape |   rsq |   ccc |

diff --git a/index.qmd b/index.qmd
@@ -45,11 +45,14 @@ tar_source("src/visualizations/tables.R")
 tar_load(valid_predictions)
 tar_load(valid_metrics)
 tar_load(details)
+tar_load(tuning_plots)
 
 games = 
     get_games_from_gcp(
         bucket = "bgg_data"
     )
+
+theme_set(bggUtils::theme_bgg())
 ```
 
 
@@ -127,6 +130,107 @@ targets_tracking_details(metrics = valid_metrics,
 
 ```
 
+## Tuning
+
+::: {.panel-tabset}
+
+### Average Weight
+
+```{r}
+#| fig-height: 7
+#| results: asis
+#| echo: false
+tuning_plots[[1]]+
+        labs(title = 'Average Weight')
+
+```
+
+### Average Rating
+
+```{r}
+#| fig-height: 7
+#| results: asis
+#| echo: false
+tuning_plots[[2]]+
+    labs(title = 'Average Rating')
+
+```
+
+
+### Users Rated
+
+```{r}
+#| fig-height: 7
+#| results: asis
+#| echo: false
+tuning_plots[[3]]+
+    labs(title = 'Users Rated')
+
+```
+
+:::
+
+## Features
+
+```{r}
+#| message: false
+#| warning: false
+average_plot = 
+    average_fit |> 
+    extract_vetiver_features() |>
+    plot_model_features()+
+    labs(title = 'Average Rating')
+
+averageweight_plot = 
+    averageweight_fit |> 
+    extract_vetiver_features() |>
+    plot_model_features()+
+    labs(title = 'Average Weight')
+
+usersrated_plot = 
+    usersrated_fit |> 
+    extract_vetiver_features() |>
+    plot_model_features()+
+    labs(title = 'Users Rated')
+
+```
+
+::: {.panel-tabset}
+
+### Average Weight
+
+```{r}
+#| fig-height: 7
+#| results: asis
+#| echo: false
+
+averageweight_plot
+
+```
+
+### Average 
+
+```{r}
+#| fig-height: 7
+#| results: asis
+#| echo: false
+average_plot
+
+```
+
+### Users Rated
+
+```{r}
+#| fig-height: 7
+#| results: asis
+#| echo: false
+usersrated_plot
+
+```
+
+:::
+
+# Predictions
 
 ```{r}
 #| message: false
@@ -148,7 +252,6 @@ upcoming_games =
 
 ```
 
-# Predictions
 
 ```{r}
 predictions =