diff --git a/_targets.R b/_targets.R index f58462a..3fd02e0 100644 --- a/_targets.R +++ b/_targets.R @@ -76,10 +76,10 @@ list( train_outcome_wflow(outcome = 'averageweight', weights = 5, valid_years = valid_years, - recipe = recipe_linear, - model_spec = glmnet_spec(), - grid = glmnet_grid(), - splines = spline_vars()) + recipe = recipe_trees, + model_spec = lightgbm_spec(), + grid = lightgbm_grid()), + packages = c("bonsai", "lightgbm") ), # now fit model tar_target( @@ -106,12 +106,11 @@ list( train_outcome_wflow(outcome = 'average', ratings = 25, valid_years = valid_years, - recipe = recipe_linear, - model_spec = glmnet_spec(), - grid = glmnet_grid(), + recipe = recipe_trees, + model_spec = lightgbm_spec(), + grid = lightgbm_grid(), ids = id_vars(), - predictors = c("est_averageweight", predictor_vars()), - splines = c("est_averageweight", spline_vars())) + predictors = c("est_averageweight", predictor_vars())) ), # now train usersrated tar_target( @@ -121,12 +120,20 @@ list( train_outcome_wflow(outcome = 'usersrated', ratings = 25, valid_years = valid_years, - recipe = recipe_linear, - model_spec = glmnet_spec(), - grid = glmnet_grid(), + recipe = recipe_trees, + model_spec = lightgbm_spec(), + grid = lightgbm_grid(), ids = id_vars(), - predictors = c("est_averageweight", predictor_vars()), - splines = c("est_averageweight", spline_vars())) + predictors = c("est_averageweight", predictor_vars())) + ), + # extract tuning plots + tar_target( + name = tuning_plots, + command = + bind_rows(averageweight_tuned, + average_tuned, + usersrated_tuned) |> + get_tuning_plots() ), # fit models to whole of training # average @@ -187,12 +194,6 @@ list( file = "targets-runs/tracking.csv"), format = "file" ), - # # render report with quarto - # tar_quarto( - # report, - # path = "results.qmd", - # quiet = F - # ), ## finalize models and predict test set # get training and validation tar_target( diff --git a/_targets/meta/meta b/_targets/meta/meta index 64ef01a..243398c 100644 --- a/_targets/meta/meta +++ b/_targets/meta/meta @@ -1,5 +1,5 @@ name|type|data|command|depend|seed|path|time|size|bytes|format|repository|iteration|parent|children|seconds|warnings|error -.Random.seed|object|176c50c91db96ee4||||||||||||||| +.Random.seed|object|27940eebb0a56d3f||||||||||||||| add_bgg_dummies|function|824802374d6e7999||||||||||||||| add_bgg_hit|function|b371a6c32ebf1d63||||||||||||||| add_bgg_preprocessing|function|ce3e0c811d3f3eaa||||||||||||||| @@ -17,22 +17,22 @@ assess_bgg_hit|function|d3e9bb6f2821f897||||||||||||||| assess_outcomes|function|4aa23d70c2931237||||||||||||||| assess_outcomes_by_threshold|function|9480f8490961652a||||||||||||||| average|object|d2d6eec4a76fa9a1||||||||||||||| -average_final|stem|fca037f6bd1c9918|7b4325494354b92f|794d3d504300009e|469714530||t19866.7902888399s|fbe34b5f6c956729|735741684|qs|local|vector|||55.549|| -average_fit|stem|a1dde248a93ab78f|94b046e18f5a3b48|01af0dee3bf8b73c|-1438633663||t19866.7562583166s|88cc72642802cee5|733264960|qs|local|vector|||49.73|| +average_final|stem|566f460e2b416085|7b4325494354b92f|49b6c8ee23af391c|469714530||t19867.6105456463s|ef1dd07dee361ede|731155104|qs|local|vector|||51.69|| +average_fit|stem|a98c30b6f7d2d584|94b046e18f5a3b48|539ce101155b9d31|-1438633663||t19867.6086554664s|7a4d309773a7a18c|728857170|qs|local|vector|||44.877|| average_last_fit_2019|stem|5952cc3778f1a0aa|966e55f110e22d16|cee0d02625035931|1722910830|bucket=bgg_data*key=bgg_data/bgg_models/model/glmnet/objects/average_last_fit_2019*version=1709829212351981|t19789.6899425121s||13475824|rds|gcp|vector|||17.768|| average_model_2019|stem|1042f2759c1a6b0a|7d1b057b155066d1|8903b272c8b804e0|1348366582|bucket=bgg_data*key=bgg_data/bgg_models/model/glmnet/objects/average_model_2019*version=1709829189736056|t19789.6894384819s||619847812|rds|gcp|vector|||33.845|| average_recipe_2019|stem|c4f1a3817867300d|ea6ae6942e82e141|8b63f10dc62ceb87|1718918283|bucket=bgg_data*key=bgg_data/bgg_models/model/glmnet/objects/average_recipe_2019*version=1709823189254759|t19789.6199686941s||581525129|rds|gcp|vector|||0.626|| average_split|stem|98cb6d9bc947296e|066908c9ebc5b172|7f84c7a1924e4e61|1029818450||t19849.801200929s|48f14561bfa6b2ee|13948173|qs|local|vector|||0.027|| average_split_2019|stem|a02238df9db841cb|1267542319b59be5|3a25cd467bfa5e2e|1936830640|bucket=bgg_data*key=bgg_data/bgg_models/model/glmnet/objects/average_split_2019*version=1709823071082785|t19789.6188669999s||13440796|rds|gcp|vector|||0.877|| -average_tuned|stem|8d09b0b8fcf821db|085b151bdc3255c0|25ec6ca6bcde3490|2075245144||t19866.7547303373s|501a7b3b08c948d0|718180337|qs|local|vector|||178.743|NAs introduced by coercion| +average_tuned|stem|52193673fbb25425|ec391d4efb706dd6|a22ca1f0d27a6861|2075245144||t19867.6072968592s|5c8f0cb1b3e3d980|717728702|qs|local|vector|||474.57|NAs introduced by coercion| average_tuned_2019|stem|204389a2b01d65b9|c7f8725a4baaa445|83f60c44cc86edbf|-337580546|bucket=bgg_data*key=bgg_data/bgg_models/model/glmnet/objects/average_tuned_2019*version=1709825403298014|t19789.6458604083s||13012935|rds|gcp|vector|||825.865|NAs introduced by coercion| -average_vetiver|stem|da7e5646cbdfced7|d1e109611603a9ba|3ba1eef0e662b79b|814172009||t0s|f4066a65ff755f48|0|file|local|vector|||8.027|| +average_vetiver|stem|da7e5646cbdfced7|d1e109611603a9ba|98425e508321c143|814172009||t0s|f4066a65ff755f48|0|file|local|vector|||7.629|| average_wflow|stem|bc4b047d7462f379|f777060c66df0c68|d4081b09e24f2e29|-1979774709||t19849.8014224895s|d0eca0793555fbbf|620611353|qs|local|vector|||0.024|| average_workflow_2019|stem|7ce04b6f7a61d0a8|98e0744e3ba3a743|7dff0f212583db1c|1154273135|bucket=bgg_data*key=bgg_data/bgg_models/model/glmnet/objects/average_workflow_2019*version=1709823717969831|t19789.6260957681s||581525548|rds|gcp|vector|||0.003|| averageweight|object|6315ec5b5c33ddb0||||||||||||||| averageweight_assess_split_2019|stem|17bea111fcbd4c4a|020c10cc1f83b721|71f37b120bb41ccb|572968638|bucket=bgg_data*key=models/objects/averageweight_assess_split_2019*version=1709600672493801|t19787.0448150537s||9134230|rds|gcp|vector|||0.016|| -averageweight_final|stem|3f353d1892ab9f83|36a4b2143f3267f1|ff93b09e6334ad8d|1930521591||t19866.78861365s|c4387cebf45cd2b7|522353156|qs|local|vector|||23.247|| -averageweight_fit|stem|0c832d0ea52e182e|41a3e9be6d9446de|095bf1587d99c7d7|-27092590||t19866.7499811983s|716178f62c6d499c|521066853|qs|local|vector|||21.004|| +averageweight_final|stem|19ee208f739ec1c8|36a4b2143f3267f1|a7b0672efc901e44|1930521591||t19867.5900449389s|83262f10169abe2b|519681478|qs|local|vector|||26.526|| +averageweight_fit|stem|db06367ddefb58a3|41a3e9be6d9446de|df8b4ae8944f7849|-27092590||t19867.5829147028s|ace96d2f1b262b30|518522400|qs|local|vector|||18.921|| averageweight_last_fit_2019|stem|68c66382800a95a1|5a1285fc8245a15b|e242805b5a0a7178|-540902471|bucket=bgg_data*key=bgg_data/bgg_models/model/glmnet/objects/averageweight_last_fit_2019*version=1709823032190961|t19789.618421784s||9189058|rds|gcp|vector|||9.398|| averageweight_metrics_2019|stem|1c4b746d9949dda6|dfc4d6606cb90750|30c6d4d69a9fbb7a|-854821502|bucket=bgg_data*key=models/objects/averageweight_metrics_2019*version=1709652223014546|t19787.6414680206s||260|rds|gcp|vector|||0.001|| averageweight_model_2019|stem|4d88fd4af9e83414|3163a84308483699|ea59fb70055a0b61|110600965|bucket=bgg_data*key=bgg_data/bgg_models/model/glmnet/objects/averageweight_model_2019*version=1709823020132251|t19789.6180568201s||439409703|rds|gcp|vector|||10.916|| @@ -40,10 +40,10 @@ averageweight_model_2021|stem|e4b94e2bd367008e|faccc051f7d91018|8050a38f64894f50 averageweight_recipe_2019|stem|4132c8d0b720274f|3ac6a6e58682b6a1|8808579a687cde96|-1911270986|bucket=bgg_data*key=models/objects/averageweight_recipe_2019*version=1709669719531690|t19787.843665928s||413376051|rds|gcp|vector|||0.091|| averageweight_split|stem|7b5b78e08915d9d5|e8a2ee4a75686a0b|c676effbd1d15726|-935408054||t19849.7994538452s|fcbe3da35663d683|9381398|qs|local|vector|||0.036|| averageweight_split_2019|stem|cb0cbbb4342ed172|bb091a611080690f|8d929788ba52be4a|1166735688|bucket=bgg_data*key=models/objects/averageweight_split_2019*version=1709669618881256|t19787.8428006551s||9168416|rds|gcp|vector|||0.063|| -averageweight_tuned|stem|d6a701c4d1549e16|c35a41cb29eee322|4c0ef64c14ebdea4|-646993965||t19866.749619734s|2c0f707a90543492|510788157|qs|local|vector|||86.476|Using an external vector in selections was deprecated in tidyselect 1.1.0.ℹ Please use all_of or any_of instead. Was data selectoutcome Now data selectall_ofoutcomeSee httpstidyselect.rlib.orgreferencefaqexternalvector.html.. NAs introduced by coercion| +averageweight_tuned|stem|aa7dea4600ca5396|4cc6cb020e159d6d|5fc3711777cedab2|-646993965||t19867.5825810673s|cdce9c81b0c8272e|510480089|qs|local|vector|||224.914|Using an external vector in selections was deprecated in tidyselect 1.1.0.ℹ Please use all_of or any_of instead. Was data selectoutcome Now data selectall_ofoutcomeSee httpstidyselect.rlib.orgreferencefaqexternalvector.html.. NAs introduced by coercion| averageweight_tuned_2019|stem|ec7efd14ad0f4c11|969803e0aa41c4ee|c17bd15290aa8ef2|-1561450121|bucket=bgg_data*key=bgg_data/bgg_models/model/glmnet/objects/averageweight_tuned_2019*version=1709822933002552|t19789.6172717144s||8870530|rds|gcp|vector|||79.172|NAs introduced by coercion| averageweight_tuning_split_2019|stem|cb0cbbb4342ed172|4fc991ad77544e97|8a8c4574d40239e3|2026635200|bucket=bgg_data*key=models/objects/averageweight_tuning_split_2019*version=1709652030592717|t19787.6392291343s||9168416|rds|gcp|vector|||0.261|| -averageweight_vetiver|stem|da7e5646cbdfced7|ac2051ab3595c9fa|3541fa3ce97e80ca|143165180||t0s|f4066a65ff755f48|0|file|local|vector|||4.931|| +averageweight_vetiver|stem|da7e5646cbdfced7|ac2051ab3595c9fa|b2a06eb104b51c1f|143165180||t0s|f4066a65ff755f48|0|file|local|vector|||5.311|| averageweight_wflow|stem|e05ea1b244627678|30c8c5142566f172|8ff3cb2a84701a9b|-1164597312||t19849.7995342371s||431719490|qs|local|vector|||0.004||unused arguments id_vars id_vars, predictor_vars predictor_vars, spline_vars spline_vars averageweight_workflow_2019|stem|3e524193ad09224e|5534f01e2125fb58|90265c72db89d0e8|885843097|bucket=bgg_data*key=bgg_data/bgg_models/model/glmnet/objects/averageweight_workflow_2019*version=1709822850235228|t19789.6161134202s||413376684|rds|gcp|vector|||0.009|| bgg_hit_metrics|stem|4c0b247b1e6d2843|7fa501f73037a88b|54fe05568e40b99b|-573508956|bucket=bgg_data*key=bgg_data/bgg_models/model/glmnet/objects/bgg_hit_metrics*version=1710880426861006|t19801.8567910649s||369|qs|gcp|vector|||0.271|| @@ -65,11 +65,15 @@ control_race|function|175b7244c4a12dec||||||||||||||| convert_to_workflow_set|function|80a529f48d628883||||||||||||||| create_workflows|function|cc8a420fe9711fd4||||||||||||||| create_year_split|function|c658d02159a974ad||||||||||||||| -details|stem|635c8ad06777d7e2|cbebc91f2363c006|42c18dde3524f393|131261106||t19866.7563965362s|b87462683b64d887|223|qs|local|vector|||0.1|| +details|stem|3d5302d2c93197f2|cbebc91f2363c006|7497e6d8f9b6132c|131261106||t19867.608807796s|6c088a07ec7dbe69|213|qs|local|vector|||0.083|| discrete_vars|function|80bc599945b9e2ed||||||||||||||| end_train_year|object|59e5e5a503986d8c||||||||||||||| extract_engine_name|function|8d44d6ae5f134199||||||||||||||| +extract_model_features|function|d6ca855a6acef6f1||||||||||||||| extract_model_name|function|8d44d6ae5f134199||||||||||||||| +extract_model_type|function|1ac8d097da2342a5||||||||||||||| +extract_vetiver_features|function|99b9ee7a30bc1961||||||||||||||| +extract_vetiver_model|function|c3438c55143cac72||||||||||||||| extract_workflow_details|function|ba25e442749a4e00||||||||||||||| extract_workflow_engine|function|46a58ef44c40bca6||||||||||||||| extract_workflow_outcome|function|b403d83e3104c449||||||||||||||| @@ -89,11 +93,14 @@ get_bayesaverage_method|function|1d97a67ebac9977b||||||||||||||| get_coefs.glmnet|function|803746b92f394aab||||||||||||||| get_glmnet_coefs|function|6fa17f92a9f77eaf||||||||||||||| get_glmnet_objs|function|1ae011e77111aa64||||||||||||||| +get_tuning_plots|function|524906dd1e723d97||||||||||||||| glmnet_grid|function|636d57d8983d2434||||||||||||||| glmnet_spec|function|6766ad55dedeb0ad||||||||||||||| id_vars|function|7935a09cd8bd8f92||||||||||||||| impute_averageweight|function|ded824f4ce6f9523||||||||||||||| imputed|object|dfa1f17495b73bca||||||||||||||| +lightgbm_grid|function|03680289f0fa9c32||||||||||||||| +lightgbm_spec|function|9ca690243901edda||||||||||||||| linear_model|stem|28486a85f3c43a88|d89ff8bec756dbb1|ef46db3751d8e999|-1612613828|bucket=bgg_data*key=models/objects/linear_model*version=1709599044440414|t19787.025975969s||309|rds|gcp|vector|||0.006|| linear_models|function|da40c48ba42ed58a||||||||||||||| load_games|function|f570cc12ee567d75||||||||||||||| @@ -116,6 +123,9 @@ pin_model|function|27e23f1c590a5ada||||||||||||||| pin_outcome_model|function|d3c7ed350c09fdf1||||||||||||||| pivot_estimates|function|30eef18b223de767||||||||||||||| pivot_outcomes|function|90829ebc094a3117||||||||||||||| +plot_features.default|function|2b6e96f53de62596||||||||||||||| +plot_features.lightgbm|function|5c417a94b4e2b891||||||||||||||| +plot_model_features|function|e1786f666bad47fc||||||||||||||| plot_predictions|function|7b474d37252d397d||||||||||||||| predict_average|function|6d27ae4de4ab4ed6||||||||||||||| predict_bayesaverage|function|1138a629a889cff9||||||||||||||| @@ -138,7 +148,7 @@ recipe_linear|function|8be4983eae687edc||||||||||||||| recipe_trees|function|b13ae956662c9c2a||||||||||||||| reg_metrics|stem|1ca3f6b05a88ca07|d3f70096de853445|b6dcc6a249e15312|-1596937686||t19839.7841557195s|dd0210555a6a8a3a|25485|qs|local|vector|||1.338|| report|stem|db008d88648db469|137425f81601e604|ef46db3751d8e999|787668912|results_files*results.md*results.qmd|t19866.8732496078s||145568|file|local|vector|||1.411||unused argument output_dir docs -reports|stem|8096b05d2734d879|40603e2a07745e1b|e90b2e75e3bb1c29|536393307|docs*index.qmd*results.qmd*_quarto.yml|t19866.9041563146s|97028f806365ba69|7322|file|local|vector|||59.891|| +reports|stem|3dbd1e8e0be20a89|7a073cf6fc3b0972|cc933c54f049368c|536393307|docs*index.qmd*results.qmd*_quarto.yml|t19867.6792956162s|4ba2d54d90c5907a|5993551|file|local|vector|||86.379|| results|stem|9627f387d611a966|8300b10a468df582|f36f2f8b6bb4b71f|-322906963||t19842.7520791573s|813a0d7697333517|548|qs|local|vector|||0.025|| retrain_years|object|a2a52dfe4e065e93||||||||||||||| round_usersrated|function|8efbe1db0e6dc629||||||||||||||| @@ -158,17 +168,17 @@ split_imputed_2019|stem|df86a7dec504321c|f2a13092c3d2c4d8|01e3275e69841946|12523 splits|object|264ad358cb1700d5||||||||||||||| table_predictions|function|84845bea2f3c5ebe||||||||||||||| targets_tracking_details|function|c9497a4cd9f6c4a9||||||||||||||| -test_predictions|stem|77efb6a5fa6da950|a176ee61cd884594|976e09eff3c800cd|-1403965390||t19866.7904986077s|bbaac39f18432b1d|3879886|qs|local|vector|||3.914|| +test_predictions|stem|8b3879a35c0e3874|a176ee61cd884594|1362f5dd07141b41|-1403965390||t19867.6107644489s|d66063fc5c1b7232|3880539|qs|local|vector|||3.379|| top_coefs_by_sign|function|8769a4336a983bbf||||||||||||||| trace_plot.glmnet|function|57a278ad60956cd1||||||||||||||| -tracking|stem|da7e5646cbdfced7|6493417c313c956c|43e6f27943340bd8|1621002234||t0s|f4066a65ff755f48|0|file|local|vector|||0.049|| +tracking|stem|da7e5646cbdfced7|6493417c313c956c|53e1caa0049bfd86|1621002234||t0s|f4066a65ff755f48|0|file|local|vector|||0.038|| train_outcome_model|function|07b9287570047389||||||||||||||| train_outcome_wflow|function|56d812b3973ed5bc||||||||||||||| training_2015|stem||f953e6dfffa5452a|673de7466bebe0ff|-2011844015||t19786.6612559291s||0|rds|gcp|vector|||0.003||could not find function training_2015 -training_and_validation|stem|574af839d9529ae4|84da5758e33a0c96|c42cbee0789ea3b2|-337972277||t19849.8012107261s|18b2c0da3bbcb8a4|46141492|qs|local|vector|||0.134|| +training_and_validation|stem|c0df1a155a02217e|84da5758e33a0c96|ded5b8bdc8a79e23|-337972277||t19867.5878741151s|a603cb42ed451597|46142651|qs|local|vector|||0.251|| training_data_2019|stem|fa7fccca0084b7ad|8222fc98ecfddd59|122a8a4d8cbfdb30|-452326442|bucket=bgg_data*key=models/objects/training_data_2019*version=1709658674692814|t19787.7160655225s||41142335|rds|gcp|vector|||0.017|| training_data_2021|stem|6420896b418a8714|69451bb58c0bc12b|ac6be7a89dd3c5a5|-1168025648|bucket=bgg_data*key=models/objects/training_data_2021*version=1709567721171424|t19786.6633352006s||46515188|rds|gcp|vector|||0.014|| -training_imputed|stem|4f679e25d67b865e|7214374ce6b52369|0dec2ed1f19085fc|-1251928409||t19866.7501369334s|53b024fd83b589b2|43521077|qs|local|vector|||9.114|| +training_imputed|stem|3be67be0802c21ef|7214374ce6b52369|a002fedee0781313|-1251928409||t19867.5830877772s|3d24d36ac2b4d2cf|43520519|qs|local|vector|||10.915|| training_imputed_2019|stem|0996a879cdd9a0d5|e9c31fa48a52b8e9|53cd84600f11e7e3|228302738|bucket=bgg_data*key=bgg_data/bgg_models/model/glmnet/objects/training_imputed_2019*version=1709823064893982|t19789.6187739271s||41774710|rds|gcp|vector|||12.862|| tree_models|function|5996000ebef87d97||||||||||||||| truncate_averageweight|function|f53f1fb6ddc6d804||||||||||||||| @@ -176,22 +186,23 @@ tune_outcome_wflow|function|58986a3604251944||||||||||||||| tune_workflow|function|f8ae94f0d5ed140c||||||||||||||| tune_workflows|function|e507bb5e69113e7d||||||||||||||| tuning_grid|stem|996e2b1aa22412a2|b3ff6d01114a976e|ef46db3751d8e999|1796853018||t19849.7906829998s|b87462683b64d887|223|qs|local|vector|||0.006|| -usersrated_final|stem|042f04206b5a56f8|3fdbd1fa4306f02a|7fee43f900cc5872|318722018||t19866.7894551746s|19300ebb857bb346|742047271|qs|local|vector|||56.954|| -usersrated_fit|stem|41ae290390a9cc94|1447115050e82b49|24db174fc8113f50|-463875767||t19866.7555134694s|ccd9f8aebb6d847a|739603718|qs|local|vector|||53.211|| +tuning_plots|stem|41e30eb996bcca0e|8dc0ab87a4d0cb5d|594758cc20d379f3|-414874247||t19867.6674927178s|9824c588258fcee1|124835|qs|local|vector|||0.123|| +usersrated_final|stem|b9076d4727f9b400|3fdbd1fa4306f02a|4f6666cd9de7426d|318722018||t19867.6096327491s|8041c0302ea0f653|736375852|qs|local|vector|||56.397|| +usersrated_fit|stem|bf51398cbf4cd116|1447115050e82b49|fd77eea888ea0ecc|-463875767||t19867.607965134s|c52f84a28e3ba4ea|734061418|qs|local|vector|||43.523|| usersrated_last_fit_2019|stem|c167a8d84294ad03|2f0ae9404e7415ad|81923f2fcd6c40a1|-1688329540|bucket=bgg_data*key=bgg_data/bgg_models/model/glmnet/objects/usersrated_last_fit_2019*version=1709829270936118|t19789.6906080283s||13548762|rds|gcp|vector|||19.728|| usersrated_model_2019|stem|5cc531eed5437a17|e937b13665c01f53|12ef504c9a1e5b76|1183094964|bucket=bgg_data*key=bgg_data/bgg_models/model/glmnet/objects/usersrated_model_2019*version=1709829406676397|t19789.6918735263s||623211426|rds|gcp|vector|||21.232|| usersrated_recipe_2019|stem|def63d1e8e484fc2|af770eb1bce17f47|36fd3f46878d8716|-1102203476|bucket=bgg_data*key=bgg_data/bgg_models/model/glmnet/objects/usersrated_recipe_2019*version=1709823506910086|t19789.6226927836s||584764565|rds|gcp|vector|||0.089|| usersrated_split|stem|214b154db4b24dd3|44053cc44027177f|7f84c7a1924e4e61|1122274177||t19849.8011979666s|091beb28b6d6dd87|14112471|qs|local|vector|||0.045|| usersrated_split_2019|stem|7998d71b18338aef|6174feee7f8caad2|3a25cd467bfa5e2e|-2020394115|bucket=bgg_data*key=bgg_data/bgg_models/model/glmnet/objects/usersrated_split_2019*version=1709823075639287|t19789.6189207257s||13515637|rds|gcp|vector|||0.478|| -usersrated_tuned|stem|1e73c329b63e7a88|7616d46a0cab263c|25ec6ca6bcde3490|257239947||t19866.7525442181s|3b66f357fe05f560|724477803|qs|local|vector|||193.233|NAs introduced by coercion| +usersrated_tuned|stem|2c6d685607399430|af931f461366ff6f|a22ca1f0d27a6861|257239947||t19867.6016914262s|da70cea87af232f5|723000678|qs|local|vector|||481.283|Using an external vector in selections was deprecated in tidyselect 1.1.0.ℹ Please use all_of or any_of instead. Was data selectoutcome Now data selectall_ofoutcomeSee httpstidyselect.rlib.orgreferencefaqexternalvector.html.. NAs introduced by coercion| usersrated_tuned_2019|stem|3a685d630fd16f3a|20a468a1ae7fcf9c|997aa397c3595ee6|8331713|bucket=bgg_data*key=bgg_data/bgg_models/model/glmnet/objects/usersrated_tuned_2019*version=1709826567275317|t19789.6593248967s||13024371|rds|gcp|vector|||1159.845|NAs introduced by coercion| -usersrated_vetiver|stem|da7e5646cbdfced7|1dd88f4e6441ee7d|b31b5987a7d83b3f|-1686075036||t0s|f4066a65ff755f48|0|file|local|vector|||8.517|| +usersrated_vetiver|stem|da7e5646cbdfced7|1dd88f4e6441ee7d|5864c9f9badfbaca|-1686075036||t0s|f4066a65ff755f48|0|file|local|vector|||20.719|| usersrated_wflow|stem|127b0865bcfcf482|94cd0c8a6d16505f|67055fb3f6de4eee|-883851043||t19849.8013162149s|02d38a8c646b6fff|627982673|qs|local|vector|||0.035|| usersrated_workflow_2019|stem|a6279c56483b5bdf|6dfbcc2a15efbc59|8b12bba5ae4a33d1|-587846464|bucket=bgg_data*key=bgg_data/bgg_models/model/glmnet/objects/usersrated_workflow_2019*version=1709824573657101|t19789.6304263608s||584764992|rds|gcp|vector|||0.012|| -valid_metrics|stem|d045a3cf86ff5bc6|48430632dfb2d35d|a13e52c5152817c2|27008866||t19866.7566601964s|780d91b267ec61d3|551|qs|local|vector|||0.114|| -valid_predictions|stem|3726998820fd27a3|95c23fab2b56878a|e8a8195937883c35|675843537||t19866.7566583675s|5c670b70159fb940|5335900|qs|local|vector|||2.746|| +valid_metrics|stem|3dd5c4001e1ca927|48430632dfb2d35d|43dd038ea7063e37|27008866||t19867.6105504414s|780d91b267ec61d3|551|qs|local|vector|||0.131|| +valid_predictions|stem|a0296aaaaaf7826d|95c23fab2b56878a|a85bccc95acdf3d7|675843537||t19867.609774398s|fe9673800b77baec|5340380|qs|local|vector|||2.778|| valid_years|object|a26c7eb00fbd604a||||||||||||||| -validation_imputed|stem|3efab732bed86865|da31470742e9c38a|0dec2ed1f19085fc|-831156082||t19866.7501942285s|6d5d9157e8f84fa9|5210293|qs|local|vector|||1.374|| +validation_imputed|stem|31f6382920320693|da31470742e9c38a|a002fedee0781313|-831156082||t19867.5831423216s|536b19d8c24d5aca|5210255|qs|local|vector|||1.498|| validation_imputed_2019|stem|e251e6c1d340902e|a543f37a20a4b014|53cd84600f11e7e3|-1068696117|bucket=bgg_data*key=bgg_data/bgg_models/model/glmnet/objects/validation_imputed_2019*version=1709823039571797|t19789.6185063628s||5460245|rds|gcp|vector|||1.451|| write_tracking|function|8e3477cbfdc33911||||||||||||||| year_splits|object|e55f2adaf03ea742||||||||||||||| diff --git a/docs/index.html b/docs/index.html index 00e59fa..09c888c 100644 --- a/docs/index.html +++ b/docs/index.html @@ -10367,6 +10367,8 @@

Table of contents

  • Models
  • Predictions
  • @@ -10394,7 +10396,7 @@

    Predicting Upcoming Board Games

    Published
    -

    5/23/24

    +

    5/24/24

    @@ -10417,8 +10419,8 @@

    Pipeline

    targets::tar_visnetwork(targets_only =T)
    -
    - +
    +
    @@ -10468,7 +10470,7 @@

    Assessment

    -

    +

    @@ -10486,7 +10488,7 @@

    Assessment

    gtExtras::gt_theme_espn()
    -
    +
    @@ -10960,37 +10962,37 @@

    Assessment

    25 -glmnet +lightgbm average -0.688 -0.506 -7.625 -0.277 -0.458 +0.692 +0.509 +7.582 +0.282 +0.463 25 -glmnet +lightgbm averageweight -0.479 -0.367 -20.356 -0.630 -0.773 +0.457 +0.347 +19.210 +0.665 +0.804 25 -glmnet+glmnet +lightgbm+lightgbm bayesaverage -0.300 -0.174 -2.878 -0.412 -0.633 +0.293 +0.170 +2.823 +0.432 +0.646 25 -glmnet +lightgbm usersrated -1888.348 -468.294 -165.111 -0.145 -0.379 +1565.092 +461.642 +184.381 +0.229 +0.476 @@ -10999,32 +11001,88 @@

    Assessment

    +
    +

    Tuning

    +
    + +
    +
    +

    +
    +
    +

    +
    +
    +

    +
    +
    +
    +
    +
    +

    Features

    +
    +
    +Show the code +
    average_plot = 
    +    average_fit |> 
    +    extract_vetiver_features() |>
    +    plot_model_features()+
    +    labs(title = 'Average Rating')
    +
    +averageweight_plot = 
    +    averageweight_fit |> 
    +    extract_vetiver_features() |>
    +    plot_model_features()+
    +    labs(title = 'Average Weight')
    +
    +usersrated_plot = 
    +    usersrated_fit |> 
    +    extract_vetiver_features() |>
    +    plot_model_features()+
    +    labs(title = 'Users Rated')
    +
    +
    +
    + +
    +
    +

    +
    +
    +

    +
    +
    +

    +
    +
    +
    +

    Predictions

    Show the code -
    predictions = 
    -    upcoming_games |>
    -    impute_averageweight(
    -        model = averageweight_fit
    -    ) |>
    -    predict_bayesaverage(
    -        average_model = average_fit,
    -        usersrated_model = usersrated_fit
    -    )
    -
    -predictions |>
    -    filter(yearpublished >= 2024) |>
    -    # this goddamn bah humbug game
    -    filter(game_id != 388225) |>
    -    predictions_dt(games = games) |>
    -    add_colors()
    +
    predictions = 
    +    upcoming_games |>
    +    impute_averageweight(
    +        model = averageweight_fit
    +    ) |>
    +    predict_bayesaverage(
    +        average_model = average_fit,
    +        usersrated_model = usersrated_fit
    +    )
    +
    +predictions |>
    +    filter(yearpublished >= 2024) |>
    +    # this goddamn bah humbug game
    +    filter(game_id != 388225) |>
    +    predictions_dt(games = games) |>
    +    add_colors()
    -
    - +
    +
    diff --git a/docs/results.md b/docs/results.md index fb838db..6ce4fa2 100644 --- a/docs/results.md +++ b/docs/results.md @@ -1,6 +1,6 @@ # Model Results -2024-05-23 +2024-05-24 # pipeline @@ -15,54 +15,58 @@ graph LR end subgraph Graph direction LR - x04d83687468f3fc4(["averageweight_tuned"]):::uptodate --> x0f34a02bd6ed632f(["averageweight_fit"]):::uptodate - x170105733f313efa(["training_imputed"]):::uptodate --> x8b18f98f2746942f(["usersrated_tuned"]):::uptodate - x2b7f0716b8751c70(["games_raw"]):::uptodate --> xe73a0f95bb7c669b(["games_prepared"]):::uptodate - x0f34a02bd6ed632f(["averageweight_fit"]):::uptodate --> x170105733f313efa(["training_imputed"]):::uptodate - x6a9d509448f9bd3e(["split"]):::uptodate --> x170105733f313efa(["training_imputed"]):::uptodate - x0f34a02bd6ed632f(["averageweight_fit"]):::uptodate --> x5a0dca7931d71121(["averageweight_final"]):::uptodate - xe5bd2d3c6fdf29ae(["training_and_validation"]):::uptodate --> x5a0dca7931d71121(["averageweight_final"]):::uptodate - x32d592eaf8d082e6(["valid_predictions"]):::uptodate --> xc2507955de0caf4e(["valid_metrics"]):::uptodate xe5bd2d3c6fdf29ae(["training_and_validation"]):::uptodate --> xd99568a85da9c31b(["usersrated_final"]):::uptodate xd6936e852f1fbe47(["usersrated_fit"]):::uptodate --> xd99568a85da9c31b(["usersrated_final"]):::uptodate + xe73a0f95bb7c669b(["games_prepared"]):::uptodate --> x6a9d509448f9bd3e(["split"]):::uptodate + x5a0dca7931d71121(["averageweight_final"]):::uptodate --> xcbeb0ce109eb5b86(["averageweight_vetiver"]):::uptodate + x04d83687468f3fc4(["averageweight_tuned"]):::uptodate --> xcbeb0ce109eb5b86(["averageweight_vetiver"]):::uptodate + xe5bd2d3c6fdf29ae(["training_and_validation"]):::uptodate --> xcbeb0ce109eb5b86(["averageweight_vetiver"]):::uptodate + xc2507955de0caf4e(["valid_metrics"]):::uptodate --> xcbeb0ce109eb5b86(["averageweight_vetiver"]):::uptodate + x0f34a02bd6ed632f(["averageweight_fit"]):::uptodate --> x5a0dca7931d71121(["averageweight_final"]):::uptodate + xe5bd2d3c6fdf29ae(["training_and_validation"]):::uptodate --> x5a0dca7931d71121(["averageweight_final"]):::uptodate + x04d83687468f3fc4(["averageweight_tuned"]):::uptodate --> x0f34a02bd6ed632f(["averageweight_fit"]):::uptodate x51e4cab15d71e6a3(["average_final"]):::uptodate --> x09d4ee9c0cdd997f(["test_predictions"]):::uptodate x5a0dca7931d71121(["averageweight_final"]):::uptodate --> x09d4ee9c0cdd997f(["test_predictions"]):::uptodate x6a9d509448f9bd3e(["split"]):::uptodate --> x09d4ee9c0cdd997f(["test_predictions"]):::uptodate xd99568a85da9c31b(["usersrated_final"]):::uptodate --> x09d4ee9c0cdd997f(["test_predictions"]):::uptodate - x0f34a02bd6ed632f(["averageweight_fit"]):::uptodate --> x60bd72cb3684ce1d(["validation_imputed"]):::uptodate - x6a9d509448f9bd3e(["split"]):::uptodate --> x60bd72cb3684ce1d(["validation_imputed"]):::uptodate - xe73a0f95bb7c669b(["games_prepared"]):::uptodate --> x6a9d509448f9bd3e(["split"]):::uptodate - xe5bd2d3c6fdf29ae(["training_and_validation"]):::uptodate --> x156a6ed4485a6769(["usersrated_vetiver"]):::uptodate - xd99568a85da9c31b(["usersrated_final"]):::uptodate --> x156a6ed4485a6769(["usersrated_vetiver"]):::uptodate - x8b18f98f2746942f(["usersrated_tuned"]):::uptodate --> x156a6ed4485a6769(["usersrated_vetiver"]):::uptodate - xc2507955de0caf4e(["valid_metrics"]):::uptodate --> x156a6ed4485a6769(["usersrated_vetiver"]):::uptodate + xd0ff2216903615b0(["average_tuned"]):::uptodate --> x45aff652992a023a(["details"]):::uptodate + x04d83687468f3fc4(["averageweight_tuned"]):::uptodate --> x45aff652992a023a(["details"]):::uptodate + x8b18f98f2746942f(["usersrated_tuned"]):::uptodate --> x45aff652992a023a(["details"]):::uptodate x170105733f313efa(["training_imputed"]):::uptodate --> xe5bd2d3c6fdf29ae(["training_and_validation"]):::uptodate x60bd72cb3684ce1d(["validation_imputed"]):::uptodate --> xe5bd2d3c6fdf29ae(["training_and_validation"]):::uptodate - x170105733f313efa(["training_imputed"]):::uptodate --> xd0ff2216903615b0(["average_tuned"]):::uptodate - xc0de7cb1ceaaae9a(["average_fit"]):::uptodate --> x51e4cab15d71e6a3(["average_final"]):::uptodate - xe5bd2d3c6fdf29ae(["training_and_validation"]):::uptodate --> x51e4cab15d71e6a3(["average_final"]):::uptodate - x45aff652992a023a(["details"]):::uptodate --> x3c2f8ffacaa45076(["reports"]):::dispatched - xc2507955de0caf4e(["valid_metrics"]):::uptodate --> x3c2f8ffacaa45076(["reports"]):::dispatched - x32d592eaf8d082e6(["valid_predictions"]):::uptodate --> x3c2f8ffacaa45076(["reports"]):::dispatched + x8b18f98f2746942f(["usersrated_tuned"]):::uptodate --> xd6936e852f1fbe47(["usersrated_fit"]):::uptodate + xd0ff2216903615b0(["average_tuned"]):::uptodate --> xc0de7cb1ceaaae9a(["average_fit"]):::uptodate + x2b7f0716b8751c70(["games_raw"]):::uptodate --> xe73a0f95bb7c669b(["games_prepared"]):::uptodate x51e4cab15d71e6a3(["average_final"]):::uptodate --> x1b61a8b8a8b5e6c2(["average_vetiver"]):::uptodate xd0ff2216903615b0(["average_tuned"]):::uptodate --> x1b61a8b8a8b5e6c2(["average_vetiver"]):::uptodate xe5bd2d3c6fdf29ae(["training_and_validation"]):::uptodate --> x1b61a8b8a8b5e6c2(["average_vetiver"]):::uptodate xc2507955de0caf4e(["valid_metrics"]):::uptodate --> x1b61a8b8a8b5e6c2(["average_vetiver"]):::uptodate - xd0ff2216903615b0(["average_tuned"]):::uptodate --> xc0de7cb1ceaaae9a(["average_fit"]):::uptodate - x6a9d509448f9bd3e(["split"]):::uptodate --> x04d83687468f3fc4(["averageweight_tuned"]):::uptodate + xe5bd2d3c6fdf29ae(["training_and_validation"]):::uptodate --> x156a6ed4485a6769(["usersrated_vetiver"]):::uptodate + xd99568a85da9c31b(["usersrated_final"]):::uptodate --> x156a6ed4485a6769(["usersrated_vetiver"]):::uptodate + x8b18f98f2746942f(["usersrated_tuned"]):::uptodate --> x156a6ed4485a6769(["usersrated_vetiver"]):::uptodate + xc2507955de0caf4e(["valid_metrics"]):::uptodate --> x156a6ed4485a6769(["usersrated_vetiver"]):::uptodate + xd0ff2216903615b0(["average_tuned"]):::uptodate --> x5009635ca9660f5b(["tuning_plots"]):::uptodate + x04d83687468f3fc4(["averageweight_tuned"]):::uptodate --> x5009635ca9660f5b(["tuning_plots"]):::uptodate + x8b18f98f2746942f(["usersrated_tuned"]):::uptodate --> x5009635ca9660f5b(["tuning_plots"]):::uptodate + x170105733f313efa(["training_imputed"]):::uptodate --> xd0ff2216903615b0(["average_tuned"]):::uptodate + x0f34a02bd6ed632f(["averageweight_fit"]):::uptodate --> x170105733f313efa(["training_imputed"]):::uptodate + x6a9d509448f9bd3e(["split"]):::uptodate --> x170105733f313efa(["training_imputed"]):::uptodate xc0de7cb1ceaaae9a(["average_fit"]):::uptodate --> x32d592eaf8d082e6(["valid_predictions"]):::uptodate xd6936e852f1fbe47(["usersrated_fit"]):::uptodate --> x32d592eaf8d082e6(["valid_predictions"]):::uptodate x60bd72cb3684ce1d(["validation_imputed"]):::uptodate --> x32d592eaf8d082e6(["valid_predictions"]):::uptodate - x8b18f98f2746942f(["usersrated_tuned"]):::uptodate --> xd6936e852f1fbe47(["usersrated_fit"]):::uptodate - xd0ff2216903615b0(["average_tuned"]):::uptodate --> x45aff652992a023a(["details"]):::uptodate - x04d83687468f3fc4(["averageweight_tuned"]):::uptodate --> x45aff652992a023a(["details"]):::uptodate - x8b18f98f2746942f(["usersrated_tuned"]):::uptodate --> x45aff652992a023a(["details"]):::uptodate + x45aff652992a023a(["details"]):::uptodate --> x3c2f8ffacaa45076(["reports"]):::dispatched + x5009635ca9660f5b(["tuning_plots"]):::uptodate --> x3c2f8ffacaa45076(["reports"]):::dispatched + xc2507955de0caf4e(["valid_metrics"]):::uptodate --> x3c2f8ffacaa45076(["reports"]):::dispatched + x32d592eaf8d082e6(["valid_predictions"]):::uptodate --> x3c2f8ffacaa45076(["reports"]):::dispatched + x0f34a02bd6ed632f(["averageweight_fit"]):::uptodate --> x60bd72cb3684ce1d(["validation_imputed"]):::uptodate + x6a9d509448f9bd3e(["split"]):::uptodate --> x60bd72cb3684ce1d(["validation_imputed"]):::uptodate + x32d592eaf8d082e6(["valid_predictions"]):::uptodate --> xc2507955de0caf4e(["valid_metrics"]):::uptodate + x6a9d509448f9bd3e(["split"]):::uptodate --> x04d83687468f3fc4(["averageweight_tuned"]):::uptodate + x170105733f313efa(["training_imputed"]):::uptodate --> x8b18f98f2746942f(["usersrated_tuned"]):::uptodate x45aff652992a023a(["details"]):::uptodate --> x92c9ab88ae1439d1(["tracking"]):::uptodate xc2507955de0caf4e(["valid_metrics"]):::uptodate --> x92c9ab88ae1439d1(["tracking"]):::uptodate - x5a0dca7931d71121(["averageweight_final"]):::uptodate --> xcbeb0ce109eb5b86(["averageweight_vetiver"]):::uptodate - x04d83687468f3fc4(["averageweight_tuned"]):::uptodate --> xcbeb0ce109eb5b86(["averageweight_vetiver"]):::uptodate - xe5bd2d3c6fdf29ae(["training_and_validation"]):::uptodate --> xcbeb0ce109eb5b86(["averageweight_vetiver"]):::uptodate - xc2507955de0caf4e(["valid_metrics"]):::uptodate --> xcbeb0ce109eb5b86(["averageweight_vetiver"]):::uptodate + xc0de7cb1ceaaae9a(["average_fit"]):::uptodate --> x51e4cab15d71e6a3(["average_final"]):::uptodate + xe5bd2d3c6fdf29ae(["training_and_validation"]):::uptodate --> x51e4cab15d71e6a3(["average_final"]):::uptodate end classDef uptodate stroke:#000000,color:#ffffff,fill:#354823; classDef dispatched stroke:#000000,color:#000000,fill:#DC863B; @@ -77,17 +81,17 @@ workflow objects
    -
    +
    | name | time | seconds | |:--------------------|--------------------:|--------:| -| average_tuned | 2024-05-23 13:06:48 | 178.743 | -| averageweight_tuned | 2024-05-23 12:59:27 | 86.476 | -| usersrated_tuned | 2024-05-23 13:03:39 | 193.233 | -| average_fit | 2024-05-23 13:09:00 | 49.730 | -| averageweight_fit | 2024-05-23 12:59:58 | 21.004 | -| usersrated_fit | 2024-05-23 13:07:56 | 53.211 | +| average_tuned | 2024-05-24 09:34:30 | 474.570 | +| averageweight_tuned | 2024-05-24 08:58:55 | 224.914 | +| usersrated_tuned | 2024-05-24 09:26:26 | 481.283 | +| average_fit | 2024-05-24 09:36:27 | 44.877 | +| averageweight_fit | 2024-05-24 08:59:23 | 18.921 | +| usersrated_fit | 2024-05-24 09:35:28 | 43.523 |
    @@ -97,14 +101,14 @@ workflow details
    -
    +
    -| outcome | wflow_id | penalty | mixture | .config | -|:--------------|:---------|--------:|--------:|:----------------------| -| average | glmnet | 0.01 | 0.5 | Preprocessor1_Model27 | -| usersrated | glmnet | 0.01 | 0.5 | Preprocessor1_Model27 | -| averageweight | glmnet | 0.01 | 0.5 | Preprocessor1_Model27 | +| outcome | wflow_id | min_n | tree_depth | .config | +|:--------------|:---------|------:|-----------:|:----------------------| +| average | lightgbm | 15 | 7 | Preprocessor1_Model06 | +| usersrated | lightgbm | 15 | 7 | Preprocessor1_Model06 | +| averageweight | lightgbm | 15 | 7 | Preprocessor1_Model06 |
    @@ -116,7 +120,7 @@ workflow details
    -
    +
    | minratings | outcome | rmse | mae | mape | rsq | ccc | @@ -138,7 +142,7 @@ workflow details
    -
    +
    | minratings | outcome | yearpublished | rmse | mae | mape | rsq | ccc | diff --git a/index.qmd b/index.qmd index a39db2e..4a24d1c 100644 --- a/index.qmd +++ b/index.qmd @@ -45,11 +45,14 @@ tar_source("src/visualizations/tables.R") tar_load(valid_predictions) tar_load(valid_metrics) tar_load(details) +tar_load(tuning_plots) games = get_games_from_gcp( bucket = "bgg_data" ) + +theme_set(bggUtils::theme_bgg()) ``` @@ -127,6 +130,107 @@ targets_tracking_details(metrics = valid_metrics, ``` +## Tuning + +::: {.panel-tabset} + +### Average Weight + +```{r} +#| fig-height: 7 +#| results: asis +#| echo: false +tuning_plots[[1]]+ + labs(title = 'Average Weight') + +``` + +### Average Rating + +```{r} +#| fig-height: 7 +#| results: asis +#| echo: false +tuning_plots[[2]]+ + labs(title = 'Average Rating') + +``` + + +### Users Rated + +```{r} +#| fig-height: 7 +#| results: asis +#| echo: false +tuning_plots[[3]]+ + labs(title = 'Users Rated') + +``` + +::: + +## Features + +```{r} +#| message: false +#| warning: false +average_plot = + average_fit |> + extract_vetiver_features() |> + plot_model_features()+ + labs(title = 'Average Rating') + +averageweight_plot = + averageweight_fit |> + extract_vetiver_features() |> + plot_model_features()+ + labs(title = 'Average Weight') + +usersrated_plot = + usersrated_fit |> + extract_vetiver_features() |> + plot_model_features()+ + labs(title = 'Users Rated') + +``` + +::: {.panel-tabset} + +### Average Weight + +```{r} +#| fig-height: 7 +#| results: asis +#| echo: false + +averageweight_plot + +``` + +### Average + +```{r} +#| fig-height: 7 +#| results: asis +#| echo: false +average_plot + +``` + +### Users Rated + +```{r} +#| fig-height: 7 +#| results: asis +#| echo: false +usersrated_plot + +``` + +::: + +# Predictions ```{r} #| message: false @@ -148,7 +252,6 @@ upcoming_games = ``` -# Predictions ```{r} predictions = diff --git a/renv.lock b/renv.lock index 7e1bd6e..ada17bb 100644 --- a/renv.lock +++ b/renv.lock @@ -266,6 +266,13 @@ ], "Hash": "0cf10dab0d023d5b46a5a14387556891" }, + "SnowballC": { + "Package": "SnowballC", + "Version": "0.7.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "46da3912f69e3e6258a033802c4af32e" + }, "SparseM": { "Package": "SparseM", "Version": "1.81", @@ -469,6 +476,25 @@ "Repository": "CRAN", "Hash": "b7d8d8ee39869c18d8846a184dd8a1af" }, + "bonsai": { + "Package": "bonsai", + "Version": "0.2.1", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "cli", + "dials", + "dplyr", + "glue", + "parsnip", + "purrr", + "rlang", + "stats", + "tibble", + "utils" + ], + "Hash": "c086fa23dfeac2d2b95f758dfa917c3d" + }, "boot": { "Package": "boot", "Version": "1.3-30", @@ -1317,6 +1343,18 @@ ], "Hash": "16a4974681fc751a09a1250431361896" }, + "git2r": { + "Package": "git2r", + "Version": "0.33.0", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "graphics", + "utils" + ], + "Hash": "cdec9964efeda730d1b2cd3d5dd27747" + }, "globals": { "Package": "globals", "Version": "0.16.2", @@ -1703,6 +1741,16 @@ ], "Hash": "8954069286b4b2b0d023d1b288dce978" }, + "janeaustenr": { + "Package": "janeaustenr", + "Version": "1.0.0", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R" + ], + "Hash": "26f391e42073877818f2d4f0470dca24" + }, "jquerylib": { "Package": "jquerylib", "Version": "0.1.4", @@ -3165,6 +3213,29 @@ ], "Hash": "79540e5fcd9e0435af547d885f184fd5" }, + "tidytext": { + "Package": "tidytext", + "Version": "0.4.1", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "Matrix", + "R", + "cli", + "dplyr", + "generics", + "janeaustenr", + "lifecycle", + "methods", + "purrr", + "rlang", + "stringr", + "tibble", + "tokenizers", + "vctrs" + ], + "Hash": "eb8386c938a086eb3216595eba76831d" + }, "timeDate": { "Package": "timeDate", "Version": "4032.109", @@ -3200,6 +3271,19 @@ ], "Hash": "5ac22900ae0f386e54f1c307eca7d843" }, + "tokenizers": { + "Package": "tokenizers", + "Version": "0.3.0", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "Rcpp", + "SnowballC", + "stringi" + ], + "Hash": "76d35ebfaaf291e08c15696c9f2ec96d" + }, "tune": { "Package": "tune", "Version": "1.1.2", @@ -3328,6 +3412,22 @@ ], "Hash": "b9a6f592769dc2b70a0a7bd1478741f2" }, + "vip": { + "Package": "vip", + "Version": "0.4.1", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "foreach", + "ggplot2", + "stats", + "tibble", + "utils", + "yardstick" + ], + "Hash": "4bfee1f7181f71d552a4b63485f8fc25" + }, "viridisLite": { "Package": "viridisLite", "Version": "0.4.2", diff --git a/src/models/training.R b/src/models/training.R index 0ddb62c..a913e7c 100644 --- a/src/models/training.R +++ b/src/models/training.R @@ -531,6 +531,33 @@ glmnet_grid = function() { ) } +lightgbm_spec = function(trees = 500, ...) { + + + require(bonsai) + + parsnip::boost_tree( + mode = "regression", + trees = trees, + min_n = tune(), + tree_depth = tune(), + ...) |> + set_engine("lightgbm") +} + +lightgbm_grid = + function(size = 15) { + + grid_max_entropy( + x = dials::parameters( + min_n(), # 2nd important + tree_depth() # 3rd most important + ), + size = size + ) + } + + # function to build a recipe and apply series of steps given an outcome build_outcome_recipe = function(data, diff --git a/src/visualizations/models.R b/src/visualizations/models.R index 11fa63c..43b7c9f 100644 --- a/src/visualizations/models.R +++ b/src/visualizations/models.R @@ -1,40 +1,156 @@ +get_tuning_plots = function(results) { + + results |> + select(outcome, wflow_id, result) |> + mutate(plot = map2(result, outcome, ~ .x |> + autoplot() + + labs(title = .y) + + theme_bw())) |> + pull(plot) +} + +extract_vetiver_features = function(v) { + + v |> + extract_vetiver_model() |> + extract_model_features() +} + +extract_vetiver_model = function(vetiver_obj) { + + vetiver_obj |> + pluck("model") |> + bundle::unbundle() +} + +extract_model_type = function(wflow) { + + wflow |> + extract_fit_parsnip() |> + pluck("spec") |> + pluck("engine") + +} + +extract_model_features = function(wflow) { + + # check type + engine = wflow |> + extract_model_type() + + if (engine == 'lightgbm') { + + features = map_df( + c('frequency', 'cover', 'gain'), + ~ wflow|> + vip::vi_model(.x, + percentage = T) |> + mutate(type = .x) + ) + + } else if (engine == 'glmnet') { + + features = wflow |> + get_coefs.glmnet() + } else { + + features = wflow |> + vip::vi_model() + } + + features |> + add_column(engine = engine) |> + select(engine, everything()) + +} + +plot_features.lightgbm = function(features, + top_n = 25, + minlength = 50) { + + features |> + group_by(type) |> + slice_max(Importance, n = top_n) |> + mutate(Variable = bggUtils::present_bgg_text(Variable, minlength = minlength)) |> + ggplot(aes(x=Importance, + y = reorder(Variable, Importance)))+ + geom_col()+ + facet_wrap(~type, scales = "free_x", ncol = 3) + + tidytext::scale_y_reordered()+ + ylab("Feature") +} + +plot_features.default = function(features, + top_n = 25, + minlength = 50) { + features |> + slice_max(Importance, n = top_n) |> + mutate(Variable = bggUtils::present_bgg_text(Variable, minlength = minlength)) |> + ggplot(aes(x=Importance, + y = reorder(Variable, Importance)))+ + geom_col()+ + tidytext::scale_y_reordered()+ + ylab("Feature") + +} + +plot_model_features = function(features, + ...) { + + engine = unique(features$engine) + + # check engine + if (engine == 'lightgbm') { + features |> + plot_features.lightgbm(...) + } else if (engine == 'glmnet') { + + features |> + coef_plot.glmnet(...) + } else { + + features |> + plot_features.default() + } +} + get_glmnet_objs = - function(glmnet_wflow) { - - # extract engine - glmnet_engine = - glmnet_wflow %>% - extract_fit_engine() - - # extract parsnip - glmnet_parsnip = - glmnet_wflow %>% - extract_fit_parsnip() - - - # return both - list("engine" = glmnet_engine, - "parsnip" = glmnet_parsnip) - - } + function(glmnet_wflow) { + + # extract engine + glmnet_engine = + glmnet_wflow %>% + extract_fit_engine() + + # extract parsnip + glmnet_parsnip = + glmnet_wflow %>% + extract_fit_parsnip() + + + # return both + list("engine" = glmnet_engine, + "parsnip" = glmnet_parsnip) + + } get_glmnet_coefs = function(glmnet_objs, type = 'parsnip', remove_intercept = T, return_zeroes = T) { - - coefs = - glmnet_objs |> - pluck(type) |> - tidy(return_zeroes = return_zeroes) - - if (remove_intercept == T) { - coefs = - coefs |> - filter(term != "(Intercept)") - } - - coefs + + coefs = + glmnet_objs |> + pluck(type) |> + tidy(return_zeroes = return_zeroes) + + if (remove_intercept == T) { + coefs = + coefs |> + filter(term != "(Intercept)") + } + + coefs } color_fill_gradient = function(plot, @@ -44,72 +160,72 @@ color_fill_gradient = function(plot, mid_color = "grey80", high_color = "deepskyblue1", oob = scales::squish) { - - plot + - scale_fill_gradient2(low = low_color, - mid = mid_color, - high = high_color, - midpoint = 0, - limits = limits, - oob = oob) + - scale_fill_gradient2(low = low_color, - mid = mid_color, - high = high_color, - midpoint = 0, - limits = limits, - oob = oob) + + plot + + scale_fill_gradient2(low = low_color, + mid = mid_color, + high = high_color, + midpoint = 0, + limits = limits, + oob = oob) + + scale_fill_gradient2(low = low_color, + mid = mid_color, + high = high_color, + midpoint = 0, + limits = limits, + oob = oob) } fill_colorbar = function(plot) { - - plot + - guides(fill = guide_colorbar(barheight = 0.5, - barwidth = 15, - title.position = 'top') - ) - + + plot + + guides(fill = guide_colorbar(barheight = 0.5, + barwidth = 15, + title.position = 'top') + ) + } color_colorbar = function(plot) { - - plot + - guides(color = guide_colorbar(barheight = 0.5, - barwidth = 15, - title.position = 'top') - ) - + + plot + + guides(color = guide_colorbar(barheight = 0.5, + barwidth = 15, + title.position = 'top') + ) + } get_coefs.glmnet = function(workflow, remove_intercept = T) { - - glmnet_objs = get_glmnet_objs(workflow) - - coefs = - get_glmnet_coefs(glmnet_objs, - type = 'parsnip', - remove_intercept = remove_intercept) - - if (remove_intercept == T) { - coefs = - coefs |> - filter(term != "(Intercept)") - } - - coefs + + glmnet_objs = get_glmnet_objs(workflow) + + coefs = + get_glmnet_coefs(glmnet_objs, + type = 'parsnip', + remove_intercept = remove_intercept) + + if (remove_intercept == T) { + coefs = + coefs |> + filter(term != "(Intercept)") + } + + coefs } top_coefs_by_sign = function(coefs, n = 25) { - - coefs |> - mutate(sign = case_when(estimate > 0 ~ 'increases probability', - estimate < 0 ~ 'decreases probability')) |> - mutate(sign = factor(sign, levels = c("increases probability", "decreases probability"))) |> - group_by(sign) |> - slice_max(abs(estimate), - n =n) + + coefs |> + mutate(sign = case_when(estimate > 0 ~ 'increases probability', + estimate < 0 ~ 'decreases probability')) |> + mutate(sign = factor(sign, levels = c("increases probability", "decreases probability"))) |> + group_by(sign) |> + slice_max(abs(estimate), + n =n) } coef_plot.glmnet = function(coefs, @@ -118,39 +234,39 @@ coef_plot.glmnet = function(coefs, midpoint = 0, facet_by_sign = F, ...) { + + present_coefs = + coefs |> + mutate(tidy_term = bggUtils::present_bgg_text(term, minlength = minlength)) + + plot = present_coefs |> + ggplot(aes(x=estimate, + fill = estimate, + y= reorder(tidy_term, estimate))) + + geom_col(color = 'white')+ + theme_bgg()+ + labs(y = "Feature", + x = "Effect on Outcome") + + suppressMessages({ + p = + plot |> + color_fill_gradient(midpoint = midpoint, + limits = limits, + oob = scales::squish) + }) + + if (facet_by_sign == T) { - present_coefs = - coefs |> - mutate(tidy_term = bggUtils::present_bgg_text(term, minlength = minlength)) - - plot = present_coefs |> - ggplot(aes(x=estimate, - fill = estimate, - y= reorder(tidy_term, estimate))) + - geom_col(color = 'white')+ - theme_bgg()+ - labs(y = "Feature", - x = "Effect on Outcome") - - suppressMessages({ - p = - plot |> - color_fill_gradient(midpoint = midpoint, - limits = limits, - oob = scales::squish) - }) - - if (facet_by_sign == T) { - - p = plot + - facet_wrap(sign ~., - scales = "free")+ - theme(strip.text.x = element_text(size = 10)) - } - - p |> - fill_colorbar() - + p = plot + + facet_wrap(sign ~., + scales = "free")+ + theme(strip.text.x = element_text(size = 10)) + } + + p |> + fill_colorbar() + } trace_plot.glmnet = function(workflow, @@ -158,68 +274,68 @@ trace_plot.glmnet = function(workflow, lower_estimate = -0.5, minlength = 50, max.overlaps = 25) { - - - glmnet_objs = - workflow |> - get_glmnet_objs() - - # get lambda - lambda = - glmnet_objs|> - pluck("parsnip") |> - tidy() |> - pull(penalty) |> - unique() - - coefs = - glmnet_objs |> - get_glmnet_coefs( - type = 'engine' - ) - - plot_coefs = - coefs |> - mutate(label_left = - case_when( - lambda == min(lambda) & abs(estimate) > upper_estimate ~ - bggUtils::present_bgg_text(term, minlength = minlength))) %>% - group_by(term) %>% - mutate(label_right = - case_when( - lambda == max(lambda) & estimate > lower_estimate ~ - bggUtils::present_bgg_text(term, minlength = minlength))) %>% - ungroup() - - plot_coefs |> - ggplot(aes(x=log(lambda), - y=estimate, - group = term))+ - geom_line(alpha = 0.5, - color = 'grey60')+ - geom_vline(xintercept = log(lambda), - linetype = 'dotted', - alpha = 0.5)+ - guides(color = 'none')+ - theme_minimal()+ - ggrepel::geom_text_repel( - aes(label = label_left), - max.overlaps = max.overlaps, - size = 2, - direction = "y", - hjust =1.5, - segment.size = .5, - segment.alpha = .5, - segment.linetype = "dashed", - box.padding = .5, - segment.curvature = 0.2, - segment.ncp = 3, - segment.angle = 20)+ - coord_cartesian(xlim = c(min(log(glmnet_objs$engine$lambda)-2), 0))+ - theme(panel.grid.major = element_blank())+ - geom_hline(yintercept = 0, - linetype = 'dotted', - alpha = 0.5) + + + glmnet_objs = + workflow |> + get_glmnet_objs() + + # get lambda + lambda = + glmnet_objs|> + pluck("parsnip") |> + tidy() |> + pull(penalty) |> + unique() + + coefs = + glmnet_objs |> + get_glmnet_coefs( + type = 'engine' + ) + + plot_coefs = + coefs |> + mutate(label_left = + case_when( + lambda == min(lambda) & abs(estimate) > upper_estimate ~ + bggUtils::present_bgg_text(term, minlength = minlength))) %>% + group_by(term) %>% + mutate(label_right = + case_when( + lambda == max(lambda) & estimate > lower_estimate ~ + bggUtils::present_bgg_text(term, minlength = minlength))) %>% + ungroup() + + plot_coefs |> + ggplot(aes(x=log(lambda), + y=estimate, + group = term))+ + geom_line(alpha = 0.5, + color = 'grey60')+ + geom_vline(xintercept = log(lambda), + linetype = 'dotted', + alpha = 0.5)+ + guides(color = 'none')+ + theme_minimal()+ + ggrepel::geom_text_repel( + aes(label = label_left), + max.overlaps = max.overlaps, + size = 2, + direction = "y", + hjust =1.5, + segment.size = .5, + segment.alpha = .5, + segment.linetype = "dashed", + box.padding = .5, + segment.curvature = 0.2, + segment.ncp = 3, + segment.angle = 20)+ + coord_cartesian(xlim = c(min(log(glmnet_objs$engine$lambda)-2), 0))+ + theme(panel.grid.major = element_blank())+ + geom_hline(yintercept = 0, + linetype = 'dotted', + alpha = 0.5) } @@ -229,24 +345,24 @@ coef_plot_by_group = function(coefs, scales = "free", shrink = T, ...) { - - tmp = - paste0('^', group) - - coefs |> - filter(grepl(tmp, term)) |> - top_coefs_by_sign() |> - mutate(term = gsub(tmp, "", term)) |> - coef_plot.glmnet(...) + - facet_wrap(sign ~., - scales = scales, - shrink = shrink) + - theme(strip.text.x = element_text(size = 10), - axis.text.y = element_text(hjust = 1))+ - scale_x_continuous(breaks = scales::pretty_breaks(n=3))+ - guides(fill = 'none') + - labs(title = stringr::str_to_title(group))+ - scale_y_discrete(label=function(x) stringr::str_trunc(x, width = width)) - - + + tmp = + paste0('^', group) + + coefs |> + filter(grepl(tmp, term)) |> + top_coefs_by_sign() |> + mutate(term = gsub(tmp, "", term)) |> + coef_plot.glmnet(...) + + facet_wrap(sign ~., + scales = scales, + shrink = shrink) + + theme(strip.text.x = element_text(size = 10), + axis.text.y = element_text(hjust = 1))+ + scale_x_continuous(breaks = scales::pretty_breaks(n=3))+ + guides(fill = 'none') + + labs(title = stringr::str_to_title(group))+ + scale_y_discrete(label=function(x) stringr::str_trunc(x, width = width)) + + } \ No newline at end of file diff --git a/targets-runs/tracking.csv b/targets-runs/tracking.csv index d78d142..d3e9d8a 100644 --- a/targets-runs/tracking.csv +++ b/targets-runs/tracking.csv @@ -1,9 +1,9 @@ -"","time","user","model","penalty","mixture",".config","minratings","outcome","rmse","mae","mape","rsq","ccc" -"1",2024-05-23 13:13:49.184795,"phil.henrickson@github.com","glmnet",0.01,0.5,"Preprocessor1_Model27",0,"average",1.303,0.923,16.481,0.113,0.204 -"2",2024-05-23 13:13:49.184795,"phil.henrickson@github.com","glmnet",0.01,0.5,"Preprocessor1_Model27",0,"averageweight",0.548,0.42,24.993,0.529,0.701 -"3",2024-05-23 13:13:49.184795,"phil.henrickson@github.com","glmnet+glmnet",NA,NA,NA,0,"bayesaverage",0.3,0.174,2.878,0.412,0.633 -"4",2024-05-23 13:13:49.184795,"phil.henrickson@github.com","glmnet",0.01,0.5,"Preprocessor1_Model27",0,"usersrated",994.732,205.786,Inf,0.163,0.401 -"5",2024-05-23 13:13:49.184795,"phil.henrickson@github.com","glmnet",0.01,0.5,"Preprocessor1_Model27",25,"average",0.688,0.506,7.625,0.277,0.458 -"6",2024-05-23 13:13:49.184795,"phil.henrickson@github.com","glmnet",0.01,0.5,"Preprocessor1_Model27",25,"averageweight",0.479,0.367,20.356,0.63,0.773 -"7",2024-05-23 13:13:49.184795,"phil.henrickson@github.com","glmnet+glmnet",NA,NA,NA,25,"bayesaverage",0.3,0.174,2.878,0.412,0.633 -"8",2024-05-23 13:13:49.184795,"phil.henrickson@github.com","glmnet",0.01,0.5,"Preprocessor1_Model27",25,"usersrated",1888.348,468.294,165.111,0.145,0.379 +"","time","user","model","min_n","tree_depth",".config","minratings","outcome","rmse","mae","mape","rsq","ccc" +"1",2024-05-24 09:41:20.367494,"phil.henrickson@github.com","lightgbm",15,7,"Preprocessor1_Model06",0,"average",1.317,0.935,16.324,0.113,0.217 +"2",2024-05-24 09:41:20.367494,"phil.henrickson@github.com","lightgbm",15,7,"Preprocessor1_Model06",0,"averageweight",0.532,0.403,23.69,0.56,0.733 +"3",2024-05-24 09:41:20.367494,"phil.henrickson@github.com","lightgbm+lightgbm",NA,NA,NA,0,"bayesaverage",0.293,0.17,2.823,0.432,0.646 +"4",2024-05-24 09:41:20.367494,"phil.henrickson@github.com","lightgbm",15,7,"Preprocessor1_Model06",0,"usersrated",829.856,211.708,Inf,0.253,0.498 +"5",2024-05-24 09:41:20.367494,"phil.henrickson@github.com","lightgbm",15,7,"Preprocessor1_Model06",25,"average",0.692,0.509,7.582,0.282,0.463 +"6",2024-05-24 09:41:20.367494,"phil.henrickson@github.com","lightgbm",15,7,"Preprocessor1_Model06",25,"averageweight",0.457,0.347,19.21,0.665,0.804 +"7",2024-05-24 09:41:20.367494,"phil.henrickson@github.com","lightgbm+lightgbm",NA,NA,NA,25,"bayesaverage",0.293,0.17,2.823,0.432,0.646 +"8",2024-05-24 09:41:20.367494,"phil.henrickson@github.com","lightgbm",15,7,"Preprocessor1_Model06",25,"usersrated",1565.092,461.642,184.381,0.229,0.476