1 Load packages
library(tidyverse) # data wrangling
library(tidymodels)
2 Data
data("penguins", package = "modeldata")
3 A bit more than minimal
rec1 <-
recipe(body_mass_g ~ ., data = penguins) %>%
step_dummy(all_nominal()) %>%
step_normalize(all_numeric_predictors()) %>%
step_nzv(all_numeric_predictors()) %>%
step_naomit(all_predictors())
Checks:
summary(rec1)
#> # A tibble: 7 × 4
#> variable type role source
#> <chr> <chr> <chr> <chr>
#> 1 species nominal predictor original
#> 2 island nominal predictor original
#> 3 bill_length_mm numeric predictor original
#> 4 bill_depth_mm numeric predictor original
#> 5 flipper_length_mm numeric predictor original
#> 6 sex nominal predictor original
#> 7 body_mass_g numeric outcome original
tidy(rec1)
#> # A tibble: 5 × 6
#> number operation type trained skip id
#> <int> <chr> <chr> <lgl> <lgl> <chr>
#> 1 1 step dummy FALSE FALSE dummy_rc5a2
#> 2 2 step normalize FALSE FALSE normalize_U3yg4
#> 3 3 step nzv FALSE FALSE nzv_vruQ8
#> 4 4 step naomit FALSE TRUE naomit_PqP3J
#> 5 5 step novel FALSE FALSE novel_6pjBL
rec1 %>%
prep() %>%
bake(new_data = NULL) %>%
head()
#> # A tibble: 6 × 9
#> bill_length_mm bill_depth_mm flipper_length_mm body_mass_g species_Chinstrap
#> <dbl> <dbl> <dbl> <int> <dbl>
#> 1 -0.883 0.784 -1.42 3750 -0.496
#> 2 -0.810 0.126 -1.06 3800 -0.496
#> 3 -0.663 0.430 -0.421 3250 -0.496
#> 4 -1.32 1.09 -0.563 3450 -0.496
#> 5 -0.847 1.75 -0.776 3650 -0.496
#> 6 -0.920 0.329 -1.42 3625 -0.496
#> # … with 4 more variables: species_Gentoo <dbl>, island_Dream <dbl>,
#> # island_Torgersen <dbl>, sex_male <dbl>
wf1 <-
workflow() %>%
add_model(spec = linear_reg(engine = "glmnet", mixture = 1, penalty = 1)) %>%
add_recipe(recipe = rec1)
m2 <- wf1 %>%
fit(data = penguins)
4 Results
tidy(m2)
#> # A tibble: 9 × 3
#> term estimate penalty
#> <chr> <dbl> <dbl>
#> 1 (Intercept) 4208. 1
#> 2 bill_length_mm 96.0 1
#> 3 bill_depth_mm 123. 1
#> 4 flipper_length_mm 232. 1
#> 5 species_Chinstrap -101. 1
#> 6 species_Gentoo 465. 1
#> 7 island_Dream -5.44 1
#> 8 island_Torgersen -15.3 1
#> 9 sex_male 197. 1
In case some predictor beta has been shrunken to zero, it would be noted here.
5 Extract fit
Less einformative …
m2 %>%
extract_fit_parsnip()
#> parsnip model object
#>
#>
#> Call: glmnet::glmnet(x = maybe_matrix(x), y = y, family = "gaussian", alpha = ~1)
#>
#> Df %Dev Lambda
#> 1 0 0.00 701.90
#> 2 1 12.94 639.50
#> 3 1 23.68 582.70
#> 4 1 32.60 530.90
#> 5 1 40.00 483.80
#> 6 1 46.15 440.80
#> 7 1 51.25 401.60
#> 8 2 55.69 366.00
#> 9 2 59.46 333.40
#> 10 2 62.59 303.80
#> 11 2 65.18 276.80
#> 12 2 67.34 252.20
#> 13 3 69.88 229.80
#> 14 3 72.71 209.40
#> 15 3 75.06 190.80
#> 16 3 77.01 173.90
#> 17 3 78.62 158.40
#> 18 3 79.97 144.30
#> 19 3 81.09 131.50
#> 20 3 82.01 119.80
#> 21 3 82.78 109.20
#> 22 3 83.42 99.49
#> 23 3 83.95 90.65
#> 24 3 84.39 82.60
#> 25 3 84.76 75.26
#> 26 3 85.06 68.57
#> 27 3 85.31 62.48
#> 28 3 85.52 56.93
#> 29 3 85.69 51.87
#> 30 4 85.85 47.27
#> 31 4 85.99 43.07
#> 32 4 86.11 39.24
#> 33 4 86.21 35.75
#> 34 4 86.29 32.58
#> 35 5 86.38 29.68
#> 36 5 86.50 27.05
#> 37 5 86.59 24.64
#> 38 5 86.67 22.46
#> 39 5 86.73 20.46
#> 40 5 86.79 18.64
#> 41 5 86.83 16.99
#> 42 6 86.89 15.48
#> 43 6 86.99 14.10
#> 44 6 87.08 12.85
#> 45 6 87.15 11.71
#> 46 6 87.21 10.67
#> 47 7 87.26 9.72
#> 48 7 87.30 8.86
#> 49 7 87.34 8.07
#> 50 7 87.37 7.35
#> 51 7 87.40 6.70
#> 52 8 87.42 6.11
#> 53 8 87.43 5.56
#> 54 8 87.45 5.07
#> 55 8 87.46 4.62
#> 56 8 87.47 4.21
#> 57 8 87.48 3.83
#> 58 8 87.49 3.49
#> 59 8 87.49 3.18
#> 60 8 87.50 2.90
#> 61 8 87.50 2.64
#> 62 8 87.50 2.41
#> 63 8 87.51 2.19
#> 64 8 87.51 2.00
#> 65 8 87.51 1.82
#> 66 8 87.51 1.66
#> 67 8 87.52 1.51
#> 68 8 87.52 1.38
#> 69 8 87.52 1.25
#> 70 8 87.52 1.14
Similar to:
m2 %>%
extract_fit_engine()
#>
#> Call: glmnet::glmnet(x = maybe_matrix(x), y = y, family = "gaussian", alpha = ~1)
#>
#> Df %Dev Lambda
#> 1 0 0.00 701.90
#> 2 1 12.94 639.50
#> 3 1 23.68 582.70
#> 4 1 32.60 530.90
#> 5 1 40.00 483.80
#> 6 1 46.15 440.80
#> 7 1 51.25 401.60
#> 8 2 55.69 366.00
#> 9 2 59.46 333.40
#> 10 2 62.59 303.80
#> 11 2 65.18 276.80
#> 12 2 67.34 252.20
#> 13 3 69.88 229.80
#> 14 3 72.71 209.40
#> 15 3 75.06 190.80
#> 16 3 77.01 173.90
#> 17 3 78.62 158.40
#> 18 3 79.97 144.30
#> 19 3 81.09 131.50
#> 20 3 82.01 119.80
#> 21 3 82.78 109.20
#> 22 3 83.42 99.49
#> 23 3 83.95 90.65
#> 24 3 84.39 82.60
#> 25 3 84.76 75.26
#> 26 3 85.06 68.57
#> 27 3 85.31 62.48
#> 28 3 85.52 56.93
#> 29 3 85.69 51.87
#> 30 4 85.85 47.27
#> 31 4 85.99 43.07
#> 32 4 86.11 39.24
#> 33 4 86.21 35.75
#> 34 4 86.29 32.58
#> 35 5 86.38 29.68
#> 36 5 86.50 27.05
#> 37 5 86.59 24.64
#> 38 5 86.67 22.46
#> 39 5 86.73 20.46
#> 40 5 86.79 18.64
#> 41 5 86.83 16.99
#> 42 6 86.89 15.48
#> 43 6 86.99 14.10
#> 44 6 87.08 12.85
#> 45 6 87.15 11.71
#> 46 6 87.21 10.67
#> 47 7 87.26 9.72
#> 48 7 87.30 8.86
#> 49 7 87.34 8.07
#> 50 7 87.37 7.35
#> 51 7 87.40 6.70
#> 52 8 87.42 6.11
#> 53 8 87.43 5.56
#> 54 8 87.45 5.07
#> 55 8 87.46 4.62
#> 56 8 87.47 4.21
#> 57 8 87.48 3.83
#> 58 8 87.49 3.49
#> 59 8 87.49 3.18
#> 60 8 87.50 2.90
#> 61 8 87.50 2.64
#> 62 8 87.50 2.41
#> 63 8 87.51 2.19
#> 64 8 87.51 2.00
#> 65 8 87.51 1.82
#> 66 8 87.51 1.66
#> 67 8 87.52 1.51
#> 68 8 87.52 1.38
#> 69 8 87.52 1.25
#> 70 8 87.52 1.14
More information, such as the difference between the last two functions, can be found here, the tidymodels page on model coefficients.
6 Reproducibility
#> ─ Session info ───────────────────────────────────────────────────────────────────────────────────────────────────────
#> setting value
#> version R version 4.2.0 (2022-04-22)
#> os macOS Big Sur/Monterey 10.16
#> system x86_64, darwin17.0
#> ui X11
#> language (EN)
#> collate en_US.UTF-8
#> ctype en_US.UTF-8
#> tz Europe/Berlin
#> date 2022-07-24
#> pandoc 2.18 @ /usr/local/bin/ (via rmarkdown)
#>
#> ─ Packages ───────────────────────────────────────────────────────────────────────────────────────────────────────────
#> package * version date (UTC) lib source
#> assertthat 0.2.1 2019-03-21 [1] CRAN (R 4.2.0)
#> backports 1.4.1 2021-12-13 [1] CRAN (R 4.2.0)
#> blogdown 1.10 2022-05-10 [1] CRAN (R 4.2.0)
#> bookdown 0.27 2022-06-14 [1] CRAN (R 4.2.0)
#> brio 1.1.3 2021-11-30 [1] CRAN (R 4.2.0)
#> broom 1.0.0 2022-07-01 [1] CRAN (R 4.2.0)
#> bslib 0.3.1 2021-10-06 [1] CRAN (R 4.2.0)
#> cachem 1.0.6 2021-08-19 [1] CRAN (R 4.2.0)
#> callr 3.7.0 2021-04-20 [1] CRAN (R 4.2.0)
#> cellranger 1.1.0 2016-07-27 [1] CRAN (R 4.2.0)
#> cli 3.3.0 2022-04-25 [1] CRAN (R 4.2.0)
#> codetools 0.2-18 2020-11-04 [2] CRAN (R 4.2.0)
#> colorout * 1.2-2 2022-06-13 [1] local
#> colorspace 2.0-3 2022-02-21 [1] CRAN (R 4.2.0)
#> crayon 1.5.1 2022-03-26 [1] CRAN (R 4.2.0)
#> DBI 1.1.2 2021-12-20 [1] CRAN (R 4.2.0)
#> dbplyr 2.2.0 2022-06-05 [1] CRAN (R 4.2.0)
#> desc 1.4.1 2022-03-06 [1] CRAN (R 4.2.0)
#> devtools 2.4.3 2021-11-30 [1] CRAN (R 4.2.0)
#> digest 0.6.29 2021-12-01 [1] CRAN (R 4.2.0)
#> dplyr * 1.0.9 2022-04-28 [1] CRAN (R 4.2.0)
#> ellipsis 0.3.2 2021-04-29 [1] CRAN (R 4.2.0)
#> evaluate 0.15 2022-02-18 [1] CRAN (R 4.2.0)
#> fansi 1.0.3 2022-03-24 [1] CRAN (R 4.2.0)
#> fastmap 1.1.0 2021-01-25 [1] CRAN (R 4.2.0)
#> forcats * 0.5.1 2021-01-27 [1] CRAN (R 4.2.0)
#> fs 1.5.2 2021-12-08 [1] CRAN (R 4.2.0)
#> generics 0.1.3 2022-07-05 [1] CRAN (R 4.2.0)
#> ggplot2 * 3.3.6 2022-05-03 [1] CRAN (R 4.2.0)
#> glue 1.6.2 2022-02-24 [1] CRAN (R 4.2.0)
#> gtable 0.3.0 2019-03-25 [1] CRAN (R 4.2.0)
#> haven 2.5.0 2022-04-15 [1] CRAN (R 4.2.0)
#> hms 1.1.1 2021-09-26 [1] CRAN (R 4.2.0)
#> htmltools 0.5.2 2021-08-25 [1] CRAN (R 4.2.0)
#> httr 1.4.3 2022-05-04 [1] CRAN (R 4.2.0)
#> jquerylib 0.1.4 2021-04-26 [1] CRAN (R 4.2.0)
#> jsonlite 1.8.0 2022-02-22 [1] CRAN (R 4.2.0)
#> knitr 1.39 2022-04-26 [1] CRAN (R 4.2.0)
#> lifecycle 1.0.1 2021-09-24 [1] CRAN (R 4.2.0)
#> lubridate 1.8.0 2021-10-07 [1] CRAN (R 4.2.0)
#> magrittr 2.0.3 2022-03-30 [1] CRAN (R 4.2.0)
#> memoise 2.0.1 2021-11-26 [1] CRAN (R 4.2.0)
#> modelr 0.1.8 2020-05-19 [1] CRAN (R 4.2.0)
#> munsell 0.5.0 2018-06-12 [1] CRAN (R 4.2.0)
#> pillar 1.7.0 2022-02-01 [1] CRAN (R 4.2.0)
#> pkgbuild 1.3.1 2021-12-20 [1] CRAN (R 4.2.0)
#> pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.2.0)
#> pkgload 1.2.4 2021-11-30 [1] CRAN (R 4.2.0)
#> prettyunits 1.1.1 2020-01-24 [1] CRAN (R 4.2.0)
#> processx 3.6.1 2022-06-17 [1] CRAN (R 4.2.0)
#> ps 1.7.1 2022-06-18 [1] CRAN (R 4.2.0)
#> purrr * 0.3.4 2020-04-17 [1] CRAN (R 4.2.0)
#> R6 2.5.1 2021-08-19 [1] CRAN (R 4.2.0)
#> readr * 2.1.2 2022-01-30 [1] CRAN (R 4.2.0)
#> readxl 1.4.0 2022-03-28 [1] CRAN (R 4.2.0)
#> remotes 2.4.2 2021-11-30 [1] CRAN (R 4.2.0)
#> reprex 2.0.1 2021-08-05 [1] CRAN (R 4.2.0)
#> rlang 1.0.3 2022-06-27 [1] CRAN (R 4.2.0)
#> rmarkdown 2.14 2022-04-25 [1] CRAN (R 4.2.0)
#> rprojroot 2.0.3 2022-04-02 [1] CRAN (R 4.2.0)
#> rstudioapi 0.13 2020-11-12 [1] CRAN (R 4.2.0)
#> rvest 1.0.2 2021-10-16 [1] CRAN (R 4.2.0)
#> sass 0.4.1 2022-03-23 [1] CRAN (R 4.2.0)
#> scales 1.2.0 2022-04-13 [1] CRAN (R 4.2.0)
#> sessioninfo 1.2.2 2021-12-06 [1] CRAN (R 4.2.0)
#> stringi 1.7.6 2021-11-29 [1] CRAN (R 4.2.0)
#> stringr * 1.4.0 2019-02-10 [1] CRAN (R 4.2.0)
#> testthat 3.1.4 2022-04-26 [1] CRAN (R 4.2.0)
#> tibble * 3.1.7 2022-05-03 [1] CRAN (R 4.2.0)
#> tidyr * 1.2.0 2022-02-01 [1] CRAN (R 4.2.0)
#> tidyselect 1.1.2 2022-02-21 [1] CRAN (R 4.2.0)
#> tidyverse * 1.3.1 2021-04-15 [1] CRAN (R 4.2.0)
#> tzdb 0.3.0 2022-03-28 [1] CRAN (R 4.2.0)
#> usethis 2.1.6 2022-05-25 [1] CRAN (R 4.2.0)
#> utf8 1.2.2 2021-07-24 [1] CRAN (R 4.2.0)
#> vctrs 0.4.1 2022-04-13 [1] CRAN (R 4.2.0)
#> withr 2.5.0 2022-03-03 [1] CRAN (R 4.2.0)
#> xfun 0.31 2022-05-10 [1] CRAN (R 4.2.0)
#> xml2 1.3.3 2021-11-30 [1] CRAN (R 4.2.0)
#> yaml 2.3.5 2022-02-21 [1] CRAN (R 4.2.0)
#>
#> [1] /Users/sebastiansaueruser/Rlibs
#> [2] /Library/Frameworks/R.framework/Versions/4.2/Resources/library
#>
#> ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────