Penguins Lasso with Tidymodels

1 Load packages

library(tidyverse)  # data wrangling
library(tidymodels)

2 Data

data("penguins", package = "modeldata")

3 A bit more than minimal

rec1 <-
  recipe(body_mass_g ~ ., data = penguins) %>% 
  step_dummy(all_nominal()) %>% 
  step_normalize(all_numeric_predictors()) %>% 
  step_nzv(all_numeric_predictors()) %>% 
  step_naomit(all_predictors())

Checks:

summary(rec1)
#> # A tibble: 7 × 4
#>   variable          type    role      source  
#>   <chr>             <chr>   <chr>     <chr>   
#> 1 species           nominal predictor original
#> 2 island            nominal predictor original
#> 3 bill_length_mm    numeric predictor original
#> 4 bill_depth_mm     numeric predictor original
#> 5 flipper_length_mm numeric predictor original
#> 6 sex               nominal predictor original
#> 7 body_mass_g       numeric outcome   original
tidy(rec1)
#> # A tibble: 5 × 6
#>   number operation type      trained skip  id             
#>    <int> <chr>     <chr>     <lgl>   <lgl> <chr>          
#> 1      1 step      dummy     FALSE   FALSE dummy_rc5a2    
#> 2      2 step      normalize FALSE   FALSE normalize_U3yg4
#> 3      3 step      nzv       FALSE   FALSE nzv_vruQ8      
#> 4      4 step      naomit    FALSE   TRUE  naomit_PqP3J   
#> 5      5 step      novel     FALSE   FALSE novel_6pjBL
rec1 %>% 
  prep() %>% 
  bake(new_data = NULL) %>% 
  head()
#> # A tibble: 6 × 9
#>   bill_length_mm bill_depth_mm flipper_length_mm body_mass_g species_Chinstrap
#>            <dbl>         <dbl>             <dbl>       <int>             <dbl>
#> 1         -0.883         0.784            -1.42         3750            -0.496
#> 2         -0.810         0.126            -1.06         3800            -0.496
#> 3         -0.663         0.430            -0.421        3250            -0.496
#> 4         -1.32          1.09             -0.563        3450            -0.496
#> 5         -0.847         1.75             -0.776        3650            -0.496
#> 6         -0.920         0.329            -1.42         3625            -0.496
#> # … with 4 more variables: species_Gentoo <dbl>, island_Dream <dbl>,
#> #   island_Torgersen <dbl>, sex_male <dbl>
wf1 <-
  workflow() %>% 
  add_model(spec = linear_reg(engine = "glmnet", mixture = 1, penalty = 1)) %>% 
  add_recipe(recipe = rec1)
m2 <- wf1 %>% 
  fit(data = penguins)

4 Results

tidy(m2)
#> # A tibble: 9 × 3
#>   term              estimate penalty
#>   <chr>                <dbl>   <dbl>
#> 1 (Intercept)        4208.         1
#> 2 bill_length_mm       96.0        1
#> 3 bill_depth_mm       123.         1
#> 4 flipper_length_mm   232.         1
#> 5 species_Chinstrap  -101.         1
#> 6 species_Gentoo      465.         1
#> 7 island_Dream         -5.44       1
#> 8 island_Torgersen    -15.3        1
#> 9 sex_male            197.         1

In case some predictor beta has been shrunken to zero, it would be noted here.

5 Extract fit

Less einformative …

m2 %>% 
  extract_fit_parsnip()
#> parsnip model object
#> 
#> 
#> Call:  glmnet::glmnet(x = maybe_matrix(x), y = y, family = "gaussian",      alpha = ~1) 
#> 
#>    Df  %Dev Lambda
#> 1   0  0.00 701.90
#> 2   1 12.94 639.50
#> 3   1 23.68 582.70
#> 4   1 32.60 530.90
#> 5   1 40.00 483.80
#> 6   1 46.15 440.80
#> 7   1 51.25 401.60
#> 8   2 55.69 366.00
#> 9   2 59.46 333.40
#> 10  2 62.59 303.80
#> 11  2 65.18 276.80
#> 12  2 67.34 252.20
#> 13  3 69.88 229.80
#> 14  3 72.71 209.40
#> 15  3 75.06 190.80
#> 16  3 77.01 173.90
#> 17  3 78.62 158.40
#> 18  3 79.97 144.30
#> 19  3 81.09 131.50
#> 20  3 82.01 119.80
#> 21  3 82.78 109.20
#> 22  3 83.42  99.49
#> 23  3 83.95  90.65
#> 24  3 84.39  82.60
#> 25  3 84.76  75.26
#> 26  3 85.06  68.57
#> 27  3 85.31  62.48
#> 28  3 85.52  56.93
#> 29  3 85.69  51.87
#> 30  4 85.85  47.27
#> 31  4 85.99  43.07
#> 32  4 86.11  39.24
#> 33  4 86.21  35.75
#> 34  4 86.29  32.58
#> 35  5 86.38  29.68
#> 36  5 86.50  27.05
#> 37  5 86.59  24.64
#> 38  5 86.67  22.46
#> 39  5 86.73  20.46
#> 40  5 86.79  18.64
#> 41  5 86.83  16.99
#> 42  6 86.89  15.48
#> 43  6 86.99  14.10
#> 44  6 87.08  12.85
#> 45  6 87.15  11.71
#> 46  6 87.21  10.67
#> 47  7 87.26   9.72
#> 48  7 87.30   8.86
#> 49  7 87.34   8.07
#> 50  7 87.37   7.35
#> 51  7 87.40   6.70
#> 52  8 87.42   6.11
#> 53  8 87.43   5.56
#> 54  8 87.45   5.07
#> 55  8 87.46   4.62
#> 56  8 87.47   4.21
#> 57  8 87.48   3.83
#> 58  8 87.49   3.49
#> 59  8 87.49   3.18
#> 60  8 87.50   2.90
#> 61  8 87.50   2.64
#> 62  8 87.50   2.41
#> 63  8 87.51   2.19
#> 64  8 87.51   2.00
#> 65  8 87.51   1.82
#> 66  8 87.51   1.66
#> 67  8 87.52   1.51
#> 68  8 87.52   1.38
#> 69  8 87.52   1.25
#> 70  8 87.52   1.14

Similar to:

m2 %>% 
  extract_fit_engine()
#> 
#> Call:  glmnet::glmnet(x = maybe_matrix(x), y = y, family = "gaussian",      alpha = ~1) 
#> 
#>    Df  %Dev Lambda
#> 1   0  0.00 701.90
#> 2   1 12.94 639.50
#> 3   1 23.68 582.70
#> 4   1 32.60 530.90
#> 5   1 40.00 483.80
#> 6   1 46.15 440.80
#> 7   1 51.25 401.60
#> 8   2 55.69 366.00
#> 9   2 59.46 333.40
#> 10  2 62.59 303.80
#> 11  2 65.18 276.80
#> 12  2 67.34 252.20
#> 13  3 69.88 229.80
#> 14  3 72.71 209.40
#> 15  3 75.06 190.80
#> 16  3 77.01 173.90
#> 17  3 78.62 158.40
#> 18  3 79.97 144.30
#> 19  3 81.09 131.50
#> 20  3 82.01 119.80
#> 21  3 82.78 109.20
#> 22  3 83.42  99.49
#> 23  3 83.95  90.65
#> 24  3 84.39  82.60
#> 25  3 84.76  75.26
#> 26  3 85.06  68.57
#> 27  3 85.31  62.48
#> 28  3 85.52  56.93
#> 29  3 85.69  51.87
#> 30  4 85.85  47.27
#> 31  4 85.99  43.07
#> 32  4 86.11  39.24
#> 33  4 86.21  35.75
#> 34  4 86.29  32.58
#> 35  5 86.38  29.68
#> 36  5 86.50  27.05
#> 37  5 86.59  24.64
#> 38  5 86.67  22.46
#> 39  5 86.73  20.46
#> 40  5 86.79  18.64
#> 41  5 86.83  16.99
#> 42  6 86.89  15.48
#> 43  6 86.99  14.10
#> 44  6 87.08  12.85
#> 45  6 87.15  11.71
#> 46  6 87.21  10.67
#> 47  7 87.26   9.72
#> 48  7 87.30   8.86
#> 49  7 87.34   8.07
#> 50  7 87.37   7.35
#> 51  7 87.40   6.70
#> 52  8 87.42   6.11
#> 53  8 87.43   5.56
#> 54  8 87.45   5.07
#> 55  8 87.46   4.62
#> 56  8 87.47   4.21
#> 57  8 87.48   3.83
#> 58  8 87.49   3.49
#> 59  8 87.49   3.18
#> 60  8 87.50   2.90
#> 61  8 87.50   2.64
#> 62  8 87.50   2.41
#> 63  8 87.51   2.19
#> 64  8 87.51   2.00
#> 65  8 87.51   1.82
#> 66  8 87.51   1.66
#> 67  8 87.52   1.51
#> 68  8 87.52   1.38
#> 69  8 87.52   1.25
#> 70  8 87.52   1.14

More information, such as the difference between the last two functions, can be found here, the tidymodels page on model coefficients.

6 Reproducibility

#> ─ Session info ───────────────────────────────────────────────────────────────────────────────────────────────────────
#>  setting  value
#>  version  R version 4.2.0 (2022-04-22)
#>  os       macOS Big Sur/Monterey 10.16
#>  system   x86_64, darwin17.0
#>  ui       X11
#>  language (EN)
#>  collate  en_US.UTF-8
#>  ctype    en_US.UTF-8
#>  tz       Europe/Berlin
#>  date     2022-07-24
#>  pandoc   2.18 @ /usr/local/bin/ (via rmarkdown)
#> 
#> ─ Packages ───────────────────────────────────────────────────────────────────────────────────────────────────────────
#>  package     * version date (UTC) lib source
#>  assertthat    0.2.1   2019-03-21 [1] CRAN (R 4.2.0)
#>  backports     1.4.1   2021-12-13 [1] CRAN (R 4.2.0)
#>  blogdown      1.10    2022-05-10 [1] CRAN (R 4.2.0)
#>  bookdown      0.27    2022-06-14 [1] CRAN (R 4.2.0)
#>  brio          1.1.3   2021-11-30 [1] CRAN (R 4.2.0)
#>  broom         1.0.0   2022-07-01 [1] CRAN (R 4.2.0)
#>  bslib         0.3.1   2021-10-06 [1] CRAN (R 4.2.0)
#>  cachem        1.0.6   2021-08-19 [1] CRAN (R 4.2.0)
#>  callr         3.7.0   2021-04-20 [1] CRAN (R 4.2.0)
#>  cellranger    1.1.0   2016-07-27 [1] CRAN (R 4.2.0)
#>  cli           3.3.0   2022-04-25 [1] CRAN (R 4.2.0)
#>  codetools     0.2-18  2020-11-04 [2] CRAN (R 4.2.0)
#>  colorout    * 1.2-2   2022-06-13 [1] local
#>  colorspace    2.0-3   2022-02-21 [1] CRAN (R 4.2.0)
#>  crayon        1.5.1   2022-03-26 [1] CRAN (R 4.2.0)
#>  DBI           1.1.2   2021-12-20 [1] CRAN (R 4.2.0)
#>  dbplyr        2.2.0   2022-06-05 [1] CRAN (R 4.2.0)
#>  desc          1.4.1   2022-03-06 [1] CRAN (R 4.2.0)
#>  devtools      2.4.3   2021-11-30 [1] CRAN (R 4.2.0)
#>  digest        0.6.29  2021-12-01 [1] CRAN (R 4.2.0)
#>  dplyr       * 1.0.9   2022-04-28 [1] CRAN (R 4.2.0)
#>  ellipsis      0.3.2   2021-04-29 [1] CRAN (R 4.2.0)
#>  evaluate      0.15    2022-02-18 [1] CRAN (R 4.2.0)
#>  fansi         1.0.3   2022-03-24 [1] CRAN (R 4.2.0)
#>  fastmap       1.1.0   2021-01-25 [1] CRAN (R 4.2.0)
#>  forcats     * 0.5.1   2021-01-27 [1] CRAN (R 4.2.0)
#>  fs            1.5.2   2021-12-08 [1] CRAN (R 4.2.0)
#>  generics      0.1.3   2022-07-05 [1] CRAN (R 4.2.0)
#>  ggplot2     * 3.3.6   2022-05-03 [1] CRAN (R 4.2.0)
#>  glue          1.6.2   2022-02-24 [1] CRAN (R 4.2.0)
#>  gtable        0.3.0   2019-03-25 [1] CRAN (R 4.2.0)
#>  haven         2.5.0   2022-04-15 [1] CRAN (R 4.2.0)
#>  hms           1.1.1   2021-09-26 [1] CRAN (R 4.2.0)
#>  htmltools     0.5.2   2021-08-25 [1] CRAN (R 4.2.0)
#>  httr          1.4.3   2022-05-04 [1] CRAN (R 4.2.0)
#>  jquerylib     0.1.4   2021-04-26 [1] CRAN (R 4.2.0)
#>  jsonlite      1.8.0   2022-02-22 [1] CRAN (R 4.2.0)
#>  knitr         1.39    2022-04-26 [1] CRAN (R 4.2.0)
#>  lifecycle     1.0.1   2021-09-24 [1] CRAN (R 4.2.0)
#>  lubridate     1.8.0   2021-10-07 [1] CRAN (R 4.2.0)
#>  magrittr      2.0.3   2022-03-30 [1] CRAN (R 4.2.0)
#>  memoise       2.0.1   2021-11-26 [1] CRAN (R 4.2.0)
#>  modelr        0.1.8   2020-05-19 [1] CRAN (R 4.2.0)
#>  munsell       0.5.0   2018-06-12 [1] CRAN (R 4.2.0)
#>  pillar        1.7.0   2022-02-01 [1] CRAN (R 4.2.0)
#>  pkgbuild      1.3.1   2021-12-20 [1] CRAN (R 4.2.0)
#>  pkgconfig     2.0.3   2019-09-22 [1] CRAN (R 4.2.0)
#>  pkgload       1.2.4   2021-11-30 [1] CRAN (R 4.2.0)
#>  prettyunits   1.1.1   2020-01-24 [1] CRAN (R 4.2.0)
#>  processx      3.6.1   2022-06-17 [1] CRAN (R 4.2.0)
#>  ps            1.7.1   2022-06-18 [1] CRAN (R 4.2.0)
#>  purrr       * 0.3.4   2020-04-17 [1] CRAN (R 4.2.0)
#>  R6            2.5.1   2021-08-19 [1] CRAN (R 4.2.0)
#>  readr       * 2.1.2   2022-01-30 [1] CRAN (R 4.2.0)
#>  readxl        1.4.0   2022-03-28 [1] CRAN (R 4.2.0)
#>  remotes       2.4.2   2021-11-30 [1] CRAN (R 4.2.0)
#>  reprex        2.0.1   2021-08-05 [1] CRAN (R 4.2.0)
#>  rlang         1.0.3   2022-06-27 [1] CRAN (R 4.2.0)
#>  rmarkdown     2.14    2022-04-25 [1] CRAN (R 4.2.0)
#>  rprojroot     2.0.3   2022-04-02 [1] CRAN (R 4.2.0)
#>  rstudioapi    0.13    2020-11-12 [1] CRAN (R 4.2.0)
#>  rvest         1.0.2   2021-10-16 [1] CRAN (R 4.2.0)
#>  sass          0.4.1   2022-03-23 [1] CRAN (R 4.2.0)
#>  scales        1.2.0   2022-04-13 [1] CRAN (R 4.2.0)
#>  sessioninfo   1.2.2   2021-12-06 [1] CRAN (R 4.2.0)
#>  stringi       1.7.6   2021-11-29 [1] CRAN (R 4.2.0)
#>  stringr     * 1.4.0   2019-02-10 [1] CRAN (R 4.2.0)
#>  testthat      3.1.4   2022-04-26 [1] CRAN (R 4.2.0)
#>  tibble      * 3.1.7   2022-05-03 [1] CRAN (R 4.2.0)
#>  tidyr       * 1.2.0   2022-02-01 [1] CRAN (R 4.2.0)
#>  tidyselect    1.1.2   2022-02-21 [1] CRAN (R 4.2.0)
#>  tidyverse   * 1.3.1   2021-04-15 [1] CRAN (R 4.2.0)
#>  tzdb          0.3.0   2022-03-28 [1] CRAN (R 4.2.0)
#>  usethis       2.1.6   2022-05-25 [1] CRAN (R 4.2.0)
#>  utf8          1.2.2   2021-07-24 [1] CRAN (R 4.2.0)
#>  vctrs         0.4.1   2022-04-13 [1] CRAN (R 4.2.0)
#>  withr         2.5.0   2022-03-03 [1] CRAN (R 4.2.0)
#>  xfun          0.31    2022-05-10 [1] CRAN (R 4.2.0)
#>  xml2          1.3.3   2021-11-30 [1] CRAN (R 4.2.0)
#>  yaml          2.3.5   2022-02-21 [1] CRAN (R 4.2.0)
#> 
#>  [1] /Users/sebastiansaueruser/Rlibs
#>  [2] /Library/Frameworks/R.framework/Versions/4.2/Resources/library
#> 
#> ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────