Some ways to plot the distribution of each variable of a data frame

1 Motivation

Often times, in explorative data analysis, one would like to plot the distribution of the relevant variables. Whereas ggplot provides handy tools to plot one variable after each other, it would come handy to plot ’em all in one go.

Of course, there are many ways to achieve this comfort. This posts presents some of them in brevity.

2 Load packages

library(tidyverse)  # data wrangling

3 Load data

Demo data.

d_path <- "https://raw.githubusercontent.com/sebastiansauer/pradadata/master/data-raw/extra.csv"
d <- read_csv(d_path)
head(d)
#> # A tibble: 6 × 34
#>   timestamp    code    i01  i02r   i03   i04   i05  i06r   i07   i08   i09   i10
#>   <chr>        <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 11.03.2015 … HSC       3     3     3     3     4     4     3     2     3     1
#> 2 11.03.2015 … ERB       2     2     1     2     3     2     2     3     3     1
#> 3 11.03.2015 … ADP       3     4     1     4     4     1     3     2     3     1
#> 4 11.03.2015 … KHB       3     3     2     4     3     3     3     3     3     2
#> 5 11.03.2015 … PTG       4     3     1     4     4     3     4     2     3     4
#> 6 11.03.2015 … ABL       3     2     1     4     2     3     4     3     3     3
#> # … with 22 more variables: n_facebook_friends <dbl>, n_hangover <dbl>,
#> #   age <dbl>, sex <chr>, extra_single_item <dbl>, time_conversation <dbl>,
#> #   presentation <chr>, n_party <dbl>, clients <chr>, extra_vignette <chr>,
#> #   i21 <lgl>, extra_vignette2 <dbl>, major <chr>, smoker <chr>,
#> #   sleep_week <dbl>, sleep_wend <dbl>, clients_freq <dbl>, extra_mean <dbl>,
#> #   extra_md <dbl>, extra_aad <dbl>, extra_mode <dbl>, extra_iqr <dbl>

Select the items (columns) we would like to plot the distribution for:

d_items <- 
  d %>% 
  select(matches("i\\d\\d")) %>% 
  select(-i21)
d_item_names <-
  names(d_items)

d_item_names
#>  [1] "i01"  "i02r" "i03"  "i04"  "i05"  "i06r" "i07"  "i08"  "i09"  "i10"

4 Let’s plot

4.1 Way 1

map(d_item_names,
    ~ ggplot(data = d_items) + geom_bar(aes(x = .data[[.x]])) + labs(y = .x))
#> [[1]]
#> 
#> [[2]]
#> 
#> [[3]]
#> 
#> [[4]]
#> 
#> [[5]]
#> 
#> [[6]]
#> 
#> [[7]]
#> 
#> [[8]]
#> 
#> [[9]]
#> 
#> [[10]]

4.2 Way 2

d_items %>% 
  pivot_longer(everything(), names_to = "item") %>%
  group_by(item) %>% 
  group_split() %>% 
  map(~ {ggplot(., aes(x = value)) + geom_bar()})
#> [[1]]
#> 
#> [[2]]
#> 
#> [[3]]
#> 
#> [[4]]
#> 
#> [[5]]
#> 
#> [[6]]
#> 
#> [[7]]
#> 
#> [[8]]
#> 
#> [[9]]
#> 
#> [[10]]

4.3 Way 3

d_items %>% 
  pivot_longer(everything(), names_to = "item") %>%
  mutate(item = factor(item, levels = d_item_names)) %>% 
  ggplot(aes(x = value)) +
  geom_bar() +
  facet_wrap(~ item)

4.4 Way 4

library(DataExplorer)

d_items %>% 
  plot_histogram()

5 Reproducibility

#> ─ Session info ───────────────────────────────────────────────────────────────────────────────────────────────────────
#>  setting  value
#>  version  R version 4.2.1 (2022-06-23)
#>  os       macOS Big Sur ... 10.16
#>  system   x86_64, darwin17.0
#>  ui       X11
#>  language (EN)
#>  collate  en_US.UTF-8
#>  ctype    en_US.UTF-8
#>  tz       Europe/Berlin
#>  date     2022-09-26
#>  pandoc   2.19.2 @ /usr/local/bin/ (via rmarkdown)
#> 
#> ─ Packages ───────────────────────────────────────────────────────────────────────────────────────────────────────────
#>  package       * version    date (UTC) lib source
#>  assertthat      0.2.1      2019-03-21 [1] CRAN (R 4.2.0)
#>  backports       1.4.1      2021-12-13 [1] CRAN (R 4.2.0)
#>  bit             4.0.4      2020-08-04 [1] CRAN (R 4.2.0)
#>  bit64           4.0.5      2020-08-30 [1] CRAN (R 4.2.0)
#>  blogdown        1.12       2022-09-15 [1] CRAN (R 4.2.1)
#>  bookdown        0.29       2022-09-12 [1] CRAN (R 4.2.0)
#>  broom           1.0.1      2022-08-29 [1] CRAN (R 4.2.0)
#>  bslib           0.4.0      2022-07-16 [1] CRAN (R 4.2.0)
#>  cachem          1.0.6      2021-08-19 [1] CRAN (R 4.2.0)
#>  callr           3.7.2      2022-08-22 [1] CRAN (R 4.2.0)
#>  cellranger      1.1.0      2016-07-27 [1] CRAN (R 4.2.0)
#>  cli             3.4.0      2022-09-08 [1] CRAN (R 4.2.0)
#>  codetools       0.2-18     2020-11-04 [2] CRAN (R 4.2.1)
#>  colorout      * 1.2-2      2022-06-13 [1] local
#>  colorspace      2.0-3      2022-02-21 [1] CRAN (R 4.2.0)
#>  crayon          1.5.1      2022-03-26 [1] CRAN (R 4.2.0)
#>  curl            4.3.2      2021-06-23 [1] CRAN (R 4.2.0)
#>  data.table      1.14.2     2021-09-27 [1] CRAN (R 4.2.0)
#>  DataExplorer  * 0.8.2      2020-12-15 [1] CRAN (R 4.2.0)
#>  DBI             1.1.3      2022-06-18 [1] CRAN (R 4.2.0)
#>  dbplyr          2.2.1      2022-06-27 [1] CRAN (R 4.2.0)
#>  devtools        2.4.4      2022-07-20 [1] CRAN (R 4.2.0)
#>  digest          0.6.29     2021-12-01 [1] CRAN (R 4.2.0)
#>  dplyr         * 1.0.10     2022-09-01 [1] CRAN (R 4.2.0)
#>  ellipsis        0.3.2      2021-04-29 [1] CRAN (R 4.2.0)
#>  evaluate        0.16       2022-08-09 [1] CRAN (R 4.2.0)
#>  fansi           1.0.3      2022-03-24 [1] CRAN (R 4.2.0)
#>  farver          2.1.1      2022-07-06 [1] CRAN (R 4.2.0)
#>  fastmap         1.1.0      2021-01-25 [1] CRAN (R 4.2.0)
#>  forcats       * 0.5.2      2022-08-19 [1] CRAN (R 4.2.0)
#>  fs              1.5.2      2021-12-08 [1] CRAN (R 4.2.0)
#>  gargle          1.2.1      2022-09-08 [1] CRAN (R 4.2.0)
#>  generics        0.1.3      2022-07-05 [1] CRAN (R 4.2.0)
#>  ggplot2       * 3.3.6.9000 2022-09-05 [1] Github (tidyverse/ggplot2@a58b48c)
#>  glue            1.6.2      2022-02-24 [1] CRAN (R 4.2.0)
#>  googledrive     2.0.0      2021-07-08 [1] CRAN (R 4.2.0)
#>  googlesheets4   1.0.1      2022-08-13 [1] CRAN (R 4.2.0)
#>  gridExtra       2.3        2017-09-09 [1] CRAN (R 4.2.0)
#>  gtable          0.3.1      2022-09-01 [1] CRAN (R 4.2.0)
#>  haven           2.5.1      2022-08-22 [1] CRAN (R 4.2.0)
#>  highr           0.9        2021-04-16 [1] CRAN (R 4.2.0)
#>  hms             1.1.2      2022-08-19 [1] CRAN (R 4.2.0)
#>  htmltools       0.5.3      2022-07-18 [1] CRAN (R 4.2.0)
#>  htmlwidgets     1.5.4      2021-09-08 [1] CRAN (R 4.2.0)
#>  httpuv          1.6.6      2022-09-08 [1] CRAN (R 4.2.0)
#>  httr            1.4.4      2022-08-17 [1] CRAN (R 4.2.0)
#>  igraph          1.3.4      2022-07-19 [1] CRAN (R 4.2.0)
#>  jquerylib       0.1.4      2021-04-26 [1] CRAN (R 4.2.0)
#>  jsonlite        1.8.0      2022-02-22 [1] CRAN (R 4.2.0)
#>  knitr           1.40       2022-08-24 [1] CRAN (R 4.2.0)
#>  labeling        0.4.2      2020-10-20 [1] CRAN (R 4.2.0)
#>  later           1.3.0      2021-08-18 [1] CRAN (R 4.2.0)
#>  lifecycle       1.0.2      2022-09-05 [1] Github (r-lib/lifecycle@f92faf7)
#>  lubridate       1.8.0      2021-10-07 [1] CRAN (R 4.2.0)
#>  magrittr        2.0.3      2022-03-30 [1] CRAN (R 4.2.0)
#>  memoise         2.0.1      2021-11-26 [1] CRAN (R 4.2.0)
#>  mime            0.12       2021-09-28 [1] CRAN (R 4.2.0)
#>  miniUI          0.1.1.1    2018-05-18 [1] CRAN (R 4.2.0)
#>  modelr          0.1.9      2022-08-19 [1] CRAN (R 4.2.0)
#>  munsell         0.5.0      2018-06-12 [1] CRAN (R 4.2.0)
#>  networkD3       0.4        2017-03-18 [1] CRAN (R 4.2.0)
#>  pillar          1.8.1      2022-08-19 [1] CRAN (R 4.2.0)
#>  pkgbuild        1.3.1      2021-12-20 [1] CRAN (R 4.2.0)
#>  pkgconfig       2.0.3      2019-09-22 [1] CRAN (R 4.2.0)
#>  pkgload         1.3.0      2022-06-27 [1] CRAN (R 4.2.0)
#>  prettyunits     1.1.1      2020-01-24 [1] CRAN (R 4.2.0)
#>  processx        3.7.0      2022-07-07 [1] CRAN (R 4.2.0)
#>  profvis         0.3.7      2020-11-02 [1] CRAN (R 4.2.0)
#>  promises        1.2.0.1    2021-02-11 [1] CRAN (R 4.2.0)
#>  ps              1.7.1      2022-06-18 [1] CRAN (R 4.2.0)
#>  purrr         * 0.3.4      2020-04-17 [1] CRAN (R 4.2.0)
#>  R6              2.5.1      2021-08-19 [1] CRAN (R 4.2.0)
#>  Rcpp            1.0.9      2022-07-08 [1] CRAN (R 4.2.0)
#>  readr         * 2.1.2      2022-01-30 [1] CRAN (R 4.2.0)
#>  readxl          1.4.1      2022-08-17 [1] CRAN (R 4.2.0)
#>  remotes         2.4.2      2021-11-30 [1] CRAN (R 4.2.0)
#>  reprex          2.0.2      2022-08-17 [1] CRAN (R 4.2.0)
#>  rlang           1.0.5      2022-08-31 [1] CRAN (R 4.2.0)
#>  rmarkdown       2.16       2022-08-24 [1] CRAN (R 4.2.0)
#>  rstudioapi      0.14       2022-08-22 [1] CRAN (R 4.2.0)
#>  rvest           1.0.3      2022-08-19 [1] CRAN (R 4.2.0)
#>  sass            0.4.2      2022-07-16 [1] CRAN (R 4.2.0)
#>  scales          1.2.1      2022-08-20 [1] CRAN (R 4.2.0)
#>  sessioninfo     1.2.2      2021-12-06 [1] CRAN (R 4.2.0)
#>  shiny           1.7.2      2022-07-19 [1] CRAN (R 4.2.0)
#>  stringi         1.7.8      2022-07-11 [1] CRAN (R 4.2.0)
#>  stringr       * 1.4.1      2022-08-20 [1] CRAN (R 4.2.0)
#>  tibble        * 3.1.8      2022-07-22 [1] CRAN (R 4.2.0)
#>  tidyr         * 1.2.1      2022-09-08 [1] CRAN (R 4.2.0)
#>  tidyselect      1.1.2      2022-02-21 [1] CRAN (R 4.2.0)
#>  tidyverse     * 1.3.2      2022-07-18 [1] CRAN (R 4.2.0)
#>  tzdb            0.3.0      2022-03-28 [1] CRAN (R 4.2.0)
#>  urlchecker      1.0.1      2021-11-30 [1] CRAN (R 4.2.0)
#>  usethis         2.1.6      2022-05-25 [1] CRAN (R 4.2.0)
#>  utf8            1.2.2      2021-07-24 [1] CRAN (R 4.2.0)
#>  vctrs           0.4.1      2022-04-13 [1] CRAN (R 4.2.0)
#>  vroom           1.5.7      2021-11-30 [1] CRAN (R 4.2.0)
#>  withr           2.5.0      2022-03-03 [1] CRAN (R 4.2.0)
#>  xfun            0.33       2022-09-12 [1] CRAN (R 4.2.0)
#>  xml2            1.3.3      2021-11-30 [1] CRAN (R 4.2.0)
#>  xtable          1.8-4      2019-04-21 [1] CRAN (R 4.2.0)
#>  yaml            2.3.5      2022-02-21 [1] CRAN (R 4.2.0)
#> 
#>  [1] /Users/sebastiansaueruser/Rlibs
#>  [2] /Library/Frameworks/R.framework/Versions/4.2/Resources/library
#> 
#> ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────