1 Load packages
library(tidyverse) # data wrangling
2 Sample data
data("mtcars")
Create some NA:
mtcars$mpg[c(1,2,3)] <- NA
mtcars$hp[c(1,2,3)] <- NA
3 Count NA rowwise
What we would like to achieve is to comfortable count the missing values per row.
Define helper function:
sum_isna <- function(x) sum(is.na(x))
4 Way 1: rowwise sum with mutate and c_across
mtcars %>%
rowwise() %>%
mutate(Na_n = sum_isna(c_across(everything()))) %>%
ungroup()
#> # A tibble: 32 × 12
#> mpg cyl disp hp drat wt qsec vs am gear carb Na_n
#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <int>
#> 1 NA 6 160 NA 3.9 2.62 16.5 0 1 4 4 2
#> 2 NA 6 160 NA 3.9 2.88 17.0 0 1 4 4 2
#> 3 NA 4 108 NA 3.85 2.32 18.6 1 1 4 1 2
#> 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1 0
#> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2 0
#> 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1 0
#> 7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4 0
#> 8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 0
#> 9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2 0
#> 10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4 0
#> # … with 22 more rows
A more in-depth treatment can be found here.
5 Way 2: apply()
with margin 1
Margin 1 means rowwise:
mtcars %>%
mutate(Na_n = apply(mtcars, 1, sum_isna))
#> mpg cyl disp hp drat wt qsec vs am gear carb Na_n
#> Mazda RX4 NA 6 160.0 NA 3.90 2.620 16.46 0 1 4 4 2
#> Mazda RX4 Wag NA 6 160.0 NA 3.90 2.875 17.02 0 1 4 4 2
#> Datsun 710 NA 4 108.0 NA 3.85 2.320 18.61 1 1 4 1 2
#> Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1 0
#> Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2 0
#> Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1 0
#> Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4 0
#> Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2 0
#> Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2 0
#> Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4 0
#> Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4 0
#> Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3 0
#> Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3 0
#> Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3 0
#> Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4 0
#> Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4 0
#> Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4 0
#> Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1 0
#> Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2 0
#> Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1 0
#> Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1 0
#> Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2 0
#> AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2 0
#> Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4 0
#> Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2 0
#> Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1 0
#> Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2 0
#> Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2 0
#> Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4 0
#> Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6 0
#> Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8 0
#> Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2 0
6 Way 3: rowSums
mtcars %>%
mutate(Na_n = rowSums(is.na(mtcars)))
#> mpg cyl disp hp drat wt qsec vs am gear carb Na_n
#> Mazda RX4 NA 6 160.0 NA 3.90 2.620 16.46 0 1 4 4 2
#> Mazda RX4 Wag NA 6 160.0 NA 3.90 2.875 17.02 0 1 4 4 2
#> Datsun 710 NA 4 108.0 NA 3.85 2.320 18.61 1 1 4 1 2
#> Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1 0
#> Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2 0
#> Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1 0
#> Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4 0
#> Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2 0
#> Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2 0
#> Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4 0
#> Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4 0
#> Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3 0
#> Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3 0
#> Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3 0
#> Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4 0
#> Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4 0
#> Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4 0
#> Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1 0
#> Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2 0
#> Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1 0
#> Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1 0
#> Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2 0
#> AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2 0
#> Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4 0
#> Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2 0
#> Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1 0
#> Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2 0
#> Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2 0
#> Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4 0
#> Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6 0
#> Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8 0
#> Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2 0
7 Way 4: cur_data()
cur_data()
refers to the data in the current group, which is, in the case
of rowwise()
the current row.
mtcars %>%
rowwise() %>%
mutate(NA_n = sum_isna(cur_data()))
#> # A tibble: 32 × 12
#> # Rowwise:
#> mpg cyl disp hp drat wt qsec vs am gear carb NA_n
#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <int>
#> 1 NA 6 160 NA 3.9 2.62 16.5 0 1 4 4 2
#> 2 NA 6 160 NA 3.9 2.88 17.0 0 1 4 4 2
#> 3 NA 4 108 NA 3.85 2.32 18.6 1 1 4 1 2
#> 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1 0
#> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2 0
#> 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1 0
#> 7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4 0
#> 8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 0
#> 9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2 0
#> 10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4 0
#> # … with 22 more rows
8 Why not map()
?
When using map()
in relation with mutate()
, we map a column of the data frame to some function.
However, when counting missing values per row, we would like to map a row to a function,
which is not possible using map()
.
9 Reproducibility
#> ─ Session info ───────────────────────────────────────────────────────────────────────────────────────────────────────
#> setting value
#> version R version 4.1.3 (2022-03-10)
#> os macOS Big Sur/Monterey 10.16
#> system x86_64, darwin17.0
#> ui X11
#> language (EN)
#> collate en_US.UTF-8
#> ctype en_US.UTF-8
#> tz Europe/Berlin
#> date 2022-05-09
#>
#> ─ Packages ───────────────────────────────────────────────────────────────────────────────────────────────────────────
#> package * version date lib source
#> assertthat 0.2.1 2019-03-21 [1] CRAN (R 4.1.0)
#> backports 1.4.1 2021-12-13 [1] CRAN (R 4.1.0)
#> blogdown 1.8 2022-02-16 [2] CRAN (R 4.1.2)
#> bookdown 0.26.2 2022-05-02 [1] Github (rstudio/bookdown@6adacc3)
#> brio 1.1.3 2021-11-30 [1] CRAN (R 4.1.0)
#> broom 0.8.0 2022-04-13 [1] CRAN (R 4.1.2)
#> bslib 0.3.1 2021-10-06 [1] CRAN (R 4.1.0)
#> cachem 1.0.6 2021-08-19 [1] CRAN (R 4.1.0)
#> callr 3.7.0 2021-04-20 [1] CRAN (R 4.1.0)
#> cellranger 1.1.0 2016-07-27 [1] CRAN (R 4.1.0)
#> cli 3.3.0 2022-04-25 [1] CRAN (R 4.1.2)
#> codetools 0.2-18 2020-11-04 [2] CRAN (R 4.1.3)
#> colorout * 1.2-2 2022-01-04 [1] Github (jalvesaq/colorout@79931fd)
#> colorspace 2.0-3 2022-02-21 [1] CRAN (R 4.1.2)
#> crayon 1.5.1 2022-03-26 [1] CRAN (R 4.1.2)
#> DBI 1.1.2 2021-12-20 [1] CRAN (R 4.1.0)
#> dbplyr 2.1.1 2021-04-06 [1] CRAN (R 4.1.0)
#> desc 1.4.0 2021-09-28 [1] CRAN (R 4.1.0)
#> devtools 2.4.3 2021-11-30 [1] CRAN (R 4.1.0)
#> digest 0.6.29 2021-12-01 [1] CRAN (R 4.1.0)
#> dplyr * 1.0.9 2022-04-28 [1] CRAN (R 4.1.2)
#> ellipsis 0.3.2 2021-04-29 [1] CRAN (R 4.1.0)
#> evaluate 0.15 2022-02-18 [1] CRAN (R 4.1.2)
#> fansi 1.0.3 2022-03-24 [1] CRAN (R 4.1.2)
#> fastmap 1.1.0 2021-01-25 [2] CRAN (R 4.1.0)
#> forcats * 0.5.1 2021-01-27 [1] CRAN (R 4.1.0)
#> fs 1.5.2 2021-12-08 [1] CRAN (R 4.1.0)
#> generics 0.1.2 2022-01-31 [1] CRAN (R 4.1.2)
#> ggplot2 * 3.3.5 2021-06-25 [2] CRAN (R 4.1.0)
#> glue 1.6.2 2022-02-24 [1] CRAN (R 4.1.2)
#> gtable 0.3.0 2019-03-25 [1] CRAN (R 4.1.0)
#> haven 2.4.3 2021-08-04 [1] CRAN (R 4.1.0)
#> hms 1.1.1 2021-09-26 [1] CRAN (R 4.1.0)
#> htmltools 0.5.2 2021-08-25 [1] CRAN (R 4.1.0)
#> httr 1.4.2 2020-07-20 [1] CRAN (R 4.1.0)
#> jquerylib 0.1.4 2021-04-26 [1] CRAN (R 4.1.0)
#> jsonlite 1.8.0 2022-02-22 [1] CRAN (R 4.1.2)
#> knitr 1.39 2022-04-26 [1] CRAN (R 4.1.2)
#> lifecycle 1.0.1 2021-09-24 [1] CRAN (R 4.1.0)
#> lubridate 1.8.0 2021-10-07 [1] CRAN (R 4.1.0)
#> magrittr 2.0.3 2022-03-30 [1] CRAN (R 4.1.2)
#> memoise 2.0.0 2021-01-26 [2] CRAN (R 4.1.0)
#> modelr 0.1.8 2020-05-19 [1] CRAN (R 4.1.0)
#> munsell 0.5.0 2018-06-12 [1] CRAN (R 4.1.0)
#> pillar 1.7.0 2022-02-01 [1] CRAN (R 4.1.2)
#> pkgbuild 1.2.0 2020-12-15 [2] CRAN (R 4.1.0)
#> pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.1.0)
#> pkgload 1.2.4 2021-11-30 [1] CRAN (R 4.1.0)
#> prettyunits 1.1.1 2020-01-24 [1] CRAN (R 4.1.0)
#> processx 3.5.2 2021-04-30 [1] CRAN (R 4.1.0)
#> ps 1.6.0 2021-02-28 [1] CRAN (R 4.1.0)
#> purrr * 0.3.4 2020-04-17 [1] CRAN (R 4.1.0)
#> R6 2.5.1 2021-08-19 [1] CRAN (R 4.1.0)
#> Rcpp 1.0.8.3 2022-03-17 [1] CRAN (R 4.1.2)
#> readr * 2.1.2 2022-01-30 [1] CRAN (R 4.1.2)
#> readxl 1.3.1 2019-03-13 [1] CRAN (R 4.1.0)
#> remotes 2.4.0 2021-06-02 [2] CRAN (R 4.1.0)
#> reprex 2.0.1 2021-08-05 [1] CRAN (R 4.1.0)
#> rlang 1.0.2 2022-03-04 [1] CRAN (R 4.1.2)
#> rmarkdown 2.14 2022-04-25 [1] CRAN (R 4.1.2)
#> rprojroot 2.0.2 2020-11-15 [2] CRAN (R 4.1.0)
#> rstudioapi 0.13 2020-11-12 [1] CRAN (R 4.1.0)
#> rvest 1.0.2 2021-10-16 [1] CRAN (R 4.1.0)
#> sass 0.4.1 2022-03-23 [1] CRAN (R 4.1.2)
#> scales 1.2.0 2022-04-13 [1] CRAN (R 4.1.3)
#> sessioninfo 1.1.1 2018-11-05 [2] CRAN (R 4.1.0)
#> stringi 1.7.6 2021-11-29 [1] CRAN (R 4.1.0)
#> stringr * 1.4.0 2019-02-10 [1] CRAN (R 4.1.0)
#> testthat 3.1.2 2022-01-20 [1] CRAN (R 4.1.2)
#> tibble * 3.1.6 2021-11-07 [1] CRAN (R 4.1.0)
#> tidyr * 1.2.0 2022-02-01 [1] CRAN (R 4.1.2)
#> tidyselect 1.1.2 2022-02-21 [1] CRAN (R 4.1.2)
#> tidyverse * 1.3.1 2021-04-15 [1] CRAN (R 4.1.0)
#> tzdb 0.1.2 2021-07-20 [2] CRAN (R 4.1.0)
#> usethis 2.0.1 2021-02-10 [2] CRAN (R 4.1.0)
#> utf8 1.2.2 2021-07-24 [1] CRAN (R 4.1.0)
#> vctrs 0.4.1 2022-04-13 [1] CRAN (R 4.1.2)
#> withr 2.5.0 2022-03-03 [1] CRAN (R 4.1.2)
#> xfun 0.30 2022-03-02 [1] CRAN (R 4.1.2)
#> xml2 1.3.3 2021-11-30 [1] CRAN (R 4.1.0)
#> yaml 2.3.5 2022-02-21 [1] CRAN (R 4.1.2)
#>
#> [1] /Users/sebastiansaueruser/Library/R/x86_64/4.1/library
#> [2] /Library/Frameworks/R.framework/Versions/4.1/Resources/library