40 Reshaping (tidyr)
40.1 Long to wide with pivot_wider()
Using the same example seen in Chapter 26:
tb_long <- tibble(
Account_ID = c(8001, 8002, 8003, 8004, 8001, 8002, 8003, 8004,
8001, 8002, 8003, 8004, 8001, 8002, 8003, 8004),
Age = c(67.8017038366664, 42.9198507293701, 46.2301756642422,
39.665983196671, 67.8017038366664, 42.9198507293701,
46.2301756642422, 39.665983196671, 67.8017038366664,
42.9198507293701, 46.2301756642422, 39.665983196671,
67.8017038366664, 42.9198507293701, 46.2301756642422,
39.665983196671),
Admission = c("ED", "Planned", "Planned", "ED", "ED", "Planned",
"Planned", "ED", "ED", "Planned", "Planned", "ED", "ED", "Planned",
"Planned", "ED"),
Lab_key = c("RBC", "RBC", "RBC", "RBC", "WBC", "WBC", "WBC", "WBC",
"Hematocrit", "Hematocrit", "Hematocrit", "Hematocrit",
"Hemoglobin", "Hemoglobin", "Hemoglobin", "Hemoglobin"),
Lab_value = c(4.63449321082268, 3.34968550627897, 4.27037213597765,
4.93897736897793, 8374.22887757195, 7612.37380499927,
8759.27855519425, 6972.28096216548, 36.272693147236,
40.5716317809522, 39.9888624177955, 39.8786884058422,
12.6188444991545, 12.1739747363806, 15.1293426442183,
14.8885696185238)
)
tb_long# A tibble: 16 × 5
Account_ID Age Admission Lab_key Lab_value
<dbl> <dbl> <chr> <chr> <dbl>
1 8001 67.8 ED RBC 4.63
2 8002 42.9 Planned RBC 3.35
3 8003 46.2 Planned RBC 4.27
4 8004 39.7 ED RBC 4.94
5 8001 67.8 ED WBC 8374.
6 8002 42.9 Planned WBC 7612.
7 8003 46.2 Planned WBC 8759.
8 8004 39.7 ED WBC 6972.
9 8001 67.8 ED Hematocrit 36.3
10 8002 42.9 Planned Hematocrit 40.6
11 8003 46.2 Planned Hematocrit 40.0
12 8004 39.7 ED Hematocrit 39.9
13 8001 67.8 ED Hemoglobin 12.6
14 8002 42.9 Planned Hemoglobin 12.2
15 8003 46.2 Planned Hemoglobin 15.1
16 8004 39.7 ED Hemoglobin 14.9
tb_wide <- pivot_wider(
tb_long,
id_cols = c("Account_ID", "Age", "Admission"),
names_from = "Lab_key",
values_from = "Lab_value")
tb_wide# A tibble: 4 × 7
Account_ID Age Admission RBC WBC Hematocrit Hemoglobin
<dbl> <dbl> <chr> <dbl> <dbl> <dbl> <dbl>
1 8001 67.8 ED 4.63 8374. 36.3 12.6
2 8002 42.9 Planned 3.35 7612. 40.6 12.2
3 8003 46.2 Planned 4.27 8759. 40.0 15.1
4 8004 39.7 ED 4.94 6972. 39.9 14.9
40.2 Wide to long with pivot_longer()
40.2.1 Example 1: Timepoints
tb_wide <- tibble(
ID = 1:4,
Timepoint_A = 11:14,
Timepoint_B = 21:24,
Timepoint_C = 51:54)
tb_wide# A tibble: 4 × 4
ID Timepoint_A Timepoint_B Timepoint_C
<int> <int> <int> <int>
1 1 11 21 51
2 2 12 22 52
3 3 13 23 53
4 4 14 24 54
tb_long <- pivot_longer(tb_wide,
cols = starts_with("Timepoint_"),
names_to = "Timepoint",
values_to = "Score")
tb_long# A tibble: 12 × 3
ID Timepoint Score
<int> <chr> <int>
1 1 Timepoint_A 11
2 1 Timepoint_B 21
3 1 Timepoint_C 51
4 2 Timepoint_A 12
5 2 Timepoint_B 22
6 2 Timepoint_C 52
7 3 Timepoint_A 13
8 3 Timepoint_B 23
9 3 Timepoint_C 53
10 4 Timepoint_A 14
11 4 Timepoint_B 24
12 4 Timepoint_C 54
40.2.2 Example 2: Key-value pairs
set.seed(2022)
tb_wide <- tibble(
Account_ID = c(8001, 8002, 8003, 8004),
Age = rnorm(4, mean = 57, sd = 12),
RBC = rnorm(4, mean = 4.8, sd = 0.5),
WBC = rnorm(4, mean = 7250, sd = 1500),
Hematocrit = rnorm(4, mean = 40.2, sd = 4),
Hemoglobin = rnorm(4, mean = 13.6, sd = 1.5),
Admission = sample(c("ED", "Planned"), size = 4, replace = TRUE)
)
tb_wide# A tibble: 4 × 7
Account_ID Age RBC WBC Hematocrit Hemoglobin Admission
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
1 8001 67.8 4.63 8374. 36.3 12.6 ED
2 8002 42.9 3.35 7612. 40.6 12.2 Planned
3 8003 46.2 4.27 8759. 40.0 15.1 Planned
4 8004 39.7 4.94 6972. 39.9 14.9 Planned
tb_long <- pivot_longer(tb_wide,
cols = 3:6,
names_to = "Lab_key",
values_to = "Lab_value")
tb_long# A tibble: 16 × 5
Account_ID Age Admission Lab_key Lab_value
<dbl> <dbl> <chr> <chr> <dbl>
1 8001 67.8 ED RBC 4.63
2 8001 67.8 ED WBC 8374.
3 8001 67.8 ED Hematocrit 36.3
4 8001 67.8 ED Hemoglobin 12.6
5 8002 42.9 Planned RBC 3.35
6 8002 42.9 Planned WBC 7612.
7 8002 42.9 Planned Hematocrit 40.6
8 8002 42.9 Planned Hemoglobin 12.2
9 8003 46.2 Planned RBC 4.27
10 8003 46.2 Planned WBC 8759.
11 8003 46.2 Planned Hematocrit 40.0
12 8003 46.2 Planned Hemoglobin 15.1
13 8004 39.7 Planned RBC 4.94
14 8004 39.7 Planned WBC 6972.
15 8004 39.7 Planned Hematocrit 39.9
16 8004 39.7 Planned Hemoglobin 14.9
or using column names:
tb_long <- pivot_longer(tb_wide,
cols = c("RBC", "WBC", "Hematocrit", "Hemoglobin"),
names_to = "Lab_key",
values_to = "Lab_value")
tb_long# A tibble: 16 × 5
Account_ID Age Admission Lab_key Lab_value
<dbl> <dbl> <chr> <chr> <dbl>
1 8001 67.8 ED RBC 4.63
2 8001 67.8 ED WBC 8374.
3 8001 67.8 ED Hematocrit 36.3
4 8001 67.8 ED Hemoglobin 12.6
5 8002 42.9 Planned RBC 3.35
6 8002 42.9 Planned WBC 7612.
7 8002 42.9 Planned Hematocrit 40.6
8 8002 42.9 Planned Hemoglobin 12.2
9 8003 46.2 Planned RBC 4.27
10 8003 46.2 Planned WBC 8759.
11 8003 46.2 Planned Hematocrit 40.0
12 8003 46.2 Planned Hemoglobin 15.1
13 8004 39.7 Planned RBC 4.94
14 8004 39.7 Planned WBC 6972.
15 8004 39.7 Planned Hematocrit 39.9
16 8004 39.7 Planned Hemoglobin 14.9
Note that the “Age” and “Admission” columns were also reshaped correctly.