Gallery showing various tables possible with the {gtsummary} package. If you have created an interesting table using {gtsummary}, please submit it to the gallery via a pull request to the GitHub repository.
library(gtsummary); library(gt); library(survival) library(dplyr); library(stringr); library(purrr); library(forcats)
Add a spanning header over the group columns for increased clarity, and modifying column headers.
trial %>% select(trt, age, grade) %>% tbl_summary( by = trt, missing = "no", statistic = all_continuous() ~ "{median} ({p25}, {p75})" ) %>% modify_header(stat_by = md("**{level}**<br>N = {n} ({style_percent(p)}%)")) %>% add_n() %>% bold_labels() %>% modify_spanning_header(starts_with("stat_") ~ "**Chemotherapy Treatment**")
| Characteristic | N | Chemotherapy Treatment | |
|---|---|---|---|
|
Drug A N = 98 (49%)1 |
Drug B N = 102 (51%)1 |
||
| Age | 189 | 46 (37, 59) | 48 (39, 56) |
| Grade | 200 | ||
| I | 35 (36%) | 33 (32%) | |
| II | 32 (33%) | 36 (35%) | |
| III | 31 (32%) | 33 (32%) | |
|
1
Statistics presented: Median (IQR); n (%)
|
|||
Show continuous summary statistics on multiple lines.
trial %>% select(trt, age, marker) %>% tbl_summary( by = trt, type = all_continuous() ~ "continuous2", statistic = all_continuous() ~ c("{N_nonmiss}", "{mean} ({sd})", "{median} ({p25}, {p75})", "{min}, {max}"), missing = "no" ) %>% italicize_levels()
| Characteristic | Drug A, N = 98 | Drug B, N = 102 |
|---|---|---|
| Age | ||
| N | 91 | 98 |
| Mean (SD) | 47 (15) | 47 (14) |
| Median (IQR) | 46 (37, 59) | 48 (39, 56) |
| Range | 6, 78 | 9, 83 |
| Marker Level (ng/mL) | ||
| N | 92 | 98 |
| Mean (SD) | 1.02 (0.89) | 0.82 (0.83) |
| Median (IQR) | 0.84 (0.24, 1.57) | 0.52 (0.19, 1.20) |
| Range | 0.00, 3.87 | 0.00, 3.64 |
Modify the function that formats the p-values, change variable labels, updating tumor response header, and add a correction for multiple testing.
trial %>% select(response, age, grade) %>% mutate(response = factor(response, labels = c("No Tumor Response", "Tumor Responded"))) %>% tbl_summary( by = response, missing = "no", label = list(age ~ "Patient Age", grade ~ "Tumor Grade") ) %>% add_p(pvalue_fun = ~style_pvalue(.x, digits = 2)) %>% add_q()
| Characteristic | No Tumor Response, N = 1321 | Tumor Responded, N = 611 | p-value2 | q-value3 |
|---|---|---|---|---|
| Patient Age | 46 (36, 55) | 49 (43, 59) | 0.091 | 0.18 |
| Tumor Grade | 0.93 | 0.93 | ||
| I | 46 (35%) | 21 (34%) | ||
| II | 44 (33%) | 19 (31%) | ||
| III | 42 (32%) | 21 (34%) | ||
|
1
Statistics presented: Median (IQR); n (%)
2
Statistical tests performed: Wilcoxon rank-sum test; chi-square test of independence
3
False discovery rate correction for multiple testing
|
||||
Include missing tumor response as column using fct_explicit_na().
trial %>% select(response, age, grade) %>% mutate( response = factor(response, labels = c("No Tumor Response", "Tumor Responded")) %>% fct_explicit_na(na_level = "Missing Response Status") ) %>% tbl_summary( by = response, label = list(age ~ "Patient Age", grade ~ "Tumor Grade") )
| Characteristic | No Tumor Response, N = 1321 | Tumor Responded, N = 611 | Missing Response Status, N = 71 |
|---|---|---|---|
| Patient Age | 46 (36, 55) | 49 (43, 59) | 52 (44, 57) |
| Unknown | 7 | 3 | 1 |
| Tumor Grade | |||
| I | 46 (35%) | 21 (34%) | 1 (14%) |
| II | 44 (33%) | 19 (31%) | 5 (71%) |
| III | 42 (32%) | 21 (34%) | 1 (14%) |
|
1
Statistics presented: Median (IQR); n (%)
|
|||
Include p-values comparing all groups to a single reference group.
# table summarizing data with no p-values t0 <- trial %>% select(grade, age, response) %>% tbl_summary(by = grade, missing = "no") %>% modify_header(stat_by = md("**{level}**")) # table comparing grade I and II t1 <- trial %>% select(grade, age, response) %>% filter(grade %in% c("I", "II")) %>% tbl_summary(by = grade, missing = "no") %>% add_p() %>% modify_header(p.value ~ md("**I vs. II**")) %>% # hide summary stat columns modify_table_header(starts_with("stat_"), hide = TRUE) # table comparing grade I and II t2 <- trial %>% select(grade, age, response) %>% filter(grade %in% c("I", "III")) %>% tbl_summary(by = grade, missing = "no") %>% add_p() %>% modify_header(p.value ~ md("**I vs. III**")) %>% # hide summary stat columns modify_table_header(starts_with("stat_"), hide = TRUE) # merging the 3 tables together, and adding additional gt formatting tbl_merge(list(t0, t1, t2)) %>% modify_spanning_header( list( starts_with("stat_") ~ "**Tumor Grade**", starts_with("p.value") ~ "**p-value**" ) ) %>% # remove default spanning headers modify_spanning_header(everything() ~ NA_character_)
| Characteristic | I1 | II1 | III1 | I vs. II2 | I vs. III2 |
|---|---|---|---|---|---|
| Age | 47 (37, 56) | 48 (37, 57) | 47 (38, 58) | 0.7 | 0.5 |
| Tumor Response | 21 (31%) | 19 (30%) | 21 (33%) | >0.9 | 0.9 |
|
1
Statistics presented: Median (IQR); n (%)
2
Statistical tests performed: Wilcoxon rank-sum test; Fisher's exact test
|
|||||
Add additional statistics as additional columns.
# define function for lower and upper bounds of the mean CI ll <- function(x) t.test(x)$conf.int[1] ul <- function(x) t.test(x)$conf.int[2] t1 <- trial %>% select(age, marker) %>% tbl_summary(statistic = all_continuous() ~ "{mean} ({sd})", missing = "no") %>% modify_header(stat_0 ~ "**Mean (SD)**") t2 <- trial %>% select(age, marker) %>% tbl_summary(statistic = all_continuous() ~ "{ll}, {ul}", missing = "no") %>% modify_header(stat_0 ~ "**95% CI for Mean**") tbl_merge(list(t1, t2)) %>% modify_footnote(everything() ~ NA_character_) %>% modify_spanning_header(everything() ~ NA_character_)
| Characteristic | Mean (SD) | 95% CI for Mean |
|---|---|---|
| Age | 47 (14) | 45, 49 |
| Marker Level (ng/mL) | 0.92 (0.86) | 0.79, 1.04 |
Include number of observations and the number of events in a univariate regression table.
trial %>% select(response, age, grade) %>% tbl_uvregression( method = glm, y = response, method.args = list(family = binomial), exponentiate = TRUE ) %>% add_nevent()
| Characteristic | N | Event N | OR1 | 95% CI1 | p-value |
|---|---|---|---|---|---|
| Age | 183 | 58 | 1.02 | 1.00, 1.04 | 0.10 |
| Grade | 193 | 61 | |||
| I | — | — | |||
| II | 0.95 | 0.45, 2.00 | 0.9 | ||
| III | 1.10 | 0.52, 2.29 | 0.8 | ||
|
1
OR = Odds Ratio, CI = Confidence Interval
|
|||||
Include two related models side-by-side with descriptive statistics.
gt_r1 <- glm(response ~ trt + grade, trial, family = binomial) %>% tbl_regression(exponentiate = TRUE) gt_r2 <- coxph(Surv(ttdeath, death) ~ trt + grade, trial) %>% tbl_regression(exponentiate = TRUE) gt_t1 <- trial[c("trt", "grade")] %>% tbl_summary(missing = "no") %>% add_n() %>% modify_header(stat_0 ~ "**n (%)**") %>% modify_footnote(stat_0 ~ NA_character_) tbl_merge( list(gt_t1, gt_r1, gt_r2), tab_spanner = c(NA_character_, "**Tumor Response**", "**Time to Death**") )
| Characteristic | N | n (%) | Tumor Response | Time to Death | ||||
|---|---|---|---|---|---|---|---|---|
| OR1 | 95% CI1 | p-value | HR1 | 95% CI1 | p-value | |||
| Chemotherapy Treatment | 200 | |||||||
| Drug A | 98 (49%) | — | — | — | — | |||
| Drug B | 102 (51%) | 1.21 | 0.66, 2.24 | 0.5 | 1.25 | 0.86, 1.81 | 0.2 | |
| Grade | 200 | |||||||
| I | 68 (34%) | — | — | — | — | |||
| II | 68 (34%) | 0.94 | 0.44, 1.98 | 0.9 | 1.28 | 0.80, 2.06 | 0.3 | |
| III | 64 (32%) | 1.09 | 0.52, 2.27 | 0.8 | 1.69 | 1.07, 2.66 | 0.024 | |
|
1
OR = Odds Ratio, CI = Confidence Interval, HR = Hazard Ratio
|
||||||||
Include the number of events at each level of a categorical predictor.
gt_model <- trial %>% select(ttdeath, death, stage, grade) %>% tbl_uvregression( method = coxph, y = Surv(ttdeath, death), exponentiate = TRUE, hide_n = TRUE ) gt_eventn <- trial %>% filter(death == 1) %>% select(stage, grade) %>% tbl_summary( statistic = all_categorical() ~ "{n}", label = list(stage ~ "T Stage", grade ~ "Grade") ) %>% modify_header(stat_0 ~ "**Event N**") %>% modify_footnote(everything() ~ NA_character_) tbl_merge(list(gt_eventn, gt_model)) %>% bold_labels() %>% italicize_levels() %>% modify_spanning_header(everything() ~ NA_character_)
| Characteristic | Event N | HR1 | 95% CI1 | p-value |
|---|---|---|---|---|
| T Stage | ||||
| T1 | 24 | — | — | |
| T2 | 27 | 1.18 | 0.68, 2.04 | 0.6 |
| T3 | 22 | 1.23 | 0.69, 2.20 | 0.5 |
| T4 | 39 | 2.48 | 1.49, 4.14 | <0.001 |
| Grade | ||||
| I | 33 | — | — | |
| II | 36 | 1.28 | 0.80, 2.05 | 0.3 |
| III | 43 | 1.69 | 1.07, 2.66 | 0.024 |
|
1
HR = Hazard Ratio, CI = Confidence Interval
|
||||
Regression model where the covariate remains the same, and the outcome changes.
tbl_reg <- trial %>% select(age, marker, trt) %>% tbl_uvregression( method = lm, x = trt, show_single_row = "trt", hide_n = TRUE ) %>% modify_header(list( label ~"**Model Outcome**", estimate ~ "**Treatment Coef.**" )) tbl_reg %>% modify_footnote(estimate ~ "Values larger than 0 indicate larger values in the Drug group.")
| Model Outcome | Treatment Coef.1 | 95% CI2 | p-value |
|---|---|---|---|
| Age | 0.44 | -3.7, 4.6 | 0.8 |
| Marker Level (ng/mL) | -0.20 | -0.44, 0.05 | 0.12 |
|
1
Values larger than 0 indicate larger values in the Drug group.
2
CI = Confidence Interval
|
|||
Add descriptive statistics by treatment group to the table above to produce a table often reported two group comparisons.
gt_sum <- trial %>% select(age, marker, trt) %>% mutate(trt = fct_rev(trt)) %>% tbl_summary(by = trt, statistic = all_continuous() ~ "{mean} ({sd})", missing = "no") %>% add_n() %>% modify_header(stat_by = md("**{level}**")) tbl_merge(list(gt_sum, tbl_reg)) %>% modify_header(estimate_2 ~ "**Difference**") %>% modify_spanning_header(everything() ~ NA_character_)
| Characteristic | N | Drug B1 | Drug A1 | Difference | 95% CI2 | p-value |
|---|---|---|---|---|---|---|
| Age | 189 | 47 (14) | 47 (15) | 0.44 | -3.7, 4.6 | 0.8 |
| Marker Level (ng/mL) | 190 | 0.82 (0.83) | 1.02 (0.89) | -0.20 | -0.44, 0.05 | 0.12 |
|
1
Statistics presented: Mean (SD)
2
CI = Confidence Interval
|
||||||
Implement a custom tidier to report Wald confidence intervals. The Wald confidence intervals are calculated using confint.default().
my_tidy <- function(x, exponentiate = FALSE, conf.level = 0.95, ...) { dplyr::bind_cols( broom::tidy(x, exponentiate = exponentiate, conf.int = FALSE), # calculate the confidence intervals, and save them in a tibble stats::confint.default(x) %>% tibble::as_tibble() %>% rlang::set_names(c("conf.low", "conf.high")) ) } lm(age ~ grade + marker, trial) %>% tbl_regression(tidy_fun = my_tidy)
| Characteristic | Beta | 95% CI1 | p-value |
|---|---|---|---|
| Grade | |||
| I | — | — | |
| II | 0.64 | -4.6, 5.9 | 0.8 |
| III | 2.4 | -2.8, 7.6 | 0.4 |
| Marker Level (ng/mL) | -0.04 | -2.6, 2.5 | >0.9 |
|
1
CI = Confidence Interval
|
|||