Gallery showing various tables possible with the {gtsummary} package. If you have created an interesting table using {gtsummary}, please submit it to the gallery via a pull request to the GitHub repository.

library(gtsummary); library(gt); library(survival)
library(dplyr); library(stringr); library(purrr); library(forcats)

Summary Tables

Add a spanning header over the group columns for increased clarity, reporting the number of non-missing observations, and modifying column headers.

trial[c("trt", "age", "grade")] %>%
  tbl_summary(
    by = trt,
    missing = "no",
    statistic = all_continuous() ~ "{median} ({p25}, {p75}) [N = {N_nonmiss}]"
  ) %>%
  modify_header(stat_by = md("**{level}**<br>N =  {n} ({style_percent(p)}%)")) %>%
  add_n() %>%
  bold_labels() %>%
  modify_spanning_header(starts_with("stat_") ~ "**Chemotherapy Treatment**")
Characteristic N Chemotherapy Treatment
Drug A
N = 98 (49%)1
Drug B
N = 102 (51%)1
Age, yrs 189 46 (37, 59) [N = 91] 48 (39, 56) [N = 98]
Grade 200
I 35 (36%) 33 (32%)
II 32 (33%) 36 (35%)
III 31 (32%) 33 (32%)

1 Statistics presented: median (IQR) [N = N]; n (%)


Modify the function that formats the p-values, change variable labels, updating tumor response header, and add a correction for multiple testing.

trial[!is.na(trial$response), c("response", "age", "grade")] %>%
  mutate(response = factor(response, labels = c("No Tumor Response", "Tumor Responded"))) %>%
  tbl_summary(
    by = response,
    missing = "no",
    label = list(age ~ "Patient Age", grade ~ "Tumor Grade")
  ) %>%
  add_p(pvalue_fun = partial(style_pvalue, digits = 2)) %>%
  add_q()
#> Adjusting p-values with
#> `stats::p.adjust(x$table_body$p.value, method = "fdr")`
Characteristic No Tumor Response, N = 1321 Tumor Responded, N = 611 p-value2 q-value3
Patient Age 46 (36, 55) 49 (43, 59) 0.091 0.18
Tumor Grade 0.93 0.93
I 46 (35%) 21 (34%)
II 44 (33%) 19 (31%)
III 42 (32%) 21 (34%)

1 Statistics presented: median (IQR); n (%)

2 Statistical tests performed: Wilcoxon rank-sum test; chi-square test of independence

3 False discovery rate correction for multiple testing


Include missing tumor response as column using fct_explicit_na().

trial[c("response", "age", "grade")] %>%
  mutate(
    response = factor(response, labels = c("No Tumor Response", "Tumor Responded")) %>%
      fct_explicit_na(na_level = "Missing Response Status")
  ) %>%
  tbl_summary(
    by = response,
    label = list(age ~ "Patient Age", grade ~ "Tumor Grade")
  )
Characteristic No Tumor Response, N = 1321 Tumor Responded, N = 611 Missing Response Status, N = 71
Patient Age 46 (36, 55) 49 (43, 59) 52 (44, 57)
Unknown 7 3 1
Tumor Grade
I 46 (35%) 21 (34%) 1 (14%)
II 44 (33%) 19 (31%) 5 (71%)
III 42 (32%) 21 (34%) 1 (14%)

1 Statistics presented: median (IQR); n (%)


Include p-values comparing all groups to a single reference group.

# table summarizing data with no p-values
t0 <- trial %>%
  select(grade, age, response) %>%
  tbl_summary(by = grade, missing = "no") %>%
  modify_header(stat_by = md("**{level}**"))

# table comparing grade I and II
t1 <- trial %>%
  select(grade, age, response) %>%
  filter(grade %in% c("I", "II")) %>%
  tbl_summary(by = grade, missing = "no") %>%
  add_p() %>%
  modify_header(p.value ~ md("**I vs. II**"))

# table comparing grade I and II
t2 <- trial %>%
  select(grade, age, response) %>%
  filter(grade %in% c("I", "III")) %>%
  tbl_summary(by = grade, missing = "no") %>%
  add_p()  %>%
  modify_header(p.value ~ md("**I vs. III**"))

# merging the 3 tables together, and adding additional gt formatting
tbl_merge(list(t0, t1, t2)) %>%
  modify_spanning_header(
    list(
      starts_with("stat_") ~ "**Tumor Grade**",
      starts_with("p.value") ~ "**p-value**"
    )
  ) %>%
  as_gt(include = -tab_spanner) %>%
  # hiding repeated summary columns
  cols_hide(columns = vars(stat_1_2, stat_2_2, stat_3_2, stat_1_3, stat_2_3, stat_3_3))
Characteristic I1 II1 III1 I vs. II2 I vs. III2
Age, yrs 47 (37, 56) 48 (37, 57) 47 (38, 58) 0.7 0.5
Tumor Response 21 (31%) 19 (30%) 21 (33%) >0.9 0.9

1 Statistics presented: median (IQR); n (%)

2 Statistical tests performed: Wilcoxon rank-sum test; Fisher's exact test


Add additional statistics as additional columns.

# define function for lower and upper bounds of the mean CI
ll <- function(x) t.test(x)$conf.int[1]
ul <- function(x) t.test(x)$conf.int[2]

t1 <-
  trial %>%
  select(age, marker) %>%
  tbl_summary(statistic = all_continuous() ~ "{mean} ({sd})", missing = "no") %>%
  modify_header(stat_0 ~ "**Mean (SD)**")

t2 <-
  trial %>%
  select(age, marker) %>%
  tbl_summary(statistic = all_continuous() ~ "{ll}, {ul}", missing = "no") %>%
  modify_header(stat_0 ~ "**95% CI for Mean**")

tbl_merge(list(t1, t2)) %>%
  modify_footnote(everything() ~ NA_character_) %>%
  modify_spanning_header(everything() ~ NA_character_)
Characteristic Mean (SD) 95% CI for Mean
Age, yrs 47 (14) 45, 49
Marker Level, ng/mL 0.92 (0.86) 0.79, 1.04

Regression Tables

Include number of observations and the number of events in a univariate regression table.

trial[c("response", "age", "grade")] %>%
  tbl_uvregression(
    method = glm,
    y = response,
    method.args = list(family = binomial),
    exponentiate = TRUE
  ) %>%
  add_nevent()
Characteristic N Event N OR1 95% CI1 p-value
Age, yrs 183 58 1.02 1.00, 1.04 0.10
Grade 193 61
I
II 0.95 0.45, 2.00 0.9
III 1.10 0.52, 2.29 0.8

1 OR = Odds Ratio, CI = Confidence Interval


Include two related models side-by-side with descriptive statistics.

gt_r1 <- glm(response ~ age + trt, trial, family = binomial) %>%
  tbl_regression(exponentiate = TRUE)
gt_r2 <- coxph(Surv(ttdeath, death) ~ age + trt, trial) %>%
  tbl_regression(exponentiate = TRUE)
gt_t1 <- trial[c("age", "trt")] %>% tbl_summary(missing = "no") %>% add_n()

tbl_merge(
  list(gt_t1, gt_r1, gt_r2),
  tab_spanner = c("**Summary Statistics**", "**Tumor Response**", "**Time to Death**")
)
Characteristic Summary Statistics Tumor Response Time to Death
N N = 2001 OR2 95% CI2 p-value HR2 95% CI2 p-value
Age, yrs 189 47 (38, 57) 1.02 1.00, 1.04 0.095 1.01 0.99, 1.02 0.4
Chemotherapy Treatment 200
Drug A 98 (49%)
Drug B 102 (51%) 1.13 0.60, 2.13 0.7 1.31 0.89, 1.93 0.2

1 Statistics presented: median (IQR); n (%)

2 OR = Odds Ratio, CI = Confidence Interval, HR = Hazard Ratio


Include the number of events at each level of a categorical predictor.

gt_model <-
  trial[c("ttdeath", "death", "stage", "grade")] %>%
  tbl_uvregression(
    method = coxph,
    y = Surv(ttdeath, death),
    exponentiate = TRUE,
    hide_n = TRUE
  )

gt_eventn <-
  trial %>%
  filter(death ==  1) %>%
  select(stage, grade) %>%
  tbl_summary(
    statistic = all_categorical() ~ "{n}",
    label = list(stage ~ "T Stage", grade ~ "Grade")
  ) %>%
  modify_header(stat_0 ~ "**Event N**") %>%
  modify_footnote(everything() ~ NA_character_)

tbl_merge(list(gt_eventn, gt_model)) %>%
  bold_labels() %>%
  italicize_levels() %>%
  modify_spanning_header(everything() ~ NA_character_)
Characteristic Event N HR1 95% CI1 p-value
T Stage
T1 24
T2 27 1.18 0.68, 2.04 0.6
T3 22 1.23 0.69, 2.20 0.5
T4 39 2.48 1.49, 4.14 <0.001
Grade
I 33
II 36 1.28 0.80, 2.05 0.3
III 43 1.69 1.07, 2.66 0.024

1 HR = Hazard Ratio, CI = Confidence Interval


Regression model where the covariate remains the same, and the outcome changes.

tbl_reg <-
  trial[c("age", "marker", "trt")] %>%
  tbl_uvregression(
    method = lm,
    x = trt,
    show_single_row = "trt",
    hide_n = TRUE
  ) %>%
  modify_header(list(
    label ~"**Model Outcome**",
    estimate ~ "**Treatment Coef.**"
  ))

tbl_reg %>%
  modify_footnote(estimate ~ "Values larger than 0 indicate larger values in the Drug group.")
Model Outcome Treatment Coef.1 95% CI2 p-value
Age, yrs 0.44 -3.7, 4.6 0.8
Marker Level, ng/mL -0.20 -0.44, 0.05 0.12

1 Values larger than 0 indicate larger values in the Drug group.

2 CI = Confidence Interval

Add descriptive statistics by treatment group to the table above to produce a table often reported two group comparisons.

gt_sum <-
  trial[c("age", "marker", "trt")] %>%
  mutate(trt = fct_rev(trt)) %>%
  tbl_summary(by = trt,
              statistic = all_continuous() ~ "{mean} ({sd})",
              missing = "no") %>%
  add_n() %>%
  modify_header(stat_by = md("**{level}**"))


tbl_merge(list(gt_sum, tbl_reg))  %>%
  modify_header(estimate_2 ~ "**Difference**") %>%
  modify_spanning_header(everything() ~ NA_character_)
Characteristic N Drug B1 Drug A1 Difference 95% CI2 p-value
Age, yrs 189 47 (14) 47 (15) 0.44 -3.7, 4.6 0.8
Marker Level, ng/mL 190 0.82 (0.83) 1.02 (0.89) -0.20 -0.44, 0.05 0.12

1 Statistics presented: mean (SD)

2 CI = Confidence Interval


Implement a custom tidier to report Wald confidence intervals. The Wald confidence intervals are calculated using confint.default().

my_tidy <- function(x, exponentiate =  FALSE, conf.level = 0.95, ...) {
  dplyr::bind_cols(
    broom::tidy(x, exponentiate = exponentiate, conf.int = FALSE),
    # calculate the confidence intervals, and save them in a tibble
    stats::confint.default(x) %>%
      tibble::as_tibble() %>%
      rlang::set_names(c("conf.low", "conf.high"))  )
}

lm(age ~ grade + marker, trial) %>%
  tbl_regression(tidy_fun = my_tidy)
Characteristic Beta 95% CI1 p-value
Grade
I
II 0.64 -4.6, 5.9 0.8
III 2.4 -2.8, 7.6 0.4
Marker Level, ng/mL -0.04 -2.6, 2.5 >0.9

1 CI = Confidence Interval