Gallery showing various tables possible with the {gtsummary} package. If you have created an interesting table using {gtsummary}, please submit it to the gallery via a pull request to the GitHub repository.


Summary Tables

Add a spanning header over the group columns for increased clarity, and modifying column headers.

trial %>%
  select(trt, age, grade) %>%
  tbl_summary(
    by = trt, 
    missing = "no",
    statistic = all_continuous() ~ "{median} ({p25}, {p75})"
  ) %>%
  modify_header(stat_by = md("**{level}**<br>N =  {n} ({style_percent(p)}%)")) %>%
  add_n() %>%
  bold_labels() %>%
  modify_spanning_header(all_stat_cols() ~ "**Chemotherapy Treatment**")
Characteristic N Chemotherapy Treatment
Drug A
N = 98 (49%)1
Drug B
N = 102 (51%)1
Age 189 46 (37, 59) 48 (39, 56)
Grade 200
I 35 (36%) 33 (32%)
II 32 (33%) 36 (35%)
III 31 (32%) 33 (32%)

1 Median (IQR); n (%)


Show continuous summary statistics on multiple lines.

trial %>%
  select(trt, age, marker) %>%
  tbl_summary(
    by = trt,
    type = all_continuous() ~ "continuous2",
    statistic = all_continuous() ~ c("{N_nonmiss}",
                                     "{mean} ({sd})", 
                                     "{median} ({p25}, {p75})", 
                                     "{min}, {max}"),
    missing = "no"
  ) %>%
  italicize_levels()
Characteristic Drug A, N = 98 Drug B, N = 102
Age
N 91 98
Mean (SD) 47 (15) 47 (14)
Median (IQR) 46 (37, 59) 48 (39, 56)
Range 6, 78 9, 83
Marker Level (ng/mL)
N 92 98
Mean (SD) 1.02 (0.89) 0.82 (0.83)
Median (IQR) 0.84 (0.24, 1.57) 0.52 (0.19, 1.20)
Range 0.00, 3.87 0.00, 3.64

Modify the function that formats the p-values, change variable labels, updating tumor response header, and add a correction for multiple testing.

trial %>%
  select(response, age, grade) %>%
  mutate(response = factor(response, labels = c("No Tumor Response", "Tumor Responded"))) %>%
  tbl_summary(
    by = response, 
    missing = "no",
    label = list(age ~ "Patient Age", grade ~ "Tumor Grade")
  ) %>%
  add_p(pvalue_fun = ~style_pvalue(.x, digits = 2)) %>%
  add_q()
Characteristic No Tumor Response, N = 1321 Tumor Responded, N = 611 p-value2 q-value3
Patient Age 46 (36, 55) 49 (43, 59) 0.091 0.18
Tumor Grade 0.93 0.93
I 46 (35%) 21 (34%)
II 44 (33%) 19 (31%)
III 42 (32%) 21 (34%)

1 Median (IQR); n (%)

2 Wilcoxon rank sum test; Pearson's Chi-squared test

3 False discovery rate correction for multiple testing


Include missing tumor response as column using fct_explicit_na().

trial %>%
  select(response, age, grade) %>%
  mutate(
    response = factor(response, labels = c("No Tumor Response", "Tumor Responded")) %>% 
      fct_explicit_na(na_level = "Missing Response Status")
  ) %>%
  tbl_summary(
    by = response, 
    label = list(age ~ "Patient Age", grade ~ "Tumor Grade")
  )  
Characteristic No Tumor Response, N = 1321 Tumor Responded, N = 611 Missing Response Status, N = 71
Patient Age 46 (36, 55) 49 (43, 59) 52 (44, 57)
Unknown 7 3 1
Tumor Grade
I 46 (35%) 21 (34%) 1 (14%)
II 44 (33%) 19 (31%) 5 (71%)
III 42 (32%) 21 (34%) 1 (14%)

1 Median (IQR); n (%)


Paired t-test

# imagine that each patient recieved Drug A and Drug B (adding ID showing their paired measurements)
trial_paired <-
  trial %>%
  select(trt, marker) %>%
  group_by(trt) %>%
  mutate(id = row_number()) %>%
  ungroup()

# you must first delete incomplete pairs from the data, then you can build the table
trial_paired %>%
  # delete missing marker values
  filter(!is.na(marker)) %>%
  # keep IDs with both measurements
  group_by(id) %>%
  filter(n() == 2) %>%
  ungroup() %>%
  # summarize data
  tbl_summary(by = trt, include = -id) %>%
  add_p(test = marker ~ "paired.t.test", group = id)
Characteristic Drug A, N = 881 Drug B, N = 881 p-value2
Marker Level (ng/mL) 0.84 (0.24, 1.66) 0.52 (0.17, 1.22) 0.2

1 Median (IQR)

2 Paired t-test


Include p-values comparing all groups to a single reference group.

# table summarizing data with no p-values
t0 <- trial %>%
  select(grade, age, response) %>%
  tbl_summary(by = grade, missing = "no") %>%
  modify_header(stat_by = md("**{level}**"))

# table comparing grade I and II
t1 <- trial %>%
  select(grade, age, response) %>%
  filter(grade %in% c("I", "II")) %>%
  tbl_summary(by = grade, missing = "no") %>%
  add_p() %>%
  modify_header(p.value ~ md("**I vs. II**")) %>%
  # hide summary stat columns
  modify_table_header(all_stat_cols(), hide = TRUE)

# table comparing grade I and II
t2 <- trial %>%
  select(grade, age, response) %>%
  filter(grade %in% c("I", "III")) %>%
  tbl_summary(by = grade, missing = "no") %>%
  add_p()  %>%
  modify_header(p.value ~ md("**I vs. III**")) %>%
  # hide summary stat columns
  modify_table_header(all_stat_cols(), hide = TRUE)

# merging the 3 tables together, and adding additional gt formatting
tbl_merge(list(t0, t1, t2)) %>%
  modify_spanning_header(
    list(
      all_stat_cols() ~ "**Tumor Grade**",
      starts_with("p.value") ~ "**p-values**"
    )
  )
Characteristic Tumor Grade p-values
I1 II1 III1 I vs. II2 I vs. III2
Age 47 (37, 56) 48 (37, 57) 47 (38, 58) 0.7 0.5
Tumor Response 21 (31%) 19 (30%) 21 (33%) >0.9 0.9

1 Median (IQR); n (%)

2 Wilcoxon rank sum test; Fisher's exact test


Add additional statistics as additional columns.

# define function for lower and upper bounds of the mean CI
ll <- function(x) t.test(x)$conf.int[1]
ul <- function(x) t.test(x)$conf.int[2]

t1 <-
  trial %>%
  select(age, marker) %>%
  tbl_summary(statistic = all_continuous() ~ "{mean} ({sd})", missing = "no") %>%
  modify_header(stat_0 ~ "**Mean (SD)**")

t2 <-
  trial %>%
  select(age, marker) %>%
  tbl_summary(statistic = all_continuous() ~ "{ll}, {ul}", missing = "no") %>%
  modify_header(stat_0 ~ "**95% CI for Mean**")

tbl_merge(list(t1, t2)) %>%
  modify_footnote(everything() ~ NA_character_) %>%
  modify_spanning_header(everything() ~ NA_character_)
Characteristic Mean (SD) 95% CI for Mean
Age 47 (14) 45, 49
Marker Level (ng/mL) 0.92 (0.86) 0.79, 1.04

Regression Tables

Include number of observations and the number of events in a univariate regression table.

trial %>%
  select(response, age, grade) %>%
  tbl_uvregression(
    method = glm,
    y = response, 
    method.args = list(family = binomial),
    exponentiate = TRUE
  ) %>%
  add_nevent()
Characteristic N Event N OR1 95% CI1 p-value
Age 183 58 1.02 1.00, 1.04 0.10
Grade 193 61
I
II 0.95 0.45, 2.00 0.9
III 1.10 0.52, 2.29 0.8

1 OR = Odds Ratio, CI = Confidence Interval


Include two related models side-by-side with descriptive statistics.

gt_r1 <- glm(response ~ trt + grade, trial, family = binomial) %>%
  tbl_regression(exponentiate = TRUE)
gt_r2 <- coxph(Surv(ttdeath, death) ~ trt + grade, trial) %>%
  tbl_regression(exponentiate = TRUE)
gt_t1 <- trial[c("trt", "grade")] %>% 
  tbl_summary(missing = "no") %>% 
  add_n() %>%
  modify_header(stat_0 ~ "**n (%)**") %>%
  modify_footnote(stat_0 ~ NA_character_)

tbl_merge(
  list(gt_t1, gt_r1, gt_r2),
  tab_spanner = c(NA_character_, "**Tumor Response**", "**Time to Death**")
)
Characteristic N n (%) Tumor Response Time to Death
OR1 95% CI1 p-value HR1 95% CI1 p-value
Chemotherapy Treatment 200
Drug A 98 (49%)
Drug B 102 (51%) 1.21 0.66, 2.24 0.5 1.25 0.86, 1.81 0.2
Grade 200
I 68 (34%)
II 68 (34%) 0.94 0.44, 1.98 0.9 1.28 0.80, 2.06 0.3
III 64 (32%) 1.09 0.52, 2.27 0.8 1.69 1.07, 2.66 0.024

1 OR = Odds Ratio, CI = Confidence Interval, HR = Hazard Ratio


Include the number of events at each level of a categorical predictor.

gt_model <-
  trial %>%
  select(ttdeath, death, stage, grade) %>%
  tbl_uvregression(
    method = coxph,
    y = Surv(ttdeath, death), 
    exponentiate = TRUE,
    hide_n = TRUE
  )

gt_eventn <-
  trial %>%
  filter(death ==  1) %>%
  select(stage, grade) %>%
  tbl_summary(
    statistic = all_categorical() ~ "{n}",
    label = list(stage ~ "T Stage", grade ~ "Grade")
  ) %>%
  modify_header(stat_0 ~ "**Event N**") %>%
  modify_footnote(everything() ~ NA_character_)

tbl_merge(list(gt_eventn, gt_model)) %>%
  bold_labels() %>%
  italicize_levels() %>%
  modify_spanning_header(everything() ~ NA_character_)
Characteristic Event N HR1 95% CI1 p-value
T Stage
T1 24
T2 27 1.18 0.68, 2.04 0.6
T3 22 1.23 0.69, 2.20 0.5
T4 39 2.48 1.49, 4.14 <0.001
Grade
I 33
II 36 1.28 0.80, 2.05 0.3
III 43 1.69 1.07, 2.66 0.024

1 HR = Hazard Ratio, CI = Confidence Interval


Regression model where the covariate remains the same, and the outcome changes.

tbl_reg <-
  trial %>%
  select(age, marker, trt) %>%
  tbl_uvregression(
    method = lm,
    x = trt,
    show_single_row = "trt",
    hide_n = TRUE
  ) %>%
  modify_header(list(
    label ~"**Model Outcome**",
    estimate ~ "**Treatment Coef.**"
  )) 

tbl_reg %>%
  modify_footnote(estimate ~ "Values larger than 0 indicate larger values in the Drug group.")
Model Outcome Treatment Coef.1 95% CI2 p-value
Age 0.44 -3.7, 4.6 0.8
Marker Level (ng/mL) -0.20 -0.44, 0.05 0.12

1 Values larger than 0 indicate larger values in the Drug group.

2 CI = Confidence Interval

Add descriptive statistics by treatment group to the table above to produce a table often reported two group comparisons.

gt_sum <- 
  trial %>%
  select(age, marker, trt) %>%
  mutate(trt = fct_rev(trt)) %>%
  tbl_summary(by = trt, 
              statistic = all_continuous() ~ "{mean} ({sd})",
              missing = "no") %>%
  add_n() %>%
  modify_header(stat_by = md("**{level}**"))


tbl_merge(list(gt_sum, tbl_reg))  %>%
  modify_header(estimate_2 ~ "**Difference**") %>%
  modify_spanning_header(everything() ~ NA_character_)
Characteristic N Drug B1 Drug A1 Difference 95% CI2 p-value
Age 189 47 (14) 47 (15) 0.44 -3.7, 4.6 0.8
Marker Level (ng/mL) 190 0.82 (0.83) 1.02 (0.89) -0.20 -0.44, 0.05 0.12

1 Mean (SD)

2 CI = Confidence Interval


Implement a custom tidier to report Wald confidence intervals. The Wald confidence intervals are calculated using confint.default().

my_tidy <- function(x, exponentiate =  FALSE, conf.level = 0.95, ...) {
  dplyr::bind_cols(
    broom::tidy(x, exponentiate = exponentiate, conf.int = FALSE),
    # calculate the confidence intervals, and save them in a tibble
    stats::confint.default(x) %>%
      tibble::as_tibble() %>%
      rlang::set_names(c("conf.low", "conf.high"))  )
}

lm(age ~ grade + marker, trial) %>%
  tbl_regression(tidy_fun = my_tidy)
Characteristic Beta 95% CI1 p-value
Grade
I
II 0.64 -4.6, 5.9 0.8
III 2.4 -2.8, 7.6 0.4
Marker Level (ng/mL) -0.04 -2.6, 2.5 >0.9

1 CI = Confidence Interval