Skip to contents

FAQ and Gallery showing various tables possible with the {gtsummary} package.


Frequently Asked Questions

Summary Tables

Add a spanning header over the group columns for increased clarity, and modify column headers. Using bold_labels() formats the labels as bold, but labels can also be italicized using italicize_labels(), or combined to format with both bold and italics.

trial |> 
  tbl_summary(
    by = trt,
    include = c(age, grade),
    missing = "no",
    statistic = all_continuous() ~ "{median} ({p25}, {p75})"
  ) |> 
  modify_header(all_stat_cols() ~ "**{level}**  \nN = {n} ({style_percent(p)}%)") |> 
  add_n() |> 
  bold_labels() |> 
  modify_spanning_header(all_stat_cols() ~ "**Chemotherapy Treatment**")
Characteristic N
Chemotherapy Treatment
Drug A
N = 98 (49%)
1
Drug B
N = 102 (51%)
1
Age 189 46 (37, 60) 48 (39, 56)
Grade 200

    I
35 (36%) 33 (32%)
    II
32 (33%) 36 (35%)
    III
31 (32%) 33 (32%)
1 Median (Q1, Q3); n (%)

Show continuous summary statistics on multiple lines. Levels are italicized here using the italicize_levels() function, but the bold_levels() function can be used instead to create bold text, or both functions can be used together to get text that is both bold and in italics.

trial |> 
  tbl_summary(
    by = trt,
    include = c(age, marker),
    type = all_continuous() ~ "continuous2",
    statistic =
      all_continuous() ~ c("{N_nonmiss}",
                           "{mean} ({sd})",
                           "{median} ({p25}, {p75})",
                           "{min}, {max}"),
    missing = "no"
  ) |> 
  italicize_levels()
Characteristic Drug A
N = 98
Drug B
N = 102
Age

    N Non-missing 91 98
    Mean (SD) 47 (15) 47 (14)
    Median (Q1, Q3) 46 (37, 60) 48 (39, 56)
    Min, Max 6, 78 9, 83
Marker Level (ng/mL)

    N Non-missing 92 98
    Mean (SD) 1.02 (0.89) 0.82 (0.83)
    Median (Q1, Q3) 0.84 (0.23, 1.60) 0.52 (0.18, 1.21)
    Min, Max 0.00, 3.87 0.01, 3.64

Modify the function that formats the p-values, change variable labels, updating tumor response header, and add a correction for multiple testing.

trial |> 
  mutate(response = factor(response, labels = c("No Tumor Response", "Tumor Responded"))) |> 
  tbl_summary(
    by = response,
    include = c(age, grade),
    missing = "no",
    label = list(age ~ "Patient Age", grade ~ "Tumor Grade")
  ) |> 
  add_p(pvalue_fun = label_style_pvalue(digits = 2)) |> 
  add_q()
Characteristic No Tumor Response
N = 132
1
Tumor Responded
N = 61
1
p-value2 q-value3
Patient Age 46 (36, 55) 49 (43, 59) 0.091 0.18
Tumor Grade

0.93 0.93
    I 46 (35%) 21 (34%)

    II 44 (33%) 19 (31%)

    III 42 (32%) 21 (34%)

1 Median (Q1, Q3); n (%)
2 Wilcoxon rank sum test; Pearson’s Chi-squared test
3 False discovery rate correction for multiple testing

Include missing tumor response as column using forcats::fct_na_value_to_level().

trial |> 
  mutate(
    response = 
      factor(response, labels = c("No Tumor Response", "Tumor Responded")) |> 
      forcats::fct_na_value_to_level(level = "Missing Response Status")
  ) |> 
  tbl_summary(
    by = response,
    include = c(age, grade),
    label = list(age ~ "Patient Age", grade ~ "Tumor Grade")
  )
Characteristic No Tumor Response
N = 132
1
Tumor Responded
N = 61
1
Missing Response Status
N = 7
1
Patient Age 46 (36, 55) 49 (43, 59) 52 (42, 57)
    Unknown 7 3 1
Tumor Grade


    I 46 (35%) 21 (34%) 1 (14%)
    II 44 (33%) 19 (31%) 5 (71%)
    III 42 (32%) 21 (34%) 1 (14%)
1 Median (Q1, Q3); n (%)

Report treatment differences between two groups. This is often needed in randomized trials. In this example, we report the difference in tumor response and marker level between two chemotherapy treatments.

trial |> 
  tbl_summary(
    by = trt,
    include = c(response, marker),
    statistic = list(
      all_continuous() ~ "{mean} ({sd})",
      all_categorical() ~ "{p}%"
    ),
    missing = "no"
  ) |> 
  add_difference() |> 
  add_n() |> 
  modify_header(all_stat_cols() ~ "**{level}**")
Characteristic N Drug A1 Drug B1 Difference2 95% CI2 p-value2
Tumor Response 193 29% 34% -4.2% -18%, 9.9% 0.6
Marker Level (ng/mL) 190 1.02 (0.89) 0.82 (0.83) 0.20 -0.05, 0.44 0.12
Abbreviation: CI = Confidence Interval
1 %; Mean (SD)
2 2-sample test for equality of proportions with continuity correction; Welch Two Sample t-test

Paired t-test and McNemar’s test. The data is expected in a long format with 2 rows per participant.

# imagine that each patient received Drug A and Drug B (adding ID showing their paired measurements)
trial_paired <-
  trial |> 
  select(trt, marker, response) |> 
  mutate(.by = trt, id = dplyr::row_number())

# you must first delete incomplete pairs from the data, then you can build the table
trial_paired |> 
  # delete missing values
  tidyr::drop_na() |> 
  # keep IDs with both measurements
  dplyr::filter(.by = id, dplyr::n() == 2) |> 
  # summarize data
  tbl_summary(by = trt, include = -id) |> 
  add_p(
    test = list(marker ~ "paired.t.test",
                response ~ "mcnemar.test"),
    group = id
  )
Characteristic Drug A
N = 83
1
Drug B
N = 83
1
p-value2
Marker Level (ng/mL) 0.82 (0.22, 1.71) 0.53 (0.17, 1.31) 0.2
Tumor Response 21 (25%) 28 (34%) 0.3
1 Median (Q1, Q3); n (%)
2 Paired t-test; McNemar’s Chi-squared test with continuity correction

Include p-values comparing all groups to a single reference group.

# table summarizing data with no p-values
small_trial <- trial |> select(grade, age, response)
t0 <- small_trial |> 
  tbl_summary(by = grade, missing = "no") |> 
  modify_header(all_stat_cols() ~ "**{level}**")

# table comparing grade I and II
t1 <- small_trial |> 
  dplyr::filter(grade %in% c("I", "II")) |> 
  tbl_summary(by = grade, missing = "no") |> 
  add_p() |> 
  modify_header(p.value ~ "**I vs. II**") |> 
  # hide summary stat columns
  modify_column_hide(all_stat_cols())

# table comparing grade I and II
t2 <- small_trial |> 
  dplyr::filter(grade %in% c("I", "III")) |> 
  tbl_summary(by = grade, missing = "no") |> 
  add_p() |> 
  modify_header(p.value = "**I vs. III**") |> 
  # hide summary stat columns
  modify_column_hide(all_stat_cols())

# merging the 3 tables together, and adding additional gt formatting
tbl_merge(list(t0, t1, t2)) |> 
  modify_spanning_header(
    all_stat_cols() ~ "**Tumor Grade**",
    starts_with("p.value") ~ "**p-values**"
  )
Characteristic
Tumor Grade
p-values
I1 II1 III1 I vs. II2 I vs. III2
Age 47 (37, 56) 49 (37, 57) 47 (38, 58) 0.7 0.5
Tumor Response 21 (31%) 19 (30%) 21 (33%) >0.9 0.9
1 Median (Q1, Q3); n (%)
2 Wilcoxon rank sum test; Fisher’s exact test

Add 95% confidence interval around the mean as an additional column

trial |> 
  tbl_summary(
    include = c(age, marker),
    statistic = all_continuous() ~ "{mean} ({sd})", 
    missing = "no"
  ) |> 
  modify_header(stat_0 = "**Mean (SD)**") |> 
  remove_footnote_header(stat_0) |> 
  add_ci()
Characteristic Mean (SD) 95% CI
Age 47 (14) 45, 49
Marker Level (ng/mL) 0.92 (0.86) 0.79, 1.0
Abbreviation: CI = Confidence Interval

It’s often needed to summarize a continuous variable by one, two, or more categorical variables. The example below shows a table summarizing a continuous variable by two categorical variables. To summarize by more than two categorical variables, use tbl_continuous in conjunction with tbl_strata (see an example of tbl_strata here).

trial |> 
  tbl_continuous(variable = marker, by = trt, include = grade) |> 
  modify_spanning_header(all_stat_cols() ~ "**Treatment Assignment**")
Characteristic
Treatment Assignment
Drug A
N = 98
1
Drug B
N = 102
1
Grade

    I 0.96 (0.23, 1.71) 1.05 (0.28, 1.50)
    II 0.66 (0.30, 1.24) 0.21 (0.09, 1.08)
    III 0.84 (0.16, 1.94) 0.58 (0.33, 1.63)
1 Marker Level (ng/mL): Median (Q1, Q3)

Build a summary table stratified by more than one variable.

trial |> 
  select(trt, grade, age, stage) |> 
  mutate(grade = paste("Grade", grade)) |> 
  tbl_strata(
    strata = grade,
    ~ .x |> 
      tbl_summary(by = trt, missing = "no") |> 
      modify_header(all_stat_cols() ~ "**{level}**")
  )
Characteristic
Grade I
Grade II
Grade III
Drug A1 Drug B1 Drug A1 Drug B1 Drug A1 Drug B1
Age 46 (36, 60) 48 (42, 55) 45 (31, 55) 51 (42, 58) 52 (42, 61) 45 (36, 52)
T Stage





    T1 8 (23%) 9 (27%) 14 (44%) 9 (25%) 6 (19%) 7 (21%)
    T2 8 (23%) 10 (30%) 8 (25%) 9 (25%) 9 (29%) 10 (30%)
    T3 11 (31%) 7 (21%) 5 (16%) 6 (17%) 6 (19%) 8 (24%)
    T4 8 (23%) 7 (21%) 5 (16%) 12 (33%) 10 (32%) 8 (24%)
1 Median (Q1, Q3); n (%)

Regression Tables

Include number of observations and the number of events in a univariate regression table.

trial |> 
  tbl_uvregression(
    method = glm,
    y = response,
    include = c(age, grade),
    method.args = list(family = binomial),
    exponentiate = TRUE
  ) |> 
  add_nevent()
Characteristic N Event N OR 95% CI p-value
Age 183 58 1.02 1.00, 1.04 0.10
Grade 193 61


    I


    II

0.95 0.45, 2.00 0.9
    III

1.10 0.52, 2.29 0.8
Abbreviations: CI = Confidence Interval, OR = Odds Ratio

Include two related models side-by-side with descriptive statistics. We also use the compact table theme that reduces cell padding and font size.

gt_r1 <- glm(response ~ trt + grade, trial, family = binomial) |> 
  tbl_regression(exponentiate = TRUE)
gt_r2 <- survival::coxph(survival::Surv(ttdeath, death) ~ trt + grade, trial) |> 
  tbl_regression(exponentiate = TRUE)
gt_t1 <- trial |> 
  tbl_summary(include = c(trt, grade), missing = "no") |> 
  add_n() |> 
  modify_header(stat_0 = "**n (%)**") |> 
  remove_footnote_header(stat_0)

theme_gtsummary_compact()
#> Setting theme "Compact"
tbl_merge(
  list(gt_t1, gt_r1, gt_r2),
  tab_spanner = c(NA_character_, "**Tumor Response**", "**Time to Death**")
)
Characteristic N n (%)
Tumor Response
Time to Death
OR 95% CI p-value HR 95% CI p-value
Chemotherapy Treatment 200






    Drug A
98 (49%)

    Drug B
102 (51%) 1.21 0.66, 2.24 0.5 1.25 0.86, 1.81 0.2
Grade 200






    I
68 (34%)

    II
68 (34%) 0.94 0.44, 1.98 0.9 1.28 0.80, 2.06 0.3
    III
64 (32%) 1.09 0.52, 2.27 0.8 1.69 1.07, 2.66 0.024
Abbreviations: CI = Confidence Interval, HR = Hazard Ratio, OR = Odds Ratio

Include the number of events at each level of a categorical predictor.

trial |> 
  tbl_uvregression(
    method = survival::coxph,
    y = survival::Surv(ttdeath, death),
    include = c(stage, grade),
    exponentiate = TRUE,
    hide_n = TRUE
  ) |> 
  add_nevent(location = "level")
Characteristic Event N HR 95% CI p-value
T Stage



    T1 24
    T2 27 1.18 0.68, 2.04 0.6
    T3 22 1.23 0.69, 2.20 0.5
    T4 39 2.48 1.49, 4.14 <0.001
Grade



    I 33
    II 36 1.28 0.80, 2.05 0.3
    III 43 1.69 1.07, 2.66 0.024
Abbreviations: CI = Confidence Interval, HR = Hazard Ratio

Regression model where the covariate remains the same, and the outcome changes.

trial |> 
  tbl_uvregression(
    method = lm,
    x = trt,
    show_single_row = "trt",
    hide_n = TRUE,
    include = c(age, marker)
  ) |> 
  modify_header(label = "**Model Outcome**",
                estimate = "**Treatment Coef.**") |> 
  modify_footnote_header("Values larger than 0 indicate larger values in the Drug B group.", columns = estimate)
Model Outcome Treatment Coef.1 95% CI p-value
Age 0.44 -3.7, 4.6 0.8
Marker Level (ng/mL) -0.20 -0.44, 0.05 0.12
Abbreviation: CI = Confidence Interval
1 Values larger than 0 indicate larger values in the Drug B group.

Implement a custom tidier to report Wald confidence intervals. The Wald confidence intervals are calculated using confint.default().

my_tidy <- function(x, exponentiate = FALSE, conf.level = 0.95, ...) {
  dplyr::bind_cols(
    broom::tidy(x, exponentiate = exponentiate, conf.int = FALSE),
    # calculate the confidence intervals, and save them in a tibble
    dplyr::case_when(
      exponentiate ~ exp(confint.default(x)),
      .default = confint.default(x)
    ) |> 
      dplyr::as_tibble(.name_repair = "minimal") |> 
      rlang::set_names(c("conf.low", "conf.high"))
  )
}

lm(age ~ grade + marker, trial) |> 
  tbl_regression(tidy_fun = my_tidy)
Characteristic Beta 95% CI p-value
Grade


    I
    II 0.64 -4.6, 5.9 0.8
    III 2.4 -2.8, 7.6 0.4
Marker Level (ng/mL) -0.04 -2.6, 2.5 >0.9
Abbreviation: CI = Confidence Interval

Use significance stars on estimates with low p-values.

trial |> 
  tbl_uvregression(
    method = survival::coxph,
    y = survival::Surv(ttdeath, death),
    include = c(stage, grade),
    exponentiate = TRUE,
  ) |> 
  add_significance_stars()
Characteristic N HR1 SE
T Stage 200

    T1
    T2
1.18 0.281
    T3
1.23 0.295
    T4
2.48*** 0.260
Grade 200

    I
    II
1.28 0.241
    III
1.69* 0.232
Abbreviations: CI = Confidence Interval, HR = Hazard Ratio, SE = Standard Error
1 *p<0.05; **p<0.01; ***p<0.001

To use robust standard errors in a regression model, the model is prepared use usual, and the variance-covariance matrix of the model is modified via an appropriate function, such as vcovCL from the sandwich package.

dat <- trial |> 
  mutate(subject_id = dplyr::row_number(), .by = trt)
lmod <- glm(response ~ trt + grade, data = dat, family = binomial(link = "logit"))

cov <- sandwich::vcovCL(lmod, cluster = ~ subject_id, vcov_type = "HC0")

Once you have the robust variance-covariance matrix, you can use it with tidy_robust to calculate adjusted confidence intervals and p-values.

Robust errors generally have only a small impact on the confidence intervals and p-values. For demonstration purposes, we therefore show 2 digits for p-values.

A standard, non-robust regression table can be made as follows:

tbl_standard <- 
  tbl_regression(
    lmod,
    pvalue_fun = label_style_pvalue(digits = 2),
    exponentiate = TRUE
  )
tbl_standard
Characteristic OR 95% CI p-value
Chemotherapy Treatment


    Drug A
    Drug B 1.21 0.66, 2.24 0.53
Grade


    I
    II 0.94 0.44, 1.98 0.87
    III 1.09 0.52, 2.27 0.83
Abbreviations: CI = Confidence Interval, OR = Odds Ratio

In order to use the robust errors, pass the variance-covariance matrix to the tidy_robust function, as shown below.

tbl_robust <- 
  tbl_regression(
    lmod,
    pvalue_fun = label_style_pvalue(digits = 2),
    exponentiate = TRUE,
    tidy_fun = \(x, ...) tidy_robust(x, vcov = cov, ...))
tbl_robust
Characteristic OR 95% CI p-value
Chemotherapy Treatment


    Drug A
    Drug B 1.21 0.64, 2.30 0.55
Grade


    I
    II 0.94 0.45, 1.95 0.86
    III 1.09 0.53, 2.22 0.82
Abbreviations: CI = Confidence Interval, OR = Odds Ratio

Comparing the tables side-by-side, we see that the confidence intervals and p-values are very similar.

tbl_merge(
  list(tbl_standard, tbl_robust), 
  tab_spanner = c("**Standard errors**", "**Robust errors**")
)
Characteristic
Standard errors
Robust errors
OR 95% CI p-value OR 95% CI p-value
Chemotherapy Treatment





    Drug A

    Drug B 1.21 0.66, 2.24 0.53 1.21 0.64, 2.30 0.55
Grade





    I

    II 0.94 0.44, 1.98 0.87 0.94 0.45, 1.95 0.86
    III 1.09 0.52, 2.27 0.83 1.09 0.53, 2.22 0.82
Abbreviations: CI = Confidence Interval, OR = Odds Ratio

Global p-values can also be calculated with robust errors in the same manner via the tidy_wald_test function. Again, the following example demonstrates the non-robust approach and the robust approach side-by-side.

tbl_merge(
  list(
    tbl_standard |> add_global_p(anova_fun = tidy_wald_test),
    tbl_robust |>
      add_global_p(anova_fun = \(x, ...) tidy_wald_test(x, vcov = cov))
  ), 
  tab_spanner = c("**Standard errors**", "**Robust errors**")
)
Characteristic
Standard errors
Robust errors
OR 95% CI p-value OR 95% CI p-value
Chemotherapy Treatment

0.53

0.55
    Drug A

    Drug B 1.21 0.66, 2.24
1.21 0.64, 2.30
Grade

0.93

0.92
    I

    II 0.94 0.44, 1.98
0.94 0.45, 1.95
    III 1.09 0.52, 2.27
1.09 0.53, 2.22
Abbreviations: CI = Confidence Interval, OR = Odds Ratio