n <- 500
df_reg <- tibble(
iss = rnorm(n, 28, 12),
sbp = rnorm(n, 108, 24),
treatment = rbinom(n, 1, 0.45)
) |> mutate(
# Lactate missing more for high-ISS, low-SBP patients (sicker = less documented)
p_miss_lactate = plogis(-1.5 + 0.06*iss - 0.02*sbp),
lactate_miss = rbinom(n, 1, p_miss_lactate),
# GCS missing more in penetrating/high mechanism injuries
p_miss_gcs = plogis(-2 + 0.04*iss),
gcs_miss = rbinom(n, 1, p_miss_gcs)
)
df_reg |>
group_by(iss_group = cut(iss, breaks=c(0,15,25,35,75),
labels=c("<15","15–25","25–35","35+"))) |>
summarise(across(c(lactate_miss, gcs_miss), mean), .groups="drop") |>
pivot_longer(-iss_group) |>
mutate(name=recode(name, lactate_miss="Lactate missing",
gcs_miss="GCS missing")) |>
ggplot(aes(iss_group, value, fill=name)) +
geom_col(position="dodge", alpha=0.85) +
scale_fill_manual(values=c("#0891b2","#e63946")) +
scale_y_continuous(labels=scales::percent_format()) +
labs(title="Missingness rate rises with severity — this is MAR, not MCAR",
x="ISS group", y="% missing", fill=NULL) +
theme_di()