This document contains exploratory plots for binary response data as well as the R code that generates these graphs.The plots presented here are based on simulated data (see: PKPD Datasets). Data specifications can be accessed on Datasets and Rmarkdown template to generate this page can be found on Rmarkdown-Template. You may also download the Multiple Ascending Dose PK/PD dataset for your reference (download dataset).
library(ggplot2)
library(dplyr)
library(xgxr)
library(tidyr)
#set seed for random number generator (for plots with jitter)
set.seed(12345)
#flag for labeling figures as draft
status = "DRAFT"
# ggplot settings
xgx_theme_set()
#directories for saving individual graphs
dirs = list(
parent_dir = "Parent_Directory",
rscript_dir = "./",
rscript_name = "Example.R",
results_dir = "./",
filename_prefix = "",
filename = "Example.png")
The plots presented here are based on simulated data (see: PKPD Datasets). You may also download the Multiple Ascending Dose PK/PD dataset for your reference (download dataset).
#load dataset
pkpd_data <- read.csv("../Data/Multiple_Ascending_Dose_Dataset2.csv")
DOSE_CMT = 1
PK_CMT = 2
PD_CMT = 6
SS_PROFDAY = 6 # steady state prof day
PD_PROFDAYS <- c(0, 2, 4, 6)
TAU = 24 # time between doses, units should match units of TIME, e.g. 24 for QD, 12 for BID, 7*24 for Q1W (when units of TIME are h)
#ensure dataset has all the necessary columns
pkpd_data = pkpd_data %>%
mutate(ID = ID, #ID column
TIME = TIME, #TIME column name
NOMTIME = NOMTIME,#NOMINAL TIME column name
PROFDAY = case_when(
NOMTIME < (SS_PROFDAY - 1)*24 ~ 1 + floor(NOMTIME / 24),
NOMTIME >= (SS_PROFDAY - 1)*24 ~ SS_PROFDAY
), #PROFILE DAY day associated with profile, e.g. day of dose administration
LIDV = LIDV, #DEPENDENT VARIABLE column name
CENS = CENS, #CENSORING column name
CMT = CMT, #COMPARTMENT column
DOSE = DOSE, #DOSE column here (numeric value)
TRTACT = TRTACT, #DOSE REGIMEN column here (character, with units),
LIDV_NORM = LIDV/DOSE,
LIDV_UNIT = EVENTU,
DAY_label = ifelse(PROFDAY > 0, paste("Day", PROFDAY), "Baseline"),
BINARY_LEVELS = factor(case_when(
CMT != PD_CMT ~ as.character(NA),
LIDV == 0 ~ "Nonresponder",
LIDV == 1 ~ "Responder"
), levels = c("Nonresponder", "Responder"))
)
#create a factor for the treatment variable for plotting
pkpd_data = pkpd_data %>%
arrange(DOSE) %>%
mutate(TRTACT_low2high = factor(TRTACT, levels = unique(TRTACT)),
TRTACT_high2low = factor(TRTACT, levels = rev(unique(TRTACT)))) %>%
select(-TRTACT)
#create pk dataset
pk_data <- pkpd_data %>%
filter(CMT==PK_CMT)
#create pd binary dataset
pd_data <- pkpd_data %>%
filter(CMT==PD_CMT) %>%
mutate(LIDV_jitter = jitter(LIDV, amount = 0.1))
#create wide pkpd dataset for plotting PK vs PD
pkpd_data_wide = pd_data %>%
select(ID, NOMTIME, PD = LIDV, BINARY_LEVELS) %>%
right_join(pk_data %>% select(-BINARY_LEVELS), by = c("ID", "NOMTIME")) %>%
rename(CONC = LIDV) %>%
filter(!is.na(PD))
#perform NCA, for additional plots
NCA = pk_data %>%
group_by(ID, DOSE) %>%
filter(!is.na(LIDV)) %>%
summarize(AUC_0 = ifelse(length(LIDV[NOMTIME > 0 & NOMTIME <= TAU]) > 1,
caTools::trapz(TIME[NOMTIME > 0 & NOMTIME <= TAU],
LIDV[NOMTIME > 0 & NOMTIME <= TAU]),
NA),
Cmax_0 = ifelse(length(LIDV[NOMTIME > 0 & NOMTIME <= TAU]) > 1,
max(LIDV[NOMTIME > 0 & NOMTIME <= TAU]),
NA),
AUC_tau = ifelse(length(LIDV[NOMTIME > (SS_PROFDAY-1)*24 &
NOMTIME <= ((SS_PROFDAY-1)*24 + TAU)]) > 1,
caTools::trapz(TIME[NOMTIME > (SS_PROFDAY-1)*24 &
NOMTIME <= ((SS_PROFDAY-1)*24 + TAU)],
LIDV[NOMTIME > (SS_PROFDAY-1)*24 &
NOMTIME <= ((SS_PROFDAY-1)*24 + TAU)]),
NA),
Cmax_tau = ifelse(length(LIDV[NOMTIME > (SS_PROFDAY-1)*24 &
NOMTIME <= ((SS_PROFDAY-1)*24 + TAU)]) > 1,
max(LIDV[NOMTIME > (SS_PROFDAY-1)*24 &
NOMTIME <= ((SS_PROFDAY-1)*24 + TAU)]),
NA),
SEX = SEX[1], #this part just keeps the SEX and WEIGHTB covariates
WEIGHTB = WEIGHTB[1]) %>%
gather(PARAM, VALUE,-c(ID, DOSE, SEX, WEIGHTB)) %>%
ungroup() %>%
mutate(VALUE_NORM = VALUE/DOSE,
PROFDAY = ifelse(PARAM %in% c("AUC_0", "Cmax_0"), 1, SS_PROFDAY))
#add response data at day 1 and at steady state to NCA for additional plots
NCA <- pd_data %>% subset(PROFDAY %in% c(1, SS_PROFDAY),) %>%
select(ID, PROFDAY, DAY_label, PD = LIDV, TRTACT_low2high, TRTACT_high2low) %>%
merge(NCA, by = c("ID", "PROFDAY"))
#units and labels
time_units_dataset = "hours"
time_units_plot = "days"
trtact_label = "Dose"
dose_units = unique((pkpd_data %>% filter(CMT == DOSE_CMT))$LIDV_UNIT) %>% as.character()
dose_label = paste0("Dose (", dose_units, ")")
conc_units = unique(pk_data$LIDV_UNIT) %>% as.character()
conc_label = paste0("Concentration (", conc_units, ")")
AUC_units = paste0("h.", conc_units)
concnorm_label = paste0("Normalized Concentration (", conc_units, ")/", dose_units)
pd_binary_label = "Response"
pd_response_label = "Responder Rate (%)"
Binary data is data that can take on one of two values. This often happens when there is a characteristic/event/response, etc. of interest that subjects either have/achieve or they don’t. Binary response data can also come out of dichotomizing continuous data. For example in the psoriasis indication the binary response variable PASI90 (yes/no) is defined as subjects achieving a PASI score of at least 90.
There are two broad categories of PK/PD exploratory plots covered on this page
These plots are displayed below.
We start with Expsoure and Response vs Time, or longitduinal plots
Summarize data with Mean +/- 95% confidence intervals for the percent
responders over time. Confidence intervals should be claculated with
binom::binom.exact()
. You should either color or facet by
dose group.
Questions to ask:
#PK data
gg <- ggplot(data = pk_data, aes(x = NOMTIME, y = LIDV, color = TRTACT_high2low))
gg <- gg + xgx_stat_ci()
gg <- gg + xgx_scale_y_log10()
gg <- gg + xgx_scale_x_time_units(units_dataset = time_units_dataset,
units_plot = time_units_plot)
gg <- gg + labs(y=conc_label,color = trtact_label)
gg <- gg + xgx_annotate_status(status)
gg <- gg + xgx_annotate_filenames(dirs)
#if saving copy of figure, replace xgx_annotate lines with xgx_save() shown below:
#xgx_save(width,height,dirs,"filename_main",status)
print(gg)
#PD data
gg <- ggplot(data = pd_data, aes(x = NOMTIME, y = LIDV, color = TRTACT_high2low))
gg <- gg + xgx_stat_ci(distribution = "binomial", position = position_dodge(width = 12), alpha = 0.5)
gg <- gg + xgx_scale_x_time_units(units_dataset = time_units_dataset,
units_plot = time_units_plot)
gg <- gg + scale_y_continuous(labels=scales::percent)
gg <- gg + labs(y = pd_response_label, color = trtact_label)
gg <- gg + xgx_annotate_status(status)
gg <- gg + xgx_annotate_filenames(dirs)
#if saving copy of figure, replace xgx_annotate lines with xgx_save() shown below:
#xgx_save(width,height,dirs,"filename_main",status)
print(gg)
If coloring by dose makes a messy plot, you can try faceting by dose instead. Here we show the same data, but faceted by dose.
#PK data
gg <- ggplot(data = pk_data, aes(x = NOMTIME, y = LIDV))
gg <- gg + xgx_stat_ci()
gg <- gg + xgx_scale_y_log10()
gg <- gg + xgx_scale_x_time_units(units_dataset = time_units_dataset,
units_plot = time_units_plot)
gg <- gg + labs(y=conc_label,color = trtact_label)
gg <- gg + facet_grid(~TRTACT_low2high)
gg <- gg + xgx_annotate_status(status)
gg <- gg + xgx_annotate_filenames(dirs)
#if saving copy of figure, replace xgx_annotate lines with xgx_save() shown below:
#xgx_save(width,height,dirs,"filename_main",status)
print(gg)
#PD data
gg <- ggplot(data = pd_data, aes(x = NOMTIME, y = LIDV))
gg <- gg + xgx_stat_ci(distribution = "binomial")
gg <- gg + xgx_scale_x_time_units(units_dataset = time_units_dataset,
units_plot = time_units_plot)
gg <- gg + scale_y_continuous(labels=scales::percent)
gg <- gg + labs(y = pd_response_label, color = trtact_label)
gg <- gg + facet_grid(~TRTACT_low2high)
gg <- gg + xgx_annotate_status(status)
gg <- gg + xgx_annotate_filenames(dirs)
#if saving copy of figure, replace xgx_annotate lines with xgx_save() shown below:
#xgx_save(width,height,dirs,"filename_main",status)
print(gg)
Another longitudinal plot that can be useful for binary data is plotting the PK over time for all individuals, and coloring by response type. This plot is often used for adverse events plotting. For studies with few PK observations, a PK model may be needed in order to produce concentrations at each time point where response is measured.
gg <- ggplot(data = pkpd_data_wide, aes(x = NOMTIME, y = CONC))
gg <- gg + geom_boxplot(aes(group = factor(NOMTIME)), width = 0.5*24)
gg <- gg + geom_jitter(aes( color = factor(BINARY_LEVELS), alpha = factor(BINARY_LEVELS)), width = 0.1*24, height = 0)
gg <- gg + scale_color_manual(values = c("black","blue")) + scale_alpha_manual(values = c(0.2,1))
gg <- gg + labs(y = conc_label, color = "Response", alpha = "Response")
gg <- gg + xgx_scale_y_log10()
gg <- gg + xgx_scale_x_time_units(units_dataset = time_units_dataset,
units_plot = time_units_plot)
print(gg)
Use spaghetti plots to visualize the extent of variability between individuals. The wider the spread of the profiles, the higher the between subject variability. Distinguish different doses by color, or separate into different panels.
When plotting individual binomial data, it is often helpful to stagger the dots and use transparency, so that it is easier to detect individual dots.
gg <- ggplot(data = pk_data, aes(x = TIME, y = LIDV))
gg <- gg + geom_line(aes(group = ID, color = factor(TRTACT_high2low)), size = 1, alpha = 0.5)
gg <- gg + geom_point(data = pk_data %>% filter(CENS==0), aes(color = TRTACT_high2low), size = 2, alpha = 0.5)
gg <- gg + geom_point(data = pk_data %>% filter(CENS==1), color="red", shape=8, size = 2, alpha = 0.5)
gg <- gg + xgx_scale_y_log10()
gg <- gg + xgx_scale_x_time_units(units_dataset = time_units_dataset,
units_plot = time_units_plot)
gg <- gg + labs(y = conc_label, color = trtact_label)
gg <- gg + xgx_annotate_status(status)
gg <- gg + xgx_annotate_filenames(dirs)
#if saving copy of figure, replace xgx_annotate lines with xgx_save() shown below:
#xgx_save(width,height,dirs,"filename_main",status)
print(gg)
gg <- ggplot(data = pd_data, aes(x = TIME, y = LIDV_jitter))
gg <- gg + geom_line(aes(group = ID, color = factor(TRTACT_high2low)), size = 1, alpha = 0.5)
gg <- gg + xgx_scale_x_time_units(units_dataset = time_units_dataset,
units_plot = time_units_plot)
gg <- gg + scale_y_continuous(labels=scales::percent)
gg <- gg + labs(y = conc_label, color = pd_response_label)
gg <- gg + xgx_annotate_status(status)
gg <- gg + xgx_annotate_filenames(dirs)
#if saving copy of figure, replace xgx_annotate lines with xgx_save() shown below:
#xgx_save(width,height,dirs,"filename_main",status)
print(gg)
gg <- ggplot(data = pk_data, aes(x = TIME, y = LIDV))
gg <- gg + geom_line(aes(group = ID), size = 1, alpha = 0.2)
gg <- gg + geom_point(aes(color = factor(CENS), shape = factor(CENS), alpha = 0.3), size = 2, alpha = 0.2)
gg <- gg + scale_shape_manual(values=c(1,8))
gg <- gg + scale_color_manual(values=c("grey50","red"))
gg <- gg + xgx_scale_y_log10()
gg <- gg + xgx_scale_x_time_units(units_dataset = time_units_dataset,
units_plot = time_units_plot)
gg <- gg + labs(y = conc_label, shape = "BLQ", color = "BLQ")
gg <- gg + facet_grid(.~TRTACT_low2high)
gg <- gg + xgx_annotate_status(status)
gg <- gg + xgx_annotate_filenames(dirs)
#if saving copy of figure, replace xgx_annotate lines with xgx_save() shown below:
#xgx_save(width,height,dirs,"filename_main",status)
print(gg)
gg <- ggplot(data = pd_data, aes(x = TIME, y = LIDV_jitter))
gg <- gg + geom_line(aes(group = ID), alpha = 0.2)
gg <- gg + geom_point(size = 2, alpha = 0.2)
gg <- gg + xgx_scale_x_time_units(units_dataset = time_units_dataset,
units_plot = time_units_plot)
gg <- gg + labs(y = pd_binary_label, shape = "BLQ", color = "BLQ")
gg <- gg + facet_grid(.~TRTACT_low2high)
gg <- gg + xgx_annotate_status(status)
gg <- gg + xgx_annotate_filenames(dirs)
#if saving copy of figure, replace xgx_annotate lines with xgx_save() shown below:
#xgx_save(width,height,dirs,"filename_main",status)
print(gg)
gg <- ggplot(data = pkpd_data_wide, aes(y = CONC, x = PD))
gg <- gg + geom_boxplot(aes(group = PD), width = 0.5, outlier.shape=NA)
gg <- gg + geom_jitter(data = pkpd_data_wide %>% filter(CENS == 0, PROFDAY == 5),
aes(color = TRTACT_high2low), shape=19, width = 0.1, height = 0.0, alpha = 0.5)
gg <- gg + geom_jitter(data = pkpd_data_wide %>% filter(CENS == 1, PROFDAY == 5),
color = "red", shape=8, width = 0.1, height = 0.0, alpha = 0.5)
gg <- gg + xgx_scale_y_log10()
gg <- gg + labs(x = pd_binary_label, y = conc_label, color = trtact_label)
gg <- gg + coord_flip()
print(gg)
Plot response against exposure. Include a logistic regression for binary data to help determine the shape of the exposure-respone relationship. Summary information such as mean and 95% confidence intervals by quartiles of exposure can also be plotted. The exposure metric that you use in these plots could be either raw concentrations, or NCA or model-derived exposure metrics (e.g. Cmin, Cmax, AUC), and may depend on the level of data that you have available.
pkpd_data_wide_plot = pkpd_data_wide %>%
filter(PROFDAY %in% c(1,3,5))
gg <- ggplot(data = pkpd_data_wide_plot, aes(x = CONC, y = PD))
gg <- gg + geom_jitter(aes(color = TRTACT_high2low), width = 0, height = 0.05, alpha = 0.5)
gg <- gg + geom_smooth(method = "glm", method.args = list(family=binomial(link = logit)), color = "black")
gg <- gg + xgx_stat_ci(bins = 4, distribution = "binomial", geom = "errorbar", size = 0.5)
gg <- gg + xgx_stat_ci(bins = 4, distribution = "binomial", geom = "point", shape = 0, size = 4)
gg <- gg + scale_y_continuous(labels=scales::percent)
gg <- gg + labs(x = conc_label, y = pd_response_label, color = trtact_label)
gg <- gg + xgx_scale_x_log10()
gg <- gg + facet_grid(~DAY_label)
print(gg)
Plotting AUC vs response instead of concentration vs response may make more sense in some situations. For example, when there is a large delay between PK and PD it would be diffcult to relate the time-varying concentration with the response. If rich sampling is only done at a particular point in the study, e.g. at steady state, then the AUC calculated on the rich profile could be used as the exposure variable for a number of PD visits. If PK samples are scarce, average Cmin could also be used as the exposure metric.
gg <- ggplot(NCA, aes(x = VALUE, y = PD))
gg <- gg + geom_jitter(aes( color = TRTACT_high2low), width = 0, height = 0.05, alpha = 0.5)
gg <- gg + geom_smooth(method = "glm", method.args = list(family=binomial(link = logit)), color = "black")
gg <- gg + xgx_stat_ci(bins = 4, conf_level = 0.95, distribution = "binomial", geom = c("point"), shape = 0, size = 4)
gg <- gg + xgx_stat_ci(bins = 4, conf_level = 0.95, distribution = "binomial", geom = c("errorbar"), size = 0.5)
gg <- gg + facet_wrap(~DAY_label + PARAM, scales = "free_x")
gg <- gg + labs(color = trtact_label, x = "NCA parameter", y = pd_response_label)
gg <- gg + xgx_scale_x_log10()
gg <- gg + scale_y_continuous(breaks=c(0,.5,1), labels = scales::percent)
print(gg)
Stratify exposure-response plots by covariates of interest to explore whether any key covariates impact response independent of exposure. For examples of plots and code startifying by covariate, see Continuous PKPD Covariate Section
sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Red Hat Enterprise Linux Server 7.9 (Maipo)
##
## Matrix products: default
## BLAS/LAPACK: /CHBS/apps/EB/software/imkl/2019.1.144-gompi-2019a/compilers_and_libraries_2019.1.144/linux/mkl/lib/intel64_lin/libmkl_gf_lp64.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8 LC_MONETARY=en_US.UTF-8
## [6] LC_MESSAGES=en_US.UTF-8 LC_PAPER=en_US.UTF-8 LC_NAME=C LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] survminer_0.4.9 ggpubr_0.4.0 survival_3.4-0 knitr_1.40 broom_1.0.1 caTools_1.18.2 DT_0.26 forcats_0.5.2 stringr_1.4.1
## [10] purrr_0.3.5 readr_2.1.3 tibble_3.1.8 tidyverse_1.3.2 xgxr_1.1.1 zoo_1.8-11 gridExtra_2.3 tidyr_1.2.1 dplyr_1.0.10
## [19] ggplot2_3.3.6
##
## loaded via a namespace (and not attached):
## [1] googledrive_2.0.0 colorspace_2.0-3 ggsignif_0.6.2 deldir_1.0-6 rio_0.5.27 ellipsis_0.3.2 class_7.3-19
## [8] htmlTable_2.2.1 markdown_1.2 base64enc_0.1-3 fs_1.5.2 gld_2.6.2 rstudioapi_0.14 proxy_0.4-26
## [15] farver_2.1.1 Deriv_4.1.3 fansi_1.0.3 mvtnorm_1.1-3 lubridate_1.8.0 xml2_1.3.3 codetools_0.2-18
## [22] splines_4.1.0 cachem_1.0.6 rootSolve_1.8.2.2 Formula_1.2-4 jsonlite_1.8.3 km.ci_0.5-2 binom_1.1-1
## [29] cluster_2.1.3 dbplyr_2.2.1 png_0.1-7 compiler_4.1.0 httr_1.4.4 backports_1.4.1 assertthat_0.2.1
## [36] Matrix_1.5-1 fastmap_1.1.0 gargle_1.2.1 cli_3.4.1 prettyunits_1.1.1 htmltools_0.5.3 tools_4.1.0
## [43] gtable_0.3.1 glue_1.6.2 lmom_2.8 Rcpp_1.0.9 carData_3.0-4 cellranger_1.1.0 jquerylib_0.1.4
## [50] vctrs_0.5.0 nlme_3.1-160 crosstalk_1.2.0 xfun_0.34 openxlsx_4.2.4 rvest_1.0.3 lifecycle_1.0.3
## [57] rstatix_0.7.0 googlesheets4_1.0.1 MASS_7.3-58.1 scales_1.2.1 hms_1.1.2 expm_0.999-6 RColorBrewer_1.1-3
## [64] curl_4.3.3 yaml_2.3.6 Exact_2.1 KMsurv_0.1-5 pander_0.6.4 sass_0.4.2 rpart_4.1.16
## [71] reshape_0.8.8 latticeExtra_0.6-30 stringi_1.7.8 highr_0.9 e1071_1.7-8 checkmate_2.1.0 zip_2.2.0
## [78] boot_1.3-28 rlang_1.0.6 pkgconfig_2.0.3 bitops_1.0-7 evaluate_0.17 lattice_0.20-45 htmlwidgets_1.5.4
## [85] labeling_0.4.2 tidyselect_1.2.0 GGally_2.1.2 plyr_1.8.7 magrittr_2.0.3 R6_2.5.1 DescTools_0.99.42
## [92] generics_0.1.3 Hmisc_4.7-0 DBI_1.1.3 pillar_1.8.1 haven_2.5.1 foreign_0.8-82 withr_2.5.0
## [99] mgcv_1.8-41 abind_1.4-5 RCurl_1.98-1.4 nnet_7.3-17 car_3.0-11 crayon_1.5.2 modelr_0.1.9
## [106] survMisc_0.5.5 interp_1.1-2 utf8_1.2.2 tzdb_0.3.0 rmarkdown_2.17 progress_1.2.2 jpeg_0.1-9
## [113] grid_4.1.0 readxl_1.4.1 minpack.lm_1.2-1 data.table_1.14.2 reprex_2.0.2 digest_0.6.30 xtable_1.8-4
## [120] munsell_0.5.0 bslib_0.4.0