Issue
I have a dataframe
and a function
that creates a new variable, adds it to the dataframe and then assign
s the dataframe back to the global environment. The problem is that if I rerun the function it creates a duplicate of the variable.
library(tidyverse)
library(rms)
set.seed(10)
ds <- data.frame(
ftime = rexp(200),
fstatus = sample(0:1,200, replace = TRUE),
x1 = runif(200),
x2 = runif(200),
x3 = factor(sample(LETTERS[1:3], size = 200, replace = TRUE)))
ds
#model
s <- Surv(ds$ftime, ds$fstatus == 1)
fit <- cph(s ~ x1 + x2 + x3, data = ds, surv = TRUE, x = TRUE, y = TRUE)
#function to add prediction to dataset
pred_fun <- function(time_to_sur, model) {
pred_data <- ds[, c("x1", "x2", "x3")] %>%
mutate(ftime = time_to_sur,
fstatus = 1) %>%
as.data.frame()
ds$pred_var_tmp <-
rms::survest(model, times = time_to_sur,
newdata = pred_data,
se.fit = FALSE, what = "survival")$surv
#rename variable
pred_var <- paste0("pred_prob_", as.character(time_to_sur), "_rms")
names(ds)[names(ds) == "pred_var_tmp"] <- pred_var
#assign dataset back to global environment
assign("ds", ds, env = .GlobalEnv)
}
The function works as it should:
pred_fun(time_to_sur = 0.2, fit)
names(ds)
# [1] "ftime" "fstatus" "x1"
# [4] "x2" "x3" "pred_prob_0.2_rms"
But if I rerun it again, it creates a duplicate of the variable
pred_fun(time_to_sur = 0.2, fit)
names(ds)
# [1] "ftime" "fstatus" "x1"
# [4] "x2" "x3" "pred_prob_0.2_rms"
# [7] "pred_prob_0.2_rms"
This is to be expected because the function create a new variable first with a different name and then assigns the name after. I thought the following might work in the function but it doesn’t:
ds$eval(substitute(paste0("pred_prob_", as.character(tt), "_rms"))) <-
rms::survest(model, times = time_to_sur,
newdata = pred_data,
se.fit = FALSE, what = "survival")$surv
How can I fix this and what is best practices in this situation?
Thanks
Solution
Thanks to @G. Grothendieck and @Limey, the following simplification works (pred_fun_final
) although I do get a warning
message.
#original function in OP
pred_fun_original <- function(time_to_sur, model) {
pred_data <- ds[, c("x1", "x2", "x3")] %>%
mutate(ftime = time_to_sur,
fstatus = 1) %>%
as.data.frame()
ds$pred_var_tmp <-
rms::survest(model, times = time_to_sur,
newdata = pred_data,
se.fit = FALSE, what = "survival")$surv
#rename variable
pred_var <- paste0("pred_prob_", as.character(time_to_sur), "_rms")
names(ds)[names(ds) == "pred_var_tmp"] <- pred_var
assign("ds", ds, env = .GlobalEnv)
}
pred_fun_original(time_to_sur = 0.2, fit)
#save created variable
test1 <- ds$pred_prob_0.2_rms
#remove pred_prob_0.2_rms
ds <- ds %>%
select(-pred_prob_0.2_rms)
New function with warning
:
#fixed function
pred_fun_final <- function(data, time_to_sur, model) {
newName <- paste0("pred_prob_", as.character(time_to_sur), "_rms")
pred_data <- data[, c("x1", "x2", "x3")] %>%
mutate(ftime = time_to_sur,
fstatus = 1) %>%
as.data.frame()
data <- data %>%
mutate({{newName}} := rms::survest(model, times = time_to_sur,
newdata = pred_data,
se.fit = FALSE, what = "survival")$surv)
data
}
ds <- pred_fun_final(ds, time_to_sur = 0.2, fit)
# Warning message:
# Problem with `mutate()` column `pred_prob_0.2_rms`.
# i `pred_prob_0.2_rms = ...$NULL`.
#save variable
test2 <- ds$pred_prob_0.2_rms
The two variables are not identical but that is because one is named and the other is not (as.numeric()
would fix this). It doesn’t explain the warning
message though.
identical(test1, test2)
#FALSE
str(test1)
# num [1:200] 0.906 0.9 0.884 0.884 0.886 ...
str(test2)
# Named num [1:200] 0.906 0.9 0.884 0.884 0.886 ...
# - attr(*, "names")= chr [1:200] "1" "2" "3" "4" ...
Answered By – user63230
Answer Checked By – Terry (BugsFixing Volunteer)