library(titanic)
library(tidyverse)
titanic_df <- titanic_train %>%
as_tibble() %>%
janitor::clean_names() %>%
mutate(
survived = case_when(
survived == "1" ~ "survived",
TRUE ~ "died"
) %>%
as.factor() %>%
fct_relevel(c("died", "survived"))
) %>%
mutate(
cabin = case_when(
cabin == "" ~ NA_character_,
TRUE ~ cabin
),
embarked = case_when(
embarked == "" ~ NA_character_,
TRUE ~ embarked
),
across(c(pclass, sex, embarked), as.factor),
passenger_id = as.character(passenger_id)
)
library(tidymodels)
set.seed(123)
titanic_split <- initial_split(titanic_df, 0.75, strata = survived)
titanic_training <- titanic_split %>% training()
titanic_testing <- titanic_split %>% testing()
titanic_recipe <- recipe(
survived ~ fare + sex + sib_sp + parch + pclass,
data = titanic_training
) %>%
step_corr(all_numeric(), threshold = 0.8) %>%
step_normalize(all_numeric()) %>%
step_dummy(all_nominal(), -all_outcomes(), one_hot = TRUE)
titanic_recipe_prep <- titanic_recipe %>% prep(titanic_training)
titanic_training_prep <- titanic_recipe_prep %>% bake(titanic_training)
titanic_testing_prep <- titanic_recipe_prep %>% bake(titanic_testing)
library(rstanarm)
bayes_lasso_logistic_fit <- stan_glm(
survived ~ ., data = titanic_training_prep,
family = binomial(link = "logit"), prior = lasso()
)
##
## SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 1).
## Chain 1:
## Chain 1: Gradient evaluation took 8.7e-05 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.87 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1:
## Chain 1:
## Chain 1: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 1: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 1: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 1: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 1: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 1: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 1: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 1: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 1: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 1: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 1: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 1: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 1:
## Chain 1: Elapsed Time: 2.68817 seconds (Warm-up)
## Chain 1: 1.97338 seconds (Sampling)
## Chain 1: 4.66155 seconds (Total)
## Chain 1:
##
## SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 2).
## Chain 2:
## Chain 2: Gradient evaluation took 4.9e-05 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.49 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2:
## Chain 2:
## Chain 2: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 2: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 2: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 2: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 2: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 2: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 2: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 2: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 2: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 2: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 2: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 2: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 2:
## Chain 2: Elapsed Time: 3.0409 seconds (Warm-up)
## Chain 2: 3.6774 seconds (Sampling)
## Chain 2: 6.7183 seconds (Total)
## Chain 2:
##
## SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 3).
## Chain 3:
## Chain 3: Gradient evaluation took 2.9e-05 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.29 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3:
## Chain 3:
## Chain 3: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 3: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 3: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 3: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 3: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 3: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 3: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 3: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 3: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 3: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 3: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 3: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 3:
## Chain 3: Elapsed Time: 2.82567 seconds (Warm-up)
## Chain 3: 2.71309 seconds (Sampling)
## Chain 3: 5.53876 seconds (Total)
## Chain 3:
##
## SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 4).
## Chain 4:
## Chain 4: Gradient evaluation took 2.5e-05 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.25 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4:
## Chain 4:
## Chain 4: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 4: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 4: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 4: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 4: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 4: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 4: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 4: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 4: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 4: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 4: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 4: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 4:
## Chain 4: Elapsed Time: 2.80339 seconds (Warm-up)
## Chain 4: 3.00071 seconds (Sampling)
## Chain 4: 5.80409 seconds (Total)
## Chain 4:
prior_summary(bayes_lasso_logistic_fit)
## Priors for model 'bayes_lasso_logistic_fit'
## ------
## Intercept (after predictors centered)
## ~ normal(location = 0, scale = 2.5)
##
## Coefficients
## ~ lasso(df = [1,1,1,...], location = [0,0,0,...], scale = [2.5,2.5,2.5,...])
## ------
## See help('prior_summary.stanreg') for more details
draws <- as.data.frame(bayes_lasso_logistic_fit) %>% as_tibble()
tibble(
variable = names(draws),
prob_pos = map_dbl(draws, ~ mean(.x >= 0)),
prob_neg = map_dbl(draws, ~ mean(.x < 0))
)
## # A tibble: 9 x 3
## variable prob_pos prob_neg
## <chr> <dbl> <dbl>
## 1 (Intercept) 0.516 0.484
## 2 fare 0.954 0.046
## 3 sib_sp 0.011 0.989
## 4 parch 0.243 0.757
## 5 sex_female 0.884 0.116
## 6 sex_male 0.107 0.893
## 7 pclass_X1 0.860 0.140
## 8 pclass_X2 0.542 0.458
## 9 pclass_X3 0.0965 0.904
bayes_ridge_logistic_fit <- stan_glm(
survived ~ ., data = titanic_training_prep,
family = binomial(link = "logit")
)
##
## SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 1).
## Chain 1:
## Chain 1: Gradient evaluation took 2.8e-05 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.28 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1:
## Chain 1:
## Chain 1: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 1: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 1: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 1: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 1: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 1: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 1: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 1: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 1: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 1: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 1: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 1: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 1:
## Chain 1: Elapsed Time: 2.35943 seconds (Warm-up)
## Chain 1: 2.21429 seconds (Sampling)
## Chain 1: 4.57372 seconds (Total)
## Chain 1:
##
## SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 2).
## Chain 2:
## Chain 2: Gradient evaluation took 2.3e-05 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.23 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2:
## Chain 2:
## Chain 2: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 2: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 2: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 2: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 2: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 2: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 2: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 2: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 2: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 2: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 2: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 2: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 2:
## Chain 2: Elapsed Time: 2.2168 seconds (Warm-up)
## Chain 2: 2.38794 seconds (Sampling)
## Chain 2: 4.60475 seconds (Total)
## Chain 2:
##
## SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 3).
## Chain 3:
## Chain 3: Gradient evaluation took 2.3e-05 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.23 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3:
## Chain 3:
## Chain 3: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 3: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 3: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 3: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 3: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 3: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 3: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 3: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 3: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 3: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 3: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 3: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 3:
## Chain 3: Elapsed Time: 2.41493 seconds (Warm-up)
## Chain 3: 2.36463 seconds (Sampling)
## Chain 3: 4.77956 seconds (Total)
## Chain 3:
##
## SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 4).
## Chain 4:
## Chain 4: Gradient evaluation took 2.3e-05 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.23 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4:
## Chain 4:
## Chain 4: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 4: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 4: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 4: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 4: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 4: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 4: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 4: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 4: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 4: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 4: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 4: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 4:
## Chain 4: Elapsed Time: 2.22715 seconds (Warm-up)
## Chain 4: 2.71864 seconds (Sampling)
## Chain 4: 4.94579 seconds (Total)
## Chain 4:
prior_summary(bayes_ridge_logistic_fit)
## Priors for model 'bayes_ridge_logistic_fit'
## ------
## Intercept (after predictors centered)
## ~ normal(location = 0, scale = 2.5)
##
## Coefficients
## Specified prior:
## ~ normal(location = [0,0,0,...], scale = [2.5,2.5,2.5,...])
## Adjusted prior:
## ~ normal(location = [0,0,0,...], scale = [2.50,2.50,2.50,...])
## ------
## See help('prior_summary.stanreg') for more details
draws <- as.data.frame(bayes_ridge_logistic_fit) %>% as_tibble()
tibble(
variable = names(draws),
prob_pos = map_dbl(draws, ~ mean(.x >= 0)),
prob_neg = map_dbl(draws, ~ mean(.x < 0))
)
## # A tibble: 9 x 3
## variable prob_pos prob_neg
## <chr> <dbl> <dbl>
## 1 (Intercept) 0.490 0.510
## 2 fare 0.956 0.0435
## 3 sib_sp 0.00675 0.993
## 4 parch 0.212 0.788
## 5 sex_female 0.646 0.354
## 6 sex_male 0.349 0.651
## 7 pclass_X1 0.593 0.407
## 8 pclass_X2 0.536 0.464
## 9 pclass_X3 0.432 0.568