Personal code snippets of @tmasjc

Image by Mads Schmidt Rasmussen from unsplash.com

Minimal Bootstrap Theme by Zachary Betz

# Simulate Customer Retention

Feb 28, 2019 #survival

The code below simulate a scenario where nc customers onboard during a ts timespan. However, none of them managed to retain for more than sp periods. The objective is to compare various customer retention analysis especially periodic and retrospective analysis techniques.

library(tidyverse)
library(charlatan)

# Generate Some Samples  ----------------------------------------------

# seeding
set.seed(123)

# number of customers
nc = 500
# timespan (entire periods)
ts = 1:12
# survival periods
sp = 3

# generate some customers data
cust_info <- data_frame(
id = paste0("KB", ch_integer(n = nc, min = 100)),
jobs = sample(ch_job(n = 4, locale = "zh_TW"),
size = length(id), replace = TRUE)
)

# generate a sequence of vector with length n
make_seq <- function(n, x) {

# initiate an empty vector
vec = rep(0, n)

# generate index with range no more than x
ind = 1 : (2 + x)
while(diff(range(ind)) > x) {
ind = runif(x, 1, n + 1)
}

# fill vector with 1s
vec[ind] <- 1

return(vec)
}

# repeats ts times
surv_times <- nc %>%
replicate(make_seq(max(ts), sp)) %>%
t() %>% as_data_frame()
names(surv_times) <- paste0("M", ts)

# calc initial register period
join <- apply(surv_times, 1, function(x) min(which(x == 1)))

# samples
dat <- bind_cols(cust_info, join = join, surv_times)
head(dat)  
## # A tibble: 6 x 15
##   id    jobs    join    M1    M2    M3    M4    M5    M6    M7    M8    M9   M10
##   <chr> <chr>  <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 KB514 清潔工     1     1     1     0     1     0     0     0     0     0     0
## 2 KB562 清潔工     1     1     1     0     0     0     0     0     0     0     0
## 3 KB278 不動產／商…     4     0     0     0     1     1     1     0     0     0     0
## 4 KB625 清潔工     7     0     0     0     0     0     0     1     0     1     1
## 5 KB294 產品事業處…     4     0     0     0     1     1     0     1     0     0     0
## 6 KB917 財務或會計…     5     0     0     0     0     1     0     0     1     0     0
## # … with 2 more variables: M11 <dbl>, M12 <dbl>
# Active Customers ---------------------------------------------

calc_active_rate <- function(m) {
dat %>%
gather(period, active, -c(id, jobs, join)) %>%
filter(join <= m, period == paste0("M", m)) %>%
summarise(rate = mean(active)) %>%
[[("rate")
}
active_rate <- map_dbl(ts, calc_active_rate)

# make plot
data_frame(ts, active_rate) %>%
ggplot(aes(ts, active_rate, group = 1)) +
geom_point() +
geom_line(col = "navyblue") +
scale_x_continuous(breaks = 1:12) +
scale_y_continuous(labels = scales::percent) +
coord_cartesian(ylim = seq(0, 1, 0.05)) +
theme_minimal() +
labs(x = "Month", y = "Active Customers %")

# Periodic Survival -------------------------------------------------------

# (active at period t + 1) / (active at period t)
rolling_active <- function(t) {

# subset those who active at period t
s = dat[dat[which(names(dat) == paste0("M", t))] == 1, ]

# remain in the subsequent period
r = s[which(names(s) == paste0("M", t + 1))] == 1

# return
sum(r) / nrow(s)
}

# (active at period t + 1) / (register at period t)
rolling_retain <- function(t) {

# subset those who join at period t
s = subset(dat, join == t)

# remain in the subsequent period
r = s[which(names(s) == paste0("M", t + 1))] == 1

# return
sum(r) / nrow(s)
}

# put result into a data frame
res <- data_frame(
ind = 1:(max(ts) - 1),
grp_by_register_period = map_dbl(ind, rolling_retain),
grp_by_active_period = map_dbl(ind, rolling_active)
)

# make plot
res %>%
gather(var, val, -ind) %>%
ggplot(aes(ind, val, col = var)) +
geom_line() +
scale_color_discrete(labels = c("Group By Active Period",
"Group By Register Period")) +
scale_x_continuous(breaks = ts) +
scale_y_continuous(labels = scales::percent) +
theme_minimal() +
theme(legend.position = "bottom") +
labs(x = "Month", y = "Retention Rate %", col = "")

# Cohort Analysis ---------------------------------------------------------

# export to Excel to do pivot table
file <- dat %>%
select(-jobs) %>%
gather(month, active, -c(id, join)) %>%
mutate(month = factor(str_remove(month, "^M")))
# write_csv(file, "some/path")