library(tidyverse)
library(yaml)

── Attaching core tidyverse packages ──────────────────────────────────────────────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.3     ✔ readr     2.1.4
✔ forcats   1.0.0     ✔ stringr   1.5.0
✔ ggplot2   3.4.4     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.0
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

config_id <- "mlml6_rate_pred_clsp"
main_config <- yaml.load_file(paste0("../experiments/configs/", config_id, "/main.yaml"))
dataset_id <- main_config$dataset_id
results_base <- paste0("../experiments/results/", config_id)

results_df <- read_csv(paste0(results_base, "/predictions_df.csv"))

Rows: 3310 Columns: 2
── Column specification ────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
dbl (2): y, yhat

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

# What is the bin size we want to have for our groupings of y?
rnd_unit = 0.25

# Observation-level error metrics
error_df <- results_df %>%
    mutate(
        y_rnd = round(y / rnd_unit) * rnd_unit,
        se = (y - yhat)^2,
        resid = y - yhat
    )

# y bin-level error metrics
error_summary_df <- error_df %>%
    group_by(y_rnd) %>%
    summarize(
        mse = mean(se),
        rmse = sqrt(mse),
        resid_mean = mean(resid),
        resid_sd = sd(resid),
        n_obs = n()
    )

error_df %>%
    summarize(
        rmse = sqrt( mean(se) ),
        resid_mean = mean( abs(resid) )
    )

set_theme_elements <- function(plt) {
    plt +
        theme_classic() +
        theme(text = element_text(size=16))
}

step_size <- 10
size_breaks1 <- c(-Inf, seq(10, 500, by = step_size), Inf)
size_labels1 <- c("<10", sapply(seq(10, 490, by = step_size), function(x) paste0(x, "-", x + step_size)), ">500")
size_values1 <- seq_along(size_labels1)
sign_colors <- c("neg" = "firebrick1", "pos" = "steelblue")

# Settings for plots
options(repr.plot.width = 16, repr.plot.height = 12, repr.plot.res = 100)

plt <- error_df %>%
    mutate(
        sign = as_factor(ifelse(resid > 0, "pos", "neg")),
    ) %>%
    ggplot() +
    geom_hline(aes(yintercept = 0.0), linetype = "dashed", color = "gray0") +
    geom_hline(aes(yintercept = -1.0), linetype = "dashed", color = "gray25") +
    geom_hline(aes(yintercept = 1.0), linetype = "dashed", color = "gray25") +
    geom_abline(slope = 1, intercept = results_df %>% pull(y) %>% mean(), color="firebrick") +
    geom_point(aes(x = y, y = resid, color = sign), alpha = 0.75) +
    scale_color_manual(values = sign_colors)

plt <- plt %>%
    set_theme_elements() +
    guides(color = "none") +
    xlab("y") +
    ylab("Residuals")

plt

plt <- error_summary_df %>%
    mutate(
        sign = as_factor(ifelse(resid_mean > 0, "pos", "neg")),
        n_obs_f = cut(
            n_obs,
            breaks = size_breaks1, 
            labels = size_labels1
        )
    ) %>%
    ggplot() +
    geom_hline(aes(yintercept = 0.0), linetype = "dashed", color = "gray0") +
    geom_hline(aes(yintercept = -1.0), linetype = "dashed", color = "gray50") +
    geom_hline(aes(yintercept = 1.0), linetype = "dashed", color = "gray50") +
    geom_abline(slope = 1, intercept = results_df %>% pull(y) %>% mean(), color="firebrick") +
    geom_errorbar(aes(x = y_rnd, ymin = resid_mean - resid_sd, ymax = resid_mean + resid_sd), width = 0.1, color = "gray75") +
    geom_point(aes(x = y_rnd, y = resid_mean, size = n_obs_f, color = sign), alpha = 0.5) +
    geom_text(aes(x = y_rnd, y = resid_mean, label = as.character(n_obs)), nudge_y = -0.3, nudge_x = 0.1, size=5) +
    scale_size_manual(values = size_values1) +
    scale_color_manual(values = sign_colors)

plt <- plt %>%
    set_theme_elements() +
    guides(size = "none", color = "none") +
    xlab("y (binned)") +
    ylab("Mean residuals")

plt

n_row <- error_df %>% nrow()

n_row_within_1 <- error_df %>%
    filter(abs(resid) <= 1.0) %>%
    nrow()

n_row_within_2 <- error_df %>%
    filter(abs(resid) <= 2.0) %>%
    nrow()
pct_within_1 <- round((n_row_within_1 / n_row) * 100, 2)
pct_withinn_2 <- round((n_row_within_2 / n_row) * 100, 2)
print(pct_within_1)
print(pct_withinn_2)

[1] 79.06
[1] 96.22

train_df <- read_csv(paste0("../data/pitchfork/", dataset_id, "/summary_df.csv"))

Rows: 22063 Columns: 8
── Column specification ────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (6): artist, album, reviewer, genre, label, reviewed
dbl (2): rating, review_n_tokens

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

yhat_baseline <- train_df %>% pull(rating) %>% mean()
yhat_baseline

# Observation-level error metrics
baseline_error_df <- results_df %>%
    mutate(yhat = yhat_baseline) %>%
    mutate(
        y_rnd = round(y / rnd_unit) * rnd_unit,
        se = (y - yhat)^2,
        resid = y - yhat
    )

baseline_error_df %>%
    summarize(
        rmse = sqrt( mean(se) ),
        resid_mean = mean( abs(resid) )
    )

error_df %>%
    summarize(
        rmse = sqrt( mean(se) ),
        resid_mean = mean( abs(resid) )
    )

rmse	resid_mean
<dbl>	<dbl>
0.8956093	0.6577236

rmse	resid_mean
<dbl>	<dbl>
1.163958	0.8412766

rmse	resid_mean
<dbl>	<dbl>
0.8956093	0.6577236

Results analysis¶

Configurations¶

Load results¶

Error metrics¶

Bias¶

Visualizing residuals¶

Baseline¶

Load training data¶

Naive model¶

Comparison¶

Closing thoughts¶