Note that this supplement still uses the ERforResearch package version from when the paper was published. Check out the package website with a package demo for the latest functionalities!

This online supplement reproduces all the Figures and Tables presented in the publication “Rethinking the Funding Line at the Swiss National Science Foundation: Bayesian Ranking and Lottery”. The methodology of the expected rank (ER) for grant peer review is fully implemented in the R package {ERforResearch} downloadable from github with documentation. To install the package, the following code can be used:

# Install ERforResearch from github:
# devtools::install_github("snsf-data/ERforResearch")

# For this result reproduction we still use the old version of ERforResearch 
# which can be install like so: 
devtools::install_github("snsf-data/ERforResearch@old.version.for.paper")
library(ERforResearch)
## Some helper functions for later:
# Function to extract the Intraclass correlation coefficient (ICC) from the 
# matrix of individual votes:
get_icc_from_matrix <- function(individual_votes) {
  (individual_votes %>% 
     select(-proposal) %>% 
     mutate_all(function(x) ERforResearch:::get_num_grade_snsf(x)) %>% 
     as.data.frame() %>% 
     psych::ICC(x = ., missing = FALSE))$results %>% 
    filter(type == "ICC3") %>% # A fixed set of k judges rates each target
    mutate(icc = paste0(round(ICC, 2), " (",
                        round(`lower bound`, 2), "; ", 
                        round(`upper bound`, 2), ")")) %>% 
    pull(icc) %>% 
    return()
}

# Function to graphically represent all the votes, and the ICC from the
# matrix of individual votes:
get_grades_plot <- function(long_individual_votes, individual_votes_mat = NULL,
                            x_min = NULL, x_max = NULL, title = "",
                            jitter_h = .02, jitter_w = .025,
                            jitter_alpha = .5){
  if (is.null(x_min)){
    x_min <- long_individual_votes %>% pull(num_grade) %>% min
    x_min <- ifelse(x_min > 1, x_min - 1.2, x_min -.2)
  }
  if (is.null(x_max)){
    x_max <- long_individual_votes %>% pull(num_grade) %>% max
    x_max <- ifelse(x_max < 6, x_max + 1.2, x_max +.2)
  }
  if (!is.null(individual_votes_mat)){
    icc <- get_icc_from_matrix(individual_votes_mat)
    title <- paste0(ifelse(title == "", "ICC = ",
                           paste0(title, ": ICC = ")), icc)
  }
  plot_data <- long_individual_votes %>% 
    group_by(proposal) %>% 
    mutate(avg = mean(num_grade, na.rm = TRUE)) %>% 
    ungroup() 
  plot_data <- plot_data %>% 
    select(proposal, avg) %>% 
    distinct() %>% 
    arrange(avg) %>% 
    mutate(order = 1:n()) %>% 
    select(-avg) %>% 
    left_join(plot_data, by = "proposal")
  plot_data %>% 
    ggplot(aes(x = num_grade, y = order)) + 
    geom_jitter(aes(color = "Individual Votes"), height = jitter_h, 
                width = jitter_w, alpha = jitter_alpha, size = 1) +
    geom_point(aes(x = avg, color = "Average"), size = 1.5) + 
    theme_minimal() +
    scale_y_continuous(breaks = unique(plot_data$order),
                       labels = 
                         as.numeric(str_extract_all(unique(plot_data$proposal),
                                                    "[0-9]+"))) +
    labs(x = "Assessor Vote", y = "Ordered proposal", title = title) +
    lims(x = c(x_min, x_max)) +
    scale_color_manual(name = " ", 
                       values = c("Average"="#FF6F6F",
                                  "Individual Votes" = "#848383")) +
    theme(axis.text.y = element_blank(),
          legend.position = c(0.5, .5),
          axis.title.y = element_blank(),
          panel.grid.major.y = element_line(size = .1))
}


# Bland-Altman Plot for Agreement:
get_agreement_plot <- function(rankings, rankings_ordinal, title, ymin = -.5,
                               ymax = .5, pt_alpha = .8, pt_size = 1){
  difference <- rankings %>%  
    select(id_application, er) %>% 
    rename(er1 = er) %>% 
    left_join(rankings_ordinal %>% 
                select(id_application, er) %>% 
                rename(er2 = er), 
              by = "id_application") %>% 
    mutate(difference = er1 - er2,
           avg = (er1 + er2)/2) %>% 
    select(id_application, difference, avg)
  
  d <- difference %>% 
    summarise(mean(difference)) %>% pull
  sd <- difference %>% 
    summarise(sd(difference)) %>% pull
  difference %>% 
    ggplot(aes(x = avg, y = difference)) + 
    geom_point(alpha = pt_alpha, size = pt_size) + 
    geom_hline(yintercept = d, color = "gray") + 
    geom_hline(yintercept = d - 1.96 * sd, color = "darkred", lty = "dotted") + 
    geom_hline(yintercept = d + 1.96 * sd, color = "darkred", lty = "dotted") + 
    ylim(ymin, ymax) + 
    labs(y = "Difference in ER", x = "Average ER of both approaches",
         title = title) +
    theme_minimal() 
}
# Percentile Loss Function
percentile_loss <- function(true_rank, estimated_rank, perc = .3, p = 2,
                            q = 2, c = 1){
  I <- length(true_rank)
  true_percentile <- true_rank / (I + 1)
  estimated_percentile <- estimated_rank / (I + 1)
  AB <- (true_percentile > perc) & (estimated_percentile < perc)
  BA <- (true_percentile < perc) & (estimated_percentile > perc)
  
  loss <- (sum(abs(perc - estimated_percentile)^p * AB +
                 c * abs(estimated_percentile - perc)^q * BA)) / I
  return(loss)
}

The main paper presents two major case studies. The methodology of the Bayesian Ranking (BR) is applied to give funding recommendations for two funding instruments at the Swiss National Science Foundation(SNSF): the Postdoc.Mobility fellowship for early career researchers and the SNSF Project Funding scheme. The data for both case studies is available on Zenodo and can accessed directly through R using the following code chunk:

# Load the data from Zenodo:
path_to_xlsx <- "https://zenodo.org/record/4531160/files/individual_votes.xlsx"

hss_s_mat <- openxlsx::read.xlsx(xlsxFile = path_to_xlsx, sheet = "pm_hsss") 
hss_s <- hss_s_mat %>% 
  get_right_data_format(prefix_voter = "voter")

hss_h_mat <- openxlsx::read.xlsx(xlsxFile = path_to_xlsx, sheet = "pm_hssh") 
hss_h <- hss_h_mat %>% 
  get_right_data_format(prefix_voter = "voter")

ls_b_mat <- openxlsx::read.xlsx(xlsxFile = path_to_xlsx, sheet = "pm_lsb") 
ls_b <- ls_b_mat %>% 
  get_right_data_format(prefix_voter = "voter")

ls_m_mat <- openxlsx::read.xlsx(xlsxFile = path_to_xlsx, sheet = "pm_lsm") 
ls_m <- ls_m_mat %>% 
  get_right_data_format(prefix_voter = "voter")

stem_mat <- openxlsx::read.xlsx(xlsxFile = path_to_xlsx, sheet = "pm_stem")
stem <- stem_mat %>% 
  get_right_data_format(prefix_voter = "voter")

mint_section1_mat <- 
  openxlsx::read.xlsx(xlsxFile = path_to_xlsx, sheet = "mint_section1") 
mint_section1 <- mint_section1_mat %>% 
  get_right_data_format(prefix_voter = "voter") %>% 
  mutate(section = "one")

mint_section2_mat <- 
  openxlsx::read.xlsx(xlsxFile = path_to_xlsx, sheet = "mint_section2") 
mint_section2 <- mint_section2_mat %>% 
  get_right_data_format(prefix_voter = "voter") %>% 
  mutate(section = "two")

mint_section3_mat <- 
  openxlsx::read.xlsx(xlsxFile = path_to_xlsx, sheet = "mint_section3")
mint_section3 <- mint_section3_mat %>% 
  get_right_data_format(prefix_voter = "voter") %>% 
  mutate(section = "three")

mint_section4_mat <- 
  openxlsx::read.xlsx(xlsxFile = path_to_xlsx, sheet = "mint_section4") 
mint_section4 <- mint_section4_mat %>% 
  get_right_data_format(prefix_voter = "voter") %>% 
  mutate(section = "four")

# Note that we want to be able to differentiate the proposals in the different
# mint sections 
mint_sections <- mint_section1 %>% 
  mutate(proposal = paste0(proposal, "_1")) %>% 
  bind_rows(mint_section2%>% 
              mutate(proposal = paste0(proposal, "_2"))) %>% 
  bind_rows(mint_section3%>% 
              mutate(proposal = paste0(proposal, "_3"))) %>% 
  bind_rows(mint_section4%>% 
              mutate(proposal = paste0(proposal, "_4")))

##### How many projects can still be funded? ####
how_many_fundable <- c(7, 4, 7, 8, 6)
names(how_many_fundable) <- c("hss_s", "hss_h", "ls_m", "ls_b", "stem")

1 Postdoc.Mobility funding scheme

The fist case study focuses on the evaluation of the Postdoc.Mobility (PM) Call of February 1st 2020. Junior researchers who have recently defended their PhD and wish to pursue an academic career can apply for a PM fellowship, which will allow them to work in a research group abroad for two years. The researchers submit their research proposal to the SNSF, which is then evaluated by two referees who score the proposal on a six-point scale (1 being poor quality and 6 being outstanding quality). Each proposal is evaluated by one of the following panels: Humanities; Social Sciences; Science, Technology, Engineering and Mathematics (STEM); Medicine; and Biology. As described in the main paper, the dataset on the PM panels contains the evaluation of the assessors on the proposals which were triaged into the ‘panel discussion’ group (e.g. not directly accepted, nor rejected). Each panel has their own available funding, so that five different funding lines are drawn. The proposals are discussed in the respective panel, where each member votes on a score for each of the proposals, unless they have a conflict of interest or another reason to be absent during the voting. Table 1.1 summarizes the number of proposals in the different ‘panel discussion’ groups together with the number of proposals that can still be funded and the size of the panel.

Table 1.1: Characteristics of panels and submissions for the February 2020 call for junior fellowships at the Swiss National Science Foundation.
N. of panel members Total N. of submissions N. of proposals discussed N. of fundable proposals
Humanities 14 23 11 4
Social Sciences 14 38 18 7
Biology 26 35 18 8
Medicine 17 35 14 7
STEM 29 50 18 6

For each of the five panels, Figure 1.1 presents the votes of the panel members to each of the proposals discussed. The proposals are represented on the y-axis, and ordered depending on the average of the individual votes they got (red dots). We see a large variability in the votes. Note that we only consider the middle block of proposals that were neither truly exceptional nor of very low quality and that the variability of the votes per proposals tends to be larger in the middle block than for proposals a with very low/high average score. Some of the members of the STEM panel still assessed a few proposals with a 2, which is the second lowest grade; not surprisingly the STEM panel has the lowest intraclass correlation (ICC), representing the assessor reliability. In all 5 panels, the assessor reliability can be classified as either poor or fair.

leg <- cowplot::get_legend(get_grades_plot(hss_h))
gridExtra::grid.arrange(get_grades_plot(hss_h, hss_h_mat, x_min = 1.8, 
                                        title = "Humanities") +
                          theme(legend.position = "none"),
                        get_grades_plot(hss_s, hss_s_mat, x_min = 1.8,
                                        title = "Social Sciences") + 
                          theme(legend.position = "none"),
                        get_grades_plot(ls_b, ls_b_mat, x_min = 1.8,
                                        title = "Biology")+ 
                          theme(legend.position = "none"),
                        get_grades_plot(ls_m, ls_m_mat, x_min = 1.8,
                                        title = "Medicine")+ 
                          theme(legend.position = "none"),
                        get_grades_plot(stem, stem_mat, x_min = 1.8, 
                                        title = "STEM")+ 
                          theme(legend.position = "none"), 
                        leg)
<span style='color: #bababa;'>The individual votes given to all proposals by all panel members. The red dots represent the averages of those individual votes for each proposal. The intra-class correlation coefficients together with their 95\% confidence intervals of the different panels can be found in the titles of the subfigures. In all panels, the assessor reliability can be classified as poor to fair.</span>\label{fig:all-grades-pm2020}

Figure 1.1: The individual votes given to all proposals by all panel members. The red dots represent the averages of those individual votes for each proposal. The intra-class correlation coefficients together with their 95% confidence intervals of the different panels can be found in the titles of the subfigures. In all panels, the assessor reliability can be classified as poor to fair.

Then, the Bayesian hierarchical models will be fitted for each of the five panels individually and the expected ranks will be retrieved using the MCMC samples from the latter models. Note that the number of iterations, burnin and adaptation (n.iter, n.burnin and n.adapt) are set high enough to ensure convergence of all the MCMC chains. Lower number of iterations etc, will make the code run faster.

##### The model for JAGS ####
# we will use the default

##### Get the ER results: ####
# To make the code run faster, make sure to decrease those numbers
n.iter <- 50 * 10^{3}
n.burnin <- 10 * 10^{3}
n.adapt <- 10 * 10^{3}
max.iter <- 50 * 10^{3}
n.chains <- 4
seed <- 1991
# as we checked the number of iterations etc needed for convergence before, 
# the argument max_iter set to the same as n.iter ensures that the algorithm is
# not rerun until convergence, but only once. 
mcmc_hss_s_object <- 
  get_mcmc_samples(data = hss_s,
                   id_application = "proposal",
                   id_voter = "voter", 
                   grade_variable = "num_grade",
                   n_chains = n.chains, n_iter = n.iter,
                   n_burnin = n.burnin, n_adapt = n.adapt,
                   max_iter =  max.iter,
                   variables_to_sample = "specified",
                   rhat_threshold = 1.1,
                   names_variables_to_sample = 
                     c("application_intercept", "tau_application",
                       "nu", "tau_voter", "rank_theta", "sigma"),
                   inits_type = "random",
                   seed = seed, quiet = TRUE, dont_bind = TRUE)
mcmc_hss_s <- mcmc(do.call(rbind, mcmc_hss_s_object))

mcmc_hss_h_object <- 
  get_mcmc_samples(data = hss_h,
                   id_application = "proposal",
                   id_voter = "voter", 
                   grade_variable = "num_grade",
                   n_chains = n.chains, n_iter = n.iter,
                   n_burnin = n.burnin, n_adapt = n.adapt,
                   max_iter =  max.iter,
                   variables_to_sample = "specified",
                   rhat_threshold = 1.1,
                   names_variables_to_sample = 
                     c("application_intercept", "tau_application",
                       "nu", "tau_voter", "rank_theta", "sigma"),
                   inits_type = "random",
                   seed = seed, quiet = TRUE, dont_bind = TRUE)
mcmc_hss_h <- mcmc(do.call(rbind, mcmc_hss_h_object))

mcmc_ls_b_object <- 
  get_mcmc_samples(data = ls_b,
                   id_application = "proposal",
                   id_voter = "voter", 
                   grade_variable = "num_grade",
                   n_chains = n.chains, n_iter = n.iter,
                   n_burnin = n.burnin, n_adapt = n.adapt,
                   max_iter =  max.iter,
                   variables_to_sample = "specified",
                   rhat_threshold = 1.1,
                   names_variables_to_sample = 
                     c("application_intercept", "tau_application",
                       "nu", "tau_voter", "rank_theta", "sigma"),
                   inits_type = "random",
                   seed = seed, quiet = TRUE, dont_bind = TRUE)
mcmc_ls_b <- mcmc(do.call(rbind, mcmc_ls_b_object))

mcmc_ls_m_object <- 
  get_mcmc_samples(data = ls_m,
                   id_application = "proposal",
                   id_voter = "voter", 
                   grade_variable = "num_grade",
                   n_chains = n.chains, n_iter = n.iter,
                   n_burnin = n.burnin, n_adapt = n.adapt,
                   max_iter =  max.iter,
                   variables_to_sample = "specified",
                   rhat_threshold = 1.1,
                   names_variables_to_sample = 
                     c("application_intercept", "tau_application",
                       "nu", "tau_voter", "rank_theta", "sigma"),
                   inits_type = "random",
                   seed = seed, quiet = TRUE, dont_bind = TRUE)
mcmc_ls_m <- mcmc(do.call(rbind, mcmc_ls_m_object))

mcmc_stem_object <- 
  get_mcmc_samples(data = stem,
                   id_application = "proposal",
                   id_voter = "voter", 
                   grade_variable = "num_grade",
                   n_chains = n.chains, n_iter = n.iter,
                   n_burnin = n.burnin, n_adapt = n.adapt,
                   max_iter =  max.iter,
                   variables_to_sample = "specified",
                   rhat_threshold = 1.1,
                   names_variables_to_sample = 
                     c("application_intercept", "tau_application",
                       "nu", "tau_voter", "rank_theta", "sigma"),
                   inits_type = "random",
                   seed = seed, quiet = TRUE, dont_bind = TRUE)

mcmc_stem <- mcmc(do.call(rbind, mcmc_stem_object))


# Computation of the ER and PCER:
ranks_hss_s <- get_er_from_jags(data = hss_s,
                                id_application = "proposal",
                                id_voter = "voter", 
                                grade_variable = "num_grade",
                                mcmc_samples = mcmc_hss_s_object)

ranks_hss_h <- get_er_from_jags(data = hss_h,
                                id_application = "proposal",
                                id_voter = "voter", 
                                grade_variable = "num_grade",
                                mcmc_samples = mcmc_hss_h_object)

ranks_ls_b <- get_er_from_jags(data = ls_b,
                               id_application = "proposal",
                               id_voter = "voter", 
                               grade_variable = "num_grade",
                               mcmc_samples = mcmc_ls_b_object)

ranks_ls_m <- get_er_from_jags(data = ls_m,
                               id_application = "proposal",
                               id_voter = "voter", 
                               grade_variable = "num_grade",
                               mcmc_samples = mcmc_ls_m_object)

ranks_stem <- get_er_from_jags(data = stem,
                               id_application = "proposal",
                               id_voter = "voter", 
                               grade_variable = "num_grade",
                               mcmc_samples = mcmc_stem_object)

Figure 1.2 presents the different stages of ranking of the proposals for all panels. The first column of points in the figure represents ranking based on the simple averages (Fixed). Then the proposals are ranked based on the posterior means of θi (Posterior Mean) and finally, the last column of points the expected rank. A naive funding line as represented by the color code in the figure is defined by simply funding the x best ranked proposals, where x can be retrieved from Table 1.1.

##### Plot the results ####
plot_hss_s <-
  plotting_er_results(ranks_hss_s, result_show = TRUE, title = "Social Science",
                      id_application = "id_application",
                      pt_size = .5, line_size = .2, draw_funding_line = FALSE, 
                      line_type_fl = "longdash", color_fl = "darkgray", grep_size = 3,
                      how_many_fundable = how_many_fundable["hss_s"])

plot_hss_h <- 
  plotting_er_results(ranks_hss_h, result_show = TRUE, title = "Humanities",
                      id_application = "id_application",
                      pt_size = .5, line_size = .2, draw_funding_line = FALSE, 
                      line_type_fl = "longdash", color_fl = "darkgray", grep_size = 3,
                      how_many_fundable = how_many_fundable["hss_h"]) 

plot_ls_b <-
  plotting_er_results(ranks_ls_b, result_show = TRUE, title = "Biology",
                      id_application = "id_application",
                      pt_size = .5, line_size = .2, draw_funding_line = FALSE, 
                      line_type_fl = "longdash", color_fl = "darkgray", grep_size = 3,
                      how_many_fundable = how_many_fundable["ls_b"]) 

plot_ls_m <- 
   plotting_er_results(ranks_ls_m, result_show = TRUE, title = "Medicine",
                      id_application = "id_application",
                      pt_size = .5, line_size = .2, draw_funding_line = FALSE, 
                      line_type_fl = "longdash", color_fl = "darkgray", grep_size = 3,
                      how_many_fundable = how_many_fundable["ls_m"]) 

plot_stem <-  
  plotting_er_results(ranks_stem, result_show = TRUE, title = "STEM",
                      id_application = "id_application",
                      pt_size = .5, line_size = .2, draw_funding_line = FALSE, 
                      line_type_fl = "longdash", color_fl = "darkgray", grep_size = 3,
                      how_many_fundable = how_many_fundable["stem"]) 

leg <- cowplot::get_legend(plot_hss_h)
gridExtra::grid.arrange(plot_hss_h +  
                          theme(legend.position = "none",
                                axis.title.x = element_blank(),
                                axis.text.x = element_blank(), 
                                axis.title.y = element_text(size = 7),
                                title = element_text(size = 7)),
                        plot_hss_s + 
                          theme(legend.position = "none",
                                axis.title.y = element_blank(),
                                axis.title.x = element_blank(),
                                axis.text.x = element_blank(),
                                title = element_text(size = 7)),
                        plot_ls_b + 
                          theme(legend.position = "none",
                                axis.title.x = element_blank(),
                                axis.title.y = element_blank(),
                                title = element_text(size = 7)),
                        plot_ls_m +
                          theme(legend.position = "none",
                                axis.title.x = element_blank(),
                                axis.title.y = element_text(size = 7),
                                title = element_text(size = 7)),
                        plot_stem + 
                          theme(legend.position = "none",
                                axis.title.y = element_blank(),
                                title = element_text(size = 7)),
                        leg,
                        ncol = 3)