Note that this supplement still uses the ERforResearch package version from when the paper was published. Check out the package website with a package demo for the latest functionalities!

This online supplement reproduces all the Figures and Tables presented in the publication “Rethinking the Funding Line at the Swiss National Science Foundation: Bayesian Ranking and Lottery”. The methodology of the expected rank (ER) for grant peer review is fully implemented in the R package {ERforResearch} downloadable from github with documentation. To install the package, the following code can be used:

# Install ERforResearch from github:
# devtools::install_github("snsf-data/ERforResearch")

# For this result reproduction we still use the old version of ERforResearch 
# which can be install like so: 
devtools::install_github("snsf-data/ERforResearch@old.version.for.paper")
library(ERforResearch)

## Some helper functions for later:
# Function to extract the Intraclass correlation coefficient (ICC) from the 
# matrix of individual votes:
get_icc_from_matrix <- function(individual_votes) {
  (individual_votes %>% 
     select(-proposal) %>% 
     mutate_all(function(x) ERforResearch:::get_num_grade_snsf(x)) %>% 
     as.data.frame() %>% 
     psych::ICC(x = ., missing = FALSE))$results %>% 
    filter(type == "ICC3") %>% # A fixed set of k judges rates each target
    mutate(icc = paste0(round(ICC, 2), " (",
                        round(`lower bound`, 2), "; ", 
                        round(`upper bound`, 2), ")")) %>% 
    pull(icc) %>% 
    return()
}

# Function to graphically represent all the votes, and the ICC from the
# matrix of individual votes:
get_grades_plot <- function(long_individual_votes, individual_votes_mat = NULL,
                            x_min = NULL, x_max = NULL, title = "",
                            jitter_h = .02, jitter_w = .025,
                            jitter_alpha = .5){
  if (is.null(x_min)){
    x_min <- long_individual_votes %>% pull(num_grade) %>% min
    x_min <- ifelse(x_min > 1, x_min - 1.2, x_min -.2)
  }
  if (is.null(x_max)){
    x_max <- long_individual_votes %>% pull(num_grade) %>% max
    x_max <- ifelse(x_max < 6, x_max + 1.2, x_max +.2)
  }
  if (!is.null(individual_votes_mat)){
    icc <- get_icc_from_matrix(individual_votes_mat)
    title <- paste0(ifelse(title == "", "ICC = ",
                           paste0(title, ": ICC = ")), icc)
  }
  plot_data <- long_individual_votes %>% 
    group_by(proposal) %>% 
    mutate(avg = mean(num_grade, na.rm = TRUE)) %>% 
    ungroup() 
  plot_data <- plot_data %>% 
    select(proposal, avg) %>% 
    distinct() %>% 
    arrange(avg) %>% 
    mutate(order = 1:n()) %>% 
    select(-avg) %>% 
    left_join(plot_data, by = "proposal")
  plot_data %>% 
    ggplot(aes(x = num_grade, y = order)) + 
    geom_jitter(aes(color = "Individual Votes"), height = jitter_h, 
                width = jitter_w, alpha = jitter_alpha, size = 1) +
    geom_point(aes(x = avg, color = "Average"), size = 1.5) + 
    theme_minimal() +
    scale_y_continuous(breaks = unique(plot_data$order),
                       labels = 
                         as.numeric(str_extract_all(unique(plot_data$proposal),
                                                    "[0-9]+"))) +
    labs(x = "Assessor Vote", y = "Ordered proposal", title = title) +
    lims(x = c(x_min, x_max)) +
    scale_color_manual(name = " ", 
                       values = c("Average"="#FF6F6F",
                                  "Individual Votes" = "#848383")) +
    theme(axis.text.y = element_blank(),
          legend.position = c(0.5, .5),
          axis.title.y = element_blank(),
          panel.grid.major.y = element_line(size = .1))
}


# Bland-Altman Plot for Agreement:
get_agreement_plot <- function(rankings, rankings_ordinal, title, ymin = -.5,
                               ymax = .5, pt_alpha = .8, pt_size = 1){
  difference <- rankings %>%  
    select(id_application, er) %>% 
    rename(er1 = er) %>% 
    left_join(rankings_ordinal %>% 
                select(id_application, er) %>% 
                rename(er2 = er), 
              by = "id_application") %>% 
    mutate(difference = er1 - er2,
           avg = (er1 + er2)/2) %>% 
    select(id_application, difference, avg)
  
  d <- difference %>% 
    summarise(mean(difference)) %>% pull
  sd <- difference %>% 
    summarise(sd(difference)) %>% pull
  difference %>% 
    ggplot(aes(x = avg, y = difference)) + 
    geom_point(alpha = pt_alpha, size = pt_size) + 
    geom_hline(yintercept = d, color = "gray") + 
    geom_hline(yintercept = d - 1.96 * sd, color = "darkred", lty = "dotted") + 
    geom_hline(yintercept = d + 1.96 * sd, color = "darkred", lty = "dotted") + 
    ylim(ymin, ymax) + 
    labs(y = "Difference in ER", x = "Average ER of both approaches",
         title = title) +
    theme_minimal() 
}
# Percentile Loss Function
percentile_loss <- function(true_rank, estimated_rank, perc = .3, p = 2,
                            q = 2, c = 1){
  I <- length(true_rank)
  true_percentile <- true_rank / (I + 1)
  estimated_percentile <- estimated_rank / (I + 1)
  AB <- (true_percentile > perc) & (estimated_percentile < perc)
  BA <- (true_percentile < perc) & (estimated_percentile > perc)
  
  loss <- (sum(abs(perc - estimated_percentile)^p * AB +
                 c * abs(estimated_percentile - perc)^q * BA)) / I
  return(loss)
}

The main paper presents two major case studies. The methodology of the Bayesian Ranking (BR) is applied to give funding recommendations for two funding instruments at the Swiss National Science Foundation(SNSF): the Postdoc.Mobility fellowship for early career researchers and the SNSF Project Funding scheme. The data for both case studies is available on Zenodo and can accessed directly through R using the following code chunk:

# Load the data from Zenodo:
path_to_xlsx <- "https://zenodo.org/record/4531160/files/individual_votes.xlsx"

hss_s_mat <- openxlsx::read.xlsx(xlsxFile = path_to_xlsx, sheet = "pm_hsss") 
hss_s <- hss_s_mat %>% 
  get_right_data_format(prefix_voter = "voter")

hss_h_mat <- openxlsx::read.xlsx(xlsxFile = path_to_xlsx, sheet = "pm_hssh") 
hss_h <- hss_h_mat %>% 
  get_right_data_format(prefix_voter = "voter")

ls_b_mat <- openxlsx::read.xlsx(xlsxFile = path_to_xlsx, sheet = "pm_lsb") 
ls_b <- ls_b_mat %>% 
  get_right_data_format(prefix_voter = "voter")

ls_m_mat <- openxlsx::read.xlsx(xlsxFile = path_to_xlsx, sheet = "pm_lsm") 
ls_m <- ls_m_mat %>% 
  get_right_data_format(prefix_voter = "voter")

stem_mat <- openxlsx::read.xlsx(xlsxFile = path_to_xlsx, sheet = "pm_stem")
stem <- stem_mat %>% 
  get_right_data_format(prefix_voter = "voter")

mint_section1_mat <- 
  openxlsx::read.xlsx(xlsxFile = path_to_xlsx, sheet = "mint_section1") 
mint_section1 <- mint_section1_mat %>% 
  get_right_data_format(prefix_voter = "voter") %>% 
  mutate(section = "one")

mint_section2_mat <- 
  openxlsx::read.xlsx(xlsxFile = path_to_xlsx, sheet = "mint_section2") 
mint_section2 <- mint_section2_mat %>% 
  get_right_data_format(prefix_voter = "voter") %>% 
  mutate(section = "two")

mint_section3_mat <- 
  openxlsx::read.xlsx(xlsxFile = path_to_xlsx, sheet = "mint_section3")
mint_section3 <- mint_section3_mat %>% 
  get_right_data_format(prefix_voter = "voter") %>% 
  mutate(section = "three")

mint_section4_mat <- 
  openxlsx::read.xlsx(xlsxFile = path_to_xlsx, sheet = "mint_section4") 
mint_section4 <- mint_section4_mat %>% 
  get_right_data_format(prefix_voter = "voter") %>% 
  mutate(section = "four")

# Note that we want to be able to differentiate the proposals in the different
# mint sections 
mint_sections <- mint_section1 %>% 
  mutate(proposal = paste0(proposal, "_1")) %>% 
  bind_rows(mint_section2%>% 
              mutate(proposal = paste0(proposal, "_2"))) %>% 
  bind_rows(mint_section3%>% 
              mutate(proposal = paste0(proposal, "_3"))) %>% 
  bind_rows(mint_section4%>% 
              mutate(proposal = paste0(proposal, "_4")))

##### How many projects can still be funded? ####
how_many_fundable <- c(7, 4, 7, 8, 6)
names(how_many_fundable) <- c("hss_s", "hss_h", "ls_m", "ls_b", "stem")

1 Postdoc.Mobility funding scheme

The fist case study focuses on the evaluation of the Postdoc.Mobility (PM) Call of February 1st 2020. Junior researchers who have recently defended their PhD and wish to pursue an academic career can apply for a PM fellowship, which will allow them to work in a research group abroad for two years. The researchers submit their research proposal to the SNSF, which is then evaluated by two referees who score the proposal on a six-point scale (1 being poor quality and 6 being outstanding quality). Each proposal is evaluated by one of the following panels: Humanities; Social Sciences; Science, Technology, Engineering and Mathematics (STEM); Medicine; and Biology. As described in the main paper, the dataset on the PM panels contains the evaluation of the assessors on the proposals which were triaged into the ‘panel discussion’ group (e.g. not directly accepted, nor rejected). Each panel has their own available funding, so that five different funding lines are drawn. The proposals are discussed in the respective panel, where each member votes on a score for each of the proposals, unless they have a conflict of interest or another reason to be absent during the voting. Table 1.1 summarizes the number of proposals in the different ‘panel discussion’ groups together with the number of proposals that can still be funded and the size of the panel.

Table 1.1: Characteristics of panels and submissions for the February 2020 call for junior fellowships at the Swiss National Science Foundation.
	N. of panel members	Total N. of submissions	N. of proposals discussed	N. of fundable proposals
Humanities	14	23	11	4
Social Sciences	14	38	18	7
Biology	26	35	18	8
Medicine	17	35	14	7
STEM	29	50	18	6

For each of the five panels, Figure 1.1 presents the votes of the panel members to each of the proposals discussed. The proposals are represented on the y-axis, and ordered depending on the average of the individual votes they got (red dots). We see a large variability in the votes. Note that we only consider the middle block of proposals that were neither truly exceptional nor of very low quality and that the variability of the votes per proposals tends to be larger in the middle block than for proposals a with very low/high average score. Some of the members of the STEM panel still assessed a few proposals with a 2, which is the second lowest grade; not surprisingly the STEM panel has the lowest intraclass correlation (ICC), representing the assessor reliability. In all 5 panels, the assessor reliability can be classified as either poor or fair.

leg <- cowplot::get_legend(get_grades_plot(hss_h))
gridExtra::grid.arrange(get_grades_plot(hss_h, hss_h_mat, x_min = 1.8, 
                                        title = "Humanities") +
                          theme(legend.position = "none"),
                        get_grades_plot(hss_s, hss_s_mat, x_min = 1.8,
                                        title = "Social Sciences") + 
                          theme(legend.position = "none"),
                        get_grades_plot(ls_b, ls_b_mat, x_min = 1.8,
                                        title = "Biology")+ 
                          theme(legend.position = "none"),
                        get_grades_plot(ls_m, ls_m_mat, x_min = 1.8,
                                        title = "Medicine")+ 
                          theme(legend.position = "none"),
                        get_grades_plot(stem, stem_mat, x_min = 1.8, 
                                        title = "STEM")+ 
                          theme(legend.position = "none"), 
                        leg)

$The individual votes given to all proposals by all panel members. The red dots represent the averages of those individual votes for each proposal. The intra-class correlation coefficients together with their 95\% confidence intervals of the different panels can be found in the titles of the subfigures. In all panels, the assessor reliability can be classified as poor to fair.\label{fig:all-grades-pm2020}$

Figure 1.1: The individual votes given to all proposals by all panel members. The red dots represent the averages of those individual votes for each proposal. The intra-class correlation coefficients together with their 95% confidence intervals of the different panels can be found in the titles of the subfigures. In all panels, the assessor reliability can be classified as poor to fair.

Then, the Bayesian hierarchical models will be fitted for each of the five panels individually and the expected ranks will be retrieved using the MCMC samples from the latter models. Note that the number of iterations, burnin and adaptation (n.iter, n.burnin and n.adapt) are set high enough to ensure convergence of all the MCMC chains. Lower number of iterations etc, will make the code run faster.

##### The model for JAGS ####
# we will use the default

##### Get the ER results: ####
# To make the code run faster, make sure to decrease those numbers
n.iter <- 50 * 10^{3}
n.burnin <- 10 * 10^{3}
n.adapt <- 10 * 10^{3}
max.iter <- 50 * 10^{3}
n.chains <- 4
seed <- 1991
# as we checked the number of iterations etc needed for convergence before, 
# the argument max_iter set to the same as n.iter ensures that the algorithm is
# not rerun until convergence, but only once. 
mcmc_hss_s_object <- 
  get_mcmc_samples(data = hss_s,
                   id_application = "proposal",
                   id_voter = "voter", 
                   grade_variable = "num_grade",
                   n_chains = n.chains, n_iter = n.iter,
                   n_burnin = n.burnin, n_adapt = n.adapt,
                   max_iter =  max.iter,
                   variables_to_sample = "specified",
                   rhat_threshold = 1.1,
                   names_variables_to_sample = 
                     c("application_intercept", "tau_application",
                       "nu", "tau_voter", "rank_theta", "sigma"),
                   inits_type = "random",
                   seed = seed, quiet = TRUE, dont_bind = TRUE)
mcmc_hss_s <- mcmc(do.call(rbind, mcmc_hss_s_object))

mcmc_hss_h_object <- 
  get_mcmc_samples(data = hss_h,
                   id_application = "proposal",
                   id_voter = "voter", 
                   grade_variable = "num_grade",
                   n_chains = n.chains, n_iter = n.iter,
                   n_burnin = n.burnin, n_adapt = n.adapt,
                   max_iter =  max.iter,
                   variables_to_sample = "specified",
                   rhat_threshold = 1.1,
                   names_variables_to_sample = 
                     c("application_intercept", "tau_application",
                       "nu", "tau_voter", "rank_theta", "sigma"),
                   inits_type = "random",
                   seed = seed, quiet = TRUE, dont_bind = TRUE)
mcmc_hss_h <- mcmc(do.call(rbind, mcmc_hss_h_object))

mcmc_ls_b_object <- 
  get_mcmc_samples(data = ls_b,
                   id_application = "proposal",
                   id_voter = "voter", 
                   grade_variable = "num_grade",
                   n_chains = n.chains, n_iter = n.iter,
                   n_burnin = n.burnin, n_adapt = n.adapt,
                   max_iter =  max.iter,
                   variables_to_sample = "specified",
                   rhat_threshold = 1.1,
                   names_variables_to_sample = 
                     c("application_intercept", "tau_application",
                       "nu", "tau_voter", "rank_theta", "sigma"),
                   inits_type = "random",
                   seed = seed, quiet = TRUE, dont_bind = TRUE)
mcmc_ls_b <- mcmc(do.call(rbind, mcmc_ls_b_object))

mcmc_ls_m_object <- 
  get_mcmc_samples(data = ls_m,
                   id_application = "proposal",
                   id_voter = "voter", 
                   grade_variable = "num_grade",
                   n_chains = n.chains, n_iter = n.iter,
                   n_burnin = n.burnin, n_adapt = n.adapt,
                   max_iter =  max.iter,
                   variables_to_sample = "specified",
                   rhat_threshold = 1.1,
                   names_variables_to_sample = 
                     c("application_intercept", "tau_application",
                       "nu", "tau_voter", "rank_theta", "sigma"),
                   inits_type = "random",
                   seed = seed, quiet = TRUE, dont_bind = TRUE)
mcmc_ls_m <- mcmc(do.call(rbind, mcmc_ls_m_object))

mcmc_stem_object <- 
  get_mcmc_samples(data = stem,
                   id_application = "proposal",
                   id_voter = "voter", 
                   grade_variable = "num_grade",
                   n_chains = n.chains, n_iter = n.iter,
                   n_burnin = n.burnin, n_adapt = n.adapt,
                   max_iter =  max.iter,
                   variables_to_sample = "specified",
                   rhat_threshold = 1.1,
                   names_variables_to_sample = 
                     c("application_intercept", "tau_application",
                       "nu", "tau_voter", "rank_theta", "sigma"),
                   inits_type = "random",
                   seed = seed, quiet = TRUE, dont_bind = TRUE)

mcmc_stem <- mcmc(do.call(rbind, mcmc_stem_object))


# Computation of the ER and PCER:
ranks_hss_s <- get_er_from_jags(data = hss_s,
                                id_application = "proposal",
                                id_voter = "voter", 
                                grade_variable = "num_grade",
                                mcmc_samples = mcmc_hss_s_object)

ranks_hss_h <- get_er_from_jags(data = hss_h,
                                id_application = "proposal",
                                id_voter = "voter", 
                                grade_variable = "num_grade",
                                mcmc_samples = mcmc_hss_h_object)

ranks_ls_b <- get_er_from_jags(data = ls_b,
                               id_application = "proposal",
                               id_voter = "voter", 
                               grade_variable = "num_grade",
                               mcmc_samples = mcmc_ls_b_object)

ranks_ls_m <- get_er_from_jags(data = ls_m,
                               id_application = "proposal",
                               id_voter = "voter", 
                               grade_variable = "num_grade",
                               mcmc_samples = mcmc_ls_m_object)

ranks_stem <- get_er_from_jags(data = stem,
                               id_application = "proposal",
                               id_voter = "voter", 
                               grade_variable = "num_grade",
                               mcmc_samples = mcmc_stem_object)

Figure 1.2 presents the different stages of ranking of the proposals for all panels. The first column of points in the figure represents ranking based on the simple averages (Fixed). Then the proposals are ranked based on the posterior means of θ_i (Posterior Mean) and finally, the last column of points the expected rank. A naive funding line as represented by the color code in the figure is defined by simply funding the x best ranked proposals, where x can be retrieved from Table 1.1.

##### Plot the results ####
plot_hss_s <-
  plotting_er_results(ranks_hss_s, result_show = TRUE, title = "Social Science",
                      id_application = "id_application",
                      pt_size = .5, line_size = .2, draw_funding_line = FALSE, 
                      line_type_fl = "longdash", color_fl = "darkgray", grep_size = 3,
                      how_many_fundable = how_many_fundable["hss_s"])

plot_hss_h <- 
  plotting_er_results(ranks_hss_h, result_show = TRUE, title = "Humanities",
                      id_application = "id_application",
                      pt_size = .5, line_size = .2, draw_funding_line = FALSE, 
                      line_type_fl = "longdash", color_fl = "darkgray", grep_size = 3,
                      how_many_fundable = how_many_fundable["hss_h"]) 

plot_ls_b <-
  plotting_er_results(ranks_ls_b, result_show = TRUE, title = "Biology",
                      id_application = "id_application",
                      pt_size = .5, line_size = .2, draw_funding_line = FALSE, 
                      line_type_fl = "longdash", color_fl = "darkgray", grep_size = 3,
                      how_many_fundable = how_many_fundable["ls_b"]) 

plot_ls_m <- 
   plotting_er_results(ranks_ls_m, result_show = TRUE, title = "Medicine",
                      id_application = "id_application",
                      pt_size = .5, line_size = .2, draw_funding_line = FALSE, 
                      line_type_fl = "longdash", color_fl = "darkgray", grep_size = 3,
                      how_many_fundable = how_many_fundable["ls_m"]) 

plot_stem <-  
  plotting_er_results(ranks_stem, result_show = TRUE, title = "STEM",
                      id_application = "id_application",
                      pt_size = .5, line_size = .2, draw_funding_line = FALSE, 
                      line_type_fl = "longdash", color_fl = "darkgray", grep_size = 3,
                      how_many_fundable = how_many_fundable["stem"]) 

leg <- cowplot::get_legend(plot_hss_h)
gridExtra::grid.arrange(plot_hss_h +  
                          theme(legend.position = "none",
                                axis.title.x = element_blank(),
                                axis.text.x = element_blank(), 
                                axis.title.y = element_text(size = 7),
                                title = element_text(size = 7)),
                        plot_hss_s + 
                          theme(legend.position = "none",
                                axis.title.y = element_blank(),
                                axis.title.x = element_blank(),
                                axis.text.x = element_blank(),
                                title = element_text(size = 7)),
                        plot_ls_b + 
                          theme(legend.position = "none",
                                axis.title.x = element_blank(),
                                axis.title.y = element_blank(),
                                title = element_text(size = 7)),
                        plot_ls_m +
                          theme(legend.position = "none",
                                axis.title.x = element_blank(),
                                axis.title.y = element_text(size = 7),
                                title = element_text(size = 7)),
                        plot_stem + 
                          theme(legend.position = "none",
                                axis.title.y = element_blank(),
                                title = element_text(size = 7)),
                        leg,
                        ncol = 3)

$PM fellowship proposals discussed in the different panels ranked using the average overall score (Fixed), the posterior means of the parameter $\theta_i$ and the ER. The color code shows which proposals would be funded, if the SNSF simply funds the $x$ best proposals based on the ER, where $x$ is panel-specific and shown in Table \@ref(tab:number-applications-pm). Points representing tied proposals are on top of each other, but noticable as such because of the numbers. \label{fig:ER_pm_2020}$

Figure 1.2: PM fellowship proposals discussed in the different panels ranked using the average overall score (Fixed), the posterior means of the parameter θ_i and the ER. The color code shows which proposals would be funded, if the SNSF simply funds the x best proposals based on the ER, where x is panel-specific and shown in Table 1.1. Points representing tied proposals are on top of each other, but noticable as such because of the numbers.

To fully account for the uncertainty, we plot the same ER of the proposals together with their 50% credible intervals in Figure 1.3. The methodology then recommends the following decisions:

Humanities panel: The four best ranked proposals are funded, the remaining seven are rejected, no random selection element.
Social Sciences: The seven best ranked proposals are funded, the eleven worst ranked proposals are rejected, no random selection element.
Medicine: The five best ranked proposals are funded, the five worst ranked proposals are rejected. Two proposals are randomly selected for funding between the proposals ranked as sixth to ninth.
Biology: The eight best ranked proposals are funded, the ten worst ranked proposals are rejected, no random selection element.
STEM: The four best ranked proposals are funded, the ten worst ranked proposals are rejected. Two proposals are randomly selected for funding between the proposals ranked as fifth to eighth.

plot_er_dist_humanities <- 
  plot_er_distributions(get_mcmc_samples_result = mcmc_hss_h_object,
                        n_proposals = hss_h %>% 
                          summarise(n_distinct(proposal)) %>% pull(),
                        name_er = "rank_theta", title = "Humanities",
                        number_fundable = how_many_fundable["hss_h"],
                        outer_show = FALSE, proposal = "")
 

plot_er_dist_social_sciences <- 
  plot_er_distributions(get_mcmc_samples_result = mcmc_hss_s_object,
                        n_proposals = hss_s %>% 
                          summarise(n_distinct(proposal)) %>% pull(),
                        name_er = "rank_theta", title = "Social Sciences",
                        number_fundable = how_many_fundable["hss_s"],
                        outer_show = FALSE, proposal = "")
plot_er_dist_biology <- 
  plot_er_distributions(get_mcmc_samples_result = mcmc_ls_b_object,
                        n_proposals = ls_b %>% 
                          summarise(n_distinct(proposal)) %>% pull(),
                        name_er = "rank_theta", title = "Biology",
                        number_fundable = how_many_fundable["ls_b"],
                        outer_show = FALSE, proposal = "")

plot_er_dist_medicine <- 
  plot_er_distributions(get_mcmc_samples_result = mcmc_ls_m_object,
                        n_proposals = ls_m %>% 
                          summarise(n_distinct(proposal)) %>% pull(),
                        name_er = "rank_theta", title = "Medicine",
                        number_fundable = how_many_fundable["ls_m"],
                        outer_show = FALSE, proposal = "")

plot_er_dist_stem <- 
  plot_er_distributions(get_mcmc_samples_result = mcmc_stem_object,
                        n_proposals = stem %>% 
                          summarise(n_distinct(proposal)) %>% pull(),
                        name_er = "rank_theta", title = "STEM",
                        number_fundable = how_many_fundable["stem"],
                        outer_show = FALSE, proposal = "")

leg <- cowplot::get_legend(plot_er_dist_stem)
gridExtra::grid.arrange(plot_er_dist_humanities + 
                          theme(legend.position = "none",
                                title = element_text(size = 7)),
                        plot_er_dist_social_sciences + 
                          theme(legend.position = "none",
                                title = element_text(size = 7)),
                        plot_er_dist_medicine + 
                          theme(legend.position = "none",
                                title = element_text(size = 7)),
                        plot_er_dist_biology + 
                          theme(legend.position = "none",
                                title = element_text(size = 7)),
                        plot_er_dist_stem + 
                          theme(legend.position = "none",
                                title = element_text(size = 7)), leg, ncol = 3)

$The expected rank together with 50% credible intervals of the rank. \label{fig:rank_credible_intervals_pm2020}$

Figure 1.3: The expected rank together with 50% credible intervals of the rank.

leg <- cowplot::get_legend(plot_er_dist_stem)
plot <- 
  gridExtra::grid.arrange(plot_er_dist_humanities + 
                            labs(x = "Proposal Number") +
                            annotate(geom = "text", y = 4, x = 8.5,
                                     label = "prov funding line",
                                     color = "blue4", size = 3) +
                            theme(axis.title.x = element_text(size = 7),
                                  legend.position = "none",
                                  title = element_text(size = 7)),
                          plot_er_dist_social_sciences +  
                            labs(x = "Proposal Number") +
                            annotate(geom = "text", y = 8.2, x = 15,
                                     label = "prov funding line",
                                     color = "blue4", size = 3) +
                            theme(axis.title.x = element_text(size = 7),
                                  legend.position = "none",
                                  title = element_text(size = 7)),
                          plot_er_dist_medicine + 
                            labs(x = "Proposal Number") +
                            annotate(geom = "text", y = 6, x = 12,
                                     label = "prov funding line",
                                     color = "blue4", size = 3) +
                            theme(axis.title.x = element_text(size = 7),
                                  legend.position = "none",
                                  title = element_text(size = 7)),
                          plot_er_dist_biology + 
                            labs(x = "Proposal Number") +
                            annotate(geom = "text", y = 9, x = 15,
                                     label = "prov funding line",
                                     color = "blue4", size = 3) +
                            theme(axis.title.x = element_text(size = 7),
                                  legend.position = "none",
                                  title = element_text(size = 7)),
                          plot_er_dist_stem + 
                            labs(x = "Proposal Number") +
                            annotate(geom = "text", y = 7.5, x = 15,
                                     label = "prov funding line",
                                     color = "blue4", size = 3) +
                            theme(legend.position = "none",
                                  title = element_text(size = 7),
                                  axis.title.x = element_text(size = 7)),
                          top = "Bayesian Ranking Recommendations",
                          leg, ncol = 2)

ggsave(filename = "Figure1.png", plot = plot, device='png', dpi=700)

## Saving 7 x 5 in image

Finally, Figure 1.4 shows the posterior distributions of the ν_j’s (the mean of the assessor effects) for all PM panels, estimated from the discussed proposals. If for a specific assessor ν_j is on average negative, this means that assessor j is stricter compared to the other assessors in the panel and his/her stricter grades are corrected more. On the other hand, a ν_j that is on average positive reflects the behaviour of a assessor who gives generally higher grades compared to the remaining assessors. In the STEM panel, we can observe an extreme assessor (assessor 18). Looking at the raw data, this makes total sense, as assessor 18 gives especially low grades compared to the other assessors (Cs to proposals to which others gave As and ABs).

plot_voter_humanities <- 
  voter_behavior_distribution(get_mcmc_samples_result = mcmc_hss_h_object,
                              title = "Humanities",
                              names_voters = "assessor",
                              n_voters = hss_h %>% 
                                summarise(n_distinct(voter)) %>% pull(),
                              xlim_min = -1, xlim_max = 1,
                              scale = 1)
plot_voter_social_sciences <- 
  voter_behavior_distribution(get_mcmc_samples_result = mcmc_hss_s_object,
                              title = "Social Sciences",
                              names_voters = "assessor",
                              n_voters = hss_s %>% 
                                summarise(n_distinct(voter)) %>% pull(),
                              xlim_min = -1, xlim_max = 1,
                              scale = 1)
plot_voter_biology <- 
  voter_behavior_distribution(get_mcmc_samples_result = mcmc_ls_b_object,
                              title = "Biology",
                              names_voters = "assessor",
                              n_voters = ls_b %>% 
                                summarise(n_distinct(voter)) %>% pull(),
                              xlim_min = -1, xlim_max = 1,
                              scale = 1)
plot_voter_medicine <- 
  voter_behavior_distribution(get_mcmc_samples_result = mcmc_ls_m_object,
                              title = "Medicine",
                              names_voters = "assessor",
                              n_voters = ls_m %>% 
                                summarise(n_distinct(voter)) %>% pull(),
                              xlim_min = -1, xlim_max = 1,
                              scale = 1)
plot_voter_stem <- 
  voter_behavior_distribution(get_mcmc_samples_result = mcmc_stem_object,
                              title = "STEM",
                              names_voters = "assessor",
                              n_voters = stem %>% 
                                summarise(n_distinct(voter)) %>% pull(),
                              xlim_min = -1.2, xlim_max = 1,
                              scale = 1)

gridExtra::grid.arrange(plot_voter_humanities, plot_voter_social_sciences,
                        plot_voter_medicine, plot_voter_biology,
                        plot_voter_stem, ncol = 5)

$Posterior distributions of the referee-specific means $\nu_j$ in the Social Sciences panel. Each of the 14 referees voted on up to 18 proposals, unless they had a conflict of interest or other reasons to be absent during the vote. \label{fig:voter_behav_ER_pm_2020}$

Figure 1.4: Posterior distributions of the referee-specific means ν_j in the Social Sciences panel. Each of the 14 referees voted on up to 18 proposals, unless they had a conflict of interest or other reasons to be absent during the vote.

Instead of defining the random selection group using the ERs, we can directly use the proposal effects (θ_i’s). Figure 1.5 shows the θ_i’s together with their 50%-Crl, the higher the θ_i the better the quality of the proposal. The division into the maximum of three groups (accepted, rejected and random selection) is the same using the proposal effects as with the expected rank for all panels, but for the Social Sciences, where a random selection would be applied on two proposals.

plot_theta_dist_humanities <- 
  plot_er_distributions(get_mcmc_samples_result = mcmc_hss_h_object,
                        n_proposals = hss_h %>% 
                          summarise(n_distinct(proposal)) %>% pull(),
                        name_er_or_theta = "application_intercept", 
                        title = "Humanities", er = FALSE,
                        number_fundable = how_many_fundable["hss_h"],
                        outer_show = FALSE, proposal = "",
                        ylab = expression(theta[i]))

plot_theta_dist_social_sciences <- 
  plot_er_distributions(get_mcmc_samples_result = mcmc_hss_s_object,
                        n_proposals = hss_s %>% 
                          summarise(n_distinct(proposal)) %>% pull(),
                        name_er_or_theta = "application_intercept", 
                        er = FALSE, title = "Social Sciences",
                        number_fundable = how_many_fundable["hss_s"],
                        outer_show = FALSE, proposal = "",
                        ylab = expression(theta[i])) 
plot_theta_dist_biology <- 
  plot_er_distributions(get_mcmc_samples_result = mcmc_ls_b_object,
                        n_proposals = ls_b %>% 
                          summarise(n_distinct(proposal)) %>% pull(),
                        name_er_or_theta = "application_intercept",
                        er = FALSE, title = "Biology",
                        number_fundable = how_many_fundable["ls_b"],
                        outer_show = FALSE, proposal = "", 
                        ylab = expression(theta[i])) 

plot_theta_dist_medicine <- 
  plot_er_distributions(get_mcmc_samples_result = mcmc_ls_m_object,
                        n_proposals = ls_m %>% 
                          summarise(n_distinct(proposal)) %>% pull(),
                        name_er_or_theta = "application_intercept",
                        er = FALSE, title = "Medicine",
                        number_fundable = how_many_fundable["ls_m"],
                        outer_show = FALSE, proposal = "",
                        ylab = expression(theta[i]))

plot_theta_dist_stem <- 
  plot_er_distributions(get_mcmc_samples_result = mcmc_stem_object,
                        n_proposals = stem %>% 
                          summarise(n_distinct(proposal)) %>% pull(),
                        name_er_or_theta = "application_intercept",
                        er = FALSE, title = "STEM",
                        number_fundable = how_many_fundable["stem"],
                        outer_show = FALSE, proposal = "",
                        ylab = expression(theta[i])) 

gridExtra::grid.arrange(plot_theta_dist_humanities + 
                          theme(legend.position = "none"),
                        plot_theta_dist_social_sciences + 
                          theme(legend.position = "none"),
                        plot_theta_dist_medicine + 
                          theme(legend.position = "none"),
                        plot_theta_dist_biology + 
                          theme(legend.position = "none"),
                        plot_theta_dist_stem + 
                          theme(legend.position = "none"), leg, ncol = 2)

$The means of the posterior distributions of the $\theta_i$'s together with their 50% credible intervals for the PM fellowship proposals. The dashed blue line is the naive funding line, $e.g$ the posterior mean of the distribution of the $\theta_i$ of the last fundable proposal. \label{fig:rank_credible_intervals_theta_pm2020}$

Figure 1.5: The means of the posterior distributions of the θ_i’s together with their 50% credible intervals for the PM fellowship proposals. The dashed blue line is the naive funding line, e.g the posterior mean of the distribution of the θ_i of the last fundable proposal.

As discussed in the main paper, other quantities derived from the ER can be of interest. The ordered PCER of the proposals in the different panels can be found in Figure 1.6. Note that the lower the PCER, the better the quality of the proposal compared to all competing proposals. The lowest PCER in the Medicine panel is for example 5.5%, meaning that the probability that the best ranked proposal is of worse quality compared to a randomly selected proposal (including itself) is only 5.5%.

plot_pcer_humanities <- ranks_hss_h$rankings %>% 
  arrange(pcer) %>% 
  ggplot(aes(x = 1:11, y = pcer/100)) +
  geom_point() + 
  theme_minimal() + 
  labs(x = " ",
       y = "PCER",
       title = "Humanities") +
  theme(legend.position = "none",
        axis.ticks.x = element_line(color = "darkgray"),
        panel.grid.minor.x = element_blank(),
        panel.grid.minor.y = element_blank()) +
  scale_y_continuous(limits = c(0, 1), labels = scales::percent) +
  scale_x_continuous(breaks = 1:11,
                     labels = 1:11)

plot_pcer_soc_sciences <- ranks_hss_s$rankings %>% 
  arrange(pcer) %>% 
  ggplot(aes(x = 1:18, y = pcer/100)) +
  geom_point() + 
  theme_minimal() + 
  labs(x = " ",
       y = " ",
       title = "Social Sciences") +
  theme(legend.position = "none",
        axis.ticks.x = element_line(color = "darkgray"),
        panel.grid.minor.x = element_blank(),
        panel.grid.minor.y = element_blank()) +
  scale_y_continuous(limits = c(0, 1), labels = scales::percent) +
  scale_x_continuous(breaks = 1:18,
                     labels = 1:18)

plot_pcer_biology <- ranks_ls_b$rankings %>% 
  arrange(pcer) %>% 
  ggplot(aes(x = 1:18, y = pcer/100)) +
  geom_point() + 
  theme_minimal() + 
  labs(x = "Proposals ranked based on PCER",
       y = "PCER",
       title = "Biology") +
  theme(legend.position = "none",
        axis.ticks.x = element_line(color = "darkgray"),
        panel.grid.minor.x = element_blank(),
        panel.grid.minor.y = element_blank()) +
  scale_y_continuous(limits = c(0, 1), labels = scales::percent) +
  scale_x_continuous(breaks = 1:18,
                     labels = 1:18)

plot_pcer_medicine <- ranks_ls_m$rankings %>% 
  arrange(pcer) %>% 
  ggplot(aes(x = 1:14, y = pcer/100)) +
  geom_point() + 
  theme_minimal() + 
  labs(x = " ",
       y = " ",
       title = "Medicine") +
  theme(legend.position = "none",
        axis.ticks.x = element_line(color = "darkgray"),
        panel.grid.minor.x = element_blank(),
        panel.grid.minor.y = element_blank()) +
  scale_y_continuous(limits = c(0, 1), labels = scales::percent) +
  scale_x_continuous(breaks = 1:14,
                     labels = 1:14)

plot_pcer_stem <- ranks_stem$rankings %>% 
  arrange(pcer) %>% 
  ggplot(aes(x = 1:18, y = pcer/100)) +
  geom_point() + 
  theme_minimal() + 
  labs(x = "Proposals ranked based on PCER",
       y = " ",
       title = "Biology") +
  theme(legend.position = "none",
        axis.ticks.x = element_line(color = "darkgray"),
        panel.grid.minor.x = element_blank(),
        panel.grid.minor.y = element_blank()) +
  scale_y_continuous(limits = c(0, 1), labels = scales::percent) +
  scale_x_continuous(breaks = 1:18,
                     labels = 1:18)



gridExtra::grid.arrange(plot_pcer_humanities, plot_pcer_soc_sciences,
                        plot_pcer_medicine, plot_pcer_biology,
                        plot_pcer_stem, ncol = 3)

$The PCER of the ranked proposals for all the PM panels. \label{fig:pcer_pm_2020}$

Figure 1.6: The PCER of the ranked proposals for all the PM panels.

Another related quantity discussed in the main paper is the SUCRA, which is the surface under the cumulative ranking probability (CRP) plot. The latter can simply be plotted using the MCMC samples of the model parameters. Figures 1.7 gives an example of the this plot for the Humanities panel. The SUCRA of each proposal is written insight the CRP plots. Every proposal has its own CRP plot. Like the PCER, the SUCRA is independent of the number of proposals ranked.

plot_rankogram(data = hss_h,
               id_application = "proposal",
               id_voter = "voter", 
               grade_variable = "num_grade",
               rank_theta_name = "rank_theta",
               cumulative_rank_prob = TRUE,
               mcmc_samples = mcmc_hss_h_object)

$Cumulative ranking probability plots for the competing proposals in the Humanities Panel.\label{fig:ranko_humanities}$

Figure 1.7: Cumulative ranking probability plots for the competing proposals in the Humanities Panel.

Figure ?? shows the ER of the proposals together with their 50% credible intervals when using an Bayesian hierarchical model for ordinal outcome variable.

# Ordinal version of the JAGS model:
seed_ordinal <- seed + 1
mcmc_hss_s_ordinal <- 
  get_mcmc_samples(data = hss_s,
                   id_application = "proposal",
                   id_voter = "voter", 
                   grade_variable = "num_grade",
                   n_chains = n.chains, n_iter = n.iter,
                   n_burnin = n.burnin, n_adapt = n.adapt,
                   ordinal_scale = TRUE, point_scale = 6,
                   max_iter =  max.iter,
                   rhat_threshold = 1.1,
                   inits_type = "random",
                   seed = seed_ordinal, quiet = TRUE)

## [1] "Be aware that the default ordinal model fixes the outcome to a six-\n          point scale."

mcmc_hss_h_ordinal <- 
  get_mcmc_samples(data = hss_h,
                   id_application = "proposal",
                   id_voter = "voter", 
                   grade_variable = "num_grade",
                   n_chains = n.chains, n_iter = n.iter,
                   n_burnin = n.burnin, n_adapt = n.adapt,
                   ordinal_scale = TRUE, point_scale = 6,
                   max_iter =  max.iter,
                   rhat_threshold = 1.1,
                   inits_type = "random",
                   seed = seed_ordinal, quiet = TRUE)

## [1] "Be aware that the default ordinal model fixes the outcome to a six-\n          point scale."

mcmc_ls_b_ordinal <- 
  get_mcmc_samples(data = ls_b,
                   id_application = "proposal",
                   id_voter = "voter", 
                   grade_variable = "num_grade",
                   n_chains = n.chains, n_iter = n.iter,
                   n_burnin = n.burnin, n_adapt = n.adapt,
                   ordinal_scale = TRUE, point_scale = 6,
                   max_iter =  max.iter,
                   rhat_threshold = 1.1,
                   inits_type = "random",
                   seed = seed_ordinal, quiet = TRUE)

## [1] "Be aware that the default ordinal model fixes the outcome to a six-\n          point scale."
## [1] "Caution: Even after increasing the adaption phase to 10000 iterations, the burnin phase to 10000 and the number of iterations to 50000 the max of the Rhat values is 1.16 e.g. > 1.1. The problematic parameter(s) is (are): sigma, tau_voter."

mcmc_ls_m_ordinal <- 
  get_mcmc_samples(data = ls_m,
                   id_application = "proposal",
                   id_voter = "voter", 
                   grade_variable = "num_grade",
                   n_chains = n.chains, n_iter = n.iter,
                   n_burnin = n.burnin, n_adapt = n.adapt,
                   ordinal_scale = TRUE, point_scale = 6,
                   max_iter =  max.iter,
                   rhat_threshold = 1.1,
                   inits_type = "random",
                   seed = seed_ordinal, quiet = TRUE)

## [1] "Be aware that the default ordinal model fixes the outcome to a six-\n          point scale."

n.iter <- 100000
mcmc_stem_ordinal <- 
  get_mcmc_samples(data = stem,
                   id_application = "proposal",
                   id_voter = "voter", 
                   grade_variable = "num_grade",
                   n_chains = n.chains, n_iter = n.iter,
                   n_burnin = n.burnin, n_adapt = n.adapt,
                   ordinal_scale = TRUE, point_scale = 6,
                   max_iter =  max.iter,
                   rhat_threshold = 1.1,
                   inits_type = "random",
                   seed = seed_ordinal, quiet = TRUE)

## [1] "Be aware that the default ordinal model fixes the outcome to a six-\n          point scale."

plot_er_dist_humanities_ordinal <- 
  plot_er_distributions(get_mcmc_samples_result = mcmc_hss_h_ordinal,
                        n_proposals = hss_h %>% 
                          summarise(n_distinct(proposal)) %>% pull(),
                        name_er_or_theta = "rank_theta", title = "Humanities",
                        number_fundable = how_many_fundable["hss_h"],
                        outer_show = FALSE, proposal = "") 

plot_er_dist_social_sciences_ordinal <- 
  plot_er_distributions(get_mcmc_samples_result = mcmc_hss_s_ordinal,
                        n_proposals = hss_s %>% 
                          summarise(n_distinct(proposal)) %>% pull(),
                        name_er_or_theta = "rank_theta", title = "Social Sciences",
                        number_fundable = how_many_fundable["hss_s"],
                        outer_show = FALSE, proposal = "")

plot_er_dist_biology_ordinal <- 
  plot_er_distributions(get_mcmc_samples_result = mcmc_ls_b_ordinal,
                        n_proposals = ls_b %>% 
                          summarise(n_distinct(proposal)) %>% pull(),
                        name_er_or_theta = "rank_theta", title = "Biology",
                        number_fundable = how_many_fundable["ls_b"],
                        outer_show = FALSE, proposal = "")

plot_er_dist_medicine_ordinal <- 
  plot_er_distributions(get_mcmc_samples_result = mcmc_ls_m_ordinal,
                        n_proposals = ls_m %>% 
                          summarise(n_distinct(proposal)) %>% pull(),
                        name_er_or_theta = "rank_theta", title = "Medicine",
                        number_fundable = how_many_fundable["ls_m"],
                        outer_show = FALSE, proposal = "")

plot_er_dist_stem_ordinal <- 
  plot_er_distributions(get_mcmc_samples_result = mcmc_stem_ordinal,
                        n_proposals = stem %>% 
                          summarise(n_distinct(proposal)) %>% pull(),
                        name_er_or_theta = "rank_theta", title = "STEM",
                        number_fundable = how_many_fundable["stem"],
                        outer_show = FALSE, proposal = "")

leg <- cowplot::get_legend(plot_er_dist_stem_ordinal)
gridExtra::grid.arrange(plot_er_dist_humanities_ordinal + 
                          theme(legend.position = "none"),
                        plot_er_dist_social_sciences_ordinal + 
                          theme(legend.position = "none"),
                        plot_er_dist_medicine_ordinal + 
                          theme(legend.position = "none"),
                        plot_er_dist_biology_ordinal + 
                          theme(legend.position = "none"),
                        plot_er_dist_stem_ordinal + 
                          theme(legend.position = "none"), leg, ncol = 2)

$The expected rank together with 50% credible intervals of the rank using an ordinal outcome model. \label{fig:rank_credible_intervals_pm2020_ordinal}$

Figure 1.8: The expected rank together with 50% credible intervals of the rank using an ordinal outcome model.

Figure 1.9 shows the Bland-Altman plots comparing the continuous and ordinal model for the five Postdoc.Mobility panels

# Computation of the ordinal ERs:
ranks_hss_s_ordinal <- get_er_from_jags(data = hss_s,
                                        id_application = "proposal",
                                        id_voter = "voter", 
                                        grade_variable = "num_grade",
                                        mcmc_samples = mcmc_hss_s_ordinal)

ranks_hss_h_ordinal <- get_er_from_jags(data = hss_h,
                                        id_application = "proposal",
                                        id_voter = "voter", 
                                        grade_variable = "num_grade",
                                        mcmc_samples = mcmc_hss_h_ordinal)

ranks_ls_b_ordinal <- get_er_from_jags(data = ls_b,
                                       id_application = "proposal",
                                       id_voter = "voter", 
                                       grade_variable = "num_grade",
                                       mcmc_samples = mcmc_ls_b_ordinal)

ranks_ls_m_ordinal <- get_er_from_jags(data = ls_m,
                                       id_application = "proposal",
                                       id_voter = "voter", 
                                       grade_variable = "num_grade",
                                       mcmc_samples = mcmc_ls_m_ordinal)

ranks_stem_ordinal <- get_er_from_jags(data = stem,
                                       id_application = "proposal",
                                       id_voter = "voter", 
                                       grade_variable = "num_grade",
                                       mcmc_samples = mcmc_stem_ordinal)

gridExtra::grid.arrange(get_agreement_plot(ranks_hss_h$rankings,
                                           ranks_hss_h_ordinal$rankings,
                                           title = "Humanities"),
                        get_agreement_plot(ranks_hss_s$rankings,
                                           ranks_hss_s_ordinal$rankings,
                                           title = "Social Sciences"),
                        get_agreement_plot(ranks_ls_b$rankings,
                                           ranks_ls_b_ordinal$rankings,
                                           title = "Biology"),
                        get_agreement_plot(ranks_ls_m$rankings,
                                           ranks_ls_m_ordinal$rankings,
                                           title = "Medicine"),
                        get_agreement_plot(ranks_stem$rankings,
                                           ranks_stem_ordinal$rankings,
                                           title = "STEM"),
                        ncol = 1)

$Difference in expected rank depending on whether the outcome is modeled as continuous or ordinal variable versus the average ER as computed using the continuous and ordinal outcome model. The 95\% limits of agreement are plotted (red dotted lines) as well as the mean difference (gray line, \textit{i.e.} bias), for the five panels evaluating the Postdoc.Mobility proposals.\label{fig:agreement_ordinal_normal}$

Figure 1.9: Difference in expected rank depending on whether the outcome is modeled as continuous or ordinal variable versus the average ER as computed using the continuous and ordinal outcome model. The 95% limits of agreement are plotted (red dotted lines) as well as the mean difference (gray line, bias), for the five panels evaluating the Postdoc.Mobility proposals.

rm(mcmc_hss_h_ordinal,
   mcmc_stem_ordinal,
   mcmc_hss_s_ordinal,
   mcmc_ls_b_ordinal,
   mcmc_ls_m_ordinal,
   mcmc_hss_s_object,
   mcmc_hss_h_object,
   mcmc_stem_object,
   mcmc_hss_h,
   mcmc_hss_s,
   mcmc_stem,
   mcmc_ls_b,
   mcmc_ls_m)

2 Project Funding

(Note that to make this online supplement run faster you would have to decrease the number of iterations, adaptation, and burnin for the following example. We used a high number, to ensure the convergence of all the chains (for the results presented in the manuscript.)

Project funding (PF) is the SNSF’s most important funding instrument. PF grants are meant for research proposals with a topic of the applicant’s own choice. The data that we will analyze in the following originates from the evaluation of the April 2020 Call to the Mathematics, Natural and Engineering Sciences (MINT) Division. All in all, 353 grant proposals were evaluated in the MINT division. To ensure a manageable amount of work to be spread among the panel members and the quality of the reviews and gradings, the evaluation was done in four separate sections. The sections were defined as such that they were of similar sizes and had a similar number of international members and female members. This approach ensures that the section compositions are comparable. Each section did indeed define their own funding line, while keeping the success rate in each section the same ( ∼ 30%). Table 2.1 shows the number of proposals evaluated and fundable by section (nine members are present in each section). The number of fundable proposals will be defined as ∼ 30% of the number of evaluated proposals. Each section member or assessor has to individually evaluate each proposal in their section (unless they have a conflict of interest or another reason to be absent during the vote). The average evaluation scores (6=A, …, 1=D) given to all the proposals in each section and the averages of the fundable proposals (30% best ranked according to their average score) are also shown.

Table 2.1: Summarising statistics in the four sections of the MINT division in Project Funding.
Section	N. of proposals	N. of fundable proposa	Avg score	Avg score of 30% best
one	87	26	3.84	5.01
two	92	28	3.81	5.02
three	86	26	3.97	5.24
four	88	26	3.73	5.04

To understand how reliably the assessors’ evaluations were in the different sections, we refer to Figure 2.1. Compared to the previous Postdoc.Mobility case study, the intra-class correlation coefficients are higher and can be classified as good. One reason for the higher reliability here is that all project funding proposals are included - also the ones with low/high average scores, for which the votes are usually less variable. (In contrast, we only looked at PM proposals with average scores in the middle range.)

leg <- cowplot::get_legend(get_grades_plot(mint_section1))
gridExtra::grid.arrange(get_grades_plot(mint_section1, mint_section1_mat,
                                        x_min = 1.8, title = "Section 1", 
                                        jitter_alpha = .4, jitter_w = .05) +
                          theme(legend.position = "none"),
                        get_grades_plot(mint_section2, mint_section2_mat,
                                        x_min = 1.8, title = "Section 2", 
                                        jitter_alpha = .4, jitter_w = .05) + 
                          theme(legend.position = "none"),
                        get_grades_plot(mint_section3, mint_section3_mat,
                                        x_min = 1.8, title = "Section 3", 
                                        jitter_alpha = .4, jitter_w = .05)+ 
                          theme(legend.position = "none"),
                        get_grades_plot(mint_section4, mint_section4_mat, 
                                        x_min = 1.8, title = "Section 4", 
                                        jitter_alpha = .4, jitter_w = .05) + 
                          theme(legend.position = "none")#, leg
                        )

$The individual votes given to all proposals by all panel members. The red dots represent the averages of those individual votes for each proposal. The intra-class correlation coefficients (ICC) together with their 95\% confidence intervals of the different panels can be found in the titles of the subfigures. The overall reliability in the those panels can be classified as good.\label{fig:all-grades-mint}$

Figure 2.1: The individual votes given to all proposals by all panel members. The red dots represent the averages of those individual votes for each proposal. The intra-class correlation coefficients (ICC) together with their 95% confidence intervals of the different panels can be found in the titles of the subfigures. The overall reliability in the those panels can be classified as good.

Then, as for the postdoc.mobility fellowships, Bayesian hierarchical models will be fitted for each of the four sections individually, but also for the pooled data and adding an additional section effect in the model definition. Afterwards the computation of the expected ranks (and the PCER) is straightforward using the MCMC samples from the latter models.

#### JAGS model for continuous outcome ####
# default model
n.iter <- 400 * 10^{3}
n.burnin <- 150 * 10^{3}
n.adapt <- 100 * 10^{3}
n.chains <- 4
seed <- 1991
mcmc_mint_object <- 
  get_mcmc_samples(data = mint_sections,
                   id_application = "proposal",
                   id_voter = "voter",
                   id_section =  "section",
                   grade_variable = "num_grade",
                   n_chains = n.chains, n_iter = n.iter,
                   n_burnin = n.burnin, n_adapt = n.adapt,
                   max_iter = 400 * 10^{3},
                   inits_type = "random",
                   rhat_threshold = 1.1,
                   variables_to_sample = "specified",
                   names_variables_to_sample =
                     c("tau_application", "tau_voter", "sigma",
                       "rank_theta", "tau_section"),
                   seed = seed, quiet = TRUE,
                   dont_bind = TRUE)

# Section by section:
n.iter <- 120 * 10^{3}
n.burnin <- 30 * 10^{3}
n.adapt <- 30 * 10^{3}
n.chains <- 4
mcmc_mint_section1 <-
  ERforResearch:::get_mcmc_samples(data = mint_section1,
                                   id_application = "proposal",
                                   id_voter = "voter", 
                                   grade_variable = "num_grade",
                                   n_chains = n.chains, n_iter = n.iter,
                                   n_burnin = n.burnin, 
                                   n_adapt = n.adapt,
                                   max_iter = 120 * 10^{3},
                                   rhat_threshold = 1.1,
                                   inits_type = "random",
                                   seed = seed, quiet = TRUE)

mcmc_mint_section2 <- 
  ERforResearch:::get_mcmc_samples(data = mint_section2,
                                   id_application = "proposal",
                                   id_voter = "voter", 
                                   grade_variable = "num_grade",
                                   n_chains = n.chains, n_iter = n.iter,
                                   n_burnin = n.burnin, 
                                   n_adapt = n.adapt,
                                   max_iter = 120 * 10^{3},
                                   rhat_threshold = 1.1,
                                   inits_type = "random",
                                   seed = seed, quiet = TRUE)

mcmc_mint_section3 <-
  ERforResearch:::get_mcmc_samples(data = mint_section3,
                                   id_application = "proposal",
                                   id_voter = "voter", 
                                   grade_variable = "num_grade",
                                   n_chains = n.chains, n_iter = n.iter,
                                   n_burnin = n.burnin,
                                   n_adapt = n.adapt,
                                   max_iter = 120 * 10^{3},
                                   rhat_threshold = 1.1,
                                   inits_type = "random",
                                   seed = seed, quiet = TRUE)

mcmc_mint_section4 <-
  ERforResearch:::get_mcmc_samples(data = mint_section4,
                                   id_application = "proposal",
                                   id_voter = "voter", 
                                   grade_variable = "num_grade",
                                   n_chains = n.chains, n_iter = n.iter,
                                   n_burnin = n.burnin,
                                   n_adapt = n.adapt,
                                   max_iter = 120 * 10^{3},
                                   rhat_threshold = 1.1,
                                   inits_type = "random",
                                   seed = seed, quiet = TRUE)


ranks_mint <- 
  get_er_from_jags(data = mint_sections,
                   id_application = "proposal",
                   id_voter = "voter",
                   id_section =  "section",
                   grade_variable = "num_grade",
                   tau_section_name = "tau_section",
                   mcmc_samples = mcmc_mint_object,
                   seed = seed, rank_pm = FALSE)

# Section by section: 

ranks_mint_section1 <-
  get_er_from_jags(data = mint_section1,
                   id_application = "proposal",
                   id_voter = "voter", 
                   grade_variable = "num_grade",
                   mcmc_samples = mcmc_mint_section1,
                   seed = seed, rank_pm = FALSE)

ranks_mint_section2 <- 
  get_er_from_jags(data = mint_section2,
                   id_application = "proposal",
                   id_voter = "voter", 
                   grade_variable = "num_grade",
                   mcmc_samples = mcmc_mint_section2,
                   seed = seed, rank_pm = FALSE)

ranks_mint_section3 <-
  get_er_from_jags(data = mint_section3,
                   id_application = "proposal",
                   id_voter = "voter", 
                   grade_variable = "num_grade",
                   mcmc_samples = mcmc_mint_section3,
                   seed = seed, rank_pm = FALSE)

ranks_mint_section4 <-
  get_er_from_jags(data = mint_section4,
                   id_application = "proposal",
                   id_voter = "voter", 
                   grade_variable = "num_grade",
                   mcmc_samples = mcmc_mint_section4,
                   seed = seed, rank_pm = FALSE)

#### JAGS model for ordinal outcome ####
n.iter <- 200 * 10^{3}
n.burnin <- 150 * 10^{3}
n.adapt <- 100 * 10^{3}
n.chains <- 4
seed_ordinal <- seed + 1
mcmc_mint_ordinal <-
  get_mcmc_samples(data = mint_sections,
                   id_application = "proposal",
                   id_voter = "voter",
                   id_section =  "section",
                   grade_variable = "num_grade",
                   tau_section_name = "tau_section",
                   n_chains = n.chains, n_iter = n.iter,
                   n_burnin = n.burnin, n_adapt = n.adapt,
                   max_iter = 200 * 10^{3}, 
                   rhat_threshold = 1.1,
                   inits_type = "random",
                   ordinal_scale = TRUE,
                   point_scale = 6,
                   seed = seed_ordinal, quiet = TRUE)

readr::write_rds(mcmc_mint_ordinal, here("mcmc_mint_object_ordinal_3101.rds"))

# Section by section:
n.iter <- 120 * 10^{3}
n.burnin <- 30 * 10^{3}
n.adapt <- 30 * 10^{3}
n.chains <- 4

mcmc_mint_section1_ordinal <-
  get_mcmc_samples(data = mint_section1,
                   id_application = "proposal",
                   id_voter = "voter",
                   grade_variable = "num_grade",
                   n_chains = n.chains, n_iter = n.iter,
                   n_burnin = n.burnin, n_adapt = n.adapt,
                   max_iter = 120 * 10^{3}, 
                   rhat_threshold = 1.1,
                   inits_type = "random",
                   ordinal_scale = TRUE,
                   point_scale = 6,
                   seed = seed_ordinal, quiet = TRUE)

mcmc_mint_section2_ordinal <-
    get_mcmc_samples(data = mint_section2,
                     id_application = "proposal",
                     id_voter = "voter",
                     grade_variable = "num_grade",
                     n_chains = n.chains, n_iter = n.iter,
                     n_burnin = n.burnin, n_adapt = n.adapt,
                     max_iter = 120 * 10^{3}, 
                     rhat_threshold = 1.1,
                     inits_type = "random",
                     ordinal_scale = TRUE,
                     point_scale = 6,
                     seed = seed_ordinal, quiet = TRUE)

mcmc_mint_section3_ordinal <-
  get_mcmc_samples(data = mint_section3,
                   id_application = "proposal",
                   id_voter = "voter",
                   grade_variable = "num_grade",
                   n_chains = n.chains, n_iter = n.iter,
                   n_burnin = n.burnin, n_adapt = n.adapt,
                   max_iter = 120 * 10^{3}, 
                   rhat_threshold = 1.1,
                   inits_type = "random",
                   ordinal_scale = TRUE,
                   point_scale = 6,
                   seed = seed_ordinal, quiet = TRUE)

mcmc_mint_section4_ordinal <-
    get_mcmc_samples(data = mint_section4,
                     id_application = "proposal",
                     id_voter = "voter",
                     grade_variable = "num_grade",
                     n_chains = n.chains, n_iter = n.iter,
                     n_burnin = n.burnin, n_adapt = n.adapt,
                     max_iter = 120 * 10^{3}, 
                     rhat_threshold = 1.1,
                     inits_type = "random",
                     ordinal_scale = TRUE,
                     point_scale = 6,
                     seed = seed_ordinal, quiet = TRUE)

ranks_mint_ordinal <-
  get_er_from_jags(data = mint_sections,
                   id_application = "proposal",
                   id_voter = "voter",
                   id_section =  "section",
                   grade_variable = "num_grade",
                   tau_section_name = "tau_section",
                   mcmc_samples = mcmc_mint_ordinal,
                   ordinal_scale = TRUE,
                   point_scale = 6,
                   seed = seed)

# Section by Section:
ranks_mint_section1_ordinal <-
  get_er_from_jags(data = mint_section1,
                   id_application = "proposal",
                   id_voter = "voter",
                   grade_variable = "num_grade",
                   mcmc_samples = mcmc_mint_section1_ordinal,
                   ordinal_scale = TRUE,
                   point_scale = 6,
                   seed = seed_ordinal)

ranks_mint_section2_ordinal <-
  get_er_from_jags(data = mint_section2,
                   id_application = "proposal",
                   id_voter = "voter",
                   grade_variable = "num_grade",
                   mcmc_samples = mcmc_mint_section2_ordinal,
                   seed = seed_ordinal)

ranks_mint_section3_ordinal <-
  get_er_from_jags(data = mint_section3,
                   id_application = "proposal",
                   id_voter = "voter",
                   grade_variable = "num_grade",
                   mcmc_samples = mcmc_mint_section3_ordinal,
                   seed = seed_ordinal)

ranks_mint_section4_ordinal <-
  get_er_from_jags(data = mint_section4,
                   id_application = "proposal",
                   id_voter = "voter",
                   grade_variable = "num_grade",
                   mcmc_samples = mcmc_mint_section4_ordinal,
                   seed = seed_ordinal)

Figure 2.2 shows the resulting expected ranks for each of the sections separately. The proposals are first ranked based on their average score (Fixed Ranking) to then compute the ER. Remember that, each section will define a separate funding line. Note that rank 1 is reserved for the best proposal according to its quality assessment. The best ERs are 1.44 for section one, 1.26 for section two, 2.26 for section three and 1.08 for section four; so very close to 1. The worst ERs are 86.28 for section one, 92 for section two, 85.9 for section three and 87.19 for section four, which are all very close to the total number of submissions.

# mcmc_mint_section1$samples <- 
#   mcmc_mint_section1$samples[sample(1:240000, 24000),]
# mcmc_mint_section2$samples <-
#   mcmc_mint_section2$samples[sample(1:240000, 24000),]
# mcmc_mint_section3$samples <-
#   mcmc_mint_section3$samples[sample(1:240000, 24000),]
# mcmc_mint_section4$samples <-
#   mcmc_mint_section4$samples[sample(1:240000, 24000),]

# mcmc_mint_object2$samples <-
mcmc_mint <- mcmc_mint_object
rm(mcmc_mint_object)
mcmc_mint$samples <- do.call("rbind", mcmc_mint$samples)
mcmc_mint$samples <- mcmc_mint$samples[sample(1:1600000, 100000),]
  
mcmc_mint_ordinal$samples <- mcmc_mint_ordinal$samples[sample(1:800000, 100000),]

x1 <- round(.3 * (mint_section1 %>% 
  pull(proposal) %>% 
  unique() %>% 
  length())) 

p1 <- plotting_er_results(ranks_mint_section1, title = "Section 1",
                          result_show = FALSE, how_many_fundable = x1,
                          draw_funding_line = FALSE, no_pm_available = TRUE) 

x2 <- round(.3 * (mint_section2 %>% 
  pull(proposal) %>% 
  unique() %>% 
  length()))

p2 <- plotting_er_results(ranks_mint_section2, title = "Section 2",
                          result_show = FALSE, how_many_fundable = x2,
                          draw_funding_line = FALSE, no_pm_available = TRUE) 

x3 <- round(.3 * (mint_section3 %>% 
  pull(proposal) %>% 
  unique() %>% 
  length()))

p3 <- plotting_er_results(ranks_mint_section3, title = "Section 3",
                          result_show = FALSE, how_many_fundable = x3,
                          draw_funding_line = FALSE, no_pm_available = TRUE) 

x4 <- round(.3 * (mint_section4 %>% 
  pull(proposal) %>% 
  unique() %>% 
  length()))

p4 <- plotting_er_results(ranks_mint_section4, title = "Section 4",
                          result_show = FALSE, how_many_fundable = x4,
                          draw_funding_line = FALSE, no_pm_available = TRUE) 

leg <- cowplot::get_legend(p1)
gridExtra::grid.arrange(p1 + 
                          theme(legend.position = "none",
                                axis.text.y = element_blank(),
                                axis.title.x = element_blank()),
                        p2 + 
                          theme(legend.position = "none",
                                axis.text.y = element_blank(),
                                axis.title.x = element_blank()),
                        p3 + 
                          theme(legend.position = "none",
                                axis.text.y = element_blank(),
                                axis.title.x = element_blank()),
                        p4 + 
                          theme(legend.position = "none",
                                axis.text.y = element_blank(),
                                axis.title.x = element_blank()))

$The proposals to the different sections in the MINT division ranked based on the average overall score (Fixed Ranking) and the expected rank (ER). The color code tells us which proposals could be funded (orange) to ensure a 30\% success rate.\label{fig:rank_mint_section}$

Figure 2.2: The proposals to the different sections in the MINT division ranked based on the average overall score (Fixed Ranking) and the expected rank (ER). The color code tells us which proposals could be funded (orange) to ensure a 30% success rate.

To better decide on which proposals should be funded in each section individually, and where, if needed, a random selection group should be defined, consider Figure . Hence, looking at the section separately would lead to a random selection group of 8 in Section 1, which is 9.2% of all proposals evaluated in this section; 2 (25%) of these can still be funded. No random selection group is needed for section 2, while three more proposals can still be funded among five in the random selection group in section 3, and three more proposals in a random selection group of seven for section 4.

n_applications1 <- mint_section1 %>% 
  summarise(n_distinct(proposal)) %>% 
  pull()

n_applications2 <- mint_section2 %>% 
  summarise(n_distinct(proposal)) %>% 
  pull()

n_applications3 <- mint_section3 %>% 
  summarise(n_distinct(proposal)) %>% 
  pull()

n_applications4 <- mint_section4 %>% 
  summarise(n_distinct(proposal)) %>% 
  pull()
  

plot_er_dist_mint1 <-
  plot_er_distributions(mcmc_mint_section1, n_proposals = n_applications1,
                        outer_show = FALSE, 
                        number_fundable = x1, size_pt = .5, size_inner = 1) +
  labs(title = "Section 1") +
  theme(panel.grid.major.x = element_blank(),
        axis.text.x = element_blank())

zoomtheme <- theme(legend.position = "none", axis.line = element_blank(),
                   axis.text.x = element_blank(), axis.text.y = element_blank(),
                   axis.ticks = element_blank(), axis.title.x = element_blank(),
                   axis.title.y = element_blank(), title = element_blank(),
                   panel.grid.major = element_blank(),
                   panel.grid.minor = element_blank(),
                   panel.background = element_rect(color = 'red',
                                                   fill = "white"))

p.zoomrast1 <- plot_er_dist_mint1 + 
  xlim(levels(plot_er_dist_mint1$data$parameter)[20:45]) +
  ylim(c(15, 48)) + 
  zoomtheme

plot_er_dist_mint2 <-
  plot_er_distributions(mcmc_mint_section2, n_proposals = n_applications2,
                        outer_show = FALSE, 
                        number_fundable = x2, size_pt = .5, size_inner = 1) +
  labs(title = "Section 2") +
  theme(panel.grid.major.x = element_blank(),
        axis.text.x = element_blank())

p.zoomrast2 <- plot_er_dist_mint2 + 
  xlim(levels(plot_er_dist_mint2$data$parameter)[20:45]) +
  ylim(c(12, 48)) + 
  zoomtheme

plot_er_dist_mint3 <-
  plot_er_distributions(mcmc_mint_section3, n_proposals = n_applications3,
                        outer_show = FALSE, 
                        number_fundable = x3, size_pt = .5, size_inner = 1) +
  labs(title = "Section 3") +
  theme(panel.grid.major.x = element_blank(),
        axis.text.x = element_blank())

p.zoomrast3 <- plot_er_dist_mint3 + 
  xlim(levels(plot_er_dist_mint3$data$parameter)[20:40]) +
  ylim(c(15, 45)) + 
  zoomtheme

plot_er_dist_mint4 <-
  plot_er_distributions(mcmc_mint_section4, n_proposals = n_applications4,
                        outer_show = FALSE, 
                        number_fundable = x4, size_pt = .5, size_inner = 1) +
  labs(title = "Section 4") +
  theme(panel.grid.major.x = element_blank(),
        axis.text.x = element_blank())

p.zoomrast4 <- plot_er_dist_mint4 + 
  xlim(levels(plot_er_dist_mint4$data$parameter)[20:40]) +
  ylim(c(15, 45)) + 
  zoomtheme

gridExtra::grid.arrange(
  plot_er_dist_mint1 + 
    theme(legend.position = "top") +
    annotation_custom(grob = ggplotGrob(p.zoomrast1),
                      xmin = 53, xmax = 80, 
                      ymin = min(ranks_mint_section1$rankings$er),
                      ymax = min(ranks_mint_section1$rankings$er) + 50),
  
  plot_er_dist_mint2 + 
    theme(legend.position = "none") +
    annotation_custom(grob = ggplotGrob(p.zoomrast2),
                      xmin = 53, xmax = 80, 
                      ymin = min(ranks_mint_section2$rankings$er),
                      ymax = min(ranks_mint_section2$rankings$er) + 50),
  
  plot_er_dist_mint3 + 
    theme(legend.position = "none") +
    annotation_custom(grob = ggplotGrob(p.zoomrast3),
                      xmin = 53, xmax = 80, 
                      ymin = min(ranks_mint_section3$rankings$er),
                      ymax = min(ranks_mint_section3$rankings$er) + 50),
  
  plot_er_dist_mint4 + 
    theme(legend.position = "none") +
    annotation_custom(grob = ggplotGrob(p.zoomrast4),
                      xmin = 53, xmax = 80, 
                      ymin = min(ranks_mint_section4$rankings$er),
                      ymax = min(ranks_mint_section4$rankings$er) + 50), ncol = 2)

$The ER with 50\% credible intervals. The color code shows the final group (Accepted, Rejected or Random Selection).\label{fig:ER_dist_all_mint_ind}$

Figure 2.3: The ER with 50% credible intervals. The color code shows the final group (Accepted, Rejected or Random Selection).

As the ER methodology is able to account for further structural differences, the proposals can also be ranked all together. When incorporating all the information from the four sections in the same model, we have to account for the hierarchical structure based on the sections. Certainly, there are different dynamics in the sections that might have an influence on the final score, but are independent of the actual quality of the discussed proposals. As can be seen in Table 2.1 the score distributions were not exactly the same in the different sections, with section three giving higher scores on average, even though the sections were constructed as to being comparable. Therefore, a random intercept for the section can be introduced in the Bayesian hierarchichal model to adjust for those dynamics.

x <- round(.3*n_distinct(mint_sections$proposal))

plotting_er_results(ranks_mint, title = "MINT (all sections)",
                    result_show = FALSE, how_many_fundable = x,
                    draw_funding_line = FALSE, no_pm_available = TRUE) +
  theme(axis.text.y = element_blank())

$(A) All the proposals to the MINT division (from all sections) ranked based on the average overall score (Fixed) and the expected rank. The color code tells us which proposals could be funded (orange) to ensure a 30\% success rate.\label{fig:rank_mint_all} (B) Shows the same ER ordered from the best (rank 1) to the worst (rank 352) together with their 50% credible intervals. A naive funding line to ensure a 30% success rate is drawn (blue dashed line).$

Figure 2.4: (A) All the proposals to the MINT division (from all sections) ranked based on the average overall score (Fixed) and the expected rank. The color code tells us which proposals could be funded (orange) to ensure a 30% success rate. (B) Shows the same ER ordered from the best (rank 1) to the worst (rank 352) together with their 50% credible intervals. A naive funding line to ensure a 30% success rate is drawn (blue dashed line).

Figure 2.4 (A) shows the resulting expected ranks. This figure mainly helps to get a first impression of whether the evaluation procedure manages to find substantial differences between proposals. We can observe the creation of several clusters. The right side of the Figure (B) shows the same ER ordered from the best ranked proposal (left) to the worst (right) together with their 50% credible intervals. The naive funding line is defined as the ER of the last fundable proposal: the 106th (30% of 353) best ranked, according to its ER. A zoom into this last figure helps to find a cluster of proposals with credible intervals overlapping the naive funding line for which a random selection should be applied to decide upon the last remaining proposals to fund. Twelve of the proposals in this zoomed part have their credible interval crossing the naive funding line, and would therefore form the random selection group. Six of these twelve proposals will be selected for funding, which is 50%.

2.1 Accounting for ordinal outcomes in the project funding evaluation

Similar results are found when using a Bayesian hierarchical model that explicitly accounts for the ordinal nature of the scores, see Figure 2.5. Figure 2.6 shows the Bland-Altman plot; the agreement between the BR with an ordinal outcome model and the BR with a continuous outcome model. The mean of all the differences (gray horizontal line) is very close to 0. The differences in ER for most proposals lie in the 95% limits of agreement (red dotted horizontal lines). However, even though both models result in similar results, it is hard to say which modelling approach performs better without knowing exactly which proposals should have been funded or rejected.}

plot_er_cred_mint_ordinal <- 
  plot_er_distributions(get_mcmc_samples_result = mcmc_mint_ordinal,
                        n_proposals = mint_sections %>% 
                          summarise(n_distinct(proposal)) %>% pull(),
                        name_er_or_theta = "rank_theta", title = "(B)",
                        names_proposals = mint_sections %>% 
                          pull(proposal) %>% unique(),
                        number_fundable = x,
                        size_pt = .1, size_outer = 0, size_inner = .3,
                        alpha_inner = .8, outer_show = FALSE) +
  theme(axis.text.x = element_blank(),
        panel.grid.major.x = element_blank(),
        axis.title.y = element_text(size = 7, vjust = 2))

p.zoomrast_ordinal <- plot_er_cred_mint_ordinal + 
  xlim(levels(plot_er_cred_mint_ordinal$data$parameter)[90:121]) +
  ylim(c(74, 140)) + 
  zoomtheme

plot_er_cred_mint_ordinal + 
  theme(legend.position = "none") +
  annotation_custom(grob = ggplotGrob(p.zoomrast),
                    xmin = 160, xmax = 360, 
                    ymin = min(ranks_mint$rankings$er) + 5,
                    ymax = min(ranks_mint$rankings$er) + 150)

<span style='color: #bababa;'>The ER - computed using an ordinal outcome model - ordered from the best (rank 1) to the worst (rank 352) together with their 50% credible intervals. A naive funding line to ensure a 30% success rate is drawn (blue dashed line).</span>

Figure 2.5: The ER - computed using an ordinal outcome model - ordered from the best (rank 1) to the worst (rank 352) together with their 50% credible intervals. A naive funding line to ensure a 30% success rate is drawn (blue dashed line).

get_agreement_plot(ranks_mint$rankings,
                   ranks_mint_ordinal$rankings,
                   title = "MINT - Project Funding",
                   ymin = -15, ymax = 15, pt_alpha = .5, 
                   pt_size = .8)

$Difference in expected rank depending on whether the outcome is modeled as continuous or ordinal variable versus the average ER as computed using the continuous and ordinal outcome model, for all proposals evaluated for project funding. The 95\% limits of agreement are plotted (red dotted lines) as well as the mean difference (gray line, \textit{i.e.} bias).\label{fig:agreement_ordinal_normal_mint}$

Figure 2.6: Difference in expected rank depending on whether the outcome is modeled as continuous or ordinal variable versus the average ER as computed using the continuous and ordinal outcome model, for all proposals evaluated for project funding. The 95% limits of agreement are plotted (red dotted lines) as well as the mean difference (gray line, bias).

3 Note on convergence diagnostics

The more complex the models the more burnin and iterations are needed to ensure convergence of the most important parameters. With the option dont_bind set to TRUE in the ERforResearch::get_mcmc_samples() function, we can directly use the MCMC sample to perform convergence diagnostics, like plotting traceplots. The option in the function solely does not bind the different chains as for example the two chains used in our examples. Using he bayesplot package traceplots and others can easily be plotted. The following code chunk for example shows the traceplots of the θ_i’s of all the proposals of the Biology Panel in Postdoc.mobility.

bayesplot::mcmc_trace(mcmc_ls_b_object$samples,
                      pars = paste0("application_intercept[", 1:18, "]"))

<span style='color: #bababa;'>Traceplots of the proposal effects of all 18 proposals in the Biology panel for the Postdoc.Mobility fellowships.</span>

Figure 3.1: Traceplots of the proposal effects of all 18 proposals in the Biology panel for the Postdoc.Mobility fellowships.

Below we show the values R̂ of the Gelman-Rubin convergence diagnostic and traceplots for some important parameters for the Social Sciences panel in the Postdoc.Mobility case study. R̂ values substantially above 1 indicate lack of convergence. Gelman et al. (2014) have recommended to use 1.1 as threshold value for R̂. The default in the ERforResearch package is even lower at 1.01.

## PM case study setting
n.iter <- 50 * 10^{3}
n.burnin <- 10 * 10^{3}
n.adapt <- 10 * 10^{3}
max.iter <- 50 * 10^{3}
n.chains <- 4
seed <- 1991
seed_ordinal <- seed + 1

## Ordinal models:
# refit the model without pooling the 2 chains
mcmc_hss_s_ordinal_2 <-
  get_mcmc_samples(data = hss_s,
                   id_application = "proposal",
                   id_voter = "voter",
                   grade_variable = "num_grade",
                   n_chains = n.chains, n_iter = n.iter,
                   n_burnin = n.burnin, n_adapt = n.adapt,
                   max_iter = max.iter,
                   ordinal_scale = TRUE, point_scale = 6,
                   seed = seed_ordinal, quiet = TRUE,
                   dont_bind = TRUE)

## [1] "Be aware that the default ordinal model fixes the outcome to a six-\n          point scale."
## [1] "Caution: Even after increasing the adaption phase to 10000 iterations, the burnin phase to 10000 and the number of iterations to 50000 the max of the Rhat values is 1.01 e.g. > 1.01. The problematic parameter(s) is (are): application_intercept[7], application_intercept[10], application_intercept[11]."

# do not perform multivariate computation since this sometimes leads to
# error messages and we do not need that result
# use the whole series, not only the second half (autoburnin = FALSE)
rhat_hss_s <- gelman.diag(mcmc_hss_s_ordinal_2$samples, autoburnin = FALSE,
                          multivariate = FALSE)
# rhat_est_hss_s <- rhat_hss_s$psrf[ ,1]

# create R_hat values table for Social Sciences panel in PM 
n_proposal <- 18
n_voter <- 14
# adapt the row names to notation in the paper
rownames <- c(paste0("$\\theta_{", seq(1, n_proposal), "}$"),
              # paste0("$\\nu_{", seq(1, n_voter), "}$"),
              paste0("rank", "$(\\theta_{", seq(1, n_proposal), "})$"),
              "$\\sigma$", "$\\tau$", "$\\tau_{\\nu}$")

table_hss_s <- as.data.frame(cbind(rhat_hss_s$psrf[ ,1]))#, rhat_hss_s$psrf[ ,2]))
row.names(table_hss_s) <- rownames

kable(table_hss_s, digits = 2, escape = FALSE,
      col.names =  c("$\\widehat{R}$"),#, "Upper CI limit"),
      row.names = TRUE, booktabs = TRUE,
      longtable = TRUE,
      caption = "Estimates of $\\widehat{R}$, the Gelman-Rubin convergence diagnostic 
      are given for different parameters in the ordinal model for the Social Sciences panel in the PM case study.") %>%
  #, and the upper limits of the 95\\% confidence intervals for $\\widehat{R}$
  kable_styling(latex_options = c("hold_position", "repeat_header"))

Table 3.1: Estimates of R̂, the Gelman-Rubin convergence diagnostic are given for different parameters in the ordinal model for the Social Sciences panel in the PM case study.
	R̂
θ₁	1.01
θ₂	1.01
θ₃	1.01
θ₄	1.01
θ₅	1.01
θ₆	1.01
θ₇	1.01
θ₈	1.01
θ₉	1.01
θ₁₀	1.01
θ₁₁	1.01
θ₁₂	1.01
θ₁₃	1.01
θ₁₄	1.01
θ₁₅	1.01
θ₁₆	1.01
θ₁₇	1.00
θ₁₈	1.00
rank(θ₁)	1.00
rank(θ₂)	1.00
rank(θ₃)	1.00
rank(θ₄)	1.00
rank(θ₅)	1.00
rank(θ₆)	1.00
rank(θ₇)	1.00
rank(θ₈)	1.00
rank(θ₉)	1.00
rank(θ₁₀)	1.00
rank(θ₁₁)	1.00
rank(θ₁₂)	1.00
rank(θ₁₃)	1.00
rank(θ₁₄)	1.00
rank(θ₁₅)	1.00
rank(θ₁₆)	1.00
rank(θ₁₇)	1.00
rank(θ₁₈)	1.00
σ	1.00
τ	1.00
τ_ν	1.00

4 Hyper-prior sensitivity

Whenever Bayesian statistics are used, the question arrises whether the results are (too) sensitive to the priors chosen. The way our methodology is implemented allows us to use JAGS model definitions different from the default, and compare the results, i.e. the expected ranks. Figure shows the agreement plot between the results from the default model versus the results using a JAGS model where the variance in the prior on the parameter ν_j is changed from 0.5² to 0.1² (smaller) and to 2² (larger). The results presented here are for the Social Sciences Panel from Postdoc.Mobility. Then Figure also shows an agreement plot for the same panel but changing the distribution of the priors on the precision, from uniform priors on the variances (τ_θ², τ_λ², σ²) to a gamma(0.1,0.1) distribution on the precisions (1/variance). Regardless the prior choices, the results do agree.

## Use different JAGS model:
cat("model{
      # Likelihood:
      for (i in 1:n) { # i is not the application but the review
          grade[i] ~ dnorm(mu[i], inv_sigma2)
                # inv_sigma2 is precision (1 / variance)
          mu[i] <- overall_mean + application_intercept[num_application[i]] +
          voter_intercept[num_application[i], num_voter[i]]
      }
      # Ranks:
      rank_theta[1:n_application] <- rank(-application_intercept[])
      # Priors:
      for (j in 1:n_application){
        application_intercept[j] ~ dnorm(0, inv_tau_application2)
      }
      for (l in 1:n_voter){
        for(j in 1:n_application){
          voter_intercept[j, l] ~ dnorm(nu[l], inv_tau_voter2)
        }
      }
      for (l in 1:n_voter){
        nu[l] ~ dnorm(0, 100) # 1/(0.1^2) = 100
      }
      sigma ~ dunif(0.000001, 2)
      inv_sigma2 <- pow(sigma, -2)
      inv_tau_application2 <- pow(tau_application, -2)
      tau_application ~ dunif(0.000001, 2)
      inv_tau_voter2 <- pow(tau_voter, -2)
      tau_voter ~ dunif(0.000001, 2)
    }",
    file = here("jags_model_change_nuvariance_smaller.txt"))

cat("model{
      # Likelihood:
      for (i in 1:n) { # i is not the application but the review
          grade[i] ~ dnorm(mu[i], inv_sigma2)
                # inv_sigma2 is precision (1 / variance)
          mu[i] <- overall_mean + application_intercept[num_application[i]] +
          voter_intercept[num_application[i], num_voter[i]]
      }
      # Ranks:
      rank_theta[1:n_application] <- rank(-application_intercept[])
      # Priors:
      for (j in 1:n_application){
        application_intercept[j] ~ dnorm(0, inv_tau_application2)
      }
      for (l in 1:n_voter){
        for(j in 1:n_application){
          voter_intercept[j, l] ~ dnorm(nu[l], inv_tau_voter2)
        }
      }
      for (l in 1:n_voter){
        nu[l] ~ dnorm(0, 0.25) # 1/(2^2) = 0.25
      }
      sigma ~ dunif(0.000001, 2)
      inv_sigma2 <- pow(sigma, -2)
      inv_tau_application2 <- pow(tau_application, -2)
      tau_application ~ dunif(0.000001, 2)
      inv_tau_voter2 <- pow(tau_voter, -2)
      tau_voter ~ dunif(0.000001, 2)
    }",
    file = here("jags_model_change_nuvariance_larger.txt"))

##### Get the ER results: ####
n.iter <- 50 * 10^{3}
n.burnin <- 10 * 10^{3}
n.adapt <- 10 * 10^{3}
max.iter <- 50 * 10^{3}
n.chains <- 4
seed <- 1991


mcmc_hss_s_object_smaller_variance <- 
  get_mcmc_samples(data = hss_s,
                   id_application = "proposal",
                   id_voter = "voter", 
                   grade_variable = "num_grade",
                   path_to_jags_model = 
                     here("jags_model_change_nuvariance_smaller.txt"),
                   n_chains = n.chains, n_iter = n.iter,
                   n_burnin = n.burnin, n_adapt = n.adapt,
                   max_iter = max.iter,
                   inits_type = "random",
                   variables_to_sample = "specified",
                   rhat_threshold = 1.1,
                   names_variables_to_sample = 
                     c("application_intercept", "tau_application",
                       "nu", "tau_voter", "rank_theta", "sigma"),
                   seed = seed + 1, quiet = TRUE, dont_bind = TRUE)

mcmc_hss_s_object_larger_variance <- 
  get_mcmc_samples(data = hss_s,
                   id_application = "proposal",
                   id_voter = "voter", 
                   grade_variable = "num_grade",
                   path_to_jags_model = 
                     here("jags_model_change_nuvariance_larger.txt"),
                   n_chains = n.chains, n_iter = n.iter,
                   n_burnin = n.burnin, n_adapt = n.adapt,
                   max_iter = max.iter,
                   inits_type = "random",
                   variables_to_sample = "specified",
                   rhat_threshold = 1.1,
                   names_variables_to_sample = 
                     c("application_intercept", "tau_application",
                       "nu", "tau_voter", "rank_theta", "sigma"),
                   seed = seed + 2, quiet = TRUE, dont_bind = TRUE)


ranks_hss_s_smaller_variance <- 
  get_er_from_jags(data = hss_s,
                   id_application = "proposal",
                   id_voter = "voter", 
                   grade_variable = "num_grade",
                   mcmc_samples = mcmc_hss_s_object_smaller_variance)

ranks_hss_s_larger_variance <- 
  get_er_from_jags(data = hss_s,
                   id_application = "proposal",
                   id_voter = "voter", 
                   grade_variable = "num_grade",
                   mcmc_samples = mcmc_hss_s_object_larger_variance)

gridExtra::grid.arrange(get_agreement_plot(ranks_hss_s$rankings,
                                           ranks_hss_s_smaller_variance$rankings,
                                           title = "with smaller variance in hyperprior on nu", 
                                           ymin = -.25, ymax = .25),
                        get_agreement_plot(ranks_hss_s$rankings,
                                           ranks_hss_s_larger_variance$rankings,
                                           title = "with larger variance in hyperprior on nu", 
                                           ymin = -.25, ymax = .25))

$Difference in expected rank between results based on default model and a model with smaller variance ($0.1^2$) on $\nu$ and another model with larger variance ($2^2$) on $\nu$. The 95\% limits of agreement are plotted (red dotted lines) as well as the mean difference (gray line, \textit{i.e.} bias) for the Social Science panel evaluating the Postdoc.Mobility proposals.\label{fig:agreement_nu_variance}$

Figure 4.1: Difference in expected rank between results based on default model and a model with smaller variance (0.1²) on ν and another model with larger variance (2²) on ν. The 95% limits of agreement are plotted (red dotted lines) as well as the mean difference (gray line, bias) for the Social Science panel evaluating the Postdoc.Mobility proposals.

## Use different JAGS model: 
# half-cauchy on precision (https://sourceforge.net/p/mcmc-jags/discussion/610037/thread/7527c08a/)
cat("model{
      # Likelihood:
      for (i in 1:n) { # i is not the application but the review
          grade[i] ~ dnorm(mu[i], inv_sigma2)
                # inv_sigma2 is precision (1 / variance)
          mu[i] <- overall_mean + application_intercept[num_application[i]] +
          voter_intercept[num_application[i], num_voter[i]]
      }
      # Ranks:
      rank_theta[1:n_application] <- rank(-application_intercept[])
      # Priors:
      for (j in 1:n_application){
        application_intercept[j] ~ dnorm(0, inv_tau_application2)
      }
      for (l in 1:n_voter){
        for(j in 1:n_application){
          voter_intercept[j, l] ~ dnorm(nu[l], inv_tau_voter2)
        }
      }
      for (l in 1:n_voter){
        nu[l] ~ dnorm(0, 4) # 1/(0.5^2) = 4
      }
      inv_sigma2 ~ dgamma(0.01, 0.01)
      # inv_sigma2 <- pow(inv_sigma, 2)
      # inv_tau_application2 <- pow(inv_tau_application, 2)
      inv_tau_application2 ~ dgamma(0.01, 0.01)
      # inv_tau_voter2 <- pow(inv_tau_voter, 2)
      inv_tau_voter2 ~ dgamma(0.01, 0.01)
    }",
    file = here("jags_model_change_halfcauchy.txt"))

##### Get the ER results: ####
# load.module("glm")
mcmc_hss_s_object_half_cauchy <- 
  get_mcmc_samples(data = hss_s,
                   id_application = "proposal",
                   id_voter = "voter", 
                   grade_variable = "num_grade",
                   path_to_jags_model = 
                     here("jags_model_change_halfcauchy.txt"),
                   n_chains = n.chains, n_iter = n.iter,
                   n_burnin = n.burnin, n_adapt = n.adapt,
                   max_iter = max.iter,
                   inits_type = "random",
                   variables_to_sample = "specified",
                   rhat_threshold = 1.1,
                   names_variables_to_sample = 
                     c("application_intercept", "inv_tau_application2",
                       "nu", "inv_tau_voter2", "rank_theta", "inv_sigma2"),
                   seed = seed + 3, quiet = TRUE, dont_bind = TRUE)


ranks_hss_s_half_cauchy <- 
  get_er_from_jags(data = hss_s,
                   id_application = "proposal",
                   id_voter = "voter", 
                   grade_variable = "num_grade",
                   mcmc_samples = mcmc_hss_s_object_half_cauchy)

gridExtra::grid.arrange(get_agreement_plot(ranks_hss_s$rankings,
                                           ranks_hss_s_half_cauchy$rankings,
                                           title = "with gamma-priors on precision parameters", 
                                           ymin = -.25, ymax = .25))

$Difference in expected rank between results based on default model and a model with gamma prior on the precisions (i.e. the inverse of the variances).\label{fig:agreement_gamma}$

Figure 4.2: Difference in expected rank between results based on default model and a model with gamma prior on the precisions (i.e. the inverse of the variances).

Reproducible online supplementary material

Rachel Heyard et al. (2021)

1 Postdoc.Mobility funding scheme

2 Project Funding

2.1 Accounting for ordinal outcomes in the project funding evaluation

3 Note on convergence diagnostics

4 Hyper-prior sensitivity