From 685d32b008f35e3337ecb956b653cfa0687e6b8c Mon Sep 17 00:00:00 2001 From: abigailkeller Date: Thu, 23 Apr 2026 09:37:18 +0000 Subject: [PATCH] working on discrepancy ideas --- discrepancy/cppp_disc_test.R | 139 ++++++++++++++++++++++++++ discrepancy/cppp_discrepancy.R | 109 ++++++++++++++++++++ discrepancy/cppp_discrepancy_NIMBLE.R | 139 ++++++++++++++++++++++++++ 3 files changed, 387 insertions(+) create mode 100644 discrepancy/cppp_disc_test.R create mode 100644 discrepancy/cppp_discrepancy.R create mode 100644 discrepancy/cppp_discrepancy_NIMBLE.R diff --git a/discrepancy/cppp_disc_test.R b/discrepancy/cppp_disc_test.R new file mode 100644 index 0000000..a974d96 --- /dev/null +++ b/discrepancy/cppp_disc_test.R @@ -0,0 +1,139 @@ +library(nimble) + +# read in disc functions +source("discrepancy/cppp_discrepancy.R") +source("discrepancy/cppp_discrepancy_NIMBLE.R") + +#################################### +# Set up model, data, MCMC samples # +#################################### + +# occupancy model +occu_model <- nimbleCode({ + + psi ~ dbeta(1, 1) + p ~ dbeta(1, 1) + + for (i in 1:nSites) { + + z[i] ~ dbern(psi) + + # observed data + y[i] ~ dbinom(z[i] * p, nVisits) + + # expected data + y_exp[i] <- z[i] * p * nVisits + + } + +}) + +# function for simulating data +simulate_occu <- function(params, nSites, nVisits) { + + y <- rep(NA, nSites) + + z <- rbinom(nSites, 1, params$psi) + + for (i in 1:nSites) { + + y[i] <- rbinom(1, nVisits, z[i] * params$p) + + } + + return(y) +} + +# model components +nVisits <- 6 +nSites <- 20 +params <- list(p = 0.3, psi = 0.4) +constants <- list(nSites = nSites, nVisits = nVisits) +observedData <- simulate_occu(params = list(p = 0.3, psi = 0.4), + nSites = 20, nVisits = 6) + +# uncompiled model +model_uncompiled <- nimbleModel(occu_model, constants = constants, + data = list(y = observedData)) + +# compiled model +model <- compileNimble(model_uncompiled) + +# get data, param, and expected nodes +dataNames <- "y" +dataNodes <- model$expandNodeNames(dataNames) + +paramNames <- c("p", "psi", "z") +paramNodes <- model$expandNodeNames(paramNames) + +expectedNames <- "y_exp" +expectedNodes <- model$expandNodeNames(expectedNames) + +# get MCMC samples +# MCMCcontrolMain = list(niter = 5000, nburnin = 1000, thin = 1) +MCMCcontrolMain = list(niter = 500, nburnin = 100, thin = 1) + +mcmcConfFun <- function(model) { + configureMCMC(model, monitors = paramNodes, print = FALSE) +} +mcmcConf <- mcmcConfFun(model) +mcmcUncompiled <- buildMCMC(mcmcConf) +cmcmc <- compileNimble(mcmcUncompiled, project = model, + resetFunctions = TRUE) + +obsMCMC <- runMCMC( + cmcmc, + niter = MCMCcontrolMain$niter, + nburnin = MCMCcontrolMain$nburnin, + thin = MCMCcontrolMain$thin +) +MCMCSamples <- as.matrix(obsMCMC) + + +##################### +# Get discrepancies # +##################### + +# choose discType +discType <- c("mean", "deviance", "chisq") + +# disc control (updated from main code) +discControl = list( + model = model, + dataNames = dataNames, + dataNodes = dataNodes, + paramNames = paramNames, + paramNodes = paramNodes, + expectedNames = expectedNames, # new + expectedNodes = expectedNodes, # new + discType = discType # new +) + +#### +# Non-NIMBLE functions +#### + +# calculate discrepancies +obsDisc <- calcDiscFunction(MCMCSamples = MCMCSamples, + targetData = observedData, + control = discControl) + +# scalar PPP for the observed data +obsPPP <- sapply(obsDisc, function(x) { + mean(as.numeric(x[, "obs"]) >= as.numeric(x[, "sim"])) +}) + + +#### +# NIMBLE functions +#### + +# create instance of discrepancy function +calc_disc <- calcDiscFunction_nimble(MCMCSamples, as.numeric(observedData), + discControl) + +# compile +Ccalc_disc <- compileNimble(calc_disc, project = model_uncompiled) + +# run - this keeps crashing R :/ +obsDisc_nimble <- Ccalc_disc$run() diff --git a/discrepancy/cppp_discrepancy.R b/discrepancy/cppp_discrepancy.R new file mode 100644 index 0000000..afdf710 --- /dev/null +++ b/discrepancy/cppp_discrepancy.R @@ -0,0 +1,109 @@ +calcDiscFunction <- function(MCMCSamples = MCMCSamples, + targetData = observedData, + control = discControl) { + + # create empty list of matrices + disc <- setNames(replicate(length(discType), + matrix(NA, nrow(MCMCSamples), 2, + dimnames = list(NULL, + c("sim", "obs"))), + simplify = FALSE), + discType) + + # make a copy of model for simulating replicated discrepancies + model_copy <- control$model$newModel() + + # get nodes to simulate + nodesSim <- model_copy$getDependencies(control$paramNames, self = FALSE) + + # loop through MCMCsamples + for (i in 1:nrow(MCMCSamples)) { + + ## + # simulate nodes for replicated discrepancies + ## + + # add values to model copy + values(model_copy, control$paramNodes) <- MCMCSamples[i, control$paramNodes] + + # simulate other nodes + model_copy$simulate(nodesSim, includeData = TRUE) + + ## + # simulate nodes for observed discrepancies + ## + + # add values to model + values(control$model, control$paramNodes) <- MCMCSamples[i, control$paramNodes] + + # simulate other nodes (not including data) + control$model$simulate(nodesSim, includeData = FALSE) + + # loop through discrepancy types + for (j in seq_along(control$discType)) { + + disc[[j]][i, ] <- switch( + control$discType[j], + "mean" = calc_mean(model_copy, targetData, control), + "deviance" = calc_deviance(model_copy, control), + "chisq" = calc_chisq(model_copy, targetData, control), + # AK: should probably put this earlier + # Default case (the error) + stop(paste0("discType ", control$discType[j], + " is invalid. Must be: mean, deviance, chisq")) + ) + + } + + } + + # AK: does this return type make sense? + # List of length(number of discrepancies) + # Each element of the list is a matrix with nrow = # MCMCSamples, ncol = 2 + return(disc) +} + +calc_mean <- function(model_copy, targetData, control) { + + # replicated discrepancies + discRep <- mean(values(model_copy, control$dataNodes)) + + # observed discrepancies + discObs <- mean(targetData) + + return(c(discRep, discObs)) +} + +calc_deviance <- function(model_copy, control) { + + # replicated discrepancies + discRep <- 2 * model_copy$calculate(control$dataNodes) + + # observed discrepancies + discObs <- 2 * control$model$calculate(control$dataNodes) + + return(c(discRep, discObs)) + +} + +calc_chisq <- function(model_copy, targetData, control) { + + # get expected data + ## AK: check that this works for multiple expected nodes + data_exp <- values(model_copy, control$expectedNodes) + + # replicated discrepancies + discRep <- sum( + (values(model_copy, control$dataNodes) - data_exp) ^ 2 / + (data_exp + 1e-6) + ) + + # observed discrepancies + discObs <- sum( + (targetData - data_exp) ^ 2 / + (data_exp + 1e-6) + ) + + return(c(discRep, discObs)) + +} diff --git a/discrepancy/cppp_discrepancy_NIMBLE.R b/discrepancy/cppp_discrepancy_NIMBLE.R new file mode 100644 index 0000000..b2132ff --- /dev/null +++ b/discrepancy/cppp_discrepancy_NIMBLE.R @@ -0,0 +1,139 @@ + +calcDiscFunction_nimble <- nimbleFunction( + setup = function(MCMCSamples, targetData, control) { + + # get dimensions and data + nSim <- nrow(MCMCSamples) + nDisc <- length(control$discType) + discTypes <- control$discType + + # get node names + dataNodes <- control$dataNodes + paramNodes <- control$paramNodes + expectedNodes <- control$expectedNodes + + # get node indices + paramNodesIndices <- match(paramNodes, colnames(MCMCSamples)) + + # get nodes to simulate + nodesSim <- control$model$getDependencies(control$paramNames, self = FALSE) + + # create a model copy and get original + model_copy <- control$model$newModel() + model <- control$model + + # put the helper function generators into the setup. + meanFun <- calc_mean_nimble(model_copy) + devFun <- calc_deviance_nimble(model_copy, model) + chisqFun <- calc_chisq_nimble(model_copy) + + }, + + run = function() { + + # use a 3D array: [Samples, DiscrepancyType, 2] + # 2 columns for "sim" and "obs" + discStore <- array(0, dim = c(nSim, nDisc, 2)) + + for (i in 1:nSim) { + + # 1. Update values in both original model and model copy + values(model_copy, paramNodes) <<- MCMCSamples[i, paramNodesIndices] + values(model, paramNodes) <<- MCMCSamples[i, paramNodesIndices] + + # 2. Simulate + model_copy$simulate(nodesSim, includeData = TRUE) + model$simulate(nodesSim, includeData = FALSE) + + # 3. Calculate Discrepancies + for (j in 1:nDisc) { + if (discTypes[j] == "mean") { + + discStore[i, j, ] <- meanFun$run(targetData, dataNodes) + + } else if (discTypes[j] == "deviance") { + + discStore[i, j, ] <- devFun$run(dataNodes) + + } else if (discTypes[j] == "chisq") { + + discStore[i, j, ] <- chisqFun$run(targetData, + dataNodes, expectedNodes) + } + } + } + + returnType(double(3)) + return(discStore) + } +) + +calc_mean_nimble <- nimbleFunction( + + setup = function(model_copy) { + }, + + run = function(targetData = double(1), + dataNodes = character(1)) { + + # replicated discrepancies + discRep <- mean(values(model_copy, dataNodes)) + + # observed discrepancies + discObs <- mean(targetData) + + returnType(double(1)) + return(c(discRep, discObs)) + } +) + +calc_deviance_nimble <- nimbleFunction( + + setup = function(model_copy, model) { + }, + + run = function(dataNodes = character(1)) { + + # replicated discrepancies + discRep <- 2 * model_copy$calculate(dataNodes) + + # observed discrepancies + discObs <- 2 * model$calculate(dataNodes) + + returnType(double(1)) + return(c(discRep, discObs)) + } +) + + +calc_chisq_nimble <- nimbleFunction( + + setup = function(model_copy) { + }, + + run = function(targetData = double(1), + dataNodes = character(1), expectedNodes = character(1)) { + + # get expected data + ## AK: check that this works for multiple expected nodes + data_exp <- values(model_copy, expectedNodes) + + data_sim <- values(model_copy, dataNodes) + + # replicated discrepancies + discRep <- sum( + (data_sim - data_exp) ^ 2 / + (data_exp + 1e-6) + ) + + # observed discrepancies + discObs <- sum( + (targetData - data_exp) ^ 2 / + (data_exp + 1e-6) + ) + + returnType(double(1)) + return(c(discRep, discObs)) + } +) +