From af638ee4008e52119585fb80ad8fb52bfe794b02 Mon Sep 17 00:00:00 2001 From: Kara Woo Date: Mon, 23 Feb 2026 14:37:23 -0800 Subject: [PATCH 1/4] remove unnecessary tidy evaluation and fix docs for paging --- R/page.R | 20 ++++---------------- man/paging.Rd | 9 +++------ 2 files changed, 7 insertions(+), 22 deletions(-) diff --git a/R/page.R b/R/page.R index 4b785e4f0..13577a990 100644 --- a/R/page.R +++ b/R/page.R @@ -6,15 +6,15 @@ #' @rdname paging #' #' @param client A Connect client object -#' @param req The request that needs to be paged +#' @param req For page_cursor, the output from an initial response to an API +#' endpoint that uses cursor-based pagination. For page_offset, a request that +#' needs to be paged. #' @param limit A row limit #' #' @return The aggregated results from all requests #' #' @export page_cursor <- function(client, req, limit = Inf) { - qreq <- rlang::enquo(req) - prg <- optional_progress_bar( format = "downloading page :current (:tick_rate/sec) :elapsedfull", total = NA, @@ -22,7 +22,7 @@ page_cursor <- function(client, req, limit = Inf) { ) prg$tick() - response <- rlang::eval_tidy(qreq) + response <- req res <- response$results while (!is.null(response$paging$`next`) && length(res) < limit) { @@ -39,19 +39,7 @@ page_cursor <- function(client, req, limit = Inf) { # TODO: Decide if this `limit = Inf` is helpful or a hack... # it is essentially a "row limit" on paging -#' Paging -#' -#' Helper functions that make paging easier in -#' the Posit Connect Server API. -#' #' @rdname paging -#' -#' @param client A Connect client object -#' @param req The request that needs to be paged -#' @param limit A row limit -#' -#' @return The aggregated results from all requests -#' #' @export page_offset <- function(client, req, limit = Inf) { qreq <- rlang::enquo(req) diff --git a/man/paging.Rd b/man/paging.Rd index 0d8d6b93f..0413b247c 100644 --- a/man/paging.Rd +++ b/man/paging.Rd @@ -12,19 +12,16 @@ page_offset(client, req, limit = Inf) \arguments{ \item{client}{A Connect client object} -\item{req}{The request that needs to be paged} +\item{req}{For page_cursor, the output from an initial response to an API +endpoint that uses cursor-based pagination. For page_offset, a request that +needs to be paged.} \item{limit}{A row limit} } \value{ -The aggregated results from all requests - The aggregated results from all requests } \description{ -Helper functions that make paging easier in -the Posit Connect Server API. - Helper functions that make paging easier in the Posit Connect Server API. } From d6f145a76ed43a761a0cac3699c0c75aaea85ed7 Mon Sep 17 00:00:00 2001 From: Kara Woo Date: Mon, 23 Feb 2026 20:39:21 -0800 Subject: [PATCH 2/4] grow list with fewer copies --- R/page.R | 46 +++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 41 insertions(+), 5 deletions(-) diff --git a/R/page.R b/R/page.R index 13577a990..126f382b9 100644 --- a/R/page.R +++ b/R/page.R @@ -24,17 +24,18 @@ page_cursor <- function(client, req, limit = Inf) { prg$tick() response <- req - res <- response$results - while (!is.null(response$paging$`next`) && length(res) < limit) { + res <- growable_list() + gl_add(res, response$results) + while (!is.null(response$paging$`next`) && gl_length(res) < limit) { prg$tick() next_url <- response$paging$`next` response <- client$GET(url = next_url) - res <- c(res, response$results) + gl_add(res, response$results) } - res <- head(res, n = limit) - return(res) + + head(gl_as_list(res), n = limit) } # TODO: Decide if this `limit = Inf` is helpful or a hack... # it is essentially a "row limit" on paging @@ -85,6 +86,41 @@ page_offset <- function(client, req, limit = Inf) { head(agg_response, limit) } +# Pre-allocated list that doubles in capacity as needed, to avoid copying on +# every request. +growable_list <- function(initial_size = 100L) { + gl <- new.env(parent = emptyenv()) + gl$buf <- vector("list", initial_size) + gl$n <- 0L + gl +} + +# Add items to a pre-allocated list. If the items would exceed the size of the +# list, increase the list size to double the existing one OR large enough to +# accommodate all the new items (whichever is larger). +gl_add <- function(gl, items) { + n_new <- length(items) + if (n_new == 0L) { + return(invisible(gl)) + } + # grow list if we've run out of space + if (gl$n + n_new > length(gl$buf)) { + new_size <- max(length(gl$buf) * 2L, gl$n + n_new) + gl$buf[new_size] <- list(NULL) + } + gl$buf[seq.int(gl$n + 1L, gl$n + n_new)] <- items + gl$n <- gl$n + n_new + invisible(gl) +} + +gl_length <- function(gl) { + gl$n +} + +gl_as_list <- function(gl) { + gl$buf[seq_len(gl$n)] +} + optional_progress_bar <- function(...) { if (requireNamespace("progress", quietly = TRUE)) { progress::progress_bar$new(...) From 768d5fd19c54c45c386cafa61feb6870755e7d82 Mon Sep 17 00:00:00 2001 From: Kara Woo Date: Mon, 23 Feb 2026 21:39:02 -0800 Subject: [PATCH 3/4] alternate approach growable list was not actually faster --- R/page.R | 46 +++++++--------------------------------------- 1 file changed, 7 insertions(+), 39 deletions(-) diff --git a/R/page.R b/R/page.R index 126f382b9..8808c25bb 100644 --- a/R/page.R +++ b/R/page.R @@ -24,18 +24,20 @@ page_cursor <- function(client, req, limit = Inf) { prg$tick() response <- req - res <- growable_list() - gl_add(res, response$results) - while (!is.null(response$paging$`next`) && gl_length(res) < limit) { + # collect whole pages, then flatten once at the end + pages <- list(response$results) + n_items <- length(response$results) + while (!is.null(response$paging$`next`) && n_items < limit) { prg$tick() next_url <- response$paging$`next` response <- client$GET(url = next_url) - gl_add(res, response$results) + pages[[length(pages) + 1L]] <- response$results + n_items <- n_items + length(response$results) } - head(gl_as_list(res), n = limit) + head(do.call(c, pages), n = limit) } # TODO: Decide if this `limit = Inf` is helpful or a hack... # it is essentially a "row limit" on paging @@ -86,40 +88,6 @@ page_offset <- function(client, req, limit = Inf) { head(agg_response, limit) } -# Pre-allocated list that doubles in capacity as needed, to avoid copying on -# every request. -growable_list <- function(initial_size = 100L) { - gl <- new.env(parent = emptyenv()) - gl$buf <- vector("list", initial_size) - gl$n <- 0L - gl -} - -# Add items to a pre-allocated list. If the items would exceed the size of the -# list, increase the list size to double the existing one OR large enough to -# accommodate all the new items (whichever is larger). -gl_add <- function(gl, items) { - n_new <- length(items) - if (n_new == 0L) { - return(invisible(gl)) - } - # grow list if we've run out of space - if (gl$n + n_new > length(gl$buf)) { - new_size <- max(length(gl$buf) * 2L, gl$n + n_new) - gl$buf[new_size] <- list(NULL) - } - gl$buf[seq.int(gl$n + 1L, gl$n + n_new)] <- items - gl$n <- gl$n + n_new - invisible(gl) -} - -gl_length <- function(gl) { - gl$n -} - -gl_as_list <- function(gl) { - gl$buf[seq_len(gl$n)] -} optional_progress_bar <- function(...) { if (requireNamespace("progress", quietly = TRUE)) { From f26c440815b351e0753a98538b2d94a3843d4b02 Mon Sep 17 00:00:00 2001 From: Kara Woo Date: Mon, 23 Feb 2026 22:03:45 -0800 Subject: [PATCH 4/4] news --- NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS.md b/NEWS.md index ad555fbe7..0a4bf1bf3 100644 --- a/NEWS.md +++ b/NEWS.md @@ -5,6 +5,7 @@ removed in a future release. (#500) - Added a single retry to `content_restart()` to more robustly clean up temporary environment variables. (#498) +- Improved performance of `page_cursor()`. (#501) ## Breaking changes