r-prof · Copilot · Feb 13, 2026 · Feb 13, 2026 · Feb 13, 2026 · Feb 13, 2026
diff --git a/R/api.R b/R/api.R
@@ -64,18 +64,19 @@ validate_profile <- function(x) {
   stopifnot(undotted(names(x$sample_types)) == c("type", "unit"))
   stopifnot(is.character(x$sample_types$type))
   stopifnot(is.character(x$sample_types$unit))
-  #' It is currently restricted to one row with values `"samples"` and `"count"`,
-  #' respectively.
-  stopifnot(nrow(x$sample_types) == 1)
-  stopifnot(x$sample_types$type == "samples")
-  stopifnot(x$sample_types$unit == "count")
+  #' It always has five rows describing the sample count and memory profiling
+  #' data types.
+  stopifnot(nrow(x$sample_types) == 5)
+  stopifnot(x$sample_types$type == c("samples", "small_v", "big_v", "nodes", "dup_count"))
+  stopifnot(x$sample_types$unit == c("count", "cells", "cells", "bytes", "count"))
 
   #'
-  #' The `samples` table has two columns, `value` (integer) and `locations`
-  #' (list).
+  #' The `samples` table has six columns: `value` (integer), `locations`
+  #' (list), and integer columns `small_v`, `big_v`, `nodes`, and `dup_count`
+  #' for memory profiling data.
   #' Additional columns with a leading dot in the name are allowed
   #' after the required columns.
-  stopifnot(undotted(names(x$samples)) == c("value", "locations"))
+  stopifnot(undotted(names(x$samples)) == c("value", "locations", "small_v", "big_v", "nodes", "dup_count"))
   stopifnot(is.integer(x$samples$value))
   stopifnot(is.list(x$samples$locations))
   #' The `value` column describes the number of consecutive samples for the
@@ -91,6 +92,18 @@ validate_profile <- function(x) {
   stopifnot(unlist(map(x$samples$locations, "[[", "location_id")) %in% x$locations$location_id)
   #' The locations are listed in inner-first order, i.e., the first location
   #' corresponds to the innermost entry of the stack trace.
+  #' The `small_v`, `big_v`, `nodes`, and `dup_count` columns contain integer
+  #' memory statistics per sample. When memory profiling data is not available,
+  #' these columns are all `NA`. When present, all memory values must be
+  #' nonnegative.
+  stopifnot(is.integer(x$samples$small_v))
+  stopifnot(is.integer(x$samples$big_v))
+  stopifnot(is.integer(x$samples$nodes))
+  stopifnot(is.integer(x$samples$dup_count))
+  stopifnot(is.na(x$samples$small_v) | x$samples$small_v >= 0L)
+  stopifnot(is.na(x$samples$big_v) | x$samples$big_v >= 0L)
+  stopifnot(is.na(x$samples$nodes) | x$samples$nodes >= 0L)
+  stopifnot(is.na(x$samples$dup_count) | x$samples$dup_count >= 0L)
 
   #'
   #' The `locations` table has three integer columns, `location_id`,

diff --git a/R/pprof-from-ds.R b/R/pprof-from-ds.R
@@ -2,6 +2,8 @@ ds_to_msg <- function(ds) {
   validate_profile(ds)
   provide_proto()
 
+  has_memory <- !all(is.na(ds$samples$small_v))
+
   msg <- RProtoBuf::new(perftools.profiles.Profile)
 
   msg$string_table <- unique(c(
@@ -13,14 +15,17 @@ ds_to_msg <- function(ds) {
     ds$functions$filename
   ))
 
-  add_sample_types_to_msg(ds$sample_types, msg)
+  add_sample_types_to_msg(ds$sample_types, msg, has_memory)
   add_samples_to_msg(ds$samples, msg)
   add_locations_to_msg(ds$locations, msg)
   add_functions_to_msg(ds$functions, msg)
   msg
 }
 
-add_sample_types_to_msg <- function(sample_types, msg) {
+add_sample_types_to_msg <- function(sample_types, msg, has_memory) {
+  if (!has_memory) {
+    sample_types <- sample_types[1, , drop = FALSE]
+  }
   sample_types$type <- match(sample_types$type, msg$string_table) - 1L
   sample_types$unit <- match(sample_types$unit, msg$string_table) - 1L
 
@@ -33,9 +38,14 @@ add_sample_types_to_msg <- function(sample_types, msg) {
 }
 
 add_samples_to_msg <- function(samples, msg) {
+  has_memory <- !all(is.na(samples$small_v))
   msg$sample <- lapply(split_rows(samples), function(s) {
     s_msg <- RProtoBuf::new(perftools.profiles.Sample)
-    s_msg$value <- s$value
+    if (has_memory) {
+      s_msg$value <- c(s$value, s$small_v, s$big_v, s$nodes, s$dup_count)
+    } else {
+      s_msg$value <- s$value
+    }
     s_msg$location_id <- s$locations[[1]]$location_id
     s_msg
   })

diff --git a/R/pprof-to-ds.R b/R/pprof-to-ds.R
@@ -16,26 +16,45 @@ msg_to_ds <- function(msg) {
 }
 
 get_sample_types_from_msg <- function(msg) {
-  sample_types <- map(msg$sample_type, function(st) {
-    tibble::tibble(
-      type = as.integer(st$type),
-      unit = as.integer(st$unit)
-    )
+  tibble::tibble(
+    type = c("samples", "small_v", "big_v", "nodes", "dup_count"),
+    unit = c("count", "cells", "cells", "bytes", "count")
+  )
+}
+
+get_samples_from_msg <- function(msg) {
+  # Determine which value indices correspond to memory types
+  all_types <- map(msg$sample_type, function(st) {
+    msg$string_table[as.integer(st$type) + 1]
   })
-  sample_types <- merge_rows(sample_types)
+  all_types <- unlist(all_types)
 
-  sample_types$type <- msg$string_table[sample_types$type + 1]
-  sample_types$unit <- msg$string_table[sample_types$unit + 1]
+  mem_types <- c("small_v", "big_v", "nodes", "dup_count")
+  has_memory <- all(mem_types %in% all_types)
 
-  sample_types[1, ]
-}
+  mem_indices <- NULL
+  if (has_memory) {
+    mem_indices <- match(mem_types, all_types)
+  }
 
-get_samples_from_msg <- function(msg) {
   samples <- map(msg$sample, function(s) {
-    tibble::tibble(
-      value = as.integer(s$value[[1]]),
+    values <- as.integer(s$value)
+    row <- tibble::tibble(
+      value = values[[1]],
       locations = list(tibble::tibble(location_id = as.integer(s$location_id)))
     )
+    if (has_memory) {
+      row$small_v <- values[[mem_indices[[1]]]]
+      row$big_v <- values[[mem_indices[[2]]]]
+      row$nodes <- values[[mem_indices[[3]]]]
+      row$dup_count <- values[[mem_indices[[4]]]]
+    } else {
+      row$small_v <- NA_integer_
+      row$big_v <- NA_integer_
+      row$nodes <- NA_integer_
+      row$dup_count <- NA_integer_
+    }
+    row
   })
   samples <- tibble::as_tibble(do.call(rbind, samples))
   samples

diff --git a/R/rprof-from-ds.R b/R/rprof-from-ds.R
@@ -1,6 +1,8 @@
 ds_to_rprof <- function(ds) {
   validate_profile(ds)
 
+  has_memory <- !all(is.na(ds$samples$small_v))
+
   . <- ds$locations
   . <- merge(., ds$functions[c("function_id", "system_name", "filename")], by = "function_id", sort = FALSE, all.x = TRUE)
   . <- .[-1L]
@@ -18,9 +20,15 @@ ds_to_rprof <- function(ds) {
   flat_locations <- .
 
   files <- paste0("#File ", unique_files$file_id, ": ", unique_files$filename)
+
+  # Expand samples by value (repeat count)
+  sample_idx <- rep(seq_len(nrow(ds$samples)), ds$samples$value)
+
   traces <- map_chr(
-    rep(ds$samples$locations, ds$samples$value),
-    function(loc) {
+    seq_along(sample_idx),
+    function(i) {
+      si <- sample_idx[[i]]
+      loc <- ds$samples$locations[[si]]
       . <- flat_locations[match(loc$location_id, flat_locations$location_id), ]
       stopifnot(.$location_id == loc$location_id)
       funs <- paste0(
@@ -31,9 +39,27 @@ ds_to_rprof <- function(ds) {
     }
   )
 
+  header <- if (has_memory) {
+    "memory profiling: line profiling: sample.interval=20000"
+  } else {
+    "line profiling: sample.interval=20000"
+  }
+
+  # Build memory data for roundtrip compatibility
+  memory <- NULL
+  if (has_memory) {
+    memory <- tibble::tibble(
+      small_v = ds$samples$small_v[sample_idx],
+      big_v = ds$samples$big_v[sample_idx],
+      nodes = ds$samples$nodes[sample_idx],
+      dup_count = ds$samples$dup_count[sample_idx]
+    )
+  }
+
   tibble::lst(
-    header = "line profiling: sample.interval=20000",
+    header,
     files,
-    traces
+    traces,
+    memory
   )
 }
diff --git a/R/rprof-read.R b/R/rprof-read.R
@@ -35,9 +35,32 @@ read_rprof_ll <- function(path) {
   header <- 1L
   files <- grep("^#File ", lines)
   traces <- setdiff(seq_along(lines), c(header, files))
+
+  memory_profiling <- startsWith(lines[header], "memory profiling:")
+
+  # Strip memory data prefix from trace lines
+  memory <- NULL
+  trace_lines <- lines[traces]
+  if (memory_profiling && length(trace_lines) > 0) {
+    mem_rx <- "^:([0-9]+):([0-9]+):([0-9]+):([0-9]+):"
+    mem_matches <- regmatches(trace_lines, regexec(mem_rx, trace_lines))
+    has_mem <- vapply(mem_matches, length, integer(1)) > 0
+    if (any(has_mem)) {
+      memory <- tibble::tibble(
+        small_v = as.integer(vapply(mem_matches[has_mem], "[[", character(1), 2L)),
+        big_v = as.integer(vapply(mem_matches[has_mem], "[[", character(1), 3L)),
+        nodes = as.integer(vapply(mem_matches[has_mem], "[[", character(1), 4L)),
+        dup_count = as.integer(vapply(mem_matches[has_mem], "[[", character(1), 5L))
+      )
+      # Strip the memory prefix from trace lines
+      trace_lines <- sub(mem_rx, "", trace_lines)
+    }
+  }
+
   list(
     header = lines[header],
     files = lines[files],
-    traces = lines[traces]
+    traces = trace_lines,
+    memory = memory
   )
 }
diff --git a/R/rprof-to-ds.R b/R/rprof-to-ds.R
@@ -20,8 +20,8 @@ rprof_to_ds <- function(rprof) {
 
 get_sample_types_from_rprof <- function(rprof) {
   tibble::tibble(
-    type = "samples",
-    unit = "count"
+    type = c("samples", "small_v", "big_v", "nodes", "dup_count"),
+    unit = c("count", "cells", "cells", "bytes", "count")
   )
 }
 
@@ -150,7 +150,22 @@ add_samples_to_flat_rprof <- function(flat_rprof) {
   .$locations <- map(.$locations, tibble::as_tibble, rownames = NULL)
 
   .$value <- 1L
-  . <- .[c("value", "locations")]
+
+  memory <- flat_rprof$rprof$memory
+  if (!is.null(memory)) {
+    # Memory data is indexed by trace line (sample), match by sample_id
+    mem <- memory[.$sample_id, , drop = FALSE]
+    .$small_v <- mem$small_v
+    .$big_v <- mem$big_v
+    .$nodes <- mem$nodes
+    .$dup_count <- mem$dup_count
+  } else {
+    .$small_v <- NA_integer_
+    .$big_v <- NA_integer_
+    .$nodes <- NA_integer_
+    .$dup_count <- NA_integer_
+  }
+  . <- .[c("value", "locations", "small_v", "big_v", "nodes", "dup_count")]
 
   flat_rprof$samples <- .
 

diff --git a/R/rprof-write.R b/R/rprof-write.R
@@ -2,6 +2,17 @@
 #' @export
 write_rprof <- function(x, path) {
   rprof <- ds_to_rprof(x)
+  # Add memory prefix to traces when writing to file
+  if (!is.null(rprof$memory)) {
+    rprof$traces <- paste0(
+      ":", rprof$memory$small_v,
+      ":", rprof$memory$big_v,
+      ":", rprof$memory$nodes,
+      ":", rprof$memory$dup_count, ":",
+      rprof$traces
+    )
+  }
+  rprof$memory <- NULL
   writeLines(unlist(rprof, use.names = FALSE), path)
   invisible(x)
 }
diff --git a/inst/samples/rprof/memory.out b/inst/samples/rprof/memory.out
@@ -0,0 +1,14 @@
+memory profiling: line profiling: sample.interval=20000
+:341536:1289015:34667640:1635:"<Anonymous>" "do.call" "as.data.frame.list" "as.data.frame" "data.frame" "Ops.data.frame" 
+:293305:449462:23670472:6514:"anyDuplicated" "[.data.frame" "[" 
+:369435:615462:36216432:10044:"get" "Ops.data.frame" 
+:327669:523462:29319360:9780:"[<-.data.frame" "[<-" 
+:285807:435462:22434496:9769:"any" ".deparseOpts" "deparse" "paste" "deparse1" "force" "as.data.frame.numeric" "as.data.frame" "<Anonymous>" "do.call" "as.data.frame.list" "as.data.frame" "data.frame" "Ops.data.frame" 
+:363324:605462:35221424:10234:"as.data.frame.list" "as.data.frame" "data.frame" "Ops.data.frame" 
+:322360:515462:28440888:9864:"<Anonymous>" "do.call" "as.data.frame.list" "as.data.frame" "data.frame" "Ops.data.frame" 
+:281229:429462:21686672:9852:"Ops.data.frame" 
+:358636:597462:34433280:10212:"deparse" "paste" "deparse1" "force" "as.data.frame.numeric" "as.data.frame" "<Anonymous>" "do.call" "as.data.frame.list" "as.data.frame" "data.frame" "Ops.data.frame" 
+:318714:513462:27870752:9993:"%in%" "Ops.data.frame" 
+:279206:429462:21346248:10022:"[.data.frame" "[" 
+:356880:597462:34140176:10248:".deparseOpts" "deparse" "paste" "deparse1" "force" "as.data.frame.numeric" "as.data.frame" "<Anonymous>" "do.call" "as.data.frame.list" "as.data.frame" "data.frame" "Ops.data.frame" 
+:317207:513462:27611920:9989:"do.call" "as.data.frame.list" "as.data.frame" "data.frame" "Ops.data.frame" 
diff --git a/man/validate_profile.Rd b/man/validate_profile.Rd