Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 21 additions & 8 deletions R/api.R
Original file line number Diff line number Diff line change
Expand Up @@ -64,18 +64,19 @@ validate_profile <- function(x) {
stopifnot(undotted(names(x$sample_types)) == c("type", "unit"))
stopifnot(is.character(x$sample_types$type))
stopifnot(is.character(x$sample_types$unit))
#' It is currently restricted to one row with values `"samples"` and `"count"`,
#' respectively.
stopifnot(nrow(x$sample_types) == 1)
stopifnot(x$sample_types$type == "samples")
stopifnot(x$sample_types$unit == "count")
#' It always has five rows describing the sample count and memory profiling
#' data types.
stopifnot(nrow(x$sample_types) == 5)
stopifnot(x$sample_types$type == c("samples", "small_v", "big_v", "nodes", "dup_count"))
stopifnot(x$sample_types$unit == c("count", "cells", "cells", "bytes", "count"))

#'
#' The `samples` table has two columns, `value` (integer) and `locations`
#' (list).
#' The `samples` table has six columns: `value` (integer), `locations`
#' (list), and integer columns `small_v`, `big_v`, `nodes`, and `dup_count`
#' for memory profiling data.
#' Additional columns with a leading dot in the name are allowed
#' after the required columns.
stopifnot(undotted(names(x$samples)) == c("value", "locations"))
stopifnot(undotted(names(x$samples)) == c("value", "locations", "small_v", "big_v", "nodes", "dup_count"))
stopifnot(is.integer(x$samples$value))
stopifnot(is.list(x$samples$locations))
#' The `value` column describes the number of consecutive samples for the
Expand All @@ -91,6 +92,18 @@ validate_profile <- function(x) {
stopifnot(unlist(map(x$samples$locations, "[[", "location_id")) %in% x$locations$location_id)
#' The locations are listed in inner-first order, i.e., the first location
#' corresponds to the innermost entry of the stack trace.
#' The `small_v`, `big_v`, `nodes`, and `dup_count` columns contain integer
#' memory statistics per sample. When memory profiling data is not available,
#' these columns are all `NA`. When present, all memory values must be
#' nonnegative.
stopifnot(is.integer(x$samples$small_v))
stopifnot(is.integer(x$samples$big_v))
stopifnot(is.integer(x$samples$nodes))
stopifnot(is.integer(x$samples$dup_count))
stopifnot(is.na(x$samples$small_v) | x$samples$small_v >= 0L)
stopifnot(is.na(x$samples$big_v) | x$samples$big_v >= 0L)
stopifnot(is.na(x$samples$nodes) | x$samples$nodes >= 0L)
stopifnot(is.na(x$samples$dup_count) | x$samples$dup_count >= 0L)

#'
#' The `locations` table has three integer columns, `location_id`,
Expand Down
16 changes: 13 additions & 3 deletions R/pprof-from-ds.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ ds_to_msg <- function(ds) {
validate_profile(ds)
provide_proto()

has_memory <- !all(is.na(ds$samples$small_v))

msg <- RProtoBuf::new(perftools.profiles.Profile)

msg$string_table <- unique(c(
Expand All @@ -13,14 +15,17 @@ ds_to_msg <- function(ds) {
ds$functions$filename
))

add_sample_types_to_msg(ds$sample_types, msg)
add_sample_types_to_msg(ds$sample_types, msg, has_memory)
add_samples_to_msg(ds$samples, msg)
add_locations_to_msg(ds$locations, msg)
add_functions_to_msg(ds$functions, msg)
msg
}

add_sample_types_to_msg <- function(sample_types, msg) {
add_sample_types_to_msg <- function(sample_types, msg, has_memory) {
if (!has_memory) {
sample_types <- sample_types[1, , drop = FALSE]
}
sample_types$type <- match(sample_types$type, msg$string_table) - 1L
sample_types$unit <- match(sample_types$unit, msg$string_table) - 1L

Expand All @@ -33,9 +38,14 @@ add_sample_types_to_msg <- function(sample_types, msg) {
}

add_samples_to_msg <- function(samples, msg) {
has_memory <- !all(is.na(samples$small_v))
msg$sample <- lapply(split_rows(samples), function(s) {
s_msg <- RProtoBuf::new(perftools.profiles.Sample)
s_msg$value <- s$value
if (has_memory) {
s_msg$value <- c(s$value, s$small_v, s$big_v, s$nodes, s$dup_count)
} else {
s_msg$value <- s$value
}
s_msg$location_id <- s$locations[[1]]$location_id
s_msg
})
Expand Down
45 changes: 32 additions & 13 deletions R/pprof-to-ds.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,26 +16,45 @@ msg_to_ds <- function(msg) {
}

get_sample_types_from_msg <- function(msg) {
sample_types <- map(msg$sample_type, function(st) {
tibble::tibble(
type = as.integer(st$type),
unit = as.integer(st$unit)
)
tibble::tibble(
type = c("samples", "small_v", "big_v", "nodes", "dup_count"),
unit = c("count", "cells", "cells", "bytes", "count")
)
}

get_samples_from_msg <- function(msg) {
# Determine which value indices correspond to memory types
all_types <- map(msg$sample_type, function(st) {
msg$string_table[as.integer(st$type) + 1]
})
sample_types <- merge_rows(sample_types)
all_types <- unlist(all_types)

sample_types$type <- msg$string_table[sample_types$type + 1]
sample_types$unit <- msg$string_table[sample_types$unit + 1]
mem_types <- c("small_v", "big_v", "nodes", "dup_count")
has_memory <- all(mem_types %in% all_types)

sample_types[1, ]
}
mem_indices <- NULL
if (has_memory) {
mem_indices <- match(mem_types, all_types)
}

get_samples_from_msg <- function(msg) {
samples <- map(msg$sample, function(s) {
tibble::tibble(
value = as.integer(s$value[[1]]),
values <- as.integer(s$value)
row <- tibble::tibble(
value = values[[1]],
locations = list(tibble::tibble(location_id = as.integer(s$location_id)))
)
if (has_memory) {
row$small_v <- values[[mem_indices[[1]]]]
row$big_v <- values[[mem_indices[[2]]]]
row$nodes <- values[[mem_indices[[3]]]]
row$dup_count <- values[[mem_indices[[4]]]]
} else {
row$small_v <- NA_integer_
row$big_v <- NA_integer_
row$nodes <- NA_integer_
row$dup_count <- NA_integer_
}
row
})
samples <- tibble::as_tibble(do.call(rbind, samples))
samples
Expand Down
34 changes: 30 additions & 4 deletions R/rprof-from-ds.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
ds_to_rprof <- function(ds) {
validate_profile(ds)

has_memory <- !all(is.na(ds$samples$small_v))

. <- ds$locations
. <- merge(., ds$functions[c("function_id", "system_name", "filename")], by = "function_id", sort = FALSE, all.x = TRUE)
. <- .[-1L]
Expand All @@ -18,9 +20,15 @@ ds_to_rprof <- function(ds) {
flat_locations <- .

files <- paste0("#File ", unique_files$file_id, ": ", unique_files$filename)

# Expand samples by value (repeat count)
sample_idx <- rep(seq_len(nrow(ds$samples)), ds$samples$value)

traces <- map_chr(
rep(ds$samples$locations, ds$samples$value),
function(loc) {
seq_along(sample_idx),
function(i) {
si <- sample_idx[[i]]
loc <- ds$samples$locations[[si]]
. <- flat_locations[match(loc$location_id, flat_locations$location_id), ]
stopifnot(.$location_id == loc$location_id)
funs <- paste0(
Expand All @@ -31,9 +39,27 @@ ds_to_rprof <- function(ds) {
}
)

header <- if (has_memory) {
"memory profiling: line profiling: sample.interval=20000"
} else {
"line profiling: sample.interval=20000"
}

# Build memory data for roundtrip compatibility
memory <- NULL
if (has_memory) {
memory <- tibble::tibble(
small_v = ds$samples$small_v[sample_idx],
big_v = ds$samples$big_v[sample_idx],
nodes = ds$samples$nodes[sample_idx],
dup_count = ds$samples$dup_count[sample_idx]
)
}

tibble::lst(
header = "line profiling: sample.interval=20000",
header,
files,
traces
traces,
memory
)
}
25 changes: 24 additions & 1 deletion R/rprof-read.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,32 @@ read_rprof_ll <- function(path) {
header <- 1L
files <- grep("^#File ", lines)
traces <- setdiff(seq_along(lines), c(header, files))

memory_profiling <- startsWith(lines[header], "memory profiling:")

# Strip memory data prefix from trace lines
memory <- NULL
trace_lines <- lines[traces]
if (memory_profiling && length(trace_lines) > 0) {
mem_rx <- "^:([0-9]+):([0-9]+):([0-9]+):([0-9]+):"
mem_matches <- regmatches(trace_lines, regexec(mem_rx, trace_lines))
has_mem <- vapply(mem_matches, length, integer(1)) > 0
if (any(has_mem)) {
memory <- tibble::tibble(
small_v = as.integer(vapply(mem_matches[has_mem], "[[", character(1), 2L)),
big_v = as.integer(vapply(mem_matches[has_mem], "[[", character(1), 3L)),
nodes = as.integer(vapply(mem_matches[has_mem], "[[", character(1), 4L)),
dup_count = as.integer(vapply(mem_matches[has_mem], "[[", character(1), 5L))
)
# Strip the memory prefix from trace lines
trace_lines <- sub(mem_rx, "", trace_lines)
}
}

list(
header = lines[header],
files = lines[files],
traces = lines[traces]
traces = trace_lines,
memory = memory
)
}
21 changes: 18 additions & 3 deletions R/rprof-to-ds.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ rprof_to_ds <- function(rprof) {

get_sample_types_from_rprof <- function(rprof) {
tibble::tibble(
type = "samples",
unit = "count"
type = c("samples", "small_v", "big_v", "nodes", "dup_count"),
unit = c("count", "cells", "cells", "bytes", "count")
)
}

Expand Down Expand Up @@ -150,7 +150,22 @@ add_samples_to_flat_rprof <- function(flat_rprof) {
.$locations <- map(.$locations, tibble::as_tibble, rownames = NULL)

.$value <- 1L
. <- .[c("value", "locations")]

memory <- flat_rprof$rprof$memory
if (!is.null(memory)) {
# Memory data is indexed by trace line (sample), match by sample_id
mem <- memory[.$sample_id, , drop = FALSE]
.$small_v <- mem$small_v
.$big_v <- mem$big_v
.$nodes <- mem$nodes
.$dup_count <- mem$dup_count
} else {
.$small_v <- NA_integer_
.$big_v <- NA_integer_
.$nodes <- NA_integer_
.$dup_count <- NA_integer_
}
. <- .[c("value", "locations", "small_v", "big_v", "nodes", "dup_count")]

flat_rprof$samples <- .

Expand Down
11 changes: 11 additions & 0 deletions R/rprof-write.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,17 @@
#' @export
write_rprof <- function(x, path) {
rprof <- ds_to_rprof(x)
# Add memory prefix to traces when writing to file
if (!is.null(rprof$memory)) {
rprof$traces <- paste0(
":", rprof$memory$small_v,
":", rprof$memory$big_v,
":", rprof$memory$nodes,
":", rprof$memory$dup_count, ":",
rprof$traces
)
}
rprof$memory <- NULL
writeLines(unlist(rprof, use.names = FALSE), path)
invisible(x)
}
14 changes: 14 additions & 0 deletions inst/samples/rprof/memory.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
memory profiling: line profiling: sample.interval=20000
:341536:1289015:34667640:1635:"<Anonymous>" "do.call" "as.data.frame.list" "as.data.frame" "data.frame" "Ops.data.frame"
:293305:449462:23670472:6514:"anyDuplicated" "[.data.frame" "["
:369435:615462:36216432:10044:"get" "Ops.data.frame"
:327669:523462:29319360:9780:"[<-.data.frame" "[<-"
:285807:435462:22434496:9769:"any" ".deparseOpts" "deparse" "paste" "deparse1" "force" "as.data.frame.numeric" "as.data.frame" "<Anonymous>" "do.call" "as.data.frame.list" "as.data.frame" "data.frame" "Ops.data.frame"
:363324:605462:35221424:10234:"as.data.frame.list" "as.data.frame" "data.frame" "Ops.data.frame"
:322360:515462:28440888:9864:"<Anonymous>" "do.call" "as.data.frame.list" "as.data.frame" "data.frame" "Ops.data.frame"
:281229:429462:21686672:9852:"Ops.data.frame"
:358636:597462:34433280:10212:"deparse" "paste" "deparse1" "force" "as.data.frame.numeric" "as.data.frame" "<Anonymous>" "do.call" "as.data.frame.list" "as.data.frame" "data.frame" "Ops.data.frame"
:318714:513462:27870752:9993:"%in%" "Ops.data.frame"
:279206:429462:21346248:10022:"[.data.frame" "["
:356880:597462:34140176:10248:".deparseOpts" "deparse" "paste" "deparse1" "force" "as.data.frame.numeric" "as.data.frame" "<Anonymous>" "do.call" "as.data.frame.list" "as.data.frame" "data.frame" "Ops.data.frame"
:317207:513462:27611920:9989:"do.call" "as.data.frame.list" "as.data.frame" "data.frame" "Ops.data.frame"
8 changes: 7 additions & 1 deletion man/validate_profile.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading