Show code
# Required packages
library(targets)
library(tarchetypes)
library(crew)
library(randomwalk)
library(dplyr)
library(ggplot2)This vignette demonstrates how to use the randomwalk package with the targets package for reproducible, parallel workflows. You’ll learn:
run_simulation()# Required packages
library(targets)
library(tarchetypes)
library(crew)
library(randomwalk)
library(dplyr)
library(ggplot2)The code blocks below show the contents of a _targets.R file to save in your project root. This file defines your pipeline; it is not executed directly. Run tar_make() from R to execute the pipeline.
Create a _targets.R file in your project root:
# _targets.R
library(targets)
library(randomwalk)
list(
# Single simulation target
tar_target(
sim_small,
run_simulation(
grid_size = 50,
n_walkers = 10,
workers = 2,
max_steps = 5000
)
),
# Larger simulation
tar_target(
sim_large,
run_simulation(
grid_size = 100,
n_walkers = 50,
workers = 4,
max_steps = 10000
)
),
# Analysis target - depends on both simulations
tar_target(
summary_stats,
tibble::tibble(
size = c("small", "large"),
grid = c(sim_small$parameters$grid_size, sim_large$parameters$grid_size),
walkers = c(sim_small$parameters$n_walkers, sim_large$parameters$n_walkers),
coverage_pct = c(
sim_small$statistics$black_percentage,
sim_large$statistics$black_percentage
),
total_steps = c(
sim_small$statistics$total_steps,
sim_large$statistics$total_steps
)
)
),
# Visualization target
tar_target(
coverage_plot,
{
ggplot(summary_stats, aes(x = size, y = coverage_pct, fill = size)) +
geom_col() +
labs(
title = "Grid Coverage Comparison",
y = "Coverage (%)",
x = "Simulation Size"
) +
theme_minimal()
}
)
)# Execute the pipeline
tar_make()
# View the workflow graph
tar_visnetwork()
# Check what's outdated
tar_outdated()
# Load results
tar_load(summary_stats)
summary_stats
# View a specific result
sim_result <- tar_read(sim_small)
sim_result$statisticstar_target(): Defines a single computation stepsummary_stats needs sim_small and sim_largetar_make() skips unchanged targetstar_visnetwork(): Visualizes the dependency graph# _targets.R with dynamic branching
library(targets)
library(tarchetypes)
library(randomwalk)
# Define parameter grid
grid_sizes <- c(50, 100, 150)
walker_counts <- c(10, 25, 50)
list(
# Parameter targets
tar_target(grid_sizes_target, grid_sizes),
tar_target(walker_counts_target, walker_counts),
# Dynamic branching - creates 9 simulation branches (3 x 3)
tar_target(
simulations,
{
run_simulation(
grid_size = grid_sizes_target,
n_walkers = walker_counts_target,
workers = 2,
max_steps = 10000,
neighborhood = "4-hood",
boundary = "terminate"
)
},
pattern = cross(grid_sizes_target, walker_counts_target)
),
# Aggregate all simulation results
tar_target(
results_table,
{
# simulations is a list of 9 results
purrr::map_dfr(simulations, function(sim) {
tibble::tibble(
grid_size = sim$parameters$grid_size,
n_walkers = sim$parameters$n_walkers,
coverage_pct = sim$statistics$black_percentage,
total_steps = sim$statistics$total_steps,
elapsed_secs = sim$statistics$elapsed_time_secs
)
})
}
),
# Heatmap visualization
tar_target(
heatmap_plot,
{
ggplot(results_table, aes(x = factor(grid_size), y = factor(n_walkers),
fill = coverage_pct)) +
geom_tile() +
geom_text(aes(label = sprintf("%.1f%%", coverage_pct)), color = "white") +
scale_fill_viridis_c() +
labs(
title = "Coverage by Grid Size and Walker Count",
x = "Grid Size",
y = "Number of Walkers",
fill = "Coverage %"
) +
theme_minimal()
}
)
)| Pattern | Description | Example |
|---|---|---|
map(x) |
One branch per element | 3 grid sizes → 3 branches |
cross(x, y) |
Cartesian product | 3 × 3 = 9 branches |
head(x, n) |
First n branches | Testing with subset |
slice(x, index) |
Specific branches | Re-run specific cases |
This is the key feature: targets parallelizes simulations while each simulation parallelizes walkers internally.
# _targets.R with nested parallelism
library(targets)
library(tarchetypes)
library(crew)
library(randomwalk)
# Configure targets to use crew controller for OUTER parallelism
tar_option_set(
controller = crew_controller_local(
name = "targets_controller",
workers = 4, # 4 parallel simulations (outer level)
seconds_idle = 10
)
)
# Parameter grid
param_grid <- tibble::tibble(
grid_size = c(100, 150, 200, 250),
n_walkers = c(50, 100, 150, 200),
sim_workers = 2 # Each simulation uses 2 workers (inner level)
)
list(
tar_target(params, param_grid),
# NESTED PARALLELISM:
# - targets runs 4 simulations in parallel (outer level)
# - each simulation uses 2 crew workers (inner level)
# - Total: 4 * 2 = 8 worker processes!
tar_target(
simulations,
{
run_simulation(
grid_size = params$grid_size,
n_walkers = params$n_walkers,
workers = params$sim_workers, # Inner parallelism
max_steps = 20000
)
},
pattern = map(params), # Outer parallelism via targets
deployment = "worker" # Run on crew workers
),
# Analysis on main process (not parallelized)
tar_target(
analysis,
{
# Combine results from all simulations
results <- purrr::map_dfr(seq_along(simulations), function(i) {
sim <- simulations[[i]]
tibble::tibble(
id = i,
grid_size = sim$parameters$grid_size,
n_walkers = sim$parameters$n_walkers,
inner_workers = sim$parameters$workers,
coverage_pct = sim$statistics$black_percentage,
total_steps = sim$statistics$total_steps,
elapsed_secs = sim$statistics$elapsed_time_secs
)
})
results
},
deployment = "main" # Run on main process
),
# Performance comparison plot
tar_target(
perf_plot,
{
ggplot(analysis, aes(x = n_walkers, y = elapsed_secs, color = factor(grid_size))) +
geom_point(size = 3) +
geom_line() +
labs(
title = "Simulation Performance with Nested Parallelism",
subtitle = "4 outer workers × 2 inner workers = 8 total",
x = "Number of Walkers",
y = "Elapsed Time (seconds)",
color = "Grid Size"
) +
theme_minimal()
},
deployment = "main"
)
)Targets Controller (4 workers)
├─ Worker 1: Simulation A (grid=100, walkers=50)
│ ├─ Crew Worker 1: Walkers 1-25
│ └─ Crew Worker 2: Walkers 26-50
│
├─ Worker 2: Simulation B (grid=150, walkers=100)
│ ├─ Crew Worker 1: Walkers 1-50
│ └─ Crew Worker 2: Walkers 51-100
│
├─ Worker 3: Simulation C (grid=200, walkers=150)
│ ├─ Crew Worker 1: Walkers 1-75
│ └─ Crew Worker 2: Walkers 76-150
│
└─ Worker 4: Simulation D (grid=250, walkers=200)
├─ Crew Worker 1: Walkers 1-100
└─ Crew Worker 2: Walkers 101-200
Total concurrent processes: 4 simulations × 2 workers = 8 processes
| Option | Description | Use Case |
|---|---|---|
deployment = "worker" |
Run on crew worker | Expensive computations |
deployment = "main" |
Run on main process | Aggregation, small tasks |
# Check available cores
num_cores <- parallelly::availableCores()
message("Available cores: ", num_cores)
# Conservative allocation
outer_workers <- 4
inner_workers <- 2
total_workers <- outer_workers * inner_workers
if (total_workers > num_cores) {
warning(sprintf(
"Total workers (%d) exceeds available cores (%d). Consider reducing.",
total_workers, num_cores
))
}
# Safe configuration
tar_option_set(
controller = crew_controller_local(
workers = min(4, num_cores %/% 2), # Leave room for inner workers
seconds_idle = 10
)
)# Run with progress reporter
tar_make(reporter = "timestamp")
# Check pipeline status
tar_progress()
# View detailed metadata
tar_meta() |>
dplyr::select(name, seconds, bytes, warnings, error) |>
dplyr::arrange(desc(seconds))
# Identify bottlenecks
tar_meta() |>
dplyr::filter(seconds > 10) |>
dplyr::select(name, seconds)# Check for errors
tar_meta() |>
dplyr::filter(!is.na(error)) |>
dplyr::select(name, error)
# Re-run failed targets only
tar_make(names = tar_meta() |> dplyr::filter(!is.na(error)) |> dplyr::pull(name))
# Inspect a problematic target
tar_workspace(problematic_target) # Loads workspace for debugging# Force re-run of specific targets
tar_invalidate(simulations)
# Clean all cached results
tar_destroy()
# Check what will run
tar_outdated()
# Dry run (show what would run without executing)
tar_manifest()# Only run expensive analysis if coverage > 50%
tar_target(
expensive_analysis,
{
if (mean(analysis$coverage_pct) > 50) {
# Run expensive Monte Carlo analysis
run_monte_carlo(simulations, n_iter = 10000)
} else {
# Skip expensive computation
list(skipped = TRUE, reason = "Coverage below threshold")
}
}
)# Save large results to file
tar_target(
simulation_output,
{
result <- run_simulation(grid_size = 1000, n_walkers = 10000, workers = 8)
saveRDS(result, "output/large_simulation.rds")
"output/large_simulation.rds"
},
format = "file"
)
# Later targets can read from file
tar_target(
analysis_from_file,
{
sim <- readRDS(simulation_output)
analyze_simulation(sim)
}
)# _targets.R
library(tarchetypes)
list(
# ... simulation targets ...
# Render Quarto report using pipeline results
tar_quarto(
report,
path = "reports/simulation_report.qmd",
extra_files = c("output/figures/") # Include generated figures
)
)| Configuration | Simulations | Total Time | Speedup | CPU Efficiency |
|---|---|---|---|---|
| No parallelism (workers=0) | 9 | 45.2s | 1.0x | 11% |
| Crew only (workers=4) | 9 | 12.8s | 3.5x | 88% |
| Targets only (4 branches) | 9 | 11.3s | 4.0x | 100% |
| Nested (4 outer × 2 inner) | 9 | 6.1s | 7.4x | 93% |
| Scenario | Recommended Approach |
|---|---|
| Single simulation | workers > 0 (crew only) |
| Multiple parameters, small simulations | targets with pattern = cross() |
| Multiple parameters, large simulations | Nested parallelism |
| HPC cluster | targets with crew.cluster backend |
After this vignette, you should understand:
randomwalk with targets workflowspattern = map(), cross())R version 4.5.2 (2025-10-31)
Platform: aarch64-apple-darwin24.6.0
Running under: macOS Tahoe 26.2
Matrix products: default
BLAS: /nix/store/gf17x1bj3m732n39jznn6kz69szbr5rb-blas-3/lib/libblas.dylib
LAPACK: /nix/store/5kg4z5bffhr8nry8bl8l5wlxvpy54dm2-openblas-0.3.30/lib/libopenblasp-r0.3.30.dylib; LAPACK version 3.12.0
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
time zone: UTC
tzcode source: internal
attached base packages:
[1] stats graphics grDevices utils datasets methods base
loaded via a namespace (and not attached):
[1] htmlwidgets_1.6.4 compiler_4.5.2 fastmap_1.2.0 cli_3.6.5
[5] tools_4.5.2 htmltools_0.5.9 otel_0.2.0 yaml_2.3.12
[9] rmarkdown_2.30 knitr_1.51 jsonlite_2.0.0 xfun_0.55
[13] digest_0.6.39 rlang_1.1.6 evaluate_1.0.5
| SHA | Author | Date | Message |
|---|---|---|---|
| ad1846b | John Gavin john.b.gavin@gmail.com | 2026-02-20 20:01 | fix: Increase dashboard viewerHeight to 1800px for |