Skip to contents

ETF Universe

We start by defining the universe of European UCITS ETFs listed on the LSE. This universe is precomputed during CI to avoid browser CORS restrictions.

The logic used to retrieve the universe list is:

Show Code
get_etf_universe <- function(n = 20) {
  path <- system.file("extdata", "etf_universe.csv", package = "etfdata")
  if (path == "") {
    path <- "inst/extdata/etf_universe.csv"
  }
  if (file.exists(path)) {
    res <- readr::read_csv(path, show_col_types = FALSE)
    return(utils::head(res, n))
  }

  seed_path <- system.file("extdata", "seed_universe.csv", package = "etfdata")
  if (seed_path == "") {
    seed_path <- "inst/extdata/seed_universe.csv"
  }
  res <- readr::read_csv(seed_path, show_col_types = FALSE)
  utils::head(res, n)
}
Snapshot generated at: 2025-12-22 16:38:06
# A tibble: 10 × 4
   ticker name                                     isin         currency
   <chr>  <chr>                                    <chr>        <chr>
 1 VUSA.L Vanguard S&P 500 UCITS ETF               IE00B3XXRP09 GBP
 2 CSPX.L iShares Core S&P 500 UCITS ETF           IE00B5BMR087 GBP
 3 INRG.L iShares Global Clean Energy UCITS ETF    IE00B1XNHC34 GBP
 4 EQQQ.L Invesco EQQQ Nasdaq-100 UCITS ETF        IE0032077012 GBP
 5 VWRL.L Vanguard FTSE All-World UCITS ETF        IE00B3RBWM25 GBP
 6 ISF.L  iShares Core FTSE 100 UCITS ETF          IE0005042456 GBP
 7 VMID.L Vanguard FTSE 250 UCITS ETF              IE00BKX55Q28 GBP
 8 VFEM.L Vanguard FTSE Emerging Markets UCITS ETF IE00B3VVMM84 GBP
 9 VEVE.L Vanguard FTSE Developed World UCITS ETF  IE00BKX55T58 GBP
10 VGOV.L Vanguard UK Gilt UCITS ETF               IE00B42WWV65 GBP     

Metadata (JustETF)

Metadata comes from the JustETF screener table (full universe), which includes fund size and TER for LSE-listed UCITS. The pipeline uses the screener result as the cached metadata snapshot.

Show Code
metadata <- fetch_justetf_screener(min_aum_gbp = 0, max_ter = Inf)
# A tibble: 10 × 4
   ticker name                                     isin         currency
   <chr>  <chr>                                    <chr>        <chr>
 1 VUSA.L Vanguard S&P 500 UCITS ETF               IE00B3XXRP09 GBP
 2 CSPX.L iShares Core S&P 500 UCITS ETF           IE00B5BMR087 GBP
 3 INRG.L iShares Global Clean Energy UCITS ETF    IE00B1XNHC34 GBP
 4 EQQQ.L Invesco EQQQ Nasdaq-100 UCITS ETF        IE0032077012 GBP
 5 VWRL.L Vanguard FTSE All-World UCITS ETF        IE00B3RBWM25 GBP
 6 ISF.L  iShares Core FTSE 100 UCITS ETF          IE0005042456 GBP
 7 VMID.L Vanguard FTSE 250 UCITS ETF              IE00BKX55Q28 GBP
 8 VFEM.L Vanguard FTSE Emerging Markets UCITS ETF IE00B3VVMM84 GBP
 9 VEVE.L Vanguard FTSE Developed World UCITS ETF  IE00BKX55T58 GBP
10 VGOV.L Vanguard UK Gilt UCITS ETF               IE00B42WWV65 GBP     

JustETF Screener API

In addition to the cached snapshot, we can query the JustETF screener directly to find ETFs meeting specific criteria (e.g., AUM > £200m, TER < 0.75%). Browser builds use the cached snapshot because cross-site requests are blocked by CORS. The snapshot is built from a curated ETF universe list and incremental Yahoo price downloads (only missing dates are fetched). See docs/wiki/ETF_Data_Sources.md for details.

Show Code
screener_results <- fetch_justetf_screener(min_aum_gbp = 200, max_ter = 0.75)
print(head(screener_results))

Price History (Yahoo Finance)

We fetch daily price history from Yahoo Finance using quantmod. For the snapshot, we attempt a single download per ticker with a short timeout and store the results for use in WebR/Shinylive.

Show Code
Show Code
is_ci <- isTRUE(get0("is_ci", ifnotfound = FALSE))
if (is_ci) {
  message("Skipping history plot in CI.")
} else if (data_loaded && !is.null(history) && requireNamespace("ggplot2", quietly = TRUE)) {
  # Visualize Close Prices with Facets
  print(
    history %>%
      ggplot(aes(x = date, y = close, color = ticker)) +
      geom_line() +
      facet_wrap(~ticker, scales = "free_y") +
      labs(title = "ETF Price History (LSE)", y = "Close Price (GBP)") +
      theme_minimal() +
      theme(legend.position = "none")
  )
  print(history)
  if (!is.null(history_summary)) {
    print(head(history_summary, 10))
  }
}

Combined Analysis

We join the datasets and parse the AUM strings to analyze the relationship between Fund Size, Liquidity, and Fees.

Show Code
if (data_loaded && !is.null(history) && !is.null(metadata)) {
  avg_vol <- history %>%
    group_by(ticker) %>%
    summarise(avg_daily_vol = mean(volume, na.rm = TRUE))
  
  if ("isin" %in% colnames(universe) && "isin" %in% colnames(metadata)) {
    combined <- universe %>%
      inner_join(metadata, by = "isin") 

    if (!"ticker" %in% names(combined)) {
      ticker_cols <- intersect(c("ticker", "ticker.x", "ticker.y", "ticker_meta"), names(combined))
      if (length(ticker_cols) > 0) {
        combined <- combined %>%
          dplyr::mutate(
            ticker = dplyr::coalesce(!!!rlang::syms(ticker_cols))
          ) %>%
          dplyr::select(-dplyr::any_of(setdiff(ticker_cols, "ticker")))
      }
    }

    combined <- combined %>%
      inner_join(avg_vol, by = "ticker")
    
    if (requireNamespace("stringr", quietly = TRUE) && requireNamespace("dplyr", quietly = TRUE)) {
      if ("aum_text" %in% names(combined) && any(!is.na(combined$aum_text))) {
        combined <- bind_cols(combined, parse_aum(combined$aum_text))
      } else {
        combined <- combined %>%
          dplyr::mutate(
            aum_amount = NA_real_,
            aum_units = NA_character_,
            total_amount = NA_real_
          )
      }
    }

    if (requireNamespace("readr", quietly = TRUE) && requireNamespace("stringr", quietly = TRUE)) {
      if ("ter_text" %in% names(combined)) {
        combined <- combined %>%
          mutate(
            ter_val = readr::parse_number(ter_text),
            ter_units = stringr::str_extract(ter_text, "[%]") 
          )
      } else {
        combined <- combined %>%
          mutate(
            ter_val = NA_real_,
            ter_units = NA_character_
          )
      }
    }

    cols_to_show <- c("ticker", "name", "currency", "aum_amount", "aum_units", "avg_daily_vol", "ter_val")
    cols_to_show <- intersect(cols_to_show, colnames(combined))
    knitr::kable(head(combined %>% select(all_of(cols_to_show))))
  }
} else {
  print("Combined data not available for this build.")
}
ticker aum_amount aum_units avg_daily_vol ter_val
VUSA.L NA NA 255370.72 NA
CSPX.L NA NA 96111.22 NA
INRG.L NA NA 201541.56 NA
EQQQ.L NA NA 21350.41 NA
VWRL.L NA NA 43351.49 NA
ISF.L NA NA 5722954.06 NA
Show Code
if (exists("combined") && requireNamespace("ggplot2", quietly = TRUE)) {
  print(
    ggplot(combined, aes(x = total_amount, y = avg_daily_vol)) +
      geom_point() +
      scale_x_log10(labels = scales::label_currency(prefix = "£", scale_cut = scales::cut_short_scale())) +
      scale_y_log10(labels = scales::label_number(scale_cut = scales::cut_short_scale())) +
      labs(title = "ETF Size vs Liquidity",
           x = "Assets Under Management (AUM in GBP)",
           y = "Average Daily Volume") +
      theme_minimal()
  )
}

Show Code
if (exists("combined") && "ter_val" %in% colnames(combined) && requireNamespace("ggplot2", quietly = TRUE)) {
  print(
    ggplot(combined, aes(x = total_amount, y = ter_val)) +
      geom_point() +
      scale_x_log10(labels = scales::label_currency(prefix = "£", scale_cut = scales::cut_short_scale())) +
      labs(title = "ETF Size vs Cost (TER)",
           x = "Assets Under Management (AUM in GBP)",
           y = "Total Expense Ratio (%)") +
      theme_minimal()
  )
} else if (exists("combined")) {
  print("TER data not available for plotting (ter_val column missing).")
} else {
  print("Combined data not available for plotting.")
}

Reproducibility & Metadata

Session Info

Click to expand Session Info
Show Code
R version 4.5.2 (2025-10-31)
Platform: x86_64-pc-linux-gnu
Running under: Ubuntu 24.04.3 LTS

Matrix products: default
BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.26.so;  LAPACK version 3.12.0

locale:
 [1] LC_CTYPE=C.UTF-8       LC_NUMERIC=C           LC_TIME=C.UTF-8
 [4] LC_COLLATE=C.UTF-8     LC_MONETARY=C.UTF-8    LC_MESSAGES=C.UTF-8
 [7] LC_PAPER=C.UTF-8       LC_NAME=C              LC_ADDRESS=C
[10] LC_TELEPHONE=C         LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C

time zone: UTC
tzcode source: system (glibc)

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base

other attached packages:
[1] stringr_1.6.0      targets_1.11.4     dplyr_1.1.4        ggplot2_4.0.1
[5] etfdata_0.0.0.9000

loaded via a namespace (and not attached):
 [1] rappdirs_0.3.3     utf8_1.2.6         generics_0.1.4     xml2_1.5.1
 [5] stringi_1.8.7      lattice_0.22-7     hms_1.1.4          digest_0.6.39
 [9] magrittr_2.0.4     RColorBrewer_1.1-3 evaluate_1.0.5     grid_4.5.2
[13] timechange_0.3.0   fastmap_1.2.0      jsonlite_2.0.0     processx_3.8.6
[17] backports_1.5.0    secretbase_1.0.5   ps_1.9.1           httr_1.4.7
[21] rvest_1.0.5        purrr_1.2.0        scales_1.4.0       codetools_0.2-20
[25] httr2_1.2.2        cli_3.6.5          rlang_1.1.6        withr_3.0.2
[29] yaml_2.3.12        otel_0.2.0         tools_4.5.2        tzdb_0.5.0
[33] base64url_1.4      curl_7.0.0         vctrs_0.6.5        logger_0.4.1
[37] R6_2.6.1           zoo_1.8-15         lifecycle_1.0.4    lubridate_1.9.4
[41] snakecase_0.11.1   janitor_2.2.1      callr_3.7.6        pkgconfig_2.0.3
[45] pillar_1.11.1      gtable_0.3.6       data.table_1.17.8  glue_1.8.0
[49] quantmod_0.4.28    xfun_0.55          tibble_3.3.0       tidyselect_1.2.1
[53] knitr_1.51         farver_2.1.2       igraph_2.2.1       htmltools_0.5.9
[57] rmarkdown_2.30     xts_0.14.1         readr_2.1.6        compiler_4.5.2
[61] prettyunits_1.2.0  S7_0.2.1           TTR_0.24.4        

Data Structures

Click to expand Data Structures
Show Code
if (data_loaded) {
  print("Universe:")
  print(head(universe, 10))
  print("Metadata:")
  print(head(metadata, 10))
  print("History:")
  print(head(history, 10))
}
[1] "Universe:"
# A tibble: 10 × 4
   ticker name                                     isin         currency
   <chr>  <chr>                                    <chr>        <chr>
 1 VUSA.L Vanguard S&P 500 UCITS ETF               IE00B3XXRP09 GBP
 2 CSPX.L iShares Core S&P 500 UCITS ETF           IE00B5BMR087 GBP
 3 INRG.L iShares Global Clean Energy UCITS ETF    IE00B1XNHC34 GBP
 4 EQQQ.L Invesco EQQQ Nasdaq-100 UCITS ETF        IE0032077012 GBP
 5 VWRL.L Vanguard FTSE All-World UCITS ETF        IE00B3RBWM25 GBP
 6 ISF.L  iShares Core FTSE 100 UCITS ETF          IE0005042456 GBP
 7 VMID.L Vanguard FTSE 250 UCITS ETF              IE00BKX55Q28 GBP
 8 VFEM.L Vanguard FTSE Emerging Markets UCITS ETF IE00B3VVMM84 GBP
 9 VEVE.L Vanguard FTSE Developed World UCITS ETF  IE00BKX55T58 GBP
10 VGOV.L Vanguard UK Gilt UCITS ETF               IE00B42WWV65 GBP
[1] "Metadata:"
# A tibble: 10 × 4
   ticker name                                     isin         currency
   <chr>  <chr>                                    <chr>        <chr>
 1 VUSA.L Vanguard S&P 500 UCITS ETF               IE00B3XXRP09 GBP
 2 CSPX.L iShares Core S&P 500 UCITS ETF           IE00B5BMR087 GBP
 3 INRG.L iShares Global Clean Energy UCITS ETF    IE00B1XNHC34 GBP
 4 EQQQ.L Invesco EQQQ Nasdaq-100 UCITS ETF        IE0032077012 GBP
 5 VWRL.L Vanguard FTSE All-World UCITS ETF        IE00B3RBWM25 GBP
 6 ISF.L  iShares Core FTSE 100 UCITS ETF          IE0005042456 GBP
 7 VMID.L Vanguard FTSE 250 UCITS ETF              IE00BKX55Q28 GBP
 8 VFEM.L Vanguard FTSE Emerging Markets UCITS ETF IE00B3VVMM84 GBP
 9 VEVE.L Vanguard FTSE Developed World UCITS ETF  IE00BKX55T58 GBP
10 VGOV.L Vanguard UK Gilt UCITS ETF               IE00B42WWV65 GBP
[1] "History:"
# A tibble: 10 × 8
   ticker date        open  high   low close volume adjusted
   <chr>  <date>     <dbl> <dbl> <dbl> <dbl>  <dbl>    <dbl>
 1 IGLT.L 2008-01-02  10.0  10.0  10.0  10.0      0     7.63
 2 IGLT.L 2008-01-03  10.0  10.0  10.0  10.0      0     7.62
 3 IGLT.L 2008-01-04  10.1  10.1  10.1  10.1      0     7.64
 4 IGLT.L 2008-01-07  10.1  10.1  10.1  10.1      0     7.64
 5 IGLT.L 2008-01-08  10.0  10.0  10.0  10.0      0     7.61
 6 IGLT.L 2008-01-09  10.1  10.1  10.1  10.1      0     7.64
 7 IGLT.L 2008-01-10  10.1  10.1  10.1  10.1      0     7.64
 8 IGLT.L 2008-01-11  10.0  10.0  10.0  10.0      0     7.61
 9 IGLT.L 2008-01-14  10.0  10.0  10.0  10.0      0     7.62
10 IGLT.L 2008-01-15  10.0  10.0  10.0  10.0      0     7.63

Targets Metadata

Click to expand Targets Metadata
Targets Pipeline Metadata (Top 5)
name bytes_formatted seconds time type
vignette_snapshot 27.1 MB 7.682 2025-12-22 21:15:52 stem
history_cache_file 27.1 MB 7.679 2025-12-22 21:15:35 stem
history_cache 27.1 MB 59.824 2025-12-22 21:15:27 stem
history 27.1 MB 0.000 2025-12-22 21:15:43 stem
universe_curated 837 B 0.104 2025-12-22 21:14:19 stem

Pipeline Network

Pipeline Manifest (CI Fallback)
name command
screener_raw fetch_justetf_screener(min_aum_gbp = 0, max_ter = Inf)
universe_curated read_curated_universe()
universe {
 screener_clean <- select_universe_cols_local(screener_raw)
 curated_clean <- select_universe_cols_local(universe_curated)
 if (nrow(screener_clean) == 0) {
     curated_clean
 }
 else {
     dplyr::bind_rows(curated_clean, screener_clean) %>% dplyr::distinct()
 }
} | |tickers |unique(na.omit(universe$ticker)) | |history_cache |update_history_cache(tickers = tickers, cache_path = "inst/extdata/history_cache.rds", start_date = as.Date("2000-01-01"), end_date = Sys.Date(), timeout_seconds = 10) | |history_cache_file |{ saveRDS(history_cache$history, “inst/extdata/history_cache.rds”, version = 2) saveRDS(history_cache$summary, "inst/extdata/history_summary.rds", version = 2) c("inst/extdata/history_cache.rds", "inst/extdata/history_summary.rds") } | |history |history_cache$history | |history_summary |history_cache$summary | |vignette_snapshot |{ if (!dir.exists(“inst/extdata”)) dir.create(“inst/extdata”, recursive = TRUE) readr::write_csv(universe, “inst/extdata/etf_universe.csv”) saveRDS(list(universe = universe, metadata = if (nrow(screener_raw) > 0) screener_raw else universe, history = history, history_summary = history_summary, generated_at = Sys.time(), source = if (nrow(screener_raw) > 0) “justetf+curated” else “curated”), “inst/extdata/vignette_data.rds”, version = 2) “inst/extdata/vignette_data.rds” } |