Select exactly the data you need using path patterns
consolidated/production/date=*/hour=*/ msg=trade/inst=*/exch=*/sym=BTCUSDT/**
consolidated/production/date=2026-06-*/ hour=*/msg=trade/**
consolidated/production/date=*/hour=*/ msg=*/inst=*/exch=binance/**
consolidated/production/date=2026-06-24/ hour=14/msg=trade/**
consolidated/production/date=*/hour=*/ msg=fundingrate/inst=linear_perp/**
consolidated/production/date=2026-06-*/ hour=*/msg=bbo/inst=spot/ exch=binance/sym=BTC*/**
Query directly without downloading
import duckdb
con = duckdb.connect()
# Query all BTCUSDT trades from consolidated production data
df = con.execute("""
SELECT * FROM read_parquet(
's3://quantum-edge-data/consolidated/production/
date=2026-06-*/hour=*/msg=trade/inst=*/
exch=binance/sym=BTCUSDT/**/*.parquet',
hive_partitioning = true
)
""").df()
print(f"Rows: {len(df):,}")Lazy evaluation for large datasets
import polars as pl
# Lazy scan with wildcard pattern
df = pl.scan_parquet(
"s3://quantum-edge-data/consolidated/production/\
date=2026-06-*/hour=*/msg=trade/inst=*/exch=binance/**"
)
# Filter and aggregate (evaluated lazily)
result = (df
.filter(pl.col("symbol") == "BTCUSDT")
.group_by("date")
.agg([
pl.col("volume").sum(),
pl.col("price").mean()
])
.collect()
)
print(result)Download files matching pattern
# First, get your API token from /tokens page
TOKEN="your_api_token_here"
# Ask the API for matching files, then download each returned file
wget --header="Authorization: Bearer $TOKEN" \
"https://api.quantum-edge.app/api/download/\
consolidated%2Fproduction%2Fdate%3D2026-06-24%2Fhour%3D14%2Fmsg%3Dtrade%2Finst%3Dlinear_perp%2Fexch%3Dbinance%2Fsym%3DBTCUSDT%2Fhourly.parquet"Programmatic access with filtering
import requests
import pandas as pd
from pathlib import Path
TOKEN = "your_api_token"
API_BASE = "https://api.quantum-edge.app"
# List files matching pattern
resp = requests.get(
f"{API_BASE}/api/datasets/trade/files",
headers={"Authorization": f"Bearer {TOKEN}"},
params={
"date": "2026-06-24",
"exchange": "binance",
"symbol": "BTCUSDT"
}
)
files = resp.json()["files"]
print(f"Found {len(files)} files")
# Download first file
file_url = files[0]["downloadUrl"]
df = pd.read_parquet(file_url)
print(df.head())