Skip to main content

mts1b_foundation.market_data — full reference

Wire format for market data + the universal panel shape for factor input.

Quote

class Quote(BaseModel):
symbol: Symbol
bid: Decimal
ask: Decimal
bid_size: int = 0
ask_size: int = 0
venue: str
timestamp: datetime

Example

from datetime import datetime, timezone
from decimal import Decimal
from mts1b_foundation.market_data import Quote
from mts1b_foundation.symbology import Symbol

q = Quote(
symbol=Symbol("AAPL"),
bid=Decimal("180.49"),
ask=Decimal("180.51"),
bid_size=100,
ask_size=200,
venue="NYSE",
timestamp=datetime.now(timezone.utc),
)

# Useful properties
def mid(q: Quote) -> Decimal:
return (q.bid + q.ask) / 2

def spread_bps(q: Quote) -> float:
return float((q.ask - q.bid) / mid(q)) * 10_000


print(mid(q)) # 180.50
print(spread_bps(q)) # ~1.1 bps

Sanity-checking

def is_sane(q: Quote) -> bool:
if q.bid <= 0 or q.ask <= 0: return False
if q.bid > q.ask: return False # crossed quote
if (q.ask - q.bid) / q.bid > 0.1: return False # > 10% spread = suspicious
return True

Bar

class Bar(BaseModel):
symbol: Symbol
interval: str # "1m", "5m", "1h", "1d", ...
open: Decimal
high: Decimal
low: Decimal
close: Decimal
volume: Decimal = Decimal("0")
vwap: Decimal | None = None
n_trades: int | None = None
timestamp: datetime

Examples

bar = Bar(
symbol=Symbol("AAPL"),
interval="1d",
open=Decimal("180.10"),
high=Decimal("182.50"),
low=Decimal("179.80"),
close=Decimal("181.25"),
volume=Decimal("52_400_000"),
vwap=Decimal("181.02"),
n_trades=148_322,
timestamp=datetime(2026, 5, 23, 16, 0, tzinfo=timezone.utc),
)

Interval conventions

IntervalMeaning
"1m", "5m", "15m", "30m"minutes
"1h", "4h"hours
"1d"daily (close-aligned to session close)
"1w"weekly (close-aligned to Friday close)

Bar timestamp convention

The timestamp is the end of the bar (close-aligned). For a 1m bar covering 09:30-09:31, the timestamp is 09:31. This avoids look-ahead at the bar boundary.

Trade

class Trade(BaseModel):
trade_id: str
symbol: Symbol
price: Decimal
quantity: Decimal
side: str | None = None # "buy" or "sell" — the taker side, if known
venue: str
timestamp: datetime

Used for tick-by-tick analysis. side is the taker's side: which side initiated the trade.

trade = Trade(
trade_id="t-abc",
symbol=Symbol("AAPL"),
price=Decimal("180.50"),
quantity=Decimal("100"),
side="buy", # taker bought
venue="NYSE",
timestamp=datetime.now(timezone.utc),
)

MarketSnapshot

Composite snapshot for one symbol — combines latest quote + bar + open interest.

class MarketSnapshot(BaseModel):
symbol: Symbol
quote: Quote | None = None
last_bar: Bar | None = None
open_interest: Decimal | None = None # futures / options
timestamp: datetime
snap = MarketSnapshot(
symbol=Symbol("ES 202609"),
quote=Quote(
symbol=Symbol("ES 202609"),
bid=Decimal("5680.50"),
ask=Decimal("5680.75"),
venue="CME",
timestamp=datetime.now(timezone.utc),
),
open_interest=Decimal("2_134_000"),
timestamp=datetime.now(timezone.utc),
)

UniversePanel

The universal panel shape for factor input. (T, A) table — time × asset.

@dataclass
class UniversePanel:
close: Any # (T, A) np.ndarray or cp.ndarray
dates: Any # (T,) datetime64[D]
symbols: list[str] # (A,) symbol strings
asset_class: str
high: Any | None = None
low: Any | None = None
open: Any | None = None
volume: Any | None = None
market_cap: Any | None = None
sector: Any | None = None
country: Any | None = None
metadata: dict = {}

Construction

import numpy as np
from mts1b_foundation.market_data import UniversePanel


panel = UniversePanel(
close=np.array([
[180.10, 410.20, 525.50], # day 0
[181.25, 412.30, 528.10], # day 1
[182.40, 415.60, 530.00], # day 2
]),
dates=np.array(["2026-05-21", "2026-05-22", "2026-05-23"], dtype="datetime64[D]"),
symbols=["AAPL", "MSFT", "GOOG"],
asset_class="equities",
)

print(panel.shape) # (3, 3)

Shape contract

close.shape == (len(dates), len(symbols)). The constructor verifies this:

try:
UniversePanel(
close=np.zeros((3, 4)),
dates=np.arange(3, dtype="datetime64[D]"),
symbols=["AAPL", "MSFT"], # only 2 symbols but close has 4 cols
asset_class="equities",
)
except ValueError as e:
print(e)
# close shape (3, 4) doesn't match (len(dates)=3, len(symbols)=2)

Factor-friendly slicing

# Last bar's z-score across the universe
last_close = panel.close[-1] # (A,) vector
z = (last_close - last_close.mean()) / last_close.std()


# 21-day momentum
mom = panel.close[-1] / panel.close[-22] - 1 # (A,) returns
z_mom = (mom - mom.mean()) / mom.std()


# Rolling vol
log_ret = np.log(panel.close[1:] / panel.close[:-1])
vol_21d = log_ret[-21:].std(axis=0) * np.sqrt(252)

CPU + GPU dispatch

import numpy as np
import cupy as cp

# Same panel, different backend
cpu_panel = UniversePanel(close=np.array(...), ...)
gpu_panel = UniversePanel(close=cp.array(...), ...)

# Factor doesn't care:
def f_my_factor(panel: UniversePanel, /, h: int = 21):
xp = np if isinstance(panel.close, np.ndarray) else cp
ret = panel.close[-1] / panel.close[-h] - 1
return (ret - xp.nanmean(ret)) / xp.nanstd(ret)


# Both work
f_my_factor(cpu_panel)
f_my_factor(gpu_panel) # 10-100x faster on GPU

Auxiliary columns (sector, country, market_cap)

panel = UniversePanel(
close=np.array(...),
dates=np.array(...),
symbols=["AAPL", "MSFT", "JPM", "XOM"],
asset_class="equities",
market_cap=np.array([[3e12, 2.5e12, 0.5e12, 0.6e12]] * 252),
sector=np.array(["Tech", "Tech", "Financial", "Energy"]),
country=np.array(["US", "US", "US", "US"]),
)


# Sector-neutral momentum
def f_sector_neutral_momentum(panel):
mom = panel.close[-1] / panel.close[-21] - 1
out = np.zeros_like(mom)
for sec in np.unique(panel.sector):
mask = panel.sector == sec
if mask.sum() > 1:
out[mask] = (mom[mask] - mom[mask].mean()) / mom[mask].std()
return out

Building a UniversePanel from the data lake

from mts1b_datalake import lake
from mts1b_foundation.market_data import UniversePanel

panel = lake.build_panel(
universe="us-large-cap",
interval="daily",
start="2014-01-01", end="2024-01-01",
include=["close", "high", "low", "volume", "market_cap", "sector"],
)

Sanity checks for a UniversePanel

def validate_panel(panel: UniversePanel) -> list[str]:
issues = []
if np.isnan(panel.close).all(axis=1).any():
issues.append("at least one date has all NaN closes")
if (panel.close <= 0).any():
issues.append("non-positive prices present")
if len(set(panel.symbols)) != len(panel.symbols):
issues.append("duplicate symbols in panel.symbols")
if not np.all(np.diff(panel.dates) > 0):
issues.append("dates not strictly increasing")
return issues


issues = validate_panel(panel)
if issues:
raise ValueError(f"panel sanity: {issues}")

See also