Run Record Reference
Structure of persisted run records
Location
Run records are stored as JSON files in .regtrace/runs/. When the
SQLite database is enabled, records are also
persisted in the runs table.
Filename convention
<run_id>.jsonrun_id is formatted as run_YYYYMMDD_<random> with a 6-character
alphanumeric suffix to avoid collisions.
Schema
interface RunRecord {
run_id: string; // Unique run identifier
timestamp: string; // ISO 8601 timestamp
status: "passed" | "failed" | "errored"; // Run outcome
trigger: "cli" | "ci" | "watch"; // What triggered the run
duration_ms: number; // Duration in milliseconds
regtrace_version: string; // Regtrace version used
judge_provider: string; // LLM provider name
judge_model: string; // LLM model name
config_hash: string; // SHA-256 of config at run time
branch?: string; // Git branch where the run was executed
golden_set_name: string; // Name of the evaluated set
golden_set_version: string; // Version of the evaluated set
golden_set_file_hash: string; // SHA-256 of golden set file
suite_score: number; // Aggregate score (0.0–1.0)
metric_summary: Record<string, { // Per-metric aggregation
score: number; // Average score across cases
pass_rate: number; // Fraction of cases passed
}>;
test_case_results: TestCaseResult[]; // Per-test-case results
regression: RegressionBlock; // Regression analysis
}Test case result
interface TestCaseResult {
test_case_id: string; // ID from the golden set
input: string; // Input sent to the LLM
actual_output: string; // LLM response evaluated
overall_passed: boolean; // All metrics passed
severity: "pass" | "warn" | "fail"; // Severity of the outcome
metric_results: Record<string, MetricResult>; // Per-metric results
regression_delta?: Record<string, number>; // Per-metric deltas
}Metric result
interface MetricResult {
metric_name: string; // e.g. "factuality", "format"
score: number; // 0.0–1.0
confidence: number; // 0.0–1.0 (1.0 for deterministic)
passed: boolean; // score >= threshold
threshold: number; // Threshold applied
explanation: string; // Human-readable reasoning
evaluation_type: "deterministic" | "llm_judged"; // How the score was computed
token_cost: number; // Estimated token cost (0 for deterministic)
details?: AssertionDetail[]; // Per-assertion breakdown of sub-checks
}
## Assertion detail
```typescript
interface AssertionDetail {
check: string; // Hierarchical check name (e.g. "json_path.$.amount", "format.length")
passed: boolean; // Whether this specific assertion passed
expected?: string; // Expected value (truncated to 80 chars)
actual?: string; // Actual value (truncated to 80 chars)
message?: string; // Human-readable description of the assertion
}
## Regression block
```typescript
interface RegressionBlock {
baseline_run_id: string | null; // Baseline run used for comparison
baseline_golden_set_version: string | null; // Golden set version at baseline
current_golden_set_version: string; // Golden set version at this run
version_change_detected: boolean; // Version changed since baseline
suite_delta: number; // Negative = regression, positive = improvement
regression_status: "clean" | "warning" | "critical"; // Regression severity
test_cases_excluded: string[]; // IDs excluded due to version change
metric_deltas: Record<string, number>; // Per-metric average delta
metric_tolerances_applied?: Record<string, number>; // Per-metric tolerance values used
}