diff --git a/benchmarks/vector-search-bench/scripts/plot-turboquant-distortion.py b/benchmarks/vector-search-bench/scripts/plot-turboquant-distortion.py
new file mode 100644
index 00000000000..4844d7ed53d
--- /dev/null
+++ b/benchmarks/vector-search-bench/scripts/plot-turboquant-distortion.py
@@ -0,0 +1,549 @@
+# /// script
+# requires-python = ">=3.11"
+# dependencies = [
+#   "matplotlib",
+# ]
+# ///
+
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright the Vortex contributors
+
+"""Sweep bits-vs-distortion for TurboQuant and plot the curves.
+
+Calls `vector-search-bench distortion` for each (dataset, bits) combination, parses the
+table from stdout, and plots reconstruction MSE and pairwise cosine-error curves with
+mean/median/max shown on a log-scaled y-axis.
+
+Each `--dataset` value may optionally pin a train layout with a colon, e.g.
+`--dataset cohere-small-100k:single`, for datasets that host more than one layout.
+
+Usage:
+  uv run benchmarks/vector-search-bench/scripts/plot-turboquant-distortion.py \\
+      --dataset sift-small-500k
+  uv run benchmarks/vector-search-bench/scripts/plot-turboquant-distortion.py \\
+      --dataset sift-small-500k --dataset glove-small-100k --samples 8192
+  uv run benchmarks/vector-search-bench/scripts/plot-turboquant-distortion.py \\
+      --dataset cohere-small-100k:single --bits 1 2 3 4 5 6 7 8 \\
+      --output /tmp/distortion.png
+"""
+
+import argparse
+import math
+import re
+import subprocess
+import sys
+from dataclasses import dataclass
+from pathlib import Path
+
+import matplotlib.pyplot as plt
+from matplotlib.lines import Line2D
+from matplotlib.ticker import MaxNLocator
+
+REPO_ROOT = Path(__file__).resolve().parents[3]
+DEFAULT_BINARY = REPO_ROOT / "target" / "release" / "vector-search-bench"
+
+METRIC_NAMES = [
+    "reconstruction MSE mean",
+    "reconstruction MSE median",
+    "reconstruction MSE max",
+    "decoded cosine err mean",
+    "decoded cosine err median",
+    "decoded cosine err max",
+]
+
+
+@dataclass(frozen=True)
+class DatasetTarget:
+    """One dataset to sweep, with the layout the bench should use for it."""
+
+    name: str
+    layout: str | None  # `None` means let the bench auto-pick.
+
+
+@dataclass
+class Run:
+    target: DatasetTarget
+    dim: int
+    bits: int
+    values: dict[str, float]
+
+    @property
+    def dataset(self) -> str:
+        return self.target.name
+
+
+DIM_RE = re.compile(r"dim=(\d+)")
+
+
+def parse_dataset_arg(spec: str, default_layout: str | None) -> DatasetTarget:
+    """Split a `name[:layout]` CLI value. `default_layout` fills in when no `:` is given."""
+    if ":" in spec:
+        name, layout = spec.split(":", 1)
+        return DatasetTarget(name=name, layout=layout or None)
+    return DatasetTarget(name=spec, layout=default_layout)
+
+
+def parse_dim(stdout: str) -> int:
+    """Pull `dim=N` out of the `## ...` header line."""
+    match = DIM_RE.search(stdout)
+    if not match:
+        raise RuntimeError(f"could not find dim=N in header:\n{stdout}")
+    return int(match.group(1))
+
+
+def parse_table(stdout: str) -> dict[str, float]:
+    """Pull `metric -> value` rows out of the tabled stdout."""
+    row_re = re.compile(r"│\s*(.+?)\s*│\s*([^│]+?)\s*│")
+    values: dict[str, float] = {}
+    for line in stdout.splitlines():
+        match = row_re.match(line)
+        if not match:
+            continue
+        metric, value = match.group(1).strip(), match.group(2).strip()
+        if metric in METRIC_NAMES:
+            values[metric] = float(value)
+    missing = [m for m in METRIC_NAMES if m not in values]
+    if missing:
+        raise RuntimeError(f"could not parse metrics {missing} from:\n{stdout}")
+    return values
+
+
+def run_one(
+    binary: Path,
+    target: DatasetTarget,
+    bits: int,
+    samples: int,
+    seed: int,
+    rounds: int,
+) -> Run:
+    cmd = [
+        str(binary),
+        "distortion",
+        "--dataset",
+        target.name,
+        "--bits",
+        str(bits),
+        "--samples",
+        str(samples),
+        "--seed",
+        str(seed),
+        "--rounds",
+        str(rounds),
+    ]
+    if target.layout:
+        cmd.extend(["--layout", target.layout])
+    layout_tag = f" layout={target.layout}" if target.layout else ""
+    print(f"  running {target.name}{layout_tag} @ bits={bits} ...", file=sys.stderr)
+    result = subprocess.run(cmd, capture_output=True, text=True, check=True)
+    return Run(
+        target=target,
+        dim=parse_dim(result.stdout),
+        bits=bits,
+        values=parse_table(result.stdout),
+    )
+
+
+def mse_bound_stage1(bits: int) -> float:
+    """Paper's normalized-MSE upper bound for TurboQuant_mse (Stage 1).
+
+    From the Stage 1 theorem (`main.tex`, line 272): for a unit-norm vector `x` quantized
+    to `b` bits per coordinate, `E[||x - x'||^2] <= (sqrt(3)*pi/2) / 4^b`. Vortex ships
+    this stage, so the bound applies to the `reconstruction MSE mean` curve directly.
+    """
+    return (math.sqrt(3.0) * math.pi / 2.0) / (4.0**bits)
+
+
+def mse_bound_stage2(bits: int) -> float:
+    """Paper's normalized-MSE upper bound for TurboQuant_prod (Stage 2 = MSE + QJL).
+
+    Stage 2 spends 1 bit per coordinate on the QJL residual correction, so the residual
+    MSE is the Stage-1 MSE evaluated at `b-1` bits, i.e. 4x looser than Stage 1.
+    """
+    return (math.sqrt(3.0) * math.pi / 2.0) / (4.0 ** (bits - 1))
+
+
+def compression_ratio(bits: int, dim: int) -> float:
+    """Theoretical TurboQuant compression ratio vs f32 storage.
+
+    Per the `vortex_tensor::encodings::turboquant` module docs, each vector is stored
+    as `padded_dim * bits / 8` bytes of quantized codes plus one f32 stored norm
+    (4 bytes), where `padded_dim` is the next power of two at least `dim`. The ratio is
+    nonlinear in `bits` because of POT padding and the per-vector norm overhead.
+    """
+    padded_dim = 1 << (dim - 1).bit_length() if dim > 1 else 1
+    per_vector_bytes = padded_dim * bits / 8.0 + 4.0
+    original_bytes = dim * 4.0
+    return original_bytes / per_vector_bytes
+
+
+def cosine_bound(bits: int, dim: int) -> float:
+    """Paper's Stage-2 inner-product bound, rendered as an absolute-error envelope.
+
+    From the Stage 2 theorem (`main.tex`, line 288): for unit y and an `x` quantized via
+    TurboQuant_prod (Stage 2, MSE + QJL residual), `E[|<y, x> - <y, x'>|^2] <=
+    sqrt(3)*pi^2/d * 4^(-b)`. Taking sqrt gives an upper envelope on the RMS error per
+    bit width, and by Jensen also on the mean abs error.
+
+    Caveat: Vortex currently implements only Stage 1 (no QJL residual correction). The
+    Stage 1 inner-product error is biased and can sit *above* this Stage-2 envelope.
+    """
+    return math.pi * (3.0**0.25) / math.sqrt(dim) / (2.0**bits)
+
+
+DATASET_PALETTE = [
+    "#1f77b4",  # blue
+    "#d62728",  # red
+    "#2ca02c",  # green
+    "#9467bd",  # purple
+    "#ff7f0e",  # orange
+    "#17becf",  # teal
+    "#e377c2",  # pink
+    "#8c564b",  # brown
+    "#7f7f7f",  # grey
+    "#bcbd22",  # olive
+]
+
+STAT_STYLES = [
+    # (metric_suffix, label, linestyle, linewidth, marker)
+    ("mean", "mean", "-", 2.4, "o"),
+    ("max", "max", ":", 1.4, None),
+]
+
+
+def plot(runs: list[Run], args: argparse.Namespace) -> None:
+    by_dataset: dict[str, list[Run]] = {}
+    for r in runs:
+        by_dataset.setdefault(r.dataset, []).append(r)
+    for ds_runs in by_dataset.values():
+        ds_runs.sort(key=lambda r: r.bits)
+
+    plt.rcParams.update(
+        {
+            "font.size": 11,
+            "axes.titlesize": 13,
+            "axes.titleweight": "semibold",
+            "axes.labelsize": 11,
+            "axes.spines.top": False,
+            "axes.spines.right": False,
+            "axes.grid": True,
+            "grid.alpha": 0.25,
+            "grid.linewidth": 0.6,
+            "legend.frameon": False,
+        }
+    )
+
+    fig, axes = plt.subplots(1, 3, figsize=(20, 6.5), constrained_layout=True)
+    fig.suptitle(
+        f"TurboQuant distortion vs bits per coordinate"
+        f"     (samples={args.samples:,}, seed={args.seed}, rounds={args.rounds})",
+        fontsize=14,
+        fontweight="semibold",
+    )
+
+    dataset_colors = {ds: DATASET_PALETTE[i % len(DATASET_PALETTE)] for i, ds in enumerate(by_dataset)}
+    dataset_dims = {ds: ds_runs[0].dim for ds, ds_runs in by_dataset.items()}
+
+    plot_panel(
+        axes[0],
+        by_dataset,
+        dataset_colors,
+        metric_prefix="reconstruction MSE",
+        title="Reconstruction MSE   (per vector, normalized)",
+        ylabel=r"$\|x - x^\prime\|^2 / \|x\|^2$",
+    )
+    bits_axis = sorted({r.bits for r in runs})
+    axes[0].plot(
+        bits_axis,
+        [mse_bound_stage1(b) for b in bits_axis],
+        color="#222222",
+        linestyle=(0, (4, 2, 1, 2)),
+        linewidth=1.6,
+        zorder=0,
+    )
+
+    plot_panel(
+        axes[1],
+        by_dataset,
+        dataset_colors,
+        metric_prefix="decoded cosine err",
+        title=r"Pairwise cosine error   $|\cos(x_i, x_j) - \cos(x_i^\prime, x_j^\prime)|$",
+        ylabel="absolute error",
+    )
+    for dataset, ds_runs in by_dataset.items():
+        color = dataset_colors[dataset]
+        d = ds_runs[0].dim
+        bits = sorted({r.bits for r in ds_runs})
+        axes[1].plot(
+            bits,
+            [cosine_bound(b, d) for b in bits],
+            color=color,
+            linestyle=(0, (4, 2, 1, 2)),
+            linewidth=1.2,
+            alpha=0.6,
+            zorder=0,
+        )
+
+    plot_compression_panel(axes[2], by_dataset, dataset_colors)
+
+    add_legends(fig, axes, dataset_colors, dataset_dims)
+    fig.text(
+        0.5,
+        -0.015,
+        "Cosine bound is the paper's Stage-2 (TurboQuant_prod, MSE + QJL residual) "
+        "envelope; Vortex currently ships Stage 1 only, so empirical curves may sit "
+        "above it.  Compression ratio is theoretical "
+        "(padded_dim * bits / 8 + 4 bytes per vector), excludes per-shard centroid "
+        "tables and file metadata.",
+        ha="center",
+        fontsize=9,
+        color="#555555",
+        wrap=True,
+    )
+
+    if args.output:
+        fig.savefig(args.output, dpi=140, bbox_inches="tight")
+        print(f"saved {args.output}", file=sys.stderr)
+    else:
+        plt.show()
+
+
+def plot_panel(
+    ax,
+    by_dataset: dict[str, list[Run]],
+    dataset_colors: dict[str, str],
+    metric_prefix: str,
+    title: str,
+    ylabel: str,
+) -> None:
+    for dataset, ds_runs in by_dataset.items():
+        color = dataset_colors[dataset]
+        bits = [r.bits for r in ds_runs]
+        for stat_key, _label, linestyle, linewidth, marker in STAT_STYLES:
+            metric = f"{metric_prefix} {stat_key}"
+            ys = [r.values[metric] for r in ds_runs]
+            ax.plot(
+                bits,
+                ys,
+                color=color,
+                linestyle=linestyle,
+                linewidth=linewidth,
+                marker=marker,
+                markersize=6,
+                markerfacecolor=color,
+                markeredgecolor="white",
+                markeredgewidth=0.8,
+                alpha=0.95 if marker else 0.75,
+            )
+    ax.set_yscale("log")
+    ax.set_xlabel("bits per coordinate")
+    ax.set_ylabel(ylabel)
+    ax.set_title(title)
+    ax.xaxis.set_major_locator(MaxNLocator(integer=True))
+    ax.grid(True, which="major", linewidth=0.7, alpha=0.45)
+    ax.grid(True, which="minor", linewidth=0.4, alpha=0.22)
+    ax.minorticks_on()
+
+
+def plot_compression_panel(
+    ax,
+    by_dataset: dict[str, list[Run]],
+    dataset_colors: dict[str, str],
+) -> None:
+    bits_axis = sorted({r.bits for runs in by_dataset.values() for r in runs})
+    for dataset, ds_runs in by_dataset.items():
+        color = dataset_colors[dataset]
+        d = ds_runs[0].dim
+        padded = 1 << (d - 1).bit_length() if d > 1 else 1
+        suffix = f"  (padded {padded})" if padded != d else "  (no padding)"
+        ax.plot(
+            bits_axis,
+            [compression_ratio(b, d) for b in bits_axis],
+            color=color,
+            linestyle="-",
+            linewidth=2.4,
+            marker="o",
+            markersize=6,
+            markerfacecolor=color,
+            markeredgecolor="white",
+            markeredgewidth=0.8,
+            label=f"{dataset}{suffix}",
+        )
+    ax.set_xlabel("bits per coordinate")
+    ax.set_ylabel(r"ratio vs f32  (=  $4d \,/\, (\mathrm{padded}\!\cdot\! b/8 + 4)$)")
+    ax.set_title("Compression ratio   (theoretical)")
+    ax.xaxis.set_major_locator(MaxNLocator(integer=True))
+    ax.grid(True, which="major", linewidth=0.7, alpha=0.45)
+    ax.grid(True, which="minor", linewidth=0.4, alpha=0.22)
+    ax.minorticks_on()
+    ax.legend(
+        title="dataset",
+        loc="upper right",
+        fontsize=9,
+        title_fontsize=10,
+    )
+
+
+def add_legends(
+    fig,
+    axes,
+    dataset_colors: dict[str, str],
+    dataset_dims: dict[str, int],
+) -> None:
+    dataset_handles = [
+        Line2D(
+            [],
+            [],
+            color=color,
+            linewidth=2.4,
+            marker="o",
+            markersize=6,
+            markerfacecolor=color,
+            markeredgecolor="white",
+            markeredgewidth=0.8,
+            label=f"{dataset}  (d = {dataset_dims[dataset]})",
+        )
+        for dataset, color in dataset_colors.items()
+    ]
+    stat_handles = [
+        Line2D(
+            [],
+            [],
+            color="#333333",
+            linestyle=linestyle,
+            linewidth=linewidth,
+            marker=marker,
+            markersize=6 if marker else 0,
+            markerfacecolor="#333333",
+            markeredgecolor="white",
+            markeredgewidth=0.8,
+            label=label,
+        )
+        for _, label, linestyle, linewidth, marker in STAT_STYLES
+    ]
+    mse_bound_handle_s1 = Line2D(
+        [],
+        [],
+        color="#222222",
+        linestyle=(0, (4, 2, 1, 2)),
+        linewidth=1.6,
+        label=r"paper bound:  $D_{\mathrm{mse}} \leq \frac{\sqrt{3}\,\pi}{2}\, 4^{-b}$",
+    )
+    cosine_bound_handle = Line2D(
+        [],
+        [],
+        color="#444444",
+        linestyle=(0, (4, 2, 1, 2)),
+        linewidth=1.2,
+        alpha=0.6,
+        label=(
+            r"paper Stage-2 bound:  "
+            r"$\sqrt{D_{\mathrm{prod}}} \leq \frac{\pi\,3^{1/4}}{\sqrt{d}}\, 2^{-b}$"
+        ),
+    )
+
+    axes[0].legend(
+        handles=dataset_handles + [mse_bound_handle_s1],
+        title="dataset / bound",
+        loc="upper right",
+        fontsize=10,
+        title_fontsize=10,
+    )
+    axes[1].legend(
+        handles=stat_handles + [cosine_bound_handle],
+        title="statistic / bound",
+        loc="upper right",
+        fontsize=10,
+        title_fontsize=10,
+    )
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
+    parser.add_argument(
+        "--dataset",
+        action="append",
+        required=True,
+        help=(
+            "Dataset to sweep (repeat to compare multiple). Optionally suffix "
+            "`:layout` to pin a specific train layout for that dataset, e.g. "
+            "`--dataset cohere-small-100k:single`. If omitted, the bench picks "
+            "the dataset's only layout, or errors if there are several."
+        ),
+    )
+    parser.add_argument(
+        "--layout",
+        default=None,
+        help=("Default train layout applied to any `--dataset` entry that doesn't pin its own with `:layout`."),
+    )
+    parser.add_argument("--samples", type=int, default=65536)
+    parser.add_argument(
+        "--bits",
+        type=int,
+        nargs="+",
+        default=[1, 2, 3, 4, 5, 6, 7, 8],
+        help="Bit widths to sweep (default: 1..=8).",
+    )
+    parser.add_argument("--seed", type=int, default=42)
+    parser.add_argument("--rounds", type=int, default=3)
+    parser.add_argument(
+        "--binary",
+        type=Path,
+        default=DEFAULT_BINARY,
+        help=f"Path to vector-search-bench (default: {DEFAULT_BINARY}).",
+    )
+    parser.add_argument(
+        "--output",
+        type=Path,
+        default=None,
+        help="If set, save the chart to this path instead of opening a window.",
+    )
+    args = parser.parse_args()
+
+    print("building vector-search-bench (release) ...", file=sys.stderr)
+    subprocess.run(
+        ["cargo", "build", "-p", "vector-search-bench", "--release"],
+        cwd=REPO_ROOT,
+        check=True,
+    )
+
+    if not args.binary.exists():
+        sys.exit(f"binary not found at {args.binary} after build")
+
+    targets = [parse_dataset_arg(spec, args.layout) for spec in args.dataset]
+
+    runs: list[Run] = []
+    for target in targets:
+        layout_tag = f" (layout={target.layout})" if target.layout else ""
+        print(
+            f"sweeping {target.name}{layout_tag} over bits {args.bits} ...",
+            file=sys.stderr,
+        )
+        for bits in args.bits:
+            runs.append(
+                run_one(
+                    args.binary,
+                    target,
+                    bits,
+                    args.samples,
+                    args.seed,
+                    args.rounds,
+                )
+            )
+
+    print_summary(runs)
+    plot(runs, args)
+
+
+def print_summary(runs: list[Run]) -> None:
+    print()
+    print("Summary (one row per (dataset, bits)):")
+    header = ["dataset", "dim", "bits"] + METRIC_NAMES
+    widths = [max(len(h), 14) for h in header]
+    print("  " + "  ".join(h.ljust(w) for h, w in zip(header, widths)))
+    for r in runs:
+        cells = [r.dataset, str(r.dim), str(r.bits)] + [f"{r.values[m]:.3e}" for m in METRIC_NAMES]
+        print("  " + "  ".join(c.ljust(w) for c, w in zip(cells, widths)))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/benchmarks/vector-search-bench/src/distortion.rs b/benchmarks/vector-search-bench/src/distortion.rs
new file mode 100644
index 00000000000..b1e7717389f
--- /dev/null
+++ b/benchmarks/vector-search-bench/src/distortion.rs
@@ -0,0 +1,310 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright the Vortex contributors
+
+//! TurboQuant distortion measurement on real vector datasets.
+//!
+//! Reports per-vector normalized reconstruction error (`||x - x'||^2 / ||x||^2`) and pairwise
+//! cosine-similarity error (`|cos(x_i, x_j) - cos(x'_i, x'_j)|`) after a full encode and decode
+//! roundtrip through the [`vortex_tensor::encodings::turboquant`] scheme. This is the same
+//! TurboQuant implementation the search subcommand stores on disk via
+//! [`BtrBlocksCompressorBuilder::with_turboquant`](vortex_btrblocks::BtrBlocksCompressorBuilder).
+
+use std::io::Write;
+
+use anyhow::Context;
+use anyhow::Result;
+use anyhow::bail;
+use tabled::settings::Style;
+use vortex::array::ArrayRef;
+use vortex::array::ExecutionCtx;
+use vortex::array::IntoArray;
+use vortex::array::VortexSessionExecute;
+use vortex::array::arrays::ExtensionArray;
+use vortex::array::arrays::FixedSizeListArray;
+use vortex::array::arrays::PrimitiveArray;
+use vortex::array::arrays::ScalarFnArray;
+use vortex::array::arrays::Struct;
+use vortex::array::arrays::StructArray;
+use vortex::array::arrays::extension::ExtensionArrayExt;
+use vortex::array::arrays::fixed_size_list::FixedSizeListArrayExt;
+use vortex::array::arrays::struct_::StructArrayExt;
+use vortex_bench::conversions::parquet_to_vortex_chunks;
+use vortex_bench::vector_dataset;
+use vortex_bench::vector_dataset::TrainLayout;
+use vortex_bench::vector_dataset::VectorDataset;
+use vortex_tensor::encodings::turboquant::TurboQuantConfig;
+use vortex_tensor::encodings::turboquant::turboquant_encode;
+use vortex_tensor::scalar_fns::cosine_similarity::CosineSimilarity;
+
+use crate::SESSION;
+use crate::ingest::transform_chunk;
+
+/// Inputs to a distortion run.
+#[derive(Debug, Clone)]
+pub struct DistortionConfig {
+    /// Dataset to load vectors from.
+    pub dataset: VectorDataset,
+    /// Train-split layout (used to locate the local parquet shards).
+    pub layout: TrainLayout,
+    /// Bits per quantized coordinate.
+    pub bits: u8,
+    /// Seed for the SORF rotation.
+    pub seed: u64,
+    /// Number of sign-diagonal plus Walsh-Hadamard rounds in the SORF transform.
+    pub rounds: u8,
+    /// Number of base vectors to sample from the first train shard.
+    pub samples: usize,
+}
+
+/// Mean, median, and max of a sample of distortion measurements.
+#[derive(Debug, Clone)]
+pub struct DistortionStats {
+    /// Arithmetic mean.
+    pub mean: f32,
+    /// Median (mid element after a partial sort).
+    pub median: f32,
+    /// Maximum.
+    pub max: f32,
+}
+
+/// Per-dataset distortion report ready to render as markdown.
+#[derive(Debug, Clone)]
+pub struct DistortionReport {
+    /// Dataset the vectors came from.
+    pub dataset: VectorDataset,
+    /// Train-split layout used to locate the shard.
+    pub layout: TrainLayout,
+    /// Vector dimensionality.
+    pub dim: u32,
+    /// Bits per quantized coordinate.
+    pub bits: u8,
+    /// Seed for the SORF rotation.
+    pub seed: u64,
+    /// Number of SORF rounds.
+    pub rounds: u8,
+    /// Number of base vectors sampled.
+    pub samples: usize,
+    /// Per-vector normalized squared L2 reconstruction error.
+    pub reconstruction: DistortionStats,
+    /// Pairwise cosine-similarity error after decoding both sides.
+    pub decoded_cosine: DistortionStats,
+}
+
+/// Compute reconstruction error and cosine-similarity error for a TurboQuant roundtrip.
+pub async fn run_distortion(config: &DistortionConfig) -> Result<DistortionReport> {
+    let dataset = config.dataset;
+    let layout = config.layout;
+
+    let paths = vector_dataset::download(dataset, layout)
+        .await
+        .with_context(|| format!("download {}", dataset.name()))?;
+    let train_path = paths
+        .train_files
+        .first()
+        .with_context(|| format!("dataset {} has no train shards", dataset.name()))?
+        .clone();
+
+    let mut ctx = SESSION.create_execution_ctx();
+
+    let chunked = parquet_to_vortex_chunks(train_path).await?;
+    let struct_array: StructArray = chunked.into_array().execute(&mut ctx)?;
+    let transformed = transform_chunk(struct_array.into_array(), &mut ctx)?;
+    let emb_full = transformed
+        .as_opt::<Struct>()
+        .with_context(|| {
+            format!(
+                "transform_chunk did not return a Struct, got {}",
+                transformed.dtype()
+            )
+        })?
+        .unmasked_field_by_name("emb")
+        .context("transformed chunk missing `emb` field")?
+        .clone();
+
+    let n = config.samples.min(emb_full.len());
+    if n < 2 {
+        bail!(
+            "distortion: need at least 2 sampled vectors for cosine pairs, got {n} (dataset {})",
+            dataset.name(),
+        );
+    }
+    let emb = emb_full.slice(0..n)?;
+
+    let original = extract_flat_f32(&emb, &mut ctx)?;
+    let dim = pairs_per_row(&original, n)?;
+
+    let tq_config = TurboQuantConfig {
+        bit_width: config.bits,
+        seed: config.seed,
+        num_rounds: config.rounds,
+    };
+    let encoded = turboquant_encode(emb.clone(), &tq_config, &mut ctx)?;
+    let decoded_ext: ExtensionArray = encoded.execute(&mut ctx)?;
+    let decoded = decoded_ext.into_array();
+    let decoded_flat = extract_flat_f32(&decoded, &mut ctx)?;
+
+    let reconstruction = stats(&reconstruction_errors(&original, &decoded_flat, dim, n));
+
+    let half = n / 2;
+    let true_cosines = compute_cosines(emb.slice(0..half)?, emb.slice(half..2 * half)?, &mut ctx)?;
+    let decoded_cosines = compute_cosines(
+        decoded.slice(0..half)?,
+        decoded.slice(half..2 * half)?,
+        &mut ctx,
+    )?;
+    let decoded_cosine = stats(&abs_diff(&true_cosines, &decoded_cosines));
+
+    Ok(DistortionReport {
+        dataset,
+        layout,
+        dim: u32::try_from(dim).context("dim must fit in u32")?,
+        bits: config.bits,
+        seed: config.seed,
+        rounds: config.rounds,
+        samples: n,
+        reconstruction,
+        decoded_cosine,
+    })
+}
+
+/// Extract a flat `f32` slice from a `Vector<f32, dim>` extension array.
+fn extract_flat_f32(array: &ArrayRef, ctx: &mut ExecutionCtx) -> Result<Vec<f32>> {
+    let ext: ExtensionArray = array.clone().execute(ctx)?;
+    let fsl: FixedSizeListArray = ext.storage_array().clone().execute(ctx)?;
+    let elements: PrimitiveArray = fsl.elements().clone().execute(ctx)?;
+    Ok(elements.as_slice::<f32>().to_vec())
+}
+
+/// Compute one cosine per row over two equal-length tensor-like arrays.
+fn compute_cosines(lhs: ArrayRef, rhs: ArrayRef, ctx: &mut ExecutionCtx) -> Result<Vec<f32>> {
+    let len = lhs.len();
+    let sfn: ScalarFnArray = CosineSimilarity::try_new_array(lhs, rhs, len)?;
+    let prim: PrimitiveArray = sfn.into_array().execute(ctx)?;
+    Ok(prim.as_slice::<f32>().to_vec())
+}
+
+fn pairs_per_row(flat: &[f32], num_rows: usize) -> Result<usize> {
+    if num_rows == 0 {
+        bail!("distortion: cannot derive dim from zero rows");
+    }
+    if !flat.len().is_multiple_of(num_rows) {
+        bail!(
+            "distortion: flat element count {} not divisible by row count {num_rows}",
+            flat.len(),
+        );
+    }
+    Ok(flat.len() / num_rows)
+}
+
+/// Per-vector normalized reconstruction MSE. Rows whose original squared norm is below `1e-10`
+/// are dropped because their normalized error is numerically undefined.
+fn reconstruction_errors(
+    original: &[f32],
+    reconstructed: &[f32],
+    dim: usize,
+    num_rows: usize,
+) -> Vec<f32> {
+    let mut out = Vec::with_capacity(num_rows);
+    for row in 0..num_rows {
+        let start = row * dim;
+        let end = start + dim;
+        let orig = &original[start..end];
+        let recon = &reconstructed[start..end];
+        let norm_sq: f32 = orig.iter().map(|&v| v * v).sum();
+        if norm_sq < 1e-10 {
+            continue;
+        }
+        let err_sq: f32 = orig
+            .iter()
+            .zip(recon.iter())
+            .map(|(&a, &b)| (a - b) * (a - b))
+            .sum();
+        out.push(err_sq / norm_sq);
+    }
+    out
+}
+
+fn abs_diff(lhs: &[f32], rhs: &[f32]) -> Vec<f32> {
+    lhs.iter()
+        .zip(rhs.iter())
+        .map(|(&a, &b)| (a - b).abs())
+        .collect()
+}
+
+fn stats(samples: &[f32]) -> DistortionStats {
+    if samples.is_empty() {
+        return DistortionStats {
+            mean: f32::NAN,
+            median: f32::NAN,
+            max: f32::NAN,
+        };
+    }
+
+    let sum: f64 = samples.iter().map(|&v| f64::from(v)).sum();
+    #[expect(
+        clippy::cast_possible_truncation,
+        reason = "casting an f64 mean back to f32 is intentional and matches the input precision"
+    )]
+    let mean = (sum / samples.len() as f64) as f32;
+
+    let mut sorted = samples.to_vec();
+    sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
+    let mid = sorted.len() / 2;
+    let median = if sorted.len() % 2 == 1 {
+        sorted[mid]
+    } else {
+        0.5 * (sorted[mid - 1] + sorted[mid])
+    };
+
+    let max = samples.iter().copied().fold(f32::NEG_INFINITY, f32::max);
+
+    DistortionStats { mean, median, max }
+}
+
+impl DistortionReport {
+    /// Render the report as a markdown header line followed by a tabled table.
+    pub fn render(&self, writer: &mut dyn Write) -> Result<()> {
+        writeln!(
+            writer,
+            "## {} | dim={} | layout={} | bits={} | samples={} | seed={} | rounds={}",
+            self.dataset.name(),
+            self.dim,
+            self.layout.label(),
+            self.bits,
+            self.samples,
+            self.seed,
+            self.rounds,
+        )?;
+
+        let rows: &[(&str, f32)] = &[
+            ("reconstruction MSE mean", self.reconstruction.mean),
+            ("reconstruction MSE median", self.reconstruction.median),
+            ("reconstruction MSE max", self.reconstruction.max),
+            ("decoded cosine err mean", self.decoded_cosine.mean),
+            ("decoded cosine err median", self.decoded_cosine.median),
+            ("decoded cosine err max", self.decoded_cosine.max),
+        ];
+
+        let mut builder = tabled::builder::Builder::new();
+        builder.push_record(["metric", "value"]);
+        for &(metric, value) in rows {
+            builder.push_record([metric.to_owned(), format_metric(value)]);
+        }
+        let mut table = builder.build();
+        table.with(Style::modern());
+        writeln!(writer, "{table}")?;
+        Ok(())
+    }
+}
+
+fn format_metric(value: f32) -> String {
+    if value.is_nan() {
+        "nan".to_owned()
+    } else if value == 0.0 {
+        "0".to_owned()
+    } else if value.abs() < 1e-3 || value.abs() >= 1e4 {
+        format!("{value:.3e}")
+    } else {
+        format!("{value:.6}")
+    }
+}
diff --git a/benchmarks/vector-search-bench/src/lib.rs b/benchmarks/vector-search-bench/src/lib.rs
index 643cbb5bd0d..76b24390d09 100644
--- a/benchmarks/vector-search-bench/src/lib.rs
+++ b/benchmarks/vector-search-bench/src/lib.rs
@@ -5,6 +5,7 @@
 
 pub mod compression;
 pub mod display;
+pub mod distortion;
 pub mod expression;
 pub mod ingest;
 pub mod prepare;
@@ -13,9 +14,12 @@ pub mod scan;
 
 use std::sync::LazyLock;
 
+use anyhow::Result;
 use vortex::VortexSessionDefault;
 use vortex::io::session::RuntimeSessionExt;
 use vortex::session::VortexSession;
+use vortex_bench::vector_dataset::TrainLayout;
+use vortex_bench::vector_dataset::VectorDataset;
 
 pub static SESSION: LazyLock<VortexSession> = LazyLock::new(|| {
     // SAFETY: called from inside the LazyLock initializer, before any other access to
@@ -26,3 +30,38 @@ pub static SESSION: LazyLock<VortexSession> = LazyLock::new(|| {
     vortex_tensor::initialize(&session);
     session
 });
+
+/// Resolve a dataset's [`TrainLayout`].
+///
+/// Every benchmark has different sets of possible dataset layouts available. The user **must**
+/// provide one if there are multiple layouts. But if a dataset only has 1 layout, we can choose
+/// that for them as the default.
+pub fn resolve_layout(
+    dataset: VectorDataset,
+    requested: Option<TrainLayout>,
+) -> Result<TrainLayout> {
+    let layouts = dataset.layouts();
+
+    match requested {
+        Some(layout) => {
+            dataset.validate_layout(layout)?;
+            Ok(layout)
+        }
+        None => {
+            if layouts.len() == 1 {
+                Ok(layouts[0].layout())
+            } else {
+                let allowed = layouts
+                    .iter()
+                    .map(|s| s.layout().label())
+                    .collect::<Vec<_>>()
+                    .join(", ");
+                anyhow::bail!(
+                    "dataset {} hosts multiple layouts ([{}]): pass --layout to pick one",
+                    dataset.name(),
+                    allowed,
+                );
+            }
+        }
+    }
+}
diff --git a/benchmarks/vector-search-bench/src/main.rs b/benchmarks/vector-search-bench/src/main.rs
index 440de142bef..307c050d221 100644
--- a/benchmarks/vector-search-bench/src/main.rs
+++ b/benchmarks/vector-search-bench/src/main.rs
@@ -1,15 +1,20 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright the Vortex contributors
 
-//! `vector-search-bench` — on-disk cosine-similarity scan benchmark.
+//! `vector-search-bench` benchmarks for cosine-similarity scan and TurboQuant distortion.
 //!
 //! ```sh
-//! cargo run -p vector-search-bench --release -- \
+//! cargo run -p vector-search-bench --release -- search \
 //!     --dataset cohere-large-10m \
 //!     --layout partitioned \
 //!     --flavors vortex-uncompressed,vortex-turboquant \
 //!     --iterations 3 \
 //!     --threshold 0.8
+//!
+//! cargo run -p vector-search-bench --release -- distortion \
+//!     --dataset sift-small-500k \
+//!     --bits 4 \
+//!     --samples 4096
 //! ```
 
 use std::path::PathBuf;
@@ -17,13 +22,17 @@ use std::path::PathBuf;
 use anyhow::Context;
 use anyhow::Result;
 use clap::Parser;
+use clap::Subcommand;
 use vector_search_bench::compression::ALL_VECTOR_FLAVORS;
 use vector_search_bench::compression::VectorFlavor;
 use vector_search_bench::display::DatasetReport;
 use vector_search_bench::display::render;
+use vector_search_bench::distortion::DistortionConfig;
+use vector_search_bench::distortion::run_distortion;
 use vector_search_bench::prepare::CompressedVortexDataset;
 use vector_search_bench::prepare::prepare_all;
 use vector_search_bench::query::get_random_query_vector;
+use vector_search_bench::resolve_layout;
 use vector_search_bench::scan::ScanConfig;
 use vector_search_bench::scan::ScanTiming;
 use vector_search_bench::scan::run_search_scan;
@@ -35,7 +44,21 @@ use vortex_bench::vector_dataset::VectorDataset;
 
 #[derive(Parser, Debug)]
 #[command(version, about, long_about = None)]
-struct Args {
+struct Cli {
+    #[command(subcommand)]
+    command: Command,
+}
+
+#[derive(Subcommand, Debug)]
+enum Command {
+    /// On-disk cosine-similarity scan latency benchmark.
+    Search(SearchArgs),
+    /// TurboQuant distortion measurement: reconstruction error and cosine error.
+    Distortion(DistortionArgs),
+}
+
+#[derive(Parser, Debug)]
+struct SearchArgs {
     /// Dataset to benchmark. Single dataset per CLI invocation by design — large datasets
     /// are intentionally babysat one at a time.
     #[arg(long, value_enum)]
@@ -86,9 +109,55 @@ struct Args {
     tracing: bool,
 }
 
+#[derive(Parser, Debug)]
+struct DistortionArgs {
+    /// Dataset to load vectors from. One dataset per invocation.
+    #[arg(long, value_enum)]
+    dataset: VectorDataset,
+
+    /// Train-split layout. Required when the dataset publishes more than one layout.
+    #[arg(long, value_enum)]
+    layout: Option<TrainLayout>,
+
+    /// Bits per quantized coordinate.
+    #[arg(long, default_value_t = 4)]
+    bits: u8,
+
+    /// Seed for the SORF rotation.
+    #[arg(long, default_value_t = 42)]
+    seed: u64,
+
+    /// Number of sign-diagonal plus Walsh-Hadamard rounds in the SORF transform.
+    #[arg(long, default_value_t = 3)]
+    rounds: u8,
+
+    /// Number of base vectors to sample from the first train shard (first N rows).
+    #[arg(long, default_value_t = 65536)]
+    samples: usize,
+
+    /// Optional path to write the rendered table to instead of stdout.
+    #[arg(long)]
+    output_path: Option<PathBuf>,
+
+    /// Emit verbose tracing.
+    #[arg(short, long)]
+    verbose: bool,
+
+    /// Enable perfetto tracing output.
+    #[arg(long)]
+    tracing: bool,
+}
+
 #[tokio::main]
 async fn main() -> Result<()> {
-    let args = Args::parse();
+    let cli = Cli::parse();
+    match cli.command {
+        Command::Search(args) => run_search(args).await,
+        Command::Distortion(args) => run_distortion_cmd(args).await,
+    }
+}
+
+async fn run_search(args: SearchArgs) -> Result<()> {
     setup_logging_and_tracing(args.verbose, args.tracing)?;
 
     let dataset = args.dataset;
@@ -105,12 +174,10 @@ async fn main() -> Result<()> {
         anyhow::bail!("no flavors selected, please pass at least one to --flavors");
     }
 
-    // Load the source embeddings parquet files.
     let datasets_paths = vector_dataset::download(dataset, layout)
         .await
         .with_context(|| format!("download {}", dataset.name()))?;
 
-    // Load all vortex files needed, compressing new ones if needed.
     let prepared = prepare_all(dataset, layout, &datasets_paths, &args.flavors).await?;
 
     let query_vector = get_random_query_vector(
@@ -131,14 +198,12 @@ async fn main() -> Result<()> {
         threshold: args.threshold,
     };
 
-    // Run all scans and record how long each takes.
     let mut scan_timings: Vec<ScanTiming> = Vec::with_capacity(prepared.len());
     for prep in &prepared {
         let timing = run_search_scan(prep, &query_vector.query, &scan_config).await?;
         scan_timings.push(timing);
     }
 
-    // Collect the benchmark results.
     let pairs: Vec<(VectorFlavor, &CompressedVortexDataset, &ScanTiming)> = prepared
         .iter()
         .zip(scan_timings.iter())
@@ -149,8 +214,6 @@ async fn main() -> Result<()> {
         vortex_results: &pairs,
     };
 
-    // Emit v3 JSONL if requested. The records carry the per-scan dimensions that
-    // ScanTiming itself does not (dataset, layout, threshold).
     if let Some(path) = args.gh_json_v3.as_ref() {
         let records: Vec<v3::V3Record> = scan_timings
             .iter()
@@ -179,7 +242,6 @@ async fn main() -> Result<()> {
         v3::write_jsonl_to_path(path, &records)?;
     }
 
-    // Print the results.
     if let Some(path) = args.output_path {
         let mut file =
             std::fs::File::create(&path).with_context(|| format!("create {}", path.display()))?;
@@ -193,32 +255,30 @@ async fn main() -> Result<()> {
     Ok(())
 }
 
-/// Every benchmark has different sets of possible dataset layouts available. The user **must**
-/// provide one if there are multiple layouts. But if a dataset only has 1 layout, we can choose
-/// that for them as the default.
-fn resolve_layout(dataset: VectorDataset, requested: Option<TrainLayout>) -> Result<TrainLayout> {
-    let layouts = dataset.layouts();
-
-    match requested {
-        Some(layout) => {
-            dataset.validate_layout(layout)?;
-            Ok(layout)
-        }
-        None => {
-            if layouts.len() == 1 {
-                Ok(layouts[0].layout())
-            } else {
-                let allowed = layouts
-                    .iter()
-                    .map(|s| s.layout().label())
-                    .collect::<Vec<_>>()
-                    .join(", ");
-                anyhow::bail!(
-                    "dataset {} hosts multiple layouts ([{}]): pass --layout to pick one",
-                    dataset.name(),
-                    allowed,
-                );
-            }
-        }
+async fn run_distortion_cmd(args: DistortionArgs) -> Result<()> {
+    setup_logging_and_tracing(args.verbose, args.tracing)?;
+
+    let layout = resolve_layout(args.dataset, args.layout)?;
+    let config = DistortionConfig {
+        dataset: args.dataset,
+        layout,
+        bits: args.bits,
+        seed: args.seed,
+        rounds: args.rounds,
+        samples: args.samples,
+    };
+
+    let report = run_distortion(&config).await?;
+
+    if let Some(path) = args.output_path {
+        let mut file =
+            std::fs::File::create(&path).with_context(|| format!("create {}", path.display()))?;
+        report.render(&mut file)?;
+    } else {
+        let stdout = std::io::stdout();
+        let mut handle = stdout.lock();
+        report.render(&mut handle)?;
     }
+
+    Ok(())
 }