Azure
diff --git a/‎.github/workflows/build_and_test.yml‎
Lines changed: 8 additions & 0 deletions b/‎.github/workflows/build_and_test.yml‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 8 additions & 1 deletion b/‎.pre-commit-config.yaml‎
Lines changed: 8 additions & 1 deletion
diff --git a/‎Makefile‎
Lines changed: 4 additions & 3 deletions b/‎Makefile‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎build_scripts/check_links.py‎
Lines changed: 72 additions & 19 deletions b/‎build_scripts/check_links.py‎
Lines changed: 72 additions & 19 deletions
diff --git a/‎build_scripts/conditional_jb_build.py‎
Lines changed: 35 additions & 0 deletions b/‎build_scripts/conditional_jb_build.py‎
Lines changed: 35 additions & 0 deletions
@@ -64,12 +64,16 @@ jobs:
 
       - name: Run pre-commit incrementally (on PR)
         if: github.event_name == 'pull_request'
+        env:
+          RUN_LONG_PRECOMMIT: true
         run: |
           git fetch origin main
           pre-commit run --from-ref origin/main --to-ref HEAD
 
       - name: Run pre-commit fully (on main)
         if: github.ref == 'refs/heads/main'
+        env:
+          RUN_LONG_PRECOMMIT: true
         run: |
           pre-commit run --all-files
 
@@ -117,12 +121,16 @@ jobs:
 
       - name: Run pre-commit incrementally (on PR)
         if: github.event_name == 'pull_request'
+        env:
+          RUN_LONG_PRECOMMIT: true
         run: |
           git fetch origin main
           pre-commit run --from-ref origin/main --to-ref HEAD
 
       - name: Run pre-commit fully (on main)
         if: github.ref == 'refs/heads/main'
+        env:
+          RUN_LONG_PRECOMMIT: true
         run: |
           pre-commit run --all-files
 
 
@@ -86,9 +86,16 @@ repos:
 
   - repo: local
     hooks:
+      - id: validate-jupyter-book
+        name: Validate Jupyter Book Structure
+        entry: python ./build_scripts/validate_jupyter_book.py
+        language: python
+        files: ^(doc/.*\.(py|ipynb|md|rst)|doc/_toc\.yml)$
+        pass_filenames: false
+        additional_dependencies: ['pyyaml']
       - id: website
         name: Jupyter Book Build Check
-        entry: jb build -W -q ./doc
+        entry: python ./build_scripts/conditional_jb_build.py
         language: system
         types: [python]
         pass_filenames: false
@@ -19,14 +19,15 @@ docs-build:
 	jb build -W -v ./doc
 	python ./build_scripts/generate_rss.py
 
+# Because of import time, "auto" seemed to actually go slower than just using 4 processes
 unit-test:
-	$(CMD) pytest --cov=$(PYMODULE) $(UNIT_TESTS)
+	$(CMD) pytest -n 4 --dist=loadfile --cov=$(PYMODULE) $(UNIT_TESTS)
 
 unit-test-cov-html:
-	$(CMD) pytest --cov=$(PYMODULE) $(UNIT_TESTS) --cov-report html
+	$(CMD) pytest -n 4 --dist=loadfile --cov=$(PYMODULE) $(UNIT_TESTS) --cov-report html
 
 unit-test-cov-xml:
-	$(CMD) pytest --cov=$(PYMODULE) $(UNIT_TESTS) --cov-report xml --junitxml=junit/test-results.xml --doctest-modules
+	$(CMD) pytest -n 4 --dist=loadfile --cov=$(PYMODULE) $(UNIT_TESTS) --cov-report xml --junitxml=junit/test-results.xml --doctest-modules
 
 integration-test:
 	$(CMD) pytest $(INTEGRATION_TESTS) --cov=$(PYMODULE) $(INTEGRATION_TESTS) --cov-report xml --junitxml=junit/test-results.xml --doctest-modules
 
@@ -118,35 +118,88 @@ def check_url(url, retries=2, delay=2):
     return url, False
 
 
-def check_links_in_file(file_path):
-    urls = extract_urls(file_path)
-    resolved_urls = [resolve_relative_url(file_path, url) for url in urls]
-    broken_urls = []
-    with ThreadPoolExecutor(max_workers=10) as executor:
-        futures = {executor.submit(check_url, url): url for url in resolved_urls}
+def extract_all_urls_from_files(files):
+    """
+    Extract all URLs from all files, returning a dict of {file_path: [urls]}.
+    """
+    file_urls = {}
+    skipped_files = ["doc/blog/"]
+
+    for file_path in files:
+        if any(file_path.startswith(skipped) for skipped in skipped_files):
+            continue
+        urls = extract_urls(file_path)
+        resolved_urls = [resolve_relative_url(file_path, url) for url in urls]
+        if resolved_urls:
+            file_urls[file_path] = resolved_urls
+
+    return file_urls
+
+
+def check_all_links_parallel(file_urls, max_workers=20):
+    """
+    Check all URLs across all files in parallel with a shared thread pool.
+
+    Args:
+        file_urls: Dict of {file_path: [urls]}
+        max_workers: Max concurrent HTTP requests across ALL files
+
+    Returns:
+        Dict of {file_path: [broken_urls]}
+    """
+    all_broken_urls = {}
+
+    # Create a mapping of url -> file_path for tracking which file each URL came from
+    url_to_files = {}
+    for file_path, urls in file_urls.items():
+        for url in urls:
+            if url not in url_to_files:
+                url_to_files[url] = []
+            url_to_files[url].append(file_path)
+
+    # Check all unique URLs in parallel
+    url_results = {}
+    with ThreadPoolExecutor(max_workers=max_workers) as executor:
+        futures = {executor.submit(check_url, url): url for url in url_to_files.keys()}
         for future in as_completed(futures):
-            url, is_valid = future.result()
-            if not is_valid:
-                broken_urls.append(url)
-    return broken_urls
+            url = futures[future]
+            _, is_valid = future.result()
+            url_results[url] = is_valid
+
+    # Map broken URLs back to their files
+    for url, is_valid in url_results.items():
+        if not is_valid:
+            for file_path in url_to_files[url]:
+                if file_path not in all_broken_urls:
+                    all_broken_urls[file_path] = []
+                all_broken_urls[file_path].append(url)
+
+    return all_broken_urls
 
 
 if __name__ == "__main__":
     files = sys.argv[1:]
-    all_broken_urls = {}
-    skipped_files = ["doc/blog/"]
-    for file_path in files:
-        if any(file_path.startswith(skipped) for skipped in skipped_files):
-            continue
-        print(f"Checking links in {file_path}")
-        broken_urls = check_links_in_file(file_path)
-        if broken_urls:
-            all_broken_urls[file_path] = broken_urls
+
+    print(f"Extracting URLs from {len(files)} file(s)...")
+    file_urls = extract_all_urls_from_files(files)
+
+    if not file_urls:
+        print("No URLs found to check.")
+        sys.exit(0)
+
+    total_urls = sum(len(urls) for urls in file_urls.values())
+    unique_urls = len(set(url for urls in file_urls.values() for url in urls))
+    print(f"Checking {unique_urls} unique URL(s) across {len(file_urls)} file(s) (total: {total_urls})...")
+
+    all_broken_urls = check_all_links_parallel(file_urls, max_workers=30)
+
     if all_broken_urls:
+        print("\n" + "=" * 80)
         for file_path, urls in all_broken_urls.items():
             print(f"Broken links in {file_path}:")
             for url in urls:
                 print(f"  - {url}")
+        print("=" * 80)
         sys.exit(1)
     else:
         print("No broken links found.")
@@ -0,0 +1,35 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+"""
+Conditional Jupyter Book build wrapper for pre-commit.
+
+This script checks the RUN_LONG_PRECOMMIT environment variable:
+- If set to "true", runs the full `jb build -W -q ./doc` command
+- Otherwise, exits successfully (fast validation script runs instead)
+
+This allows CI/pipeline to run full builds while local development uses fast validation.
+"""
+
+import os
+import subprocess
+import sys
+
+
+def main():
+    run_long = os.environ.get("RUN_LONG_PRECOMMIT", "").lower() == "true"
+
+    if run_long:
+        print("RUN_LONG_PRECOMMIT=true: Running full Jupyter Book build...")
+        # Run jb build with the same flags as before
+        result = subprocess.run(
+            ["jb", "build", "-W", "-q", "./doc"], cwd=os.path.dirname(os.path.dirname(__file__))  # Repository root
+        )
+        return result.returncode
+    else:
+        print("RUN_LONG_PRECOMMIT not set: Skipping full Jupyter Book build (fast validation runs instead)")
+        return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())