Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
68 commits
Select commit Hold shift + click to select a range
6f078dd
Initial index implementation
FrancescAlted Apr 1, 2026
37ec18c
More bench, and moderate index improvements
FrancescAlted Apr 1, 2026
43c73f0
Implement chunk-aware and reduced-order NDArray indexing
FrancescAlted Apr 2, 2026
241d125
New get_1d_span_numpy for reading single blocks
FrancescAlted Apr 2, 2026
43e8e67
Use lazychunks for avoiding a full chunk load
FrancescAlted Apr 2, 2026
bf4771c
Speed up indexed point queries and benchmark the public query path
FrancescAlted Apr 2, 2026
cb2a0cd
New (preliminary) algorithm for light indexes
FrancescAlted Apr 2, 2026
c1e69ab
Rework light indexes around coarse block-local order
FrancescAlted Apr 2, 2026
61b2338
Add dtype-aware indexing benchmarks and lossy float light indexes
FrancescAlted Apr 2, 2026
3cf0df5
Add missing bench for pytables indexing
FrancescAlted Apr 2, 2026
60c1668
Make index builders out-of-core by default
FrancescAlted Apr 2, 2026
dc0a189
Add ordered access and append maintenance to NDArray indexes
FrancescAlted Apr 2, 2026
c429829
Polish the single-target NDArray indexing model
FrancescAlted Apr 2, 2026
9a95bdb
Add materialized expression indexes for NDArray
FrancescAlted Apr 2, 2026
c1c8925
Add append-run maintenance for full indexes
FrancescAlted Apr 2, 2026
7027fef
Improve full-index selective lookup with L1 and L2 caches
FrancescAlted Apr 3, 2026
da8785e
Add bounded run fallback and document compact_index API
FrancescAlted Apr 3, 2026
6b80e29
Add tutorial on indexes
FrancescAlted Apr 3, 2026
1dd1f25
Docstrings for LazyExpr.explain()
FrancescAlted Apr 3, 2026
2e4785e
Rework light/medium indexes to chunk-local OPSI-style layout
FrancescAlted Apr 4, 2026
2751776
Fix OOC light/medium append rebuilds
FrancescAlted Apr 4, 2026
e401e7a
Tune medium nav density by optlevel
FrancescAlted Apr 4, 2026
87d0b96
Release compaction memmaps before unlink on Windows
FrancescAlted Apr 4, 2026
7fb696d
Split indexing accelerators into indexing_ext and widen dtype coverage
FrancescAlted Apr 4, 2026
94cb5e6
Fix stale in-memory index store reuse
FrancescAlted Apr 4, 2026
d1c3636
Replace full OOC temp runs with Blosc2 scratch arrays
FrancescAlted Apr 5, 2026
426dce9
Accelerate indexed queries with threaded chunk batching
FrancescAlted Apr 5, 2026
d3e9cc8
Add configurable chunk/block geometry to index query bench
FrancescAlted Apr 5, 2026
9498a1a
New geometry for blocks in medium indexes
FrancescAlted Apr 5, 2026
53d7651
Release OOC temp memmaps before Windows cleanup
FrancescAlted Apr 5, 2026
ba9cdc1
Remove memmap staging from light/medium index builds
FrancescAlted Apr 6, 2026
a9a7f15
Rework chunk-local index builds around intra-chunk sorting
FrancescAlted Apr 6, 2026
5666fdb
Stream benchmark array generation and rename random to permuted
FrancescAlted Apr 6, 2026
5f24620
New --kind option for selecting the kind of the index
FrancescAlted Apr 6, 2026
8946c85
Better table formatting
FrancescAlted Apr 6, 2026
7c8765f
Honor cparams in create_index()
FrancescAlted Apr 7, 2026
65e6fcb
Document index build kwargs and compression controls
FrancescAlted Apr 7, 2026
e489527
Reduce chunk size on macos to make index sorting times reasonable
FrancescAlted Apr 7, 2026
b218dbe
Make more common defaults
FrancescAlted Apr 7, 2026
d90b684
Some API cleanup
FrancescAlted Apr 7, 2026
99fc8d0
Document will_use_index and add tests
FrancescAlted Apr 7, 2026
f4ecadf
Fix Windows file-locking error in rebuild_index test
FrancescAlted Apr 8, 2026
ce11d99
Fixing windows/mmap issues (I)
FrancescAlted Apr 8, 2026
72da1c8
Use latest miniexpr sources
FrancescAlted Apr 8, 2026
90f586d
Clamp indexing Python threads to 1 on wasm32
FrancescAlted Apr 8, 2026
8933439
Avoid mmap on main array in light index worker on Windows
FrancescAlted Apr 8, 2026
9656f28
Use latest miniexpr sources
FrancescAlted Apr 8, 2026
1a021a8
Reduce Cython indexing helper overhead
FrancescAlted Apr 8, 2026
f9a0616
Disable mmap for all index I/O on Windows
FrancescAlted Apr 8, 2026
4804726
Use latest miniexpr sources
FrancescAlted Apr 8, 2026
ebde7c1
Clamp wasm NDArray thread defaults and skip executor-only indexing tests
FrancescAlted Apr 8, 2026
7db48be
Use latest miniexpr sources
FrancescAlted Apr 8, 2026
692c0e3
Enable broader Cython optimizations in indexing_ext helpers
FrancescAlted Apr 8, 2026
3e06322
New Index class to unify access for array in index sidecar
FrancescAlted Apr 9, 2026
d91d4f4
Raise on direct assignment to NDArray.fields entries
FrancescAlted Apr 9, 2026
4da9547
Speed up exact row gathering for scattered index hits
FrancescAlted Apr 9, 2026
0c5711c
Comparison with DuckDB and moved benchmarks to bench/indexing
FrancescAlted Apr 9, 2026
fbb0d55
New version of indexes tutorial
FrancescAlted Apr 9, 2026
40fc4ab
Honor copy-inducing kwargs in asarray for NDArray inputs
FrancescAlted Apr 9, 2026
c5bbc40
Add persistent query-result cache for indexed lookups
FrancescAlted Apr 9, 2026
b112f3a
Fix query-result cache isolation and non-exact warm reuse
FrancescAlted Apr 9, 2026
038b6f6
Implement FIFO pruning for persistent query cache
FrancescAlted Apr 9, 2026
8062d4a
Use cached mmap handles in evaluate_full_query for zero-copy block reads
FrancescAlted Apr 9, 2026
38bc290
Update comparison with DuckDB on a MacMini w/ M4 Pro, 24GB RAM
FrancescAlted Apr 9, 2026
af8a205
Use latest c-blosc2 sources
FrancescAlted Apr 9, 2026
5b82cf9
Use nbytes instead of cbytes as the main metric for cache accounting
FrancescAlted Apr 10, 2026
49bbc9a
Simplify query cache accounting and overflow policy
FrancescAlted Apr 10, 2026
9856f2f
Refine DuckDB query benchmark scan and table reporting
FrancescAlted Apr 10, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 48 additions & 0 deletions AGENTS.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# Repository Guidelines

## Project Structure & Module Organization
The Python package lives in `src/blosc2/`, including the C/Cython extension sources
(`blosc2_ext.*`) and core modules such as `core.py`, `ndarray.py`, and `schunk.py`.
Tests are under `tests/`, with additional doctests enabled for select modules per
`pytest.ini`. Documentation sources are in `doc/` and build output lands in `html/`.
Examples are in `examples/`, and performance/benchmark scripts live in `bench/`.

## Build, Test, and Development Commands
- `pip install .` builds the bundled C-Blosc2 and installs the package.
- `pip install -e .` installs in editable mode for local development.
- `CMAKE_PREFIX_PATH=/usr/local USE_SYSTEM_BLOSC2=1 pip install -e .` builds
against a separately installed C-Blosc2.
- `pytest` runs the default test suite (excludes `heavy` and `network` markers).
- `pytest -m "heavy"` runs long-running tests.
- `pytest -m "network"` runs tests requiring network access.
- `cd doc && rm -rf ../html _build && python -m sphinx . ../html` builds docs.

## Coding Style & Naming Conventions
Use Ruff for formatting and linting (line length 109). Enable pre-commit hooks:
`python -m pip install pre-commit` then `pre-commit install`. Follow Python
conventions: 4-space indentation, `snake_case` for functions/variables, and
`PascalCase` for classes. Pytest discovery expects `tests/test_*.py` and
`test_*` functions. Do not use leading underscores in module-level helper
function names when those helpers are imported from other modules; reserve
leading underscores for file-local implementation details. Avoid leading
underscores in core module filenames under `src/blosc2/`; prefer non-underscored
module names unless there is a strong reason to keep a module private.

For documentation and tutorial query examples, prefer the shortest idiom that
matches the intended result type. Use `expr[:]` or `arr[mask][:]` when showing
values, use `expr.compute()` when materializing an `NDArray`, and use
`expr.compute(_use_index=False)` when demonstrating scan-vs-index behavior.
Avoid `expr.compute()[:]` unless a NumPy array is specifically required.

## Testing Guidelines
Pytest is required; warnings are treated as errors. The default configuration
adds `--doctest-modules`, so keep doctest examples in `blosc2/core.py`,
`blosc2/ndarray.py`, and `blosc2/schunk.py` accurate. Use markers `heavy` and
`network` for slow or network-dependent tests.

## Commit & Pull Request Guidelines
Recent commit messages are short, imperative sentences (e.g., “Add …”, “Fix …”)
without ticket prefixes. For pull requests: branch from `main`, add tests for
behavior changes, update docs for API changes, ensure the test suite passes,
and avoid introducing new compiler warnings. Link issues when applicable and
include clear reproduction steps for bug fixes.
16 changes: 13 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,20 @@ add_custom_command(
DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/src/blosc2/blosc2_ext.pyx"
VERBATIM)

add_custom_command(
OUTPUT indexing_ext.c
COMMAND Python::Interpreter -m cython
"${CMAKE_CURRENT_SOURCE_DIR}/src/blosc2/indexing_ext.pyx" --output-file indexing_ext.c
DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/src/blosc2/indexing_ext.pyx"
VERBATIM)

# ...and add it to the target
Python_add_library(blosc2_ext MODULE blosc2_ext.c WITH_SOABI)
Python_add_library(indexing_ext MODULE indexing_ext.c WITH_SOABI)

# We need to link against NumPy
target_link_libraries(blosc2_ext PRIVATE Python::NumPy)
target_link_libraries(indexing_ext PRIVATE Python::NumPy)

# Fetch and build miniexpr library
include(FetchContent)
Expand All @@ -63,7 +72,7 @@ endif()

FetchContent_Declare(miniexpr
GIT_REPOSITORY https://github.com/Blosc/miniexpr.git
GIT_TAG feadbc633a887bafd84b2fbc370ef2962d01b7ee
GIT_TAG f2faef741c4c507bf6a03167c72ce7f92c6f0ae8
# SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../miniexpr
)
FetchContent_MakeAvailable(miniexpr)
Expand All @@ -72,6 +81,7 @@ FetchContent_MakeAvailable(miniexpr)
target_link_libraries(blosc2_ext PRIVATE miniexpr_static)

target_compile_features(blosc2_ext PRIVATE c_std_11)
target_compile_features(indexing_ext PRIVATE c_std_11)
if(WIN32 AND CMAKE_C_COMPILER_ID STREQUAL "Clang")
execute_process(
COMMAND "${CMAKE_C_COMPILER}" -print-resource-dir
Expand Down Expand Up @@ -119,7 +129,7 @@ else()
include(FetchContent)
FetchContent_Declare(blosc2
GIT_REPOSITORY https://github.com/Blosc/c-blosc2
GIT_TAG b32256fc1287b6e24c22f09ac202265c7054e2bc
GIT_TAG 0568990388e6201240b170947d4c2199572f795d
# SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../c-blosc2
)
FetchContent_MakeAvailable(blosc2)
Expand Down Expand Up @@ -148,7 +158,7 @@ endif()

# Python extension -> site-packages/blosc2
install(
TARGETS blosc2_ext
TARGETS blosc2_ext indexing_ext
LIBRARY DESTINATION ${SKBUILD_PLATLIB_DIR}/blosc2
)

Expand Down
Loading
Loading