Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions openeo_driver/save_result.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,12 @@ class WorkspaceExport:
merge: Optional[str]


def clean_parquet_column_name(name: str):
# Could not find official SPEC for this, but cleaning just for consistency
# https://geoparquet.org/releases/v1.0.0/#column-metadata
return re.sub(r"[()/\\]", "_", name)


def get_temp_file(suffix="", prefix="openeo-pydrvr-"):
# TODO: make sure temp files are cleaned up when read
_, filename = tempfile.mkstemp(suffix=suffix, prefix=prefix)
Expand Down Expand Up @@ -631,7 +637,9 @@ def to_geoparquet(self, destination: Optional[str] = None) -> str:
# TODO: avoid accessing _geometries to combine _regions and CSV
gdf = self._regions._geometries

gpd.GeoDataFrame(stats.join(gdf, on='feature_index')).to_parquet(filename)
(gpd.GeoDataFrame(stats.join(gdf, on='feature_index'))
.rename(columns=clean_parquet_column_name)
.to_parquet(filename))
return filename

def to_driver_vector_cube(self) -> DriverVectorCube:
Expand Down Expand Up @@ -837,7 +845,7 @@ def to_geoparquet(self, destination: Optional[str] = None) -> str:

(gdf
.join(stats.set_index('feature_index'), on='feature_index')
.rename(columns=lambda col_name: re.sub(r"\W", "_", col_name)) # adhere to naming restriction [A-Za-z0-9_]
.rename(columns=clean_parquet_column_name)
.to_parquet(filename))
# TODO: Is naming restriction required for parquet files?

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
feature_index,avg(band_0),avg_band_0_,avg(band_2)
1,4645.719597475695,4865.467252259935,5177.803342998465
0,4646.262612301313,4865.926572218383,5178.517363510712
16 changes: 16 additions & 0 deletions tests/test_save_result_parquet.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import pytest

from openeo_driver.datacube import DriverVectorCube
from openeo_driver.save_result import AggregatePolygonSpatialResult
from .data import get_path
Expand Down Expand Up @@ -27,6 +29,20 @@ def test_save_aggregate_polygon_spatial_result(tmp_path):
}


def test_save_column_name(tmp_path):
csv_dir = get_path("aggregate_spatial_spatial_cube_name_collision")

vector_cube = DriverVectorCube(gpd.read_file(str(get_path("geojson/FeatureCollection02.json"))))

output_file = tmp_path / "test.parquet"

spatial_result = AggregatePolygonSpatialResult(csv_dir, regions=vector_cube, format="Parquet")

with pytest.raises(ValueError) as exc_info:
spatial_result.to_geoparquet(destination=str(output_file))
assert "Duplicate column names found" in str(exc_info.value)


def test_write_driver_vector_cube_to_parquet(tmp_path):
vector_cube = DriverVectorCube(gpd.read_file(str(get_path("geojson/FeatureCollection02.json"))))
vector_cube.write_assets(tmp_path / "dummy", format="Parquet")
Expand Down