Skip to content

Commit 939c0be

Browse files
Merge pull request #18 from Geocodio/fix/nested-field-parsing
fix(parser): support nested field structures with backward compatibility
2 parents a71fbbb + ee94771 commit 939c0be

File tree

3 files changed

+256
-55
lines changed

3 files changed

+256
-55
lines changed

src/geocodio/client.py

Lines changed: 164 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@
2222
Location, GeocodioFields, Timezone, CongressionalDistrict,
2323
CensusData, ACSSurveyData, StateLegislativeDistrict, SchoolDistrict,
2424
Demographics, Economics, Families, Housing, Social,
25-
FederalRiding, ProvincialRiding, StatisticsCanadaData, ListResponse, PaginatedResponse
25+
FederalRiding, ProvincialRiding, StatisticsCanadaData, ListResponse, PaginatedResponse,
26+
ZIP4Data, FFIECData
2627
)
2728
from geocodio.exceptions import InvalidRequestError, AuthenticationError, GeocodioServerError, BadRequestError
2829

@@ -402,7 +403,15 @@ def _parse_list_response(response_json: dict, response: httpx.Response = None) -
402403
http_response=response,
403404
)
404405

406+
405407
def _parse_fields(self, fields_data: dict | None) -> GeocodioFields | None:
408+
"""
409+
Parse fields data from API response.
410+
411+
Supports both nested and flat field structures for backward compatibility:
412+
- Nested: census: {2010: {...}, 2020: {...}}, acs: {demographics: {...}}
413+
- Flat: census2010: {...}, acs-demographics: {...}
414+
"""
406415
if not fields_data:
407416
return None
408417

@@ -436,30 +445,84 @@ def _parse_fields(self, fields_data: dict | None) -> GeocodioFields | None:
436445
for district in fields_data["stateleg-next"]
437446
]
438447

448+
# School districts - support both nested dict and flat list formats
439449
school_districts = None
440-
if "school" in fields_data:
441-
school_districts = [
442-
SchoolDistrict.from_api(district)
443-
for district in fields_data["school"]
444-
]
445-
446-
# Dynamically parse all census fields (e.g., census2010, census2020, census2024, etc.)
447-
# This supports any census year returned by the API
448-
from dataclasses import fields as dataclass_fields
449-
valid_field_names = {f.name for f in dataclass_fields(GeocodioFields)}
450450

451-
census_fields = {}
451+
# Check for nested dict format: school_districts: {elementary: {...}, secondary: {...}}
452+
if "school_districts" in fields_data:
453+
school_data = fields_data["school_districts"]
454+
if isinstance(school_data, dict):
455+
# Nested dict format - iterate over dict values
456+
school_districts = [
457+
SchoolDistrict.from_api(district)
458+
for district in school_data.values()
459+
]
460+
elif isinstance(school_data, list):
461+
# List format (backward compatibility)
462+
school_districts = [
463+
SchoolDistrict.from_api(district)
464+
for district in school_data
465+
]
466+
467+
# Also check for flat list format: school: [...]
468+
elif "school" in fields_data:
469+
school_data = fields_data["school"]
470+
if isinstance(school_data, dict):
471+
# Dict format
472+
school_districts = [
473+
SchoolDistrict.from_api(district)
474+
for district in school_data.values()
475+
]
476+
elif isinstance(school_data, list):
477+
# List format
478+
school_districts = [
479+
SchoolDistrict.from_api(district)
480+
for district in school_data
481+
]
482+
483+
# Census fields - support both nested and flat structures
484+
# Store in dict for dynamic access (fields.census2020, fields.census2031, etc.)
485+
census_data_dict = {}
486+
487+
def parse_census_data(data: dict) -> dict:
488+
"""
489+
Parse census data and map new field names to old field names for backward compatibility.
490+
491+
API used to send: block, blockgroup, tract
492+
API now sends: block_code, block_group, tract_code
493+
494+
We populate both so existing code using old names continues to work.
495+
"""
496+
parsed = dict(data) # Copy original data
497+
498+
# Map new field names to old field names if old names not present
499+
if "block_code" in data and "block" not in data:
500+
parsed["block"] = data["block_code"]
501+
if "block_group" in data and "blockgroup" not in data:
502+
parsed["blockgroup"] = data["block_group"]
503+
if "tract_code" in data and "tract" not in data:
504+
parsed["tract"] = data["tract_code"]
505+
506+
return parsed
507+
508+
# Check for nested census structure: census: {2010: {...}, 2020: {...}}
509+
if "census" in fields_data and isinstance(fields_data["census"], dict):
510+
for year, census_data in fields_data["census"].items():
511+
field_name = f"census{year}"
512+
# Map new field names to old for backward compatibility
513+
parsed_data = parse_census_data(census_data)
514+
census_data_dict[field_name] = CensusData.from_api(parsed_data)
515+
516+
# Also check for flat structure: census2010: {...}, census2020: {...}
517+
# This ensures backward compatibility if API sends both formats
452518
for key in fields_data:
453-
if key.startswith("census") and key[6:].isdigit(): # e.g., "census2024"
454-
# Only include if it's a defined field in GeocodioFields
455-
if key in valid_field_names:
456-
census_fields[key] = CensusData.from_api(fields_data[key])
457-
458-
acs = (
459-
ACSSurveyData.from_api(fields_data["acs"])
460-
if "acs" in fields_data else None
461-
)
519+
if key.startswith("census") and key[6:].isdigit() and key not in census_data_dict:
520+
# Map new field names to old for backward compatibility
521+
parsed_data = parse_census_data(fields_data[key])
522+
census_data_dict[key] = CensusData.from_api(parsed_data)
462523

524+
# Parse flat ACS structure for backward compatibility
525+
# These will be merged with nested structure later if both exist
463526
demographics = (
464527
Demographics.from_api(fields_data["acs-demographics"])
465528
if "acs-demographics" in fields_data else None
@@ -485,6 +548,58 @@ def _parse_fields(self, fields_data: dict | None) -> GeocodioFields | None:
485548
if "acs-social" in fields_data else None
486549
)
487550

551+
# ACS fields - support both nested and flat structures
552+
acs_fields = {}
553+
acs = None
554+
555+
# Check for ACS field
556+
if "acs" in fields_data and isinstance(fields_data["acs"], dict):
557+
acs_data = fields_data["acs"]
558+
559+
# Check if this is nested ACS structure (contains metric keys)
560+
# or simple ACS structure (contains population, households, etc.)
561+
acs_metric_keys = {"demographics", "economics", "families", "housing", "social"}
562+
563+
if any(key in acs_data for key in acs_metric_keys):
564+
# Nested structure: acs: {demographics: {...}, economics: {...}}
565+
acs_metric_map = {
566+
"demographics": Demographics,
567+
"economics": Economics,
568+
"families": Families,
569+
"housing": Housing,
570+
"social": Social,
571+
}
572+
573+
for metric, model_class in acs_metric_map.items():
574+
if metric in acs_data:
575+
acs_fields[metric] = model_class.from_api(acs_data[metric])
576+
else:
577+
# Simple structure: acs: {population: ..., households: ..., median_income: ...}
578+
acs = ACSSurveyData.from_api(acs_data)
579+
580+
# Also preserve flat structure parsing for backward compatibility
581+
if demographics and "demographics" not in acs_fields:
582+
acs_fields["demographics"] = demographics
583+
if economics and "economics" not in acs_fields:
584+
acs_fields["economics"] = economics
585+
if families and "families" not in acs_fields:
586+
acs_fields["families"] = families
587+
if housing and "housing" not in acs_fields:
588+
acs_fields["housing"] = housing
589+
if social and "social" not in acs_fields:
590+
acs_fields["social"] = social
591+
592+
# ZIP4 and FFIEC data
593+
zip4 = (
594+
ZIP4Data.from_api(fields_data["zip4"])
595+
if "zip4" in fields_data else None
596+
)
597+
598+
ffiec = (
599+
FFIECData.from_api(fields_data["ffiec"])
600+
if "ffiec" in fields_data else None
601+
)
602+
488603
# Canadian fields
489604
riding = (
490605
FederalRiding.from_api(fields_data["riding"])
@@ -506,23 +621,45 @@ def _parse_fields(self, fields_data: dict | None) -> GeocodioFields | None:
506621
if "statcan" in fields_data else None
507622
)
508623

624+
# Collect all known field keys that were parsed
625+
parsed_keys = {
626+
"timezone", "cd", "congressional_districts",
627+
"stateleg", "stateleg-next",
628+
"school", "school_districts", # Both school formats
629+
"census", # Nested census structure
630+
"acs", # Nested ACS structure
631+
"acs-demographics", "acs-economics", "acs-families", "acs-housing", "acs-social",
632+
"zip4", "ffiec",
633+
"riding", "provriding", "provriding-next",
634+
"statcan",
635+
}
636+
# Add flat census keys that were parsed (census2000, census2020, etc.)
637+
# All census years are now stored in _census dict for dynamic access
638+
parsed_keys.update(census_data_dict.keys())
639+
640+
# Extras - capture any fields not explicitly handled
641+
# This is now mainly for truly unknown API fields (not census years)
642+
extras = {
643+
k: v for k, v in fields_data.items()
644+
if k not in parsed_keys
645+
}
646+
509647
return GeocodioFields(
510648
timezone=timezone,
511649
congressional_districts=congressional_districts,
512650
state_legislative_districts=state_legislative_districts,
513651
state_legislative_districts_next=state_legislative_districts_next,
514652
school_districts=school_districts,
515653
acs=acs,
516-
demographics=demographics,
517-
economics=economics,
518-
families=families,
519-
housing=housing,
520-
social=social,
654+
zip4=zip4,
655+
ffiec=ffiec,
521656
riding=riding,
522657
provriding=provriding,
523658
provriding_next=provriding_next,
524659
statcan=statcan,
525-
**census_fields, # Dynamically include all census year fields
660+
extras=extras,
661+
_census=census_data_dict, # All census years stored here
662+
**acs_fields, # Dynamically include all ACS metric fields
526663
)
527664

528665
# @TODO add a "keep_trying" parameter to download() to keep trying until the list is processed.

0 commit comments

Comments
 (0)