Skip to content

Commit f819089

Browse files
authored
Merge pull request #171 from FEWS-NET/HEA-751/CD07-AttributeError--int-object-has-no-attribute-map
Handle int when unexpectedly included in the lables e.g. CD07 has 0 i…
2 parents b08eed7 + 2448e7d commit f819089

File tree

3 files changed

+117
-7
lines changed

3 files changed

+117
-7
lines changed

pipelines/utils.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -88,18 +88,21 @@ def prepare_lookup(data: str | list[str] | pd.Series | pd.DataFrame) -> pd.Serie
8888
"""
8989
Prepare a Series or DataFrame for lookup operations by converting to lowercase strings and stripping whitespace.
9090
"""
91-
if isinstance(data, str):
92-
result = pd.DataFrame([data])
91+
if isinstance(data, pd.DataFrame):
92+
result = data
9393
elif isinstance(data, (list, pd.Series)):
9494
result = pd.DataFrame(data)
9595
else:
96-
result = data
96+
# Handle other types (like str, int, float)
97+
result = pd.DataFrame([data])
98+
9799
result = result.map(str).map(str.strip).map(str.lower).replace(r"\s+", " ", regex=True)
98-
if isinstance(data, str):
99-
result = result.iloc[0, 0]
100+
101+
if isinstance(data, pd.DataFrame):
102+
return result
100103
elif isinstance(data, (list, pd.Series)):
101-
result = result.iloc[:, 0]
102-
return result
104+
return result.iloc[:, 0]
105+
return result.iloc[0, 0]
103106

104107

105108
def verbose_pivot(df: pd.DataFrame, values: str | list[str], index: str | list[str], columns: str | list[str]):

pipelines_tests/test_utils/__init__.py

Whitespace-only changes.
Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
import pandas as pd
2+
from django.test import TestCase
3+
from pipelines.utils import prepare_lookup
4+
5+
6+
class PrepareLookupTestCase(TestCase):
7+
8+
def test_prepare_lookup_with_premitive_type_input(self):
9+
# empty string
10+
result = prepare_lookup("")
11+
self.assertEqual(result, "")
12+
13+
# a simple string
14+
result = prepare_lookup("nbr Mois")
15+
self.assertEqual(result, "nbr mois")
16+
17+
# an int
18+
result = prepare_lookup(0)
19+
self.assertEqual(result, "0")
20+
21+
# a float
22+
result = prepare_lookup(7.55)
23+
self.assertEqual(result, "7.55")
24+
25+
# a simple string with spaces
26+
result = prepare_lookup(" nbr Mois ")
27+
self.assertEqual(result, "nbr mois")
28+
29+
# string with multiple internal spaces
30+
result = prepare_lookup("Autre revenu (ex. crédit)")
31+
self.assertEqual(result, "autre revenu (ex. crédit)")
32+
33+
def test_prepare_lookup_with_list_input(self):
34+
# list with single element
35+
result = prepare_lookup(["water"])
36+
self.assertIsInstance(result, pd.Series)
37+
self.assertEqual(result[0], "water")
38+
39+
# list with multiple elements
40+
result = prepare_lookup(["Water", "inputs", "Social serv."])
41+
self.assertIsInstance(result, pd.Series)
42+
pd.testing.assert_series_equal(result, pd.Series(["water", "inputs", "social serv."], name=0))
43+
44+
def test_prepare_lookup_with_series_input(self):
45+
# simple series
46+
data = pd.Series(["Camel number owned", "Cattle number owned"])
47+
result = prepare_lookup(data)
48+
self.assertIsInstance(result, pd.Series)
49+
pd.testing.assert_series_equal(result, pd.Series(["camel number owned", "cattle number owned"], name=0))
50+
51+
# test with irrigular spaces in elements
52+
data = pd.Series(["Camel number owned ", " cattle number Owned"])
53+
result = prepare_lookup(data)
54+
pd.testing.assert_series_equal(result, pd.Series(["camel number owned", "cattle number owned"], name=0))
55+
56+
# test with numeric elemnts
57+
data = pd.Series([123, 456])
58+
result = prepare_lookup(data)
59+
pd.testing.assert_series_equal(result, pd.Series(["123", "456"], name=0))
60+
61+
def test_prepare_lookup_with_dataframe_input(self):
62+
# single column dataframe
63+
data = pd.DataFrame({"lables": ["Livestock products"]})
64+
result = prepare_lookup(data)
65+
self.assertIsInstance(result, pd.DataFrame)
66+
pd.testing.assert_frame_equal(result, pd.DataFrame({"lables": ["livestock products"]}))
67+
68+
# multiple columns dataframe
69+
data = pd.DataFrame({"lables": ["Livestock products"], "another": ["Payment in kind "]})
70+
result = prepare_lookup(data)
71+
assert isinstance(result, pd.DataFrame)
72+
expected = pd.DataFrame({"lables": ["livestock products"], "another": ["payment in kind"]})
73+
pd.testing.assert_frame_equal(result, expected)
74+
75+
# numeric values
76+
data = pd.DataFrame({"column1": [123, 456], "column2": [78.9, 1011.12]})
77+
result = prepare_lookup(data)
78+
expected = pd.DataFrame({"column1": ["123", "456"], "column2": ["78.9", "1011.12"]})
79+
pd.testing.assert_frame_equal(result, expected)
80+
81+
# empty df
82+
data = pd.DataFrame()
83+
result = prepare_lookup(data)
84+
self.assertIsInstance(result, pd.DataFrame)
85+
self.assertTrue(result.empty)
86+
87+
# test that datafarme preserves structure
88+
data = pd.DataFrame(
89+
{
90+
"label": ["Cowpeas: kg produced", "Sorghum: kg produced"],
91+
"product": ["Cowpeas", "Sorghum"],
92+
"unit": ["kg", "kg"],
93+
}
94+
)
95+
result = prepare_lookup(data)
96+
self.assertEqual(result.shape, data.shape)
97+
self.assertEqual(list(result.columns), list(data.columns))
98+
99+
def test_prepare_lookup_with_special_characters(self):
100+
result = prepare_lookup("Autre nourriture: Poisson 2(sec)!@#$%")
101+
self.assertEqual(result, "autre nourriture: poisson 2(sec)!@#$%")
102+
# with tabs
103+
result = prepare_lookup("Autre nourriture: \tPoisson")
104+
self.assertEqual(result, "autre nourriture: poisson")
105+
# some unicode characters
106+
result = prepare_lookup("Revenu (Espèces)")
107+
self.assertEqual(result, "revenu (espèces)")

0 commit comments

Comments
 (0)