Skip to content

Commit 92e0fb4

Browse files
authored
Merge pull request #200 from American-Institutes-for-Research/HEA-820/nom-de-mesure-locale-regex
Hea 820/nom de mesure locale regex
2 parents ca92c46 + 5e8b3a0 commit 92e0fb4

File tree

3 files changed

+46
-20
lines changed

3 files changed

+46
-20
lines changed

pipelines/assets/livelihood_activity.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,7 @@ def get_livelihood_activity_regexes() -> list:
243243
"nbr_pattern": r"(?:n[bo]?r?e?|no)\.?",
244244
"vendu_pattern": r"(?:quantité )?vendu(?:e|s|ss|es|ses)?",
245245
"separator_pattern": r" ?[:-]?",
246+
"name_of_local_measure_pattern": r"(?:name of (?:meas(?:ure)?\.?)|nom(?: (?:de la mesure(?: locale)?|de mesure locale|du mesure|d'unité|mesure locale|unité de mesure))?)",
246247
}
247248
# Compile the regexes
248249
compiled_regexes = []

pipelines/assets/livelihood_activity_regexes.json

Lines changed: 14 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@
122122
"times_per_month"
123123
],
124124
[
125-
"name of measure",
125+
"{name_of_local_measure_pattern}",
126126
null,
127127
false,
128128
"name_of_local_measure"
@@ -223,12 +223,6 @@
223223
false,
224224
"times_per_month"
225225
],
226-
[
227-
"(?:nom de la mesure|nom du mesure|nom d'unité|nom mesure locale)",
228-
null,
229-
false,
230-
"name_of_local_measure"
231-
],
232226
[
233227
"(?:poids? de la mesure|poids? du mesure|poids? d'unité)",
234228
null,
@@ -500,7 +494,19 @@
500494
"quantity_sold"
501495
],
502496
[
503-
"{product_pattern}: name of meas\\.",
497+
"autre culture(?: de rente)?{separator_pattern} (?:type|nom)",
498+
null,
499+
true,
500+
"product__name"
501+
],
502+
[
503+
"(?:autre nouriture de base|autre legumineuse|autre culture)?{separator_pattern} ?\\(?{product_pattern}\\)?{separator_pattern} ?{name_of_local_measure_pattern}",
504+
null,
505+
true,
506+
"name_of_local_measure"
507+
],
508+
[
509+
"{product_pattern}{separator_pattern} {name_of_local_measure_pattern}:?\\.?",
504510
null,
505511
true,
506512
"name_of_local_measure"
@@ -757,12 +763,6 @@
757763
false,
758764
"number_of_local_measures"
759765
],
760-
[
761-
"autre culture(?: de rente)?{separator_pattern} (?:type|nom)",
762-
null,
763-
true,
764-
"product__name"
765-
],
766766
[
767767
"autre culture{separator_pattern} \\(?{product_pattern}\\)?(?: type)?",
768768
null,
@@ -847,12 +847,6 @@
847847
true,
848848
"expenditure"
849849
],
850-
[
851-
"(?:autre nouriture de base|autre legumineuse|autre culture)?{separator_pattern} ?\\(?{product_pattern}\\)?{separator_pattern} ?(?:nom de la mesure locale?|nom mesure locale|nom du mesure|nom unité de mesure|nom)",
852-
null,
853-
true,
854-
"name_of_local_measure"
855-
],
856850
[
857851
"{product_pattern} achetée?: quantité ?\\(?{unit_of_measure_pattern}\\)?",
858852
null,

pipelines_tests/test_assets/test_livelihood_activity_regexes.json

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -940,4 +940,35 @@
940940
"product_id": "petty trade",
941941
"attribute": "people_per_household"
942942
}
943+
,
944+
"huile: nom du mesure": {
945+
"is_start": true,
946+
"product_id": "huile",
947+
"attribute": "name_of_local_measure"
948+
},
949+
"sel - nom de la mesure locale": {
950+
"is_start": true,
951+
"product_id": "sel",
952+
"attribute": "name_of_local_measure"
953+
},
954+
"sucre: nom mesure locale": {
955+
"is_start": true,
956+
"product_id": "sucre",
957+
"attribute": "name_of_local_measure"
958+
},
959+
"eau pour humains: nom de mesure locale:.": {
960+
"is_start": true,
961+
"product_id": "eau pour humains",
962+
"attribute": "name_of_local_measure"
963+
},
964+
"eau pour betail: nom de la mesure locale": {
965+
"is_start": true,
966+
"product_id": "eau pour betail",
967+
"attribute": "name_of_local_measure"
968+
},
969+
"farine de ble - nom de la mesure locale": {
970+
"is_start": true,
971+
"product_id": "farine de ble",
972+
"attribute": "name_of_local_measure"
973+
}
943974
}

0 commit comments

Comments
 (0)