Skip to content

Commit c54b6e4

Browse files
committed
remove patient_id as merge col
1 parent 12a5916 commit c54b6e4

File tree

2 files changed

+6
-15
lines changed

2 files changed

+6
-15
lines changed

geniesp/bpc_redcap_export_mapping.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -634,18 +634,18 @@ def replace_cpt_seq_date(
634634
merge_cols = ["SAMPLE_ID"]
635635

636636
elif cpt_seq_date_replacement_type == "derived_variable":
637-
# there should only be a unique seq_date per patient id and sample id
637+
# there should only be a unique seq_date per sample id as
638+
# cpt_seq_date is unique field to clinical sample data
638639
replacement_data = replacement_data[
639-
["cpt_genie_sample_id", "record_id", "cpt_seq_date"]
640+
["cpt_genie_sample_id", "cpt_seq_date"]
640641
].drop_duplicates()
641642

642643
# rename to match input data
643644
replacement_data.rename(columns = {
644645
"cpt_genie_sample_id" : "SAMPLE_ID",
645-
"record_id": "PATIENT_ID",
646646
"cpt_seq_date" : "CPT_SEQ_DATE"
647647
}, inplace = True)
648-
merge_cols = ["SAMPLE_ID", "PATIENT_ID"]
648+
merge_cols = ["SAMPLE_ID"]
649649
else:
650650
raise ValueError(f"cpt_seq_date_replacement_type: {cpt_seq_date_replacement_type} invalid!")
651651

tests/test_bpc_redcap_export_mapping.py

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,6 @@ def test_that_get_derived_variable_file_gets_file_correctly(mock_syn):
198198
[
199199
(pd.DataFrame(
200200
{
201-
"PATIENT_ID": ["GENIE-1", "GENIE-1"],
202201
"SAMPLE_ID": ["GENIE-1-1", "GENIE-1-2"],
203202
"CPT_SEQ_DATE": ["2014", "2015"],
204203
}
@@ -212,14 +211,12 @@ def test_that_get_derived_variable_file_gets_file_correctly(mock_syn):
212211
),
213212
pd.DataFrame(
214213
{
215-
"PATIENT_ID": ["GENIE-1", "GENIE-1"],
216214
"SAMPLE_ID": ["GENIE-1-1", "GENIE-1-2"],
217215
"CPT_SEQ_DATE": [None, None],
218216
}
219217
)),
220218
(pd.DataFrame(
221219
{
222-
"PATIENT_ID": ["GENIE-1", "GENIE-1"],
223220
"SAMPLE_ID": ["GENIE-1-1", "GENIE-1-2"],
224221
"CPT_SEQ_DATE": ["2014", "2015"],
225222
}
@@ -233,14 +230,12 @@ def test_that_get_derived_variable_file_gets_file_correctly(mock_syn):
233230
),
234231
pd.DataFrame(
235232
{
236-
"PATIENT_ID": ["GENIE-1", "GENIE-1"],
237233
"SAMPLE_ID": ["GENIE-1-1", "GENIE-1-2"],
238234
"CPT_SEQ_DATE": ["2017", None],
239235
}
240236
)),
241237
(pd.DataFrame(
242238
{
243-
"PATIENT_ID": ["GENIE-1", "GENIE-1"],
244239
"SAMPLE_ID": ["GENIE-1-1", "GENIE-1-2"],
245240
"CPT_SEQ_DATE": ["2014", "2015"],
246241
}
@@ -254,14 +249,12 @@ def test_that_get_derived_variable_file_gets_file_correctly(mock_syn):
254249
),
255250
pd.DataFrame(
256251
{
257-
"PATIENT_ID": ["GENIE-1", "GENIE-1"],
258252
"SAMPLE_ID": ["GENIE-1-1", "GENIE-1-2"],
259253
"CPT_SEQ_DATE": ["2017", "2018"],
260254
}
261255
)),
262256
(pd.DataFrame(
263257
{
264-
"PATIENT_ID": ["GENIE-1", "GENIE-1"],
265258
"SAMPLE_ID": ["GENIE-1-1", "GENIE-1-2"],
266259
"CPT_SEQ_DATE": ["2014", "2015"],
267260
}
@@ -275,14 +268,12 @@ def test_that_get_derived_variable_file_gets_file_correctly(mock_syn):
275268
),
276269
pd.DataFrame(
277270
{
278-
"PATIENT_ID": ["GENIE-1", "GENIE-1"],
279271
"SAMPLE_ID": ["GENIE-1-1", "GENIE-1-2"],
280272
"CPT_SEQ_DATE": ["2014", "2015"],
281273
}
282274
)),
283275
(pd.DataFrame(
284276
{
285-
"PATIENT_ID": ["GENIE-1", "GENIE-1"],
286277
"SAMPLE_ID": ["GENIE-1-1", "GENIE-1-2"],
287278
"CPT_SEQ_DATE": ["2012", "2013"],
288279
}
@@ -296,7 +287,6 @@ def test_that_get_derived_variable_file_gets_file_correctly(mock_syn):
296287
),
297288
pd.DataFrame(
298289
{
299-
"PATIENT_ID": ["GENIE-1", "GENIE-1"],
300290
"SAMPLE_ID": ["GENIE-1-1", "GENIE-1-2"],
301291
"CPT_SEQ_DATE": ["2014", "2015"],
302292
}
@@ -330,6 +320,7 @@ def test_that_replace_cpt_seq_date_replaces_correctly_with_derived_variable_repl
330320
),
331321
pd.DataFrame(
332322
{
323+
"PATIENT_ID": ["GENIE-1", "GENIE-1"],
333324
"SAMPLE_ID": ["GENIE-1-3", "GENIE-1-4"],
334325
"SEQ_YEAR": ["2017", "2018"],
335326
}
@@ -395,7 +386,7 @@ def test_that_replace_cpt_seq_date_replaces_correctly_with_derived_variable_repl
395386
}
396387
))
397388
],
398-
ids = ["none_replaced", "some_replaced", "all_replaced", "the_same"]
389+
ids = ["none_replaced_with_extra_cols", "some_replaced", "all_replaced", "the_same"]
399390
)
400391
def test_that_replace_cpt_seq_date_replaces_correctly_with_main_genie_replacement_type(input, clinical, expected):
401392
output = bpc_export.replace_cpt_seq_date(

0 commit comments

Comments
 (0)