Coverage for cc_modules/cc_redcap.py : 28%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#!/usr/bin/env python
3"""camcops_server/cc_modules/cc_redcap.py
5===============================================================================
7 Copyright (C) 2012-2020 Rudolf Cardinal (rudolf@pobox.com).
9 This file is part of CamCOPS.
11 CamCOPS is free software: you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation, either version 3 of the License, or
14 (at your option) any later version.
16 CamCOPS is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
21 You should have received a copy of the GNU General Public License
22 along with CamCOPS. If not, see <https://www.gnu.org/licenses/>.
24===============================================================================
26**Implements communication with REDCap.**
28- For general information about REDCap, see https://www.project-redcap.org/.
30- The API documentation is not provided there, but is available from
31 your local REDCap server. Pick a project. Choose "API" from the left-hand
32 menu. Follow the "REDCap API documentation" link.
34- We use PyCap (https://pycap.readthedocs.io/ or
35 https://github.com/redcap-tools/PyCap). See also
36 https://redcap-tools.github.io/projects/. PyCap is no longer being actively
37 developed though the author is still responding to issues and pull requests.
39We use an XML fieldmap to describe how the rows in CamCOPS task tables are
40translated into REDCap records. See :ref:`REDCap export <redcap>`.
42REDCap does not assign instance IDs for repeating instruments so we need to
43query the database in order to determine the next instance ID. It is possible
44to create a race condition if more than one client is trying to update the same
45record at the same time.
47"""
49from enum import Enum
50import io
51import logging
52from typing import (
53 Any,
54 Dict,
55 Iterable,
56 List,
57 Optional,
58 TYPE_CHECKING,
59 Union,
60)
61import xml.etree.cElementTree as ElementTree
63from asteval import Interpreter, make_symbol_table
64from cardinal_pythonlib.datetimefunc import format_datetime
65from cardinal_pythonlib.logs import BraceStyleAdapter
66from pandas import DataFrame
67from pandas.errors import EmptyDataError
68import redcap
70from camcops_server.cc_modules.cc_constants import (
71 ConfigParamExportRecipient,
72 DateFormat,
73)
74from camcops_server.cc_modules.cc_exportrecipient import ExportRecipient
76if TYPE_CHECKING:
77 from camcops_server.cc_modules.cc_exportmodels import ExportedTaskRedcap
78 from camcops_server.cc_modules.cc_request import CamcopsRequest
79 from camcops_server.cc_modules.cc_task import Task
81log = BraceStyleAdapter(logging.getLogger(__name__))
83MISSING_EVENT_TAG_OR_ATTRIBUTE = (
84 "The REDCap project has events but there is no 'event' tag "
85 "in the fieldmap or an instrument is missing an 'event' "
86 "attribute"
87)
90class RedcapExportException(Exception):
91 pass
94class RedcapFieldmap(object):
95 """
96 Internal representation of the fieldmap XML file.
97 This describes how the task fields should be translated to
98 the REDCap record.
99 """
101 def __init__(self, filename: str) -> None:
102 """
103 Args:
104 filename:
105 Name of an XML file telling CamCOPS how to map task fields
106 to REDCap. See :ref:`REDCap export <redcap>`.
107 """
108 self.filename = filename
109 self.fields = {} # type: Dict[str, Dict[str, str]]
110 # ... {task: {name: formula}}
111 self.files = {} # type: Dict[str, Dict[str, str]]
112 # ... {task: {name: formula}}
113 self.instruments = {} # type: Dict[str, str]
114 # ... {task: instrument_name}
115 self.events = {} # type: Dict[str, str]
116 # ... {task: event_name}
118 parser = ElementTree.XMLParser(encoding="UTF-8")
119 try:
120 tree = ElementTree.parse(filename, parser=parser)
121 except FileNotFoundError:
122 raise RedcapExportException(
123 f"Unable to open fieldmap file '{filename}'"
124 )
125 except ElementTree.ParseError as e:
126 raise RedcapExportException(
127 f"There was a problem parsing {filename}: {str(e)}"
128 ) from e
130 root = tree.getroot()
131 if root.tag != "fieldmap":
132 raise RedcapExportException(
133 (f"Expected the root tag to be 'fieldmap' instead of "
134 f"'{root.tag}' in {filename}")
135 )
137 patient_element = root.find("patient")
138 if patient_element is None:
139 raise RedcapExportException(
140 f"'patient' is missing from {filename}"
141 )
143 self.patient = self._validate_and_return_attributes(
144 patient_element, ("instrument", "redcap_field")
145 )
147 record_element = root.find("record")
148 if record_element is None:
149 raise RedcapExportException(
150 f"'record' is missing from {filename}"
151 )
153 self.record = self._validate_and_return_attributes(
154 record_element, ("instrument", "redcap_field")
155 )
157 default_event = None
158 event_element = root.find("event")
159 if event_element is not None:
160 event_attributes = self._validate_and_return_attributes(
161 event_element, ("name",)
162 )
163 default_event = event_attributes['name']
165 instrument_elements = root.find("instruments")
166 if instrument_elements is None:
167 raise RedcapExportException(
168 f"'instruments' tag is missing from {filename}"
169 )
171 for instrument_element in instrument_elements:
172 instrument_attributes = self._validate_and_return_attributes(
173 instrument_element, ("name", "task")
174 )
176 task = instrument_attributes["task"]
177 instrument_name = instrument_attributes["name"]
178 self.fields[task] = {}
179 self.files[task] = {}
180 self.events[task] = instrument_attributes.get("event",
181 default_event)
182 self.instruments[task] = instrument_name
184 field_elements = instrument_element.find("fields") or []
186 for field_element in field_elements:
187 field_attributes = self._validate_and_return_attributes(
188 field_element, ("name", "formula")
189 )
190 name = field_attributes["name"]
191 formula = field_attributes["formula"]
193 self.fields[task][name] = formula
195 file_elements = instrument_element.find("files") or []
196 for file_element in file_elements:
197 file_attributes = self._validate_and_return_attributes(
198 file_element, ("name", "formula")
199 )
201 name = file_attributes["name"]
202 formula = file_attributes["formula"]
203 self.files[task][name] = formula
205 def _validate_and_return_attributes(
206 self, element: ElementTree.Element,
207 expected_attributes: Iterable[str]) -> Dict[str, str]:
208 """
209 Checks that all the expected attributes are present in the XML element
210 (from the fieldmap XML file), or raises :exc:`RedcapExportException`.
211 """
212 attributes = element.attrib
214 if not all(a in attributes.keys() for a in expected_attributes):
215 raise RedcapExportException(
216 (f"'{element.tag}' must have attributes: "
217 f"{', '.join(expected_attributes)} in {self.filename}")
218 )
220 return attributes
222 def instrument_names(self) -> List[str]:
223 """
224 Returns the names of all REDCap instruments.
225 """
226 return list(self.instruments.values())
229class RedcapTaskExporter(object):
230 """
231 Main entry point for task export to REDCap. Works out which record needs
232 updating or creating. Creates the fieldmap and initiates upload.
233 """
234 def export_task(self,
235 req: "CamcopsRequest",
236 exported_task_redcap: "ExportedTaskRedcap") -> None:
237 """
238 Exports a specific task.
240 Args:
241 req:
242 a :class:`camcops_server.cc_modules.cc_request.CamcopsRequest`
243 exported_task_redcap:
244 a :class:`camcops_server.cc_modules.cc_exportmodels.ExportedTaskRedcap`
245 """ # noqa
246 exported_task = exported_task_redcap.exported_task
247 recipient = exported_task.recipient
248 task = exported_task.task
250 if task.is_anonymous:
251 raise RedcapExportException(
252 f"Skipping anonymous task '{task.tablename}'"
253 )
255 which_idnum = recipient.primary_idnum
256 idnum_object = task.patient.get_idnum_object(which_idnum)
258 project = self.get_project(recipient)
259 fieldmap = self.get_fieldmap(recipient)
261 if project.is_longitudinal():
262 if not all(fieldmap.events.values()):
263 raise RedcapExportException(MISSING_EVENT_TAG_OR_ATTRIBUTE)
265 existing_records = self._get_existing_records(project, fieldmap)
266 existing_record_id = self._get_existing_record_id(
267 existing_records,
268 fieldmap,
269 idnum_object.idnum_value
270 )
272 if existing_record_id is None:
273 uploader_class = RedcapNewRecordUploader
274 else:
275 uploader_class = RedcapUpdatedRecordUploader
277 try:
278 instrument_name = fieldmap.instruments[task.tablename]
279 except KeyError:
280 raise RedcapExportException(
281 (f"Instrument for task '{task.tablename}' is missing from the "
282 f"fieldmap")
283 )
285 record_id_fieldname = fieldmap.record["redcap_field"]
287 next_instance_id = self._get_next_instance_id(existing_records,
288 instrument_name,
289 record_id_fieldname,
290 existing_record_id)
292 uploader = uploader_class(req, project)
294 new_record_id = uploader.upload(task, existing_record_id,
295 next_instance_id,
296 fieldmap, idnum_object.idnum_value)
298 exported_task_redcap.redcap_record_id = new_record_id
299 exported_task_redcap.redcap_instrument_name = instrument_name
300 exported_task_redcap.redcap_instance_id = next_instance_id
302 @staticmethod
303 def _get_existing_records(project: redcap.project.Project,
304 fieldmap: RedcapFieldmap) -> "DataFrame":
305 """
306 Returns a Pandas data frame containing existing REDCap records for this
307 project, for instruments we are interested in.
309 Args:
310 project:
311 a :class:`redcap.project.Project`
312 fieldmap:
313 a :class:`RedcapFieldmap`
314 """
315 # Arguments to pandas read_csv()
317 type_dict = {
318 # otherwise pandas may infer as int or str
319 fieldmap.record["redcap_field"]: str,
320 }
322 df_kwargs = {
323 "index_col": None, # don't index by record_id
324 "dtype": type_dict,
325 }
327 forms = (fieldmap.instrument_names() +
328 [fieldmap.patient["instrument"]] +
329 [fieldmap.record["instrument"]])
331 try:
332 records = project.export_records(format="df", forms=forms,
333 df_kwargs=df_kwargs)
334 except EmptyDataError:
335 # Should not happen, but in case of PyCap failing to catch this...
336 return DataFrame()
337 except redcap.RedcapError as e:
338 raise RedcapExportException(str(e))
340 return records
342 @staticmethod
343 def _get_existing_record_id(records: "DataFrame",
344 fieldmap: RedcapFieldmap,
345 idnum_value: int) -> Optional[str]:
346 """
347 Returns the ID of an existing record that matches a specific
348 patient, if one can be found.
350 Args:
351 records:
352 records retrieved from REDCap; Pandas data frame from
353 :meth:`_get_existing_records`
354 fieldmap:
355 :class:`RedcapFieldmap`
356 idnum_value:
357 CamCOPS patient ID number
359 Returns:
360 REDCap record ID or ``None``
361 """
363 if records.empty:
364 return None
366 patient_id_fieldname = fieldmap.patient["redcap_field"]
368 if patient_id_fieldname not in records:
369 raise RedcapExportException(
370 (f"Field '{patient_id_fieldname}' does not exist in REDCap. "
371 f"Is the 'patient' tag in the fieldmap correct?")
372 )
374 with_identifier = records[patient_id_fieldname] == idnum_value
376 if len(records[with_identifier]) == 0:
377 return None
379 return records[with_identifier].iat[0, 0]
381 @staticmethod
382 def _get_next_instance_id(records: "DataFrame",
383 instrument: str,
384 record_id_fieldname: str,
385 existing_record_id: Optional[str]) -> int:
386 """
387 Returns the next REDCap record ID to use for a particular instrument,
388 including for a repeating instrument (the previous highest ID plus 1,
389 or 1 if none can be found).
391 Args:
392 records:
393 records retrieved from REDCap; Pandas data frame from
394 :meth:`_get_existing_records`
395 instrument:
396 instrument name
397 existing_record_id:
398 ID of existing record
399 """
400 if existing_record_id is None:
401 return 1
403 if record_id_fieldname not in records:
404 raise RedcapExportException(
405 (f"Field '{record_id_fieldname}' does not exist in REDCap. "
406 f"Is the 'record' tag in the fieldmap correct?")
407 )
409 previous_instances = records[
410 (records["redcap_repeat_instrument"] == instrument) &
411 (records[record_id_fieldname] == existing_record_id)
412 ]
414 if len(previous_instances) == 0:
415 return 1
417 return int(previous_instances.max()["redcap_repeat_instance"] + 1)
419 def get_fieldmap(self, recipient: ExportRecipient) -> RedcapFieldmap:
420 """
421 Returns the relevant :class:`RedcapFieldmap`.
423 Args:
424 recipient:
425 an
426 :class:`camcops_server.cc_modules.cc_exportmodels.ExportRecipient`
427 """ # noqa
428 fieldmap = RedcapFieldmap(self.get_fieldmap_filename(recipient))
430 return fieldmap
432 @staticmethod
433 def get_fieldmap_filename(recipient: ExportRecipient) -> str:
434 """
435 Returns the name of the XML file containing our fieldmap details, or
436 raises :exc:`RedcapExportException`.
438 Args:
439 recipient:
440 an
441 :class:`camcops_server.cc_modules.cc_exportmodels.ExportRecipient`
442 """ # noqa
443 filename = recipient.redcap_fieldmap_filename
444 if filename is None:
445 raise RedcapExportException(
446 f"{ConfigParamExportRecipient.REDCAP_FIELDMAP_FILENAME} "
447 f"is not set in the config file"
448 )
450 if filename == "":
451 raise RedcapExportException(
452 f"{ConfigParamExportRecipient.REDCAP_FIELDMAP_FILENAME} "
453 f"is empty in the config file"
454 )
456 return filename
458 @staticmethod
459 def get_project(recipient: ExportRecipient) -> redcap.project.Project:
460 """
461 Returns the :class:`redcap.project.Project`.
463 Args:
464 recipient:
465 an
466 :class:`camcops_server.cc_modules.cc_exportmodels.ExportRecipient`
467 """
468 try:
469 project = redcap.project.Project(
470 recipient.redcap_api_url, recipient.redcap_api_key
471 )
472 except redcap.RedcapError as e:
473 raise RedcapExportException(str(e))
475 return project
478class RedcapRecordStatus(Enum):
479 """
480 Corresponds to valid values of Form Status -> Complete? field in REDCap
481 """
482 INCOMPLETE = 0
483 UNVERIFIED = 1
484 COMPLETE = 2
487class RedcapUploader(object):
488 """
489 Uploads records and files into REDCap, transforming the fields via the
490 fieldmap.
492 Abstract base class.
494 Knows nothing about ExportedTaskRedcap, ExportedTask, ExportRecipient
495 """
496 def __init__(self,
497 req: "CamcopsRequest",
498 project: "redcap.project.Project") -> None:
499 """
501 Args:
502 req:
503 a :class:`camcops_server.cc_modules.cc_request.CamcopsRequest`
504 project:
505 a :class:`redcap.project.Project`
506 """
507 self.req = req
508 self.project = project
509 self.project_info = project.export_project_info()
511 def get_record_id(self, existing_record_id: Optional[str]) -> str:
512 """
513 Returns the REDCap record ID to use.
515 Args:
516 existing_record_id: highest existing record ID, if known
517 """
518 raise NotImplementedError("implement in subclass")
520 @property
521 def return_content(self) -> str:
522 """
523 The ``return_content`` argument to be passed to
524 :meth:`redcap.project.Project.import_records`. Can be:
526 - ``count`` [default] - the number of records imported
527 - ``ids`` - a list of all record IDs that were imported
528 - ``auto_ids`` = (used only when ``forceAutoNumber=true``) a list of
529 pairs of all record IDs that were imported, includes the new ID
530 created and the ID value that was sent in the API request
531 (e.g., 323,10).
533 Note (2020-01-27) that it can return e.g. ``15-30,0``, i.e. the ID
534 values can be non-integer.
535 """
536 raise NotImplementedError("implement in subclass")
538 @property
539 def force_auto_number(self) -> bool:
540 """
541 Should we force auto-numbering of records in REDCap?
542 """
543 raise NotImplementedError("implement in subclass")
545 def get_new_record_id(self, record_id: str, response: List[str]) -> str:
546 """
547 Returns the ID of the new (or updated) REDCap record.
549 Args:
550 record_id:
551 existing record ID
552 response:
553 response from :meth:`redcap.project.Project.import_records`
554 """
555 raise NotImplementedError("implement in subclass")
557 @staticmethod
558 def log_success(record_id: str) -> None:
559 """
560 Report upload success to the Python log.
562 Args:
563 record_id: REDCap record ID
564 """
565 raise NotImplementedError("implement in subclass")
567 @property
568 def autonumbering_enabled(self) -> bool:
569 """
570 Does this REDCap project have record autonumbering enabled?
571 """
572 return self.project_info['record_autonumbering_enabled']
574 def upload(self, task: "Task", existing_record_id: Optional[str],
575 next_instance_id: int, fieldmap: RedcapFieldmap,
576 idnum_value: int) -> str:
577 """
578 Uploads a CamCOPS task to REDCap.
580 Args:
581 task:
582 :class:`camcops_server.cc_modules.cc_task.Task` to be uploaded
583 existing_record_id:
584 REDCap ID of the existing record, if there is one
585 next_instance_id:
586 REDCap instance ID to be used for a repeating instrument
587 fieldmap:
588 :class:`RedcapFieldmap`
589 idnum_value:
590 CamCOPS patient ID number
592 Returns:
593 str: REDCap record ID of the record that was created or updated
595 """
596 complete_status = RedcapRecordStatus.INCOMPLETE
598 if task.is_complete():
599 complete_status = RedcapRecordStatus.COMPLETE
600 instrument_name = fieldmap.instruments[task.tablename]
601 record_id_fieldname = fieldmap.record["redcap_field"]
603 record_id = self.get_record_id(existing_record_id)
605 record = {
606 record_id_fieldname: record_id,
607 "redcap_repeat_instrument": instrument_name,
608 # https://community.projectredcap.org/questions/74561/unexpected-behaviour-with-import-records-repeat-in.html # noqa
609 # REDCap won't create instance IDs automatically so we have to
610 # assume no one else is writing to this record
611 "redcap_repeat_instance": next_instance_id,
612 f"{instrument_name}_complete": complete_status.value,
613 "redcap_event_name": fieldmap.events[task.tablename]
614 }
616 self.transform_fields(record, task, fieldmap.fields[task.tablename])
618 import_kwargs = {
619 "return_content": self.return_content,
620 "force_auto_number": self.force_auto_number,
621 }
623 response = self.upload_record(record, **import_kwargs)
625 new_record_id = self.get_new_record_id(record_id, response)
627 # We don't mark the patient record as complete - it could be part of
628 # a larger form. We don't require it to be complete.
629 patient_record = {
630 record_id_fieldname: new_record_id,
631 fieldmap.patient["redcap_field"]: idnum_value,
632 }
633 self.upload_record(patient_record)
635 file_dict = {}
636 self.transform_fields(file_dict, task, fieldmap.files[task.tablename])
638 self.upload_files(task,
639 new_record_id,
640 next_instance_id,
641 file_dict,
642 event=fieldmap.events[task.tablename])
644 self.log_success(new_record_id)
646 return new_record_id
648 def upload_record(self, record: Dict[str, Any],
649 **kwargs) -> Union[Dict, List, str]:
650 """
651 Uploads a REDCap record via the pycap
652 :func:`redcap.project.Project.import_record` function. Returns its
653 response.
654 """
655 try:
656 response = self.project.import_records(
657 [record],
658 **kwargs
659 )
660 except redcap.RedcapError as e:
661 raise RedcapExportException(str(e))
663 return response
665 def upload_files(self, task: "Task", record_id: Union[int, str],
666 repeat_instance: int,
667 file_dict: Dict[str, bytes],
668 event: Optional[str] = None) -> None:
669 """
670 Uploads files attached to a task (e.g. a PDF of the CamCOPS task).
672 Args:
673 task:
674 the :class:`camcops_server.cc_modules.cc_task.Task`
675 record_id:
676 the REDCap record ID
677 repeat_instance:
678 instance number for repeating instruments
679 file_dict:
680 dictionary mapping filename to file contents
681 event:
682 for longitudinal projects, specify the unique event here
684 Raises:
685 :exc:`RedcapExportException`
686 """
687 for fieldname, value in file_dict.items():
688 with io.BytesIO(value) as file_obj:
689 filename = f"{task.tablename}_{record_id}_{fieldname}"
691 try:
692 self.project.import_file(
693 record_id, fieldname, filename, file_obj,
694 event=event,
695 repeat_instance=repeat_instance
696 )
697 # ValueError if the field does not exist or is not
698 # a file field
699 except (redcap.RedcapError, ValueError) as e:
700 raise RedcapExportException(str(e))
702 def transform_fields(self, field_dict: Dict[str, Any], task: "Task",
703 formula_dict: Dict[str, str]) -> None:
704 """
705 Uses the definitions from the fieldmap XML to set up field values to be
706 exported to REDCap.
708 Args:
709 field_dict:
710 Exported field values go here (the dictionary is modified).
711 task:
712 the :class:`camcops_server.cc_modules.cc_task.Task`
713 formula_dict:
714 dictionary (from the XML information) mapping REDCap field
715 name to a "formula". The formula is applied to extract data
716 from the task in a flexible way.
717 """
718 extra_symbols = self.get_extra_symbols()
720 symbol_table = make_symbol_table(
721 task=task,
722 **extra_symbols
723 )
724 interpreter = Interpreter(symtable=symbol_table)
726 for redcap_field, formula in formula_dict.items():
727 v = interpreter(f"{formula}", show_errors=True)
728 if interpreter.error:
729 message = "\n".join([e.msg for e in interpreter.error])
730 raise RedcapExportException(
731 (
732 f"Fieldmap:\n"
733 f"Error in formula '{formula}': {message}\n"
734 f"Task: '{task.tablename}'\n"
735 f"REDCap field: '{redcap_field}'\n"
736 )
737 )
738 field_dict[redcap_field] = v
740 def get_extra_symbols(self) -> Dict[str, Any]:
741 """
742 Returns a dictionary made available to the ``asteval`` interpreter.
743 These become variables that the system administrator can refer to in
744 their fieldmap XML; see :ref:`REDCap export <redcap>`.
745 """
746 return dict(
747 format_datetime=format_datetime,
748 DateFormat=DateFormat,
749 request=self.req
750 )
753class RedcapNewRecordUploader(RedcapUploader):
754 """
755 Creates a new REDCap record.
756 """
758 @property
759 def force_auto_number(self) -> bool:
760 return self.autonumbering_enabled
762 @property
763 def return_content(self) -> str:
764 if self.autonumbering_enabled:
765 # import_records returns ["<redcap record id>, 0"]
766 return "auto_ids"
768 # import_records returns {'count': 1}
769 return "count"
771 # noinspection PyUnusedLocal
772 def get_record_id(self, existing_record_id: str) -> str:
773 """
774 Get the record ID to send to REDCap when importing records
775 """
776 if self.autonumbering_enabled:
777 # Is ignored but we still need to set this to something
778 return "0"
780 return self.project.generate_next_record_name()
782 def get_new_record_id(self, record_id: str, response: List[str]) -> str:
783 """
784 For autonumbering, read the generated record ID from the
785 response. Otherwise we already have it.
786 """
787 if not self.autonumbering_enabled:
788 return record_id
790 id_pair = response[0]
792 record_id = id_pair.rsplit(",")[0]
794 return record_id
796 @staticmethod
797 def log_success(record_id: str) -> None:
798 log.info(f"Created new REDCap record {record_id}")
801class RedcapUpdatedRecordUploader(RedcapUploader):
802 """
803 Updates an existing REDCap record.
804 """
805 force_auto_number = False
806 # import_records returns {'count': 1}
807 return_content = "count"
809 # noinspection PyMethodMayBeStatic
810 def get_record_id(self, existing_record_id: str) -> str:
811 return existing_record_id
813 # noinspection PyMethodMayBeStatic,PyUnusedLocal
814 def get_new_record_id(self, old_record_id: str, response: Any) -> str:
815 return old_record_id
817 @staticmethod
818 def log_success(record_id: str) -> None:
819 log.info(f"Updated REDCap record {record_id}")