Source code for fhirpack.extraction.base
import json
from typing import Union
import time
import requests
from tqdm import tqdm
from dicomweb_client.api import DICOMwebClient
from fhirpy.lib import SyncFHIRResource
from fhirpy.lib import SyncFHIRReference
import fhirpack
from fhirpack.constants import CONFIG
# TODO build dinamically from metadata/capability statement
SEARCH_PARAMS = {
"Condition": [
"_content",
"_id",
"_sort",
"_include",
"code",
"identifier",
"patient",
"subject",
"recordedDate__lt",
"recordedDate__gt",
"recordedDate__ge",
],
"EpisodeOfCare": [
"_id",
"_content",
"_sort",
"_include",
"code",
"identifier",
"patient",
],
"Encounter": [
"_id",
"_content",
"_sort",
"_include",
"code",
"identifier",
"subject",
],
"DiagnosticReport": [
"_id",
"_content",
"_sort",
"_include",
"code",
"identifier",
"subject",
"issued",
"category",
"issued__lt",
"issued__gt",
"issued__ge",
"date__lt",
"date__gt",
"date__ge",
],
"FamilyMemberHistory": [
"_content",
"_id",
"_sort",
"_include",
"code",
"identifier",
"patient",
"_content",
],
"MedicationAdministration": [
"_content",
"_id",
"_sort",
"_include",
"code",
"identifier",
"subject",
],
"MedicationRequest": [
"_id",
"_content",
"_sort",
"_include",
"code",
"identifier",
"subject",
],
"Observation": [
"_content",
"_id",
"_sort",
"_include",
"code",
"_count",
"__count",
"count",
"identifier",
"patient",
],
"Patient": [
"_content",
"_id",
"_sort",
"_include",
"code",
"identifier",
"given",
"family",
"name",
"link",
"link:missing",
],
"ImagingStudy": [
"_content",
"_id",
"_sort",
"_include",
"code",
"identifier",
"subject",
"endpoint:missing",
"shipProcedureCode",
],
"Procedure": [
"_id",
"_content",
"_sort",
"_include",
"code",
"identifier",
"subject",
],
"List": ["_id", "_content", "_sort", "_include", "code", "identifier"],
}
[docs]class BaseExtractorMixin:
[docs] def getReferences(
self,
input: Union[
list[str],
list[SyncFHIRReference],
list[SyncFHIRResource],
] = None,
params: dict = None,
ignoreFrame: bool = False,
raw: bool = False,
):
params = {} if params is None else params
if not input and self.isFrame:
input = self.data
pass
elif input and not self.isFrame:
input = self.prepareOperationInput(input, SyncFHIRReference)
pass
elif input and self.isFrame:
# TODO raise error references and isFrame not allowed
# TODO raise in other similar methods
raise NotImplementedError
if not raw:
result = self.prepareOutput(input)
return result
[docs] def getResources(
self,
input: Union[
list[str],
list[SyncFHIRReference],
list[SyncFHIRResource],
] = None,
searchParams: dict = None,
params: dict = None,
resourceType: str = None,
ignoreFrame: bool = False,
raw: bool = False,
):
searchActive = False if searchParams is None else True
searchParams = {} if searchParams is None else searchParams
params = {} if params is None else params
input = [] if input is None else input
result = []
if len(input):
pass
elif self.isFrame and not ignoreFrame:
input = self.data.values
elif searchActive:
raise NotImplementedError
for element in tqdm(input, desc=f"GET[{resourceType}]> ", leave=False):
element = self.castOperand(element, SyncFHIRResource, resourceType)
result.extend(element)
if not raw:
result = self.prepareOutput(result)
return result
[docs] def searchResources(
self,
input: Union[
list[str],
list[SyncFHIRReference],
list[SyncFHIRResource],
] = None,
searchParams: dict = None,
params: dict = None,
resourceType: str = None,
ignoreFrame: bool = True,
raw: bool = False,
):
searchActive = False if searchParams is None else True
searchParams = {} if searchParams is None else searchParams
params = {} if params is None else params
input = [] if input is None else input
if searchParams:
invalidsearchParams = set(searchParams.keys()) - set(
SEARCH_PARAMS[resourceType]
)
if invalidsearchParams:
raise Exception(f"non allowed search parameters {invalidsearchParams}")
if len(input):
raise NotImplementedError
elif self.isFrame and not ignoreFrame:
raise NotImplementedError
elif searchActive:
pass
resourcePageSize = 100
search = (
self.client.resources(resourceType)
.search(**searchParams)
.limit(resourcePageSize)
)
result = []
resourceCount = 0
nonEmptyBundle = bool(len(search.limit(1).fetch()))
if nonEmptyBundle:
try:
resourceCount = search.limit(1).fetch_raw().get("total", None)
if not resourceCount:
resourceCount = search.count()
for element in tqdm(
search,
desc=f"SEARCH[{resourceType}]> ",
total=resourceCount,
leave=False,
):
result.append(element)
except:
# server doesn't support _total parameter nor returns total
# element in each request https://build.fhir.org/bundle.html#searchset
pass
if not raw:
result = self.prepareOutput(result, resourceType)
return result
[docs] def getAbsolutePaths(
self,
paths: list[str],
input: Union[
list[str],
list[SyncFHIRReference],
list[SyncFHIRResource],
] = None,
searchParams: dict = None,
params: dict = None,
):
searchActive = False if searchParams is None else True
searchParams = {} if searchParams is None else searchParams
params = {} if params is None else params
input = [] if input is None else input
# invalidsearchParams = None
# if searchParams:
# invalidsearchParams = set(searchParams.keys()) - set(
# base.SEARCH_PARAMS["MedicationAdministration"]
# )
# if invalidsearchParams:
# raise Exception(f"non allowed search parameters {invalidsearchParams}")
if not input and self.isFrame:
input = self.data
elif input and not self.isFrame:
raise NotImplementedError
elif input and self.isFrame:
raise NotImplementedError
if self.resourceTypeIs("patient"):
searchParams["subject"] = ",".join([e.id for e in self.data])
else:
raise NotImplementedError
finalResults = {}
# TODO move allowed absolute paths allowed for patients somewhere else
# these relative paths are allowed because they reference a subject or patient
relativePaths = {
"Appointment": [],
"CarePlan": [],
"ClinicalImpression": [],
"Condition": [],
"DiagnosticReport": [],
"Encounter": [],
"EpisodeOfCare": [],
"ImagingStudy": [],
"Immunization": [],
"List": [],
"MedicationRequest": [],
"MedicationStatement": [],
"Observation": [],
"Procedure": [],
"QuestionnaireResponse": [],
"ServiceRequest": [],
# 'BiologicallyDerivedProduct':[],
# 'DocumentReference':[],
# 'FamilyMemberHistory':[],
# 'Media':[],
# 'Medication':[],
# 'MedicationAdministration':[],
# 'Organization':[],
# 'Patient':[],
# 'Practitioner':[],
# 'Specimen':[],
# 'Substance':[]
}
paths = [e.split(".") for e in sorted(paths)]
for absp in paths:
relativePaths[absp[0]].append(absp[1:])
resourceType = self.resourceType
for resourceType, relpaths in relativePaths.items():
if not relpaths:
continue
result = fhirpack.PACK().searchResources(
resourceType=resourceType, searchParams=searchParams
)
n = len(result)
filteredResults = []
# TODO handle multiple filters
# filter = filter.popitem()
filteredResults = result.gatherSimplePaths([".".join(e) for e in relpaths])
filteredResults.columns = [
resourceType + "." + key for key in filteredResults.columns
]
# filteredRecord.update({f"{resourceType}.{filter[0]}": filter[1]})
# filteredResults.append(filteredRecord)
finalResults[resourceType] = filteredResults
return finalResults
[docs] def getURLBytes(
self,
input: list[str] = None,
operateOnCol: str = "data",
resultInCol: str = None,
params: dict = {},
):
params = {} if params is None else params
input = [] if input is None else input
if not input and self.isFrame:
if operateOnCol:
input = self[operateOnCol].values
elif self.resourceTypeIs("DiagnosticReport"):
input = self.gatherSimplePaths(["presentedForm.url"])
else:
raise NotImplementedError
elif input and not self.isFrame:
raise NotImplementedError
elif input and self.isFrame:
raise NotImplementedError
results = []
for i, url in zip(range(len(input)), input):
response = requests.get(
url,
headers=self.client._build_request_headers(),
stream=True,
)
data = bytearray()
if not response.ok:
# TODO log to execution.log
data = None
# raise Exception(f"{response}")
else:
for block in response.iter_content(1024):
data.extend(block)
if not block:
break
time.sleep(0.5)
results.append(data)
if resultInCol:
result = self.assign(**{resultInCol: results})
else:
result = self.prepareOutput(results, "Binary")
return result
[docs] def getFromFiles(self, input: list[str]):
"""Creates a Frame object from json files containing fhir resources"""
pathsData = []
for iPath in input:
with open(iPath, "r") as f:
rawJson = json.load(f)
fileData = []
if isinstance(rawJson, list):
fileData.extend(rawJson)
elif rawJson["resourceType"] == "Bundle":
for r in rawJson["entry"]:
fileData.append(r["resource"])
else:
fileData.append(rawJson)
pathsData.extend(fileData)
for element in pathsData:
if element["resourceType"] != pathsData[0]["resourceType"]:
raise TypeError("All resources have to be of the same type.")
result = [
SyncFHIRResource(self.client, e["resourceType"], **e) for e in pathsData
]
result = self.prepareOutput(result)
return result
[docs] def getDICOMInstances(
self,
input: list[str] = None,
operateOnCol: str = "data",
resultInCol: str = None,
params: dict = None,
inPlace: dict = False,
):
params = {} if params is None else params
input = [] if input is None else input
if not input and self.isFrame:
if self.resourceTypeIs("ImagingStudy"):
input = self
else:
raise NotImplementedError
elif input and not self.isFrame:
raise NotImplementedError
elif input and self.isFrame:
raise NotImplementedError
result = []
for i, series, study, endpoint in input[
["series", "study", "endpoint"]
].itertuples():
client = DICOMwebClient(
endpoint,
headers={
"Authorization": f"Bearer {CONFIG.get('EXTRACTION_BASE_TOKEN_DICOM')}"
},
)
instances = list(client.iter_series(study, series))
result.append(instances)
if inPlace:
self.data = result
result = self
else:
result = self.prepareOutput(result)
return result