#!/usr/bin/env python
# crate_anon/crateweb/research/views.py
"""
===============================================================================
Copyright (C) 2015-2018 Rudolf Cardinal (rudolf@pobox.com).
This file is part of CRATE.
CRATE is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
CRATE is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with CRATE. If not, see <http://www.gnu.org/licenses/>.
===============================================================================
"""
import datetime
# from functools import lru_cache
import json
import logging
# import pprint
from typing import Any, Dict, List, Type, Union
from cardinal_pythonlib.dbfunc import get_fieldnames_from_cursor
from cardinal_pythonlib.django.function_cache import django_cache_function
from cardinal_pythonlib.django.serve import file_response
from cardinal_pythonlib.exceptions import recover_info_from_exception
from cardinal_pythonlib.hash import hash64
from cardinal_pythonlib.logs import BraceStyleAdapter
from cardinal_pythonlib.sqlalchemy.dialect import SqlaDialectName
from django import forms
from django.conf import settings
from django.contrib.auth.decorators import user_passes_test
from django.core.exceptions import (
ObjectDoesNotExist,
ValidationError,
)
# from django.db import connection
from django.db import DatabaseError
from django.db.models import Q, QuerySet
from django.http.response import HttpResponse, HttpResponseRedirect
from django.http.request import HttpRequest
from django.shortcuts import get_object_or_404, redirect, render
from django.template.loader import render_to_string
from django.urls import reverse
from django.utils.html import escape
from pyparsing import ParseException
from crate_anon.common.contenttypes import ContentType
from crate_anon.common.sql import (
ColumnId,
escape_sql_string_literal,
escape_sql_string_or_int_literal,
SQL_OPS_MULTIPLE_VALUES,
SQL_OPS_VALUE_UNNECESSARY,
TableId,
toggle_distinct,
WhereCondition,
)
from crate_anon.crateweb.core.utils import is_clinician, is_superuser, paginate
from crate_anon.crateweb.research.forms import (
AddHighlightForm,
AddQueryForm,
ClinicianAllTextFromPidForm,
DatabasePickerForm,
DEFAULT_MIN_TEXT_FIELD_LENGTH,
FieldPickerInfo,
ManualPeQueryForm,
PidLookupForm,
QueryBuilderForm,
RidLookupForm,
SQLHelperTextAnywhereForm,
)
from crate_anon.crateweb.research.html_functions import (
highlight_text,
HtmlElementCounter,
make_result_element,
make_collapsible_sql_query,
N_CSS_HIGHLIGHT_CLASSES,
prettify_sql_css,
prettify_sql_html,
prettify_sql_and_args,
)
from crate_anon.crateweb.research.models import (
Highlight,
PidLookup,
PatientExplorer,
PatientMultiQuery,
Query,
)
from crate_anon.crateweb.research.research_db_info import (
research_database_info,
PatientFieldPythonTypes,
SingleResearchDatabase,
)
from crate_anon.crateweb.userprofile.models import get_patients_per_page
from crate_anon.crateweb.research.sql_writer import (
add_to_select,
SelectElement,
)
log = BraceStyleAdapter(logging.getLogger(__name__))
# =============================================================================
# Helper functions
# =============================================================================
def query_context(request: HttpRequest) -> Dict[str, Any]:
query_id = Query.get_active_query_id_or_none(request)
pe_id = PatientExplorer.get_active_pe_id_or_none(request)
return {
'query_selected': query_id is not None,
'current_query_id': query_id,
'pe_selected': pe_id is not None,
'current_pe_id': pe_id,
}
# Try to minimize SQL here, as these calls will be used for EVERY
# request.
# This problem can be circumvented with a per-request cache; see
# http://stackoverflow.com/questions/3151469/per-request-cache-in-django
def datetime_iso_for_filename() -> str:
dtnow = datetime.datetime.now()
return dtnow.strftime("%Y%m%d_%H%M%S")
# =============================================================================
# Errors
# =============================================================================
def generic_error(request: HttpRequest, error: str) -> HttpResponse:
context = {
'error': error,
}
return render(request, 'generic_error.html', context)
# =============================================================================
# Queries
# =============================================================================
@django_cache_function(timeout=None)
# @lru_cache(maxsize=None)
def get_db_structure_json() -> str:
colinfolist = research_database_info.get_colinfolist()
if not colinfolist:
log.warning("get_db_structure_json(): colinfolist is empty")
info = [] # type: List[Dict[str, Any]]
for dbinfo in research_database_info.dbinfolist:
log.info("get_db_structure_json: schema {}".format(
dbinfo.schema_identifier))
if not dbinfo.eligible_for_query_builder:
log.debug("Skipping schema={}: not eligible for query "
"builder".format(dbinfo.schema_identifier))
continue
schema_cil = [x for x in colinfolist
if x.table_catalog == dbinfo.database and
x.table_schema == dbinfo.schema_name]
table_info = [] # type: List[Dict[str, Any]]
for table in sorted(set(x.table_name for x in schema_cil)):
table_cil = [x for x in schema_cil if x.table_name == table]
if not any(x for x in table_cil
if x.column_name == dbinfo.trid_field):
# This table doesn't contain a TRID, so we will skip it.
log.debug("... skipping table {}: no TRID [{}]".format(
table, dbinfo.trid_field))
continue
if not any(x for x in table_cil
if x.column_name == dbinfo.rid_field):
# This table doesn't contain a RID, so we will skip it.
log.debug("... skipping table {}: no RID [{}]".format(
table, dbinfo.rid_field))
continue
column_info = [] # type: List[Dict[str, str]]
for ci in sorted(table_cil, key=lambda x: x.column_name):
column_info.append({
'colname': ci.column_name,
'coltype': ci.querybuilder_type,
'rawtype': ci.column_type,
'comment': ci.column_comment or '',
})
if column_info:
table_info.append({
'table': table,
'columns': column_info,
})
log.debug("... using table {}: {} columns".format(
table, len(column_info)))
if table_info:
info.append({
'database': dbinfo.database,
'schema': dbinfo.schema_name,
'tables': table_info,
})
return json.dumps(info)
[docs]def query_build(request: HttpRequest) -> HttpResponse:
"""
Assisted query builder, based on the data dictionary.
"""
# NOTES FOR FIRST METHOD, with lots (and lots) of forms.
# - In what follows, we want a normal template but we want to include a
# large chunk of raw HTML. I was doing this with
# {{ builder_html | safe }} within the template, but it was very slow
# (e.g. 500ms on my machine; 50s on the CPFT "sandpit" server,
# 2016-06-28). The delay was genuinely in the template rendering, it
# seems, based on profiling and manual log calls.
# - A simple string replacement, as below, was about 7% of the total time
# (e.g. 3300ms instead of 50s).
# - Other alternatives might include the Jinja2 template system, which is
# apparently faster than the Django default, but we may not need further
# optimization.
# - Another, potentially better, solution, is not to send dozens or
# hundreds of forms, but to write some Javascript to make this happen
# mostly on the client side. Might look better, too. (Yes, it does.)
# NB: first "submit" button takes the Enter key, so place WHERE
# before SELECT so users can hit enter in the WHERE value fields.
# - If you provide the "request=request" argument to
# render_to_string it gives you the CSRF token.
# - Another way is to ignore "request" and use render_to_string
# with a manually crafted context including 'csrf_token'.
# (This avoids the global context processors.)
# - Note that the CSRF token prevents simple caching of the forms.
# - But we can't cache anyway if we're going to have some forms
# (differentially) non-collapsed at the start, e.g. on form POST.
# - Also harder work to do this HTML manually (rather than with
# template rendering), because the csrf_token ends up like:
# <input type='hidden' name='csrfmiddlewaretoken' value='RGN5UZnTVkLFAVNtXRpJwn5CclBRAdLr' /> # noqa
profile = request.user.profile
parse_error = ''
default_database = research_database_info.get_default_database_name()
default_schema = research_database_info.get_default_schema_name()
with_database = research_database_info.uses_database_level()
form = None
if request.method == 'POST':
grammar = research_database_info.grammar
try:
if 'global_clear' in request.POST:
profile.sql_scratchpad = ''
profile.save()
elif 'global_toggle_distinct' in request.POST:
profile.sql_scratchpad = toggle_distinct(
profile.sql_scratchpad, grammar=grammar)
profile.save()
elif 'global_save' in request.POST:
return query_submit(request, profile.sql_scratchpad, run=False)
elif 'global_run' in request.POST:
return query_submit(request, profile.sql_scratchpad, run=True)
else:
form = QueryBuilderForm(request.POST, request.FILES)
if form.is_valid():
database = (form.cleaned_data['database'] if with_database
else '')
schema = form.cleaned_data['schema']
table = form.cleaned_data['table']
column = form.cleaned_data['column']
column_id = ColumnId(db=database, schema=schema,
table=table, column=column)
table_id = column_id.table_id
if 'submit_select' in request.POST:
profile.sql_scratchpad = add_to_select(
profile.sql_scratchpad,
select_elements=[
SelectElement(column_id=column_id)
],
magic_join=True,
grammar=grammar
)
elif 'submit_select_star' in request.POST:
select_elements = [
SelectElement(column_id=c.column_id) for c in
research_database_info.all_columns(table_id)]
profile.sql_scratchpad = add_to_select(
profile.sql_scratchpad,
select_elements=select_elements,
magic_join=True,
grammar=grammar,
)
elif 'submit_where' in request.POST:
datatype = form.cleaned_data['datatype']
op = form.cleaned_data['where_op']
# Value
if op in SQL_OPS_MULTIPLE_VALUES:
value = form.file_values_list
elif op in SQL_OPS_VALUE_UNNECESSARY:
value = None
else:
value = form.get_cleaned_where_value()
# WHERE fragment
wherecond = WhereCondition(column_id=column_id,
op=op,
datatype=datatype,
value_or_values=value)
profile.sql_scratchpad = add_to_select(
profile.sql_scratchpad,
where_type="AND",
where_conditions=[wherecond],
magic_join=True,
grammar=grammar
)
else:
raise ValueError("Bad form command!")
profile.save()
else:
pass
except ParseException as e:
parse_error = str(e)
if form is None:
form = QueryBuilderForm()
starting_values_dict = {
'database': form.data.get('database', '') if with_database else '',
'schema': form.data.get('schema', ''),
'table': form.data.get('table', ''),
'column': form.data.get('column', ''),
'op': form.data.get('where_op', ''),
'date_value': form.data.get('date_value', ''),
# Impossible to set file_value programmatically. (See querybuilder.js.)
'float_value': form.data.get('float_value', ''),
'int_value': form.data.get('int_value', ''),
'string_value': form.data.get('string_value', ''),
'offer_where': bool(profile.sql_scratchpad), # existing SELECT?
'form_errors': "<br>".join("{}: {}".format(k, v)
for k, v in form.errors.items()),
'default_database': default_database,
'default_schema': default_schema,
'with_database': with_database,
}
context = {
'nav_on_querybuilder': True,
'sql': prettify_sql_html(profile.sql_scratchpad),
'parse_error': parse_error,
'database_structure': get_db_structure_json(),
'starting_values': json.dumps(starting_values_dict),
'sql_dialect': settings.RESEARCH_DB_DIALECT,
'dialect_mysql': settings.RESEARCH_DB_DIALECT == SqlaDialectName.MYSQL,
'dialect_mssql': settings.RESEARCH_DB_DIALECT == SqlaDialectName.MSSQL,
'sql_highlight_css': prettify_sql_css(),
}
context.update(query_context(request))
return render(request, 'query_build.html', context)
def get_all_queries(request: HttpRequest) -> QuerySet:
return Query.objects.filter(user=request.user, deleted=False)\
.order_by('-active', '-created')
def get_identical_queries(request: HttpRequest, sql: str) -> List[Query]:
all_queries = get_all_queries(request)
# identical_queries = all_queries.filter(sql=sql)
#
# - 2017-02-03: we had a problem here, in which the parameter was sent to
# SQL Server as type NTEXT, but the field "sql" is NVARCHAR(MAX), leading
# to "The data types nvarchar(max) and ntext are incompatible in the
# equal to operator."
# - The Django field type TextField is converted to NVARCHAR(MAX) by
# django-pyodbc-azure, in sql_server/pyodbc/base.py, also at [1].
# - That seems fine; NVARCHAR(MAX) seems more capable than NTEXT.
# NTEXT is deprecated.
# - Error is reproducible with
# ... WHERE sql = CAST('hello' AS NTEXT) ...
# - The order of the types in the error message matches the order in the
# SQL statement.
# - A solution would be to cast the parameter as
# CAST(some_parameter AS NVARCHAR(MAX))
# - Fixed by upgrading pyodbc from 3.1.1 to 4.0.3
# - Added to FAQ
# - WARNING: the problem came back with pyodbc==4.0.6, but not fixed again
# by downgrading to 4.0.3
# - See also [2].
# - An alternative solution would not be to compare on the long text, but
# store and compare on a hash of it.
# - The problem is that either pyodbc or ODBC itself, somehow, is sending
# the string parameter as NTEXT.
# Similar Perl problem: [3].
#
# - In pyodbc, the key functions are:
# cursor.cpp: static PyObject* execute(...)
# -> params.cpp: bool PrepareAndBind(...)
# -> GetParameterInfo // THIS ONE
# Parameter will be of type str.
# This will fail for PyBytes_Check [4].
# This will match for PyUnicode_Check [5].
# Thus:
# -> GetUnicodeInfo
# ... and depending on the string length of the
# parameter, this returns either
# SQL_WVARCHAR -> NVARCHAR on SQL Server [6], for short strings # noqa
# SQL_WLONGVARCHAR -> NTEXT on SQL Server [6], for long strings # noqa
# ... and the length depends on
# -> connection.h: cur->cnxn->GetMaxLength(info.ValueType); # noqa
# -> BindParameter
# in cursor.cpp
#
# - Now we also have pyodbc docs: [7].
#
# - Anyway, the upshot is that there is some unpredictabilty in sending
# very long parameters... the intermittency would be explained by some
# dependency on string length.
# - Empirically, it fails somewhere around 1,900 characters.
#
# - Could switch away from pyodbc, e.g. to Django-mssql [8, 9].
# But, as per the CRATE manual, there were version incompatibilities
# here. Tried again with v1.8, but it gave configuration errors
# (ADODB.Connection; Provider cannot be found. It may not be properly
# installed.) Anyway, pyodbc is good enough for SQLAlchemy.
#
# [1] https://github.com/michiya/django-pyodbc-azure/blob/azure-1.10/sql_server/pyodbc/base.py # noqa
# [2] https://github.com/mkleehammer/pyodbc/blob/master/tests2/informixtests.py # noqa
# [3] http://stackoverflow.com/questions/13090907
# [4] https://docs.python.org/3/c-api/bytes.html
# [5] https://docs.python.org/3/c-api/unicode.html
# [6] https://documentation.progress.com/output/DataDirect/DataDirectCloud/index.html#page/queries/microsoft-sql-server-data-types.html # noqa
# [7] https://github.com/mkleehammer/pyodbc/wiki/Data-Types
# [8] https://docs.djangoproject.com/en/1.10/ref/databases/#using-a-3rd-party-database-backend # noqa
# [9] https://django-mssql.readthedocs.io/en/latest/
# Screw it, let's use a hash. We can use our hash64() function and
# a Django BigIntegerField.
identical_queries = all_queries.filter(sql_hash=hash64(sql))
# Now eliminate any chance of errors via hash collisions by double-checking
# the Python objects:
return [q for q in identical_queries if q.sql == sql]
[docs]def query_submit(request: HttpRequest,
sql: str,
run: bool = False) -> HttpResponse:
"""
Ancillary function to add a query, and redirect to the editing or
run page.
"""
identical_queries = get_identical_queries(request, sql)
if identical_queries:
identical_queries[0].activate()
query_id = identical_queries[0].id
else:
query = Query(sql=sql, raw=True, user=request.user,
active=True)
query.save()
query_id = query.id
# redirect to a new URL:
if run:
return redirect('results', query_id)
else:
return redirect('query')
[docs]def query_edit_select(request: HttpRequest) -> HttpResponse:
"""
Edit or select SQL for current query.
"""
# log.debug("query")
# if this is a POST request we need to process the form data
if request.method == 'POST':
# create a form instance and populate it with data from the request:
form = AddQueryForm(request.POST)
# check whether it's valid:
if form.is_valid():
cmd_run = 'submit_run' in request.POST
cmd_add = 'submit_add' in request.POST
cmd_builder = 'submit_builder' in request.POST
# process the data in form.cleaned_data as required
sql = form.cleaned_data['sql']
if cmd_add or cmd_run:
run = 'submit_run' in request.POST
return query_submit(request, sql, run)
elif cmd_builder:
profile = request.user.profile
profile.sql_scratchpad = sql
profile.save()
return redirect('build_query')
else:
raise ValueError("Bad command!")
# if a GET (or any other method) we'll create a blank form
values = {}
all_queries = get_all_queries(request)
active_queries = all_queries.filter(active=True)
if active_queries:
values['sql'] = active_queries[0].get_original_sql()
form = AddQueryForm(values)
queries = paginate(request, all_queries)
profile = request.user.profile
element_counter = HtmlElementCounter()
for q in queries:
q.formatted_query_safe = make_collapsible_sql_query(
q.get_original_sql(),
element_counter=element_counter,
collapse_at_n_lines=profile.collapse_at_n_lines,
)
context = {
'form': form,
'queries': queries,
'nav_on_query': True,
'dialect_mysql': settings.RESEARCH_DB_DIALECT == SqlaDialectName.MYSQL,
'dialect_mssql': settings.RESEARCH_DB_DIALECT == SqlaDialectName.MSSQL,
'sql_highlight_css': prettify_sql_css(),
}
context.update(query_context(request))
return render(request, 'query_edit_select.html', context)
def query_activate(request: HttpRequest, query_id: str) -> HttpResponse:
validate_blank_form(request)
query = get_object_or_404(Query, id=query_id) # type: Query
query.activate()
return redirect('query')
def query_delete(request: HttpRequest, query_id: str) -> HttpResponse:
validate_blank_form(request)
query = get_object_or_404(Query, id=query_id) # type: Query
query.delete_if_permitted()
return redirect('query')
def no_query_selected(request: HttpRequest) -> HttpResponse:
return render(request, 'query_none_selected.html', query_context(request))
[docs]def query_count(request: HttpRequest, query_id: str) -> HttpResponse:
"""
View COUNT(*) from specific query.
"""
if query_id is None:
return no_query_selected(request)
try:
query_id = int(query_id)
# ... conceivably might raise TypeError (from e.g. None), ValueError
# (from e.g. "xyz"), but both should be filtered out by the URL parser
query = Query.objects.get(id=query_id, user=request.user)
# ... will return None if not found, but may raise something derived
# from ObjectDoesNotExist or (in principle, if this weren't a PK)
# MultipleObjectsReturned;
# https://docs.djangoproject.com/en/1.9/ref/models/querysets/#django.db.models.query.QuerySet.get # noqa
except ObjectDoesNotExist:
return render_bad_query_id(request, query_id)
return render_resultcount(request, query)
[docs]def query_count_current(request: HttpRequest) -> HttpResponse:
"""
View COUNT(*) from current query.
"""
query = Query.get_active_query_or_none(request)
if query is None:
return no_query_selected(request)
return render_resultcount(request, query)
[docs]def query_results(request: HttpRequest, query_id: str) -> HttpResponse:
"""
View results of chosen query, in tabular format
"""
if query_id is None:
return no_query_selected(request)
try:
query_id = int(query_id)
query = Query.objects.get(id=query_id, user=request.user)
except ObjectDoesNotExist:
return render_bad_query_id(request, query_id)
profile = request.user.profile
highlights = Highlight.get_active_highlights(request)
return render_resultset(request, query, highlights,
collapse_at_len=profile.collapse_at_len,
collapse_at_n_lines=profile.collapse_at_n_lines,
line_length=profile.line_length)
[docs]def query_results_recordwise(request: HttpRequest,
query_id: str) -> HttpResponse:
"""
View results of chosen query, in tabular format
"""
if query_id is None:
return no_query_selected(request)
try:
query_id = int(query_id)
query = Query.objects.get(id=query_id, user=request.user)
except ObjectDoesNotExist:
return render_bad_query_id(request, query_id)
profile = request.user.profile
highlights = Highlight.get_active_highlights(request)
return render_resultset_recordwise(
request, query, highlights,
collapse_at_len=profile.collapse_at_len,
collapse_at_n_lines=profile.collapse_at_n_lines,
line_length=profile.line_length)
[docs]def query_tsv(request: HttpRequest, query_id: str) -> HttpResponse:
"""
Download TSV of current query.
"""
query = get_object_or_404(Query, id=query_id) # type: Query
try:
return file_response(
query.make_tsv(),
content_type=ContentType.TSV,
filename="crate_results_{num}_{datetime}.tsv".format(
num=query.id,
datetime=datetime_iso_for_filename(),
)
)
except DatabaseError as exception:
return render_bad_query(request, query, exception)
def query_excel(request: HttpRequest, query_id: str) -> HttpResponse:
query = get_object_or_404(Query, id=query_id) # type: Query
try:
return file_response(
query.make_excel(),
content_type=ContentType.XLSX,
filename="crate_query_{}_{}.xlsx".format(
query_id, datetime_iso_for_filename())
)
except DatabaseError as exception:
return render_bad_query(request, query, exception)
# @user_passes_test(is_superuser)
# def audit(request):
# """
# View audit log
# """
# all_audits = QueryAudit.objects.all()\
# .select_related('query', 'query__user')\
# .order_by('-id')
# audits = paginate(request, all_audits)
# context = {'audits': audits}
# return render(request, 'audit.html', context)
# =============================================================================
# Internal functions for views on queries
# =============================================================================
# def make_demo_query_unless_exists(request):
# DEMOQUERY = Query(
# pk=1,
# sql="SELECT * FROM notes\nWHERE note LIKE '%Adam%'\nLIMIT 20",
# raw=True,
# user=request.user,
# )
# DEMOQUERY.save()
# H1 = Highlight(pk=1, text="Aaron", colour=0, user=request.user)
# H1.save()
# H2 = Highlight(pk=2, text="Adam", colour=0, user=request.user)
# H2.save()
# H3 = Highlight(pk=3, text="October", colour=1, user=request.user)
# H3.save()
# EXCEPTIONS FOR HOMEBREW SQL.
# You can see:
# - django.db.ProgrammingError
# - django.db.OperationalError
# - InternalError (?django.db.utils.InternalError)
# ... but I think all are subclasses of django.db.utils.DatabaseError
[docs]def render_resultcount(request: HttpRequest, query: Query) -> HttpResponse:
"""
Displays the number of rows that a given query will fetch.
"""
if query is None:
return render_missing_query(request)
try:
with query.get_executed_cursor() as cursor:
rowcount = cursor.rowcount
query.audit(count_only=True, n_records=rowcount)
context = {
'rowcount': rowcount,
'sql': query.get_original_sql(),
'nav_on_count': True,
}
context.update(query_context(request))
return render(request, 'query_count.html', context)
# See above re exception classes
except DatabaseError as exception:
query.audit(count_only=True, failed=True,
fail_msg=str(exception))
return render_bad_query(request, query, exception)
def resultset_html_table(fieldnames: List[str],
rows: List[List[Any]],
element_counter: HtmlElementCounter,
start_index: int = 0,
highlight_dict: Dict[int, List[Highlight]] = None,
collapse_at_len: int = None,
collapse_at_n_lines: int = None,
line_length: int = None,
ditto: bool = True,
ditto_html: str = '″',
no_ditto_cols: List[int] = None,
null: str = '<i>NULL</i>') -> str:
# Considered but not implemented: hiding table columns
# ... see esp "tr > *:nth-child(n)" at
# http://stackoverflow.com/questions/5440657/how-to-hide-columns-in-html-table # noqa
no_ditto_cols = no_ditto_cols or []
ditto_cell = ' <td class="queryresult ditto">{}</td>\n'.format(
ditto_html)
html = '<table>\n'
html += ' <tr>\n'
html += ' <th><i>#</i></th>\n'
for field in fieldnames:
html += ' <th>{}</th>\n'.format(escape(field))
html += ' </tr>\n'
for row_index, row in enumerate(rows):
# row_index is zero-based within this table
html += ' <tr class="{}">\n'.format(
"stripy_even" if row_index % 2 == 0 else "stripy_odd"
)
# Row number
html += ' <td><b><i>{}</i></b></td>\n'.format(
row_index + start_index + 1)
# Values
for col_index, value in enumerate(row):
if (row_index > 0 and ditto and col_index not in no_ditto_cols and
value == rows[row_index - 1][col_index]):
html += ditto_cell
else:
html += ' <td class="queryresult">{}</td>\n'.format(
make_result_element(
value,
element_counter=element_counter,
highlight_dict=highlight_dict,
collapse_at_len=collapse_at_len,
collapse_at_n_lines=collapse_at_n_lines,
line_length=line_length,
null=null
)
)
html += ' </tr>\n'
html += '</table>\n'
return html
def single_record_html_table(fieldnames: List[str],
record: List[Any],
element_counter: HtmlElementCounter,
highlight_dict: Dict[int, List[Highlight]] = None,
collapse_at_len: int = None,
collapse_at_n_lines: int = None,
line_length: int = None) -> str:
table_html = '<table>\n'
for col_index, value in enumerate(record):
fieldname = fieldnames[col_index]
table_html += ' <tr class="{}">\n'.format(
"stripy_even" if col_index % 2 == 0 else "stripy_odd"
)
table_html += ' <th>{}</th>'.format(escape(fieldname))
table_html += (
' <td class="queryresult">{}</td>\n'.format(
make_result_element(
value,
element_counter=element_counter,
highlight_dict=highlight_dict,
collapse_at_len=collapse_at_len,
collapse_at_n_lines=collapse_at_n_lines,
line_length=line_length,
collapsed=False,
)
)
)
table_html += ' </tr>\n'
table_html += '</table>\n'
return table_html
def render_resultset(request: HttpRequest,
query: Query,
highlights: Union[QuerySet, List[Highlight]],
collapse_at_len: int = None,
collapse_at_n_lines: int = None,
line_length: int = None,
ditto: bool = True,
ditto_html: str = '″') -> HttpResponse:
# Query
if query is None:
return render_missing_query(request)
try:
with query.get_executed_cursor() as cursor:
fieldnames = get_fieldnames_from_cursor(cursor)
rows = cursor.fetchall()
rowcount = cursor.rowcount
query.audit(n_records=rowcount)
except DatabaseError as exception:
query.audit(failed=True, fail_msg=str(exception))
return render_bad_query(request, query, exception)
row_indexes = list(range(len(rows)))
# We don't need to process all rows before we paginate.
page = paginate(request, row_indexes)
start_index = page.start_index() - 1
end_index = page.end_index() - 1
display_rows = rows[start_index:end_index + 1]
# Highlights
highlight_dict = Highlight.as_ordered_dict(highlights)
# Table
element_counter = HtmlElementCounter()
table_html = resultset_html_table(
fieldnames=fieldnames,
rows=display_rows,
element_counter=element_counter,
start_index=start_index,
highlight_dict=highlight_dict,
collapse_at_len=collapse_at_len,
collapse_at_n_lines=collapse_at_n_lines,
line_length=line_length,
ditto=ditto,
ditto_html=ditto_html,
)
# Render
context = {
'table_html': table_html,
'page': page,
'rowcount': rowcount,
'sql': prettify_sql_html(query.get_original_sql()),
'nav_on_results': True,
'sql_highlight_css': prettify_sql_css(),
}
context.update(query_context(request))
return render(request, 'query_result.html', context)
def render_resultset_recordwise(request: HttpRequest,
query: Query,
highlights: Union[QuerySet, List[Highlight]],
collapse_at_len: int = None,
collapse_at_n_lines: int = None,
line_length: int = None) -> HttpResponse:
# Query
if query is None:
return render_missing_query(request)
try:
with query.get_executed_cursor() as cursor:
fieldnames = get_fieldnames_from_cursor(cursor)
rows = cursor.fetchall()
rowcount = cursor.rowcount
query.audit(n_records=rowcount)
except DatabaseError as exception:
query.audit(failed=True, fail_msg=str(exception))
return render_bad_query(request, query, exception)
row_indexes = list(range(len(rows)))
# We don't need to process all rows before we paginate.
page = paginate(request, row_indexes, per_page=1)
# Highlights
highlight_dict = Highlight.as_ordered_dict(highlights)
if rows:
record_index = page.start_index() - 1
record = rows[record_index]
# Table
element_counter = HtmlElementCounter()
table_html = '<p><i>Record {}</i></p>\n'.format(page.start_index())
table_html += single_record_html_table(
fieldnames=fieldnames,
record=record,
element_counter=element_counter,
highlight_dict=highlight_dict,
collapse_at_len=collapse_at_len,
collapse_at_n_lines=collapse_at_n_lines,
line_length=line_length,
)
else:
table_html = "<b>No rows returned.</b>"
# Render
context = {
'table_html': table_html,
'page': page,
'rowcount': rowcount,
'sql': prettify_sql_html(query.get_original_sql()),
'nav_on_results_recordwise': True,
'sql_highlight_css': prettify_sql_css(),
}
context.update(query_context(request))
return render(request, 'query_result.html', context)
def render_missing_query(request: HttpRequest) -> HttpResponse:
return render(request, 'query_missing.html', query_context(request))
def render_bad_query(request: HttpRequest,
query: Query,
exception: Exception) -> HttpResponse:
info = recover_info_from_exception(exception)
final_sql = info.get('sql', '')
args = info.get('args', [])
context = {
'original_sql': prettify_sql_html(query.get_original_sql()),
'final_sql': prettify_sql_and_args(final_sql, args),
'exception': repr(exception),
'sql_highlight_css': prettify_sql_css(),
}
context.update(query_context(request))
return render(request, 'query_bad.html', context)
def render_bad_query_id(request: HttpRequest, query_id: str) -> HttpResponse:
context = {'query_id': query_id}
context.update(query_context(request))
return render(request, 'query_bad_id.html', context)
# =============================================================================
# Highlights
# =============================================================================
[docs]def highlight_edit_select(request: HttpRequest) -> HttpResponse:
"""
Edit or select highlighting for current query.
"""
all_highlights = Highlight.objects.filter(user=request.user)\
.order_by('text', 'colour')
if request.method == 'POST':
form = AddHighlightForm(request.POST)
if form.is_valid():
colour = form.cleaned_data['colour']
text = form.cleaned_data['text']
identicals = all_highlights.filter(colour=colour, text=text)
if identicals:
identicals[0].activate()
else:
highlight = Highlight(colour=colour, text=text,
user=request.user, active=True)
highlight.save()
return redirect('highlight')
values = {'colour': 0}
form = AddHighlightForm(values)
active_highlights = all_highlights.filter(active=True)
highlight_dict = Highlight.as_ordered_dict(active_highlights)
highlight_descriptions = get_highlight_descriptions(highlight_dict)
highlights = paginate(request, all_highlights)
context = {
'form': form,
'highlights': highlights,
'nav_on_highlight': True,
'N_CSS_HIGHLIGHT_CLASSES': N_CSS_HIGHLIGHT_CLASSES,
'highlight_descriptions': highlight_descriptions,
'colourlist': list(range(N_CSS_HIGHLIGHT_CLASSES)),
}
context.update(query_context(request))
return render(request, 'highlight_edit_select.html', context)
def highlight_activate(request: HttpRequest,
highlight_id: str) -> HttpResponse:
validate_blank_form(request)
highlight = get_object_or_404(Highlight, id=highlight_id) # type: Highlight
highlight.activate()
return redirect('highlight')
def highlight_deactivate(request: HttpRequest,
highlight_id: str) -> HttpResponse:
validate_blank_form(request)
highlight = get_object_or_404(Highlight, id=highlight_id) # type: Highlight
highlight.deactivate()
return redirect('highlight')
def highlight_delete(request: HttpRequest,
highlight_id: str) -> HttpResponse:
validate_blank_form(request)
highlight = get_object_or_404(Highlight, id=highlight_id) # type: Highlight
highlight.delete()
return redirect('highlight')
# def render_bad_highlight_id(request, highlight_id):
# context = {'highlight_id': highlight_id}
# context.update(query_context(request))
# return render(request, 'highlight_bad_id.html', context)
[docs]def get_highlight_descriptions(
highlight_dict: Dict[int, List[Highlight]]) -> List[str]:
"""
Returns a list of length up to N_CSS_HIGHLIGHT_CLASSES of HTML
elements illustrating the highlights.
"""
desc = []
for n in range(N_CSS_HIGHLIGHT_CLASSES):
if n not in highlight_dict:
continue
desc.append(", ".join([highlight_text(h.text, n)
for h in highlight_dict[n]]))
return desc
# =============================================================================
# PID lookup
# =============================================================================
# In general with these database-choosing functions, don't redirect between
# the "generic" and "database-specific" views using POST, because we can't then
# add default values to a new form (since the request.POST object is
# populated and immutable). Use a dbname query parameter as well.
# (That doesn't make it HTTP GET; it makes it HTTP POST with query parameters.)
[docs]def pid_rid_lookup(request: HttpRequest,
with_db_url_name: str,
html_filename: str) -> HttpResponse:
"""
Common functionality for pidlookup, ridlookup.
"""
dbinfolist = research_database_info.dbs_with_secret_map
n = len(dbinfolist)
if n == 0:
return generic_error(request, "No databases with lookup map!")
elif n == 1:
dbname = dbinfolist[0].name
return HttpResponseRedirect(
reverse(with_db_url_name, args=[dbname])
)
else:
form = DatabasePickerForm(request.POST or None, dbinfolist=dbinfolist)
if form.is_valid():
dbname = form.cleaned_data['database']
return HttpResponseRedirect(
reverse(with_db_url_name, args=[dbname])
)
return render(request, html_filename, {'form': form})
[docs]def pid_rid_lookup_with_db(
request: HttpRequest,
dbname: str,
form_html_filename: str,
formclass: Any,
result_html_filename: str) -> HttpResponse:
"""
Common functionality for pidlookup_with_db, ridlookup_with_db.
"""
# There's a bug in the Python 3.5 typing module; we can't use
# Union[Type[PidLookupForm], Type[RidLookupForm]] yet; we get
# TypeError: descriptor '__subclasses__' of 'type' object needs an argument
# ... see https://github.com/python/typing/issues/266
try:
dbinfo = research_database_info.get_dbinfo_by_name(dbname)
except ValueError:
return generic_error(request,
"No research database named {!r}".format(dbname))
form = formclass(request.POST or None, dbinfo=dbinfo) # type: Union[PidLookupForm, RidLookupForm] # noqa
if form.is_valid():
pids = form.cleaned_data.get('pids') or [] # type: List[int]
mpids = form.cleaned_data.get('mpids') or [] # type: List[int]
trids = form.cleaned_data.get('trids') or [] # type: List[int]
rids = form.cleaned_data.get('rids') or [] # type: List[str]
mrids = form.cleaned_data.get('mrids') or [] # type: List[str]
return render_lookup(request=request, dbinfo=dbinfo,
result_html_filename=result_html_filename,
pids=pids, mpids=mpids,
trids=trids, rids=rids, mrids=mrids)
context = {
'db_name': dbinfo.name,
'db_description': dbinfo.description,
'form': form,
}
return render(request, form_html_filename, context)
[docs]@user_passes_test(is_superuser)
def pidlookup(request: HttpRequest) -> HttpResponse:
"""
Look up PID information from RID information.
"""
return pid_rid_lookup(request=request,
with_db_url_name="pidlookup_with_db",
html_filename="pid_lookup_choose_db.html")
[docs]@user_passes_test(is_superuser)
def pidlookup_with_db(request: HttpRequest,
dbname: str) -> HttpResponse:
"""
Look up PID information from RID information, for a specific database.
"""
return pid_rid_lookup_with_db(
request=request,
dbname=dbname,
form_html_filename='pid_lookup_form.html',
formclass=PidLookupForm,
result_html_filename='pid_lookup_result.html')
[docs]@user_passes_test(is_clinician)
def ridlookup(request: HttpRequest) -> HttpResponse:
"""
Look up RID information from PID information.
"""
return pid_rid_lookup(request=request,
with_db_url_name="ridlookup_with_db",
html_filename="rid_lookup_choose_db.html")
[docs]@user_passes_test(is_clinician)
def ridlookup_with_db(request: HttpRequest,
dbname: str) -> HttpResponse:
"""
Look up RID information from PID information, for a specific database.
"""
return pid_rid_lookup_with_db(
request=request,
dbname=dbname,
form_html_filename='rid_lookup_form.html',
formclass=RidLookupForm,
result_html_filename='rid_lookup_result.html')
[docs]def render_lookup(request: HttpRequest,
dbinfo: SingleResearchDatabase,
result_html_filename: str,
trids: List[int] = None,
rids: List[str] = None,
mrids: List[str] = None,
pids: List[int] = None,
mpids: List[int] = None) -> HttpResponse:
"""
Shows the output of a PID/RID lookup.
"""
# if not request.user.superuser:
# return HttpResponse('Forbidden', status=403)
# # http://stackoverflow.com/questions/3297048/403-forbidden-vs-401-unauthorized-http-responses # noqa
trids = [] if trids is None else trids
rids = [] if rids is None else rids
mrids = [] if mrids is None else mrids
pids = [] if pids is None else pids
mpids = [] if mpids is None else mpids
assert dbinfo.secret_lookup_db
lookups = PidLookup.objects.using(dbinfo.secret_lookup_db).filter(
Q(trid__in=trids) |
Q(rid__in=rids) |
Q(mrid__in=mrids) |
Q(pid__in=pids) |
Q(mpid__in=mpids)
).order_by('pid')
context = {
'lookups': lookups,
'trid_field': dbinfo.trid_field,
'trid_description': dbinfo.trid_description,
'rid_field': dbinfo.rid_field,
'rid_description': dbinfo.rid_description,
'mrid_field': dbinfo.mrid_field,
'mrid_description': dbinfo.mrid_description,
'pid_description': dbinfo.pid_description,
'mpid_description': dbinfo.mpid_description,
}
return render(request, result_html_filename, context)
# =============================================================================
# Research database structure
# =============================================================================
def structure_table_long(request: HttpRequest) -> HttpResponse:
colinfolist = research_database_info.get_colinfolist()
rowcount = len(colinfolist)
context = {
'paginated': False,
'colinfolist': colinfolist,
'rowcount': rowcount,
'default_database': research_database_info.get_default_database_name(),
'default_schema': research_database_info.get_default_schema_name(),
'with_database': research_database_info.uses_database_level(),
}
return render(request, 'database_structure.html', context)
def structure_table_paginated(request: HttpRequest) -> HttpResponse:
colinfolist = research_database_info.get_colinfolist()
rowcount = len(colinfolist)
colinfolist = paginate(request, colinfolist)
context = {
'paginated': True,
'colinfolist': colinfolist,
'rowcount': rowcount,
'default_database': research_database_info.get_default_database_name(),
'default_schema': research_database_info.get_default_schema_name(),
'with_database': research_database_info.uses_database_level(),
}
return render(request, 'database_structure.html', context)
@django_cache_function(timeout=None)
# @lru_cache(maxsize=None)
def get_structure_tree_html() -> str:
table_to_colinfolist = research_database_info.get_colinfolist_by_tables()
content = ""
element_counter = HtmlElementCounter()
grammar = research_database_info.grammar
for table_id, colinfolist in table_to_colinfolist.items():
html_table = render_to_string(
'database_structure_table.html', {
'colinfolist': colinfolist,
'default_database': research_database_info.get_default_database_name(), # noqa
'default_schema': research_database_info.get_default_schema_name(), # noqa
'with_database': research_database_info.uses_database_level()
})
cd_button = element_counter.visibility_div_spanbutton()
cd_content = element_counter.visibility_div_contentdiv(
contents=html_table)
content += (
'<div class="titlecolour">{db_schema}.<b>{table}</b>{button}</div>'
'{cd}'.format(
db_schema=table_id.database_schema_part(grammar),
table=table_id.table_part(grammar),
button=cd_button,
cd=cd_content,
)
)
return content
def structure_tree(request: HttpRequest) -> HttpResponse:
context = {
'content': get_structure_tree_html(),
'default_database': research_database_info.get_default_database_name(),
'default_schema': research_database_info.get_default_schema_name(),
}
return render(request, 'database_structure_tree.html', context)
# noinspection PyUnusedLocal
def structure_tsv(request: HttpRequest) -> HttpResponse:
return file_response(
research_database_info.get_tsv(),
content_type=ContentType.TSV,
filename="structure.tsv"
)
# noinspection PyUnusedLocal
def structure_excel(request: HttpRequest) -> HttpResponse:
return file_response(
research_database_info.get_excel(),
content_type=ContentType.TSV,
filename="structure.xlsx"
)
# =============================================================================
# Local help on structure
# =============================================================================
def local_structure_help(request: HttpRequest) -> HttpResponse:
if settings.DATABASE_HELP_HTML_FILENAME:
with open(settings.DATABASE_HELP_HTML_FILENAME, 'r') as infile:
content = infile.read()
return HttpResponse(content.encode('utf8'))
else:
content = "<p>No local help available.</p>"
context = {'content': content}
return render(request, 'local_structure_help.html', context)
# =============================================================================
# SQL helpers
# =============================================================================
def textmatch(column_name: str,
fragment: str,
as_fulltext: bool,
dialect: str = 'mysql') -> str:
if as_fulltext and dialect == 'mysql':
return "MATCH({column}) AGAINST ('{fragment}')".format(
column=column_name, fragment=fragment)
elif as_fulltext and dialect == 'mssql':
return "CONTAINS({column}, '{fragment}')".format(
column=column_name, fragment=fragment)
else:
return "{column} LIKE '%{fragment}%'".format(
column=column_name, fragment=fragment)
[docs]def textfinder_sql(patient_id_fieldname: str,
fragment: str,
min_length: int,
use_fulltext_index: bool,
include_content: bool,
include_datetime: bool,
patient_id_value: Union[int, str] = None,
extra_fieldname: str = None,
extra_value: Union[int, str] = None) -> str:
"""
Returns SQL to find the text in fragment across all tables that contain the
field indicated by patient_id_fieldname, where the length of the text field
is at least min_length.
use_fulltext_index: use database full-text indexing
include_content: include the text fields in the output
patient_id_value: restrict to a single patient
Will raise ValueError if no tables match the request.
"""
grammar = research_database_info.grammar
tables = research_database_info.tables_containing_field(
patient_id_fieldname)
if not tables:
raise ValueError(
"No tables containing fieldname: {}".format(patient_id_fieldname))
have_pid_value = patient_id_value is not None and patient_id_value != ''
if have_pid_value:
pidclause = "{patient_id_fieldname} = {value}".format(
patient_id_fieldname=patient_id_fieldname,
value=escape_sql_string_or_int_literal(patient_id_value)
)
else:
pidclause = ""
using_extra = extra_fieldname and extra_value is not None
table_heading = "_table_name"
contents_colname_heading = "_column_name"
datetime_heading = "_datetime"
queries = [] # type: List[str]
def add_query(table_ident: str,
extra_cols: List[str],
date_value_select: str,
extra_conditions: List[str]) -> None:
selectcols = [] # type: List[str]
# Patient ID(s); date
if using_extra:
selectcols.append('{lit} AS {ef}'.format(
lit=escape_sql_string_or_int_literal(extra_value),
ef=extra_fieldname
))
selectcols.append(patient_id_fieldname)
if include_datetime:
selectcols.append("{} AS {}".format(date_value_select,
datetime_heading))
# +/- table/column/content
selectcols += extra_cols
# Build query
query = (
"SELECT {cols}"
"\nFROM {table}".format(cols=", ".join(selectcols),
table=table_ident)
)
conditions = [] # type: List[str]
if have_pid_value:
conditions.append(pidclause)
conditions.extend(extra_conditions)
query += "\nWHERE " + " AND ".join(conditions)
queries.append(query)
for table_id in tables:
columns = research_database_info.text_columns(
table_id=table_id, min_length=min_length)
if not columns:
continue
table_identifier = table_id.identifier(grammar)
date_col = research_database_info.get_default_date_column(
table=table_id)
if date_col:
date_identifier = date_col.identifier(grammar)
else:
date_identifier = "NULL"
if include_content:
# Content required; therefore, one query per text column.
table_select = "'{}' AS {}".format(
escape_sql_string_literal(table_identifier),
table_heading
)
for columninfo in columns:
column_identifier = columninfo.column_id.identifier(grammar)
contentcol_name_select = "'{}' AS {}".format(
column_identifier, contents_colname_heading)
content_select = "{} AS _content".format(column_identifier)
add_query(table_ident=table_identifier,
extra_cols=[table_select,
contentcol_name_select,
content_select],
date_value_select=date_identifier,
extra_conditions=[
textmatch(
column_name=column_identifier,
fragment=fragment,
as_fulltext=(columninfo.indexed_fulltext and
use_fulltext_index)
)
])
else:
# Content not required; therefore, one query per table.
elements = [] # type: List[str]
for columninfo in columns:
elements.append(textmatch(
column_name=columninfo.column_id.identifier(grammar),
fragment=fragment,
as_fulltext=(columninfo.indexed_fulltext and
use_fulltext_index)
))
add_query(table_ident=table_identifier,
extra_cols=[],
date_value_select=date_identifier,
extra_conditions=[
"(\n {}\n)".format("\n OR ".join(elements))
])
sql = "\nUNION\n".join(queries)
if sql:
order_by_cols = []
if using_extra:
order_by_cols.append(extra_fieldname)
order_by_cols.append(patient_id_fieldname)
if include_datetime:
order_by_cols.append(datetime_heading + " DESC")
if include_content:
order_by_cols.extend([table_heading, contents_colname_heading])
sql += "\nORDER BY " + ", ".join(order_by_cols)
return sql
[docs]def common_find_text(request: HttpRequest,
dbinfo: SingleResearchDatabase,
form_class: Type[SQLHelperTextAnywhereForm],
default_values: Dict[str, Any],
permit_pid_search: bool,
html_filename: str) -> HttpResponse:
"""
Creates SQL to find text anywhere in the database(s) via a UNION query.
"""
# When you forget about Django forms, go back to:
# http://www.slideshare.net/pydanny/advanced-django-forms-usage
# -------------------------------------------------------------------------
# What may the user use to look up patients?
# -------------------------------------------------------------------------
fk_options = [] # type: List[FieldPickerInfo]
if permit_pid_search:
fk_options.append(FieldPickerInfo(
value=dbinfo.pid_pseudo_field,
description="{}: {}".format(dbinfo.pid_pseudo_field,
dbinfo.pid_description),
type_=PatientFieldPythonTypes.PID,
permits_empty_id=False
))
fk_options.append(FieldPickerInfo(
value=dbinfo.mpid_pseudo_field,
description="{}: {}".format(
dbinfo.mpid_pseudo_field, dbinfo.mpid_description),
type_=PatientFieldPythonTypes.MPID,
permits_empty_id=False
))
assert dbinfo.secret_lookup_db
default_values['fkname'] = dbinfo.pid_pseudo_field
fk_options.append(
FieldPickerInfo(value=dbinfo.rid_field,
description="{}: {}".format(dbinfo.rid_field,
dbinfo.rid_description),
type_=PatientFieldPythonTypes.RID,
permits_empty_id=True),
)
if dbinfo.secret_lookup_db:
fk_options.append(
FieldPickerInfo(value=dbinfo.mrid_field,
description="{}: {}".format(
dbinfo.mrid_field, dbinfo.mrid_description),
type_=PatientFieldPythonTypes.MRID,
permits_empty_id=False)
)
# We don't want to make too much of the TRID. Let's not offer it as
# a lookup option. If performance becomes a major problem with these
# queries, we could always say "if dbinfo.secret_lookup_db, then
# look up the TRID from the RID (or whatever we're using)".
#
# FieldPickerInfo(value=dbinfo.trid_field,
# description="{}: {}".format(dbinfo.trid_field,
# dbinfo.trid_description),
# type_=PatientFieldPythonTypes.TRID),
form = form_class(request.POST or default_values, fk_options=fk_options)
if form.is_valid():
patient_id_fieldname = form.cleaned_data['fkname']
pidvalue = form.cleaned_data['patient_id']
min_length = form.cleaned_data['min_length']
# ---------------------------------------------------------------------
# Whare are we going to use internally for the lookup?
# ---------------------------------------------------------------------
# For patient lookups, a TRID is quick but not so helpful for
# clinicians. Use the RID.
if patient_id_fieldname == dbinfo.pid_pseudo_field:
lookup = (
PidLookup.objects.using(dbinfo.secret_lookup_db)
.filter(pid=pidvalue).first()
) # type: PidLookup
if lookup is None:
return generic_error(
request, "No patient with PID {!r}".format(pidvalue))
# Replace:
extra_fieldname = patient_id_fieldname
extra_value = pidvalue
patient_id_fieldname = dbinfo.rid_field
pidvalue = lookup.rid # string
elif patient_id_fieldname == dbinfo.mpid_pseudo_field:
lookup = (
PidLookup.objects.using(dbinfo.secret_lookup_db)
.filter(mpid=pidvalue).first()
) # type: PidLookup
if lookup is None:
return generic_error(
request, "No patient with MPID {!r}".format(pidvalue))
# Replace:
extra_fieldname = patient_id_fieldname
extra_value = pidvalue
patient_id_fieldname = dbinfo.rid_field
pidvalue = lookup.rid # string
elif patient_id_fieldname == dbinfo.mrid_field:
# Using MRID. This is not stored in each table. Rather than have
# an absolutely enormous query (SELECT stuff FROM texttable INNER
# JOIN mridtable ON patient_id_stuff WHERE textttable.contents
# LIKE something AND mridtable.mrid = ? UNION SELECT morestuff...)
# let's look up the RID from the MRID. Consequently, we only offer
# MRID lookup if we have a secret lookup table.
lookup = (
PidLookup.objects.using(dbinfo.secret_lookup_db)
.filter(mrid=pidvalue).first()
)
if lookup is None:
return generic_error(
request, "No patient with RID {!r}".format(pidvalue))
# Replace:
extra_fieldname = patient_id_fieldname
extra_value = pidvalue
patient_id_fieldname = dbinfo.rid_field
pidvalue = lookup.rid # string
else:
# Using RID directly (or, if we wanted to support it, TRID).
extra_fieldname = None
extra_value = None
# ---------------------------------------------------------------------
# Generate the query
# ---------------------------------------------------------------------
try:
sql = textfinder_sql(
patient_id_fieldname=patient_id_fieldname,
fragment=escape_sql_string_literal(
form.cleaned_data['fragment']),
min_length=min_length,
use_fulltext_index=form.cleaned_data['use_fulltext_index'],
include_content=form.cleaned_data['include_content'],
include_datetime=form.cleaned_data['include_datetime'],
patient_id_value=pidvalue,
extra_fieldname=extra_fieldname,
extra_value=extra_value,
)
# This SQL will link across all available research databases
# where the fieldname conditions are met.
if not sql:
raise ValueError(
"No fields matched your criteria (text columns of minimum "
"length {} in tables containing field {!r})".format(
min_length, patient_id_fieldname))
except ValueError as e:
return generic_error(request, str(e))
# ---------------------------------------------------------------------
# Run, save, or display the query
# ---------------------------------------------------------------------
if 'submit_save' in request.POST:
return query_submit(request, sql, run=False)
elif 'submit_run' in request.POST:
return query_submit(request, sql, run=True)
else:
return render(request, 'sql_fragment.html', {'sql': sql})
# -------------------------------------------------------------------------
# Offer the starting choices
# -------------------------------------------------------------------------
return render(request, html_filename, {
'db_name': dbinfo.name,
'db_description': dbinfo.description,
'form': form,
})
[docs]def sqlhelper_text_anywhere(request: HttpRequest) -> HttpResponse:
"""
Picks a database, then redirects to sqlhelper_text_anywhere_with_db.
"""
if research_database_info.single_research_db:
dbname = research_database_info.first_dbinfo.name
return HttpResponseRedirect(
reverse('sqlhelper_text_anywhere_with_db', args=[dbname])
)
else:
form = DatabasePickerForm(request.POST or None,
dbinfolist=research_database_info.dbinfolist)
if form.is_valid():
dbname = form.cleaned_data['database']
return HttpResponseRedirect(
reverse('sqlhelper_text_anywhere_with_db', args=[dbname])
)
return render(request, 'sqlhelper_form_text_anywhere_choose_db.html',
{'form': form})
[docs]def sqlhelper_text_anywhere_with_db(request: HttpRequest,
dbname: str) -> HttpResponse:
"""
Creates SQL to find text anywhere in the database(s) via a UNION query.
"""
try:
dbinfo = research_database_info.get_dbinfo_by_name(dbname)
except ValueError:
return generic_error(request,
"No research database named {!r}".format(dbname))
default_values = {
'fkname': dbinfo.rid_field,
'min_length': DEFAULT_MIN_TEXT_FIELD_LENGTH,
'use_fulltext_index': True,
'include_content': False,
'include_datetime': False,
}
return common_find_text(
request=request,
dbinfo=dbinfo,
form_class=SQLHelperTextAnywhereForm,
default_values=default_values,
permit_pid_search=False,
html_filename='sqlhelper_form_text_anywhere.html')
[docs]@user_passes_test(is_clinician)
def all_text_from_pid(request: HttpRequest) -> HttpResponse:
"""
Picks a database, then redirects to all_text_from_pid_with_db.
"""
dbinfolist = research_database_info.dbs_with_secret_map
n = len(dbinfolist)
if n == 0:
return generic_error(request, "No databases with lookup map!")
elif n == 1:
dbname = dbinfolist[0].name
return HttpResponseRedirect(
reverse('all_text_from_pid_with_db', args=[dbname])
)
else:
form = DatabasePickerForm(request.POST or None, dbinfolist=dbinfolist)
if form.is_valid():
dbname = form.cleaned_data['database']
return HttpResponseRedirect(
reverse('all_text_from_pid_with_db', args=[dbname])
)
return render(request,
'clinician_form_all_text_from_pid_choose_db.html',
{'form': form})
[docs]@user_passes_test(is_clinician)
def all_text_from_pid_with_db(request: HttpRequest,
dbname: str) -> HttpResponse:
"""
Clinician view to look up a patient's RID from their PID and display
text from any field.
"""
try:
dbinfo = research_database_info.get_dbinfo_by_name(dbname)
except ValueError:
return generic_error(request,
"No research database named {!r}".format(dbname))
default_values = {
'min_length': DEFAULT_MIN_TEXT_FIELD_LENGTH,
'use_fulltext_index': True,
'include_content': True,
'include_datetime': True,
}
return common_find_text(
request=request,
dbinfo=dbinfo,
form_class=ClinicianAllTextFromPidForm,
default_values=default_values,
permit_pid_search=True,
html_filename='clinician_form_all_text_from_pid.html')
# =============================================================================
# Per-patient views: Patient Explorer
# =============================================================================
def pe_build(request: HttpRequest) -> HttpResponse:
profile = request.user.profile
default_database = research_database_info.get_default_database_name()
default_schema = research_database_info.get_default_schema_name()
with_database = research_database_info.uses_database_level()
manual_form = None
form = None
if not profile.patient_multiquery_scratchpad:
profile.patient_multiquery_scratchpad = PatientMultiQuery()
pmq = profile.patient_multiquery_scratchpad
if request.method == 'POST':
if 'global_clear_select' in request.POST:
pmq.clear_output_columns()
profile.save()
elif 'global_clear_where' in request.POST:
pmq.clear_patient_conditions()
profile.save()
elif 'global_clear_everything' in request.POST:
pmq.clear_output_columns()
pmq.clear_patient_conditions()
pmq.set_override_query('')
profile.save()
elif 'global_save' in request.POST:
if pmq.ok_to_run:
return pe_submit(request, pmq, run=False)
elif 'global_run' in request.POST:
if pmq.ok_to_run:
return pe_submit(request, pmq, run=True)
elif 'global_manual_set' in request.POST:
manual_form = ManualPeQueryForm(request.POST)
if manual_form.is_valid():
sql = manual_form.cleaned_data['sql']
pmq.set_override_query(sql)
profile.save()
elif 'global_manual_clear' in request.POST:
pmq.set_override_query('')
profile.save()
else:
form = QueryBuilderForm(request.POST, request.FILES)
if form.is_valid():
database = (form.cleaned_data['database'] if with_database
else '')
schema = form.cleaned_data['schema']
table = form.cleaned_data['table']
column = form.cleaned_data['column']
column_id = ColumnId(db=database, schema=schema,
table=table, column=column)
if 'submit_select' in request.POST:
pmq.add_output_column(column_id) # noqa
elif 'submit_select_star' in request.POST:
table_id = column_id.table_id
all_column_ids = [
c.column_id for c in
research_database_info.all_columns(table_id)]
for c in all_column_ids:
pmq.add_output_column(c)
elif 'submit_where' in request.POST:
datatype = form.cleaned_data['datatype']
op = form.cleaned_data['where_op']
# Value
if op in SQL_OPS_MULTIPLE_VALUES:
value = form.file_values_list
elif op in SQL_OPS_VALUE_UNNECESSARY:
value = None
else:
value = form.get_cleaned_where_value()
# WHERE fragment
wherecond = WhereCondition(column_id=column_id,
op=op,
datatype=datatype,
value_or_values=value)
pmq.add_patient_condition(wherecond)
else:
raise ValueError("Bad form command!")
profile.save()
else:
# log.critical("not is_valid")
pass
manual_query = pmq.manual_patient_id_query
if form is None:
form = QueryBuilderForm()
if manual_form is None:
manual_form = ManualPeQueryForm({'sql': manual_query})
starting_values_dict = {
'database': form.data.get('database', '') if with_database else '',
'schema': form.data.get('schema', ''),
'table': form.data.get('table', ''),
'column': form.data.get('column', ''),
'op': form.data.get('where_op', ''),
'date_value': form.data.get('date_value', ''),
# Impossible to set file_value programmatically. (See querybuilder.js.)
'float_value': form.data.get('float_value', ''),
'int_value': form.data.get('int_value', ''),
'string_value': form.data.get('string_value', ''),
'offer_where': bool(True),
'form_errors': "<br>".join("{}: {}".format(k, v)
for k, v in form.errors.items()),
'default_database': default_database,
'default_schema': default_schema,
'with_database': with_database,
}
if manual_query:
pmq_patient_conditions = "<div><i>Overridden by manual query.</i></div>" # noqa
pmq_manual_patient_query = prettify_sql_html(
pmq.manual_patient_id_query)
else:
pmq_patient_conditions = pmq.pt_conditions_html
pmq_manual_patient_query = "<div><i>None</i></div>"
pmq_final_patient_query = prettify_sql_html(pmq.patient_id_query(
with_order_by=True))
warnings = ''
if not pmq.has_patient_id_query:
warnings += '<div class="warning">No patient criteria yet</div>'
if not pmq.has_output_columns:
warnings += '<div class="warning">No output columns yet</div>'
context = {
'nav_on_pe_build': True,
'pmq_output_columns': pmq.output_cols_html,
'pmq_patient_conditions': pmq_patient_conditions,
'pmq_manual_patient_query': pmq_manual_patient_query,
'pmq_final_patient_query': pmq_final_patient_query,
'warnings': warnings,
'database_structure': get_db_structure_json(),
'starting_values': json.dumps(starting_values_dict),
'sql_dialect': settings.RESEARCH_DB_DIALECT,
'dialect_mysql': settings.RESEARCH_DB_DIALECT == SqlaDialectName.MYSQL,
'dialect_mssql': settings.RESEARCH_DB_DIALECT == SqlaDialectName.MSSQL,
'sql_highlight_css': prettify_sql_css(),
'manual_form': manual_form,
}
context.update(query_context(request))
return render(request, 'pe_build.html', context)
def pe_choose(request: HttpRequest) -> HttpResponse:
all_pes = get_all_pes(request)
patient_explorers = paginate(request, all_pes)
context = {
'nav_on_pe_choose': True,
'patient_explorers': patient_explorers,
'sql_highlight_css': prettify_sql_css(),
}
context.update(query_context(request))
return render(request, 'pe_choose.html', context)
def pe_activate(request: HttpRequest, pe_id: str) -> HttpResponse:
validate_blank_form(request)
pe = get_object_or_404(PatientExplorer, id=pe_id) # type: PatientExplorer
pe.activate()
return redirect('pe_choose')
def pe_delete(request: HttpRequest, pe_id: str) -> HttpResponse:
validate_blank_form(request)
pe = get_object_or_404(PatientExplorer, id=pe_id) # type: PatientExplorer
pe.delete_if_permitted()
return redirect('pe_choose')
def pe_edit(request: HttpRequest, pe_id: str) -> HttpResponse:
validate_blank_form(request)
pe = get_object_or_404(PatientExplorer, id=pe_id) # type: PatientExplorer
profile = request.user.profile
profile.patient_multiquery_scratchpad = pe.patient_multiquery
profile.save()
return redirect('pe_build')
def pe_results(request: HttpRequest, pe_id: str) -> HttpResponse:
pe = get_object_or_404(PatientExplorer, id=pe_id) # type: PatientExplorer
grammar = research_database_info.grammar
profile = request.user.profile
highlights = Highlight.get_active_highlights(request)
highlight_dict = Highlight.as_ordered_dict(highlights)
element_counter = HtmlElementCounter()
patient_id_query_html = prettify_sql_html(pe.get_patient_id_query())
patients_per_page = get_patients_per_page(request)
try:
mrids = pe.get_patient_mrids()
page = paginate(request, mrids, per_page=patients_per_page)
active_mrids = list(page)
results = []
if active_mrids:
for table_id, sql, args in pe.all_queries(mrids=active_mrids):
with pe.get_executed_cursor(sql, args) as cursor:
fieldnames = get_fieldnames_from_cursor(cursor)
rows = cursor.fetchall()
table_html = resultset_html_table(
fieldnames=fieldnames,
rows=rows,
element_counter=element_counter,
highlight_dict=highlight_dict,
collapse_at_len=profile.collapse_at_len,
collapse_at_n_lines=profile.collapse_at_n_lines,
line_length=profile.line_length,
)
query_html = element_counter.visibility_div_with_divbutton(
contents=prettify_sql_and_args(sql, args),
title_html="SQL")
results.append({
'tablename': table_id.identifier(grammar),
'table_html': table_html,
'query_html': query_html,
})
context = {
'nav_on_pe_results': True,
'results': results,
'page': page,
'rowcount': len(mrids),
'patient_id_query_html': patient_id_query_html,
'patients_per_page': patients_per_page,
'sql_highlight_css': prettify_sql_css(),
}
context.update(query_context(request))
return render(request, 'pe_result.html', context)
except DatabaseError as exception:
return render_bad_pe(request, pe, exception)
def render_missing_pe(request: HttpRequest) -> HttpResponse:
return render(request, 'pe_missing.html', query_context(request))
# noinspection PyUnusedLocal
def render_bad_pe(request: HttpRequest,
pe: PatientExplorer,
exception: Exception) -> HttpResponse:
info = recover_info_from_exception(exception)
final_sql = info.get('sql', '')
args = info.get('args', [])
context = {
'exception': repr(exception),
'query': prettify_sql_and_args(final_sql, args),
'sql_highlight_css': prettify_sql_css(),
}
context.update(query_context(request))
return render(request, 'pe_bad.html', context)
# def render_bad_pe_id(request: HttpRequest, pe_id: int) -> HttpResponse:
# context = {'pe_id': pe_id}
# context.update(query_context(request))
# return render(request, 'pe_bad_id.html', context)
def get_all_pes(request: HttpRequest) -> QuerySet:
return PatientExplorer.objects\
.filter(user=request.user, deleted=False)\
.order_by('-active', '-created')
def get_identical_pes(request: HttpRequest,
pmq: PatientMultiQuery) -> List[PatientMultiQuery]:
all_pes = get_all_pes(request)
# identical_pes = all_pes.filter(patient_multiquery=pmq)
#
# ... this works, but does so by converting the parameter (pmq) to its
# JSON representation, presumably via JsonClassField.get_prep_value().
# Accordingly, we can predict problems under SQL Server with very long
# strings; see the problem in query_submit().
# So, we should similarly hash:
identical_pes = all_pes.filter(pmq_hash=pmq.hash64)
# Beware: Python's hash() function will downconvert to 32 bits on 32-bit
# machines; use pmq.hash64() directly, not hash(pmq).
# Double-check in Python in case of hash collision:
return [pe for pe in identical_pes if pe.patient_multiquery == pmq]
def pe_submit(request: HttpRequest,
pmq: PatientMultiQuery,
run: bool = False) -> HttpResponse:
identical_pes = get_identical_pes(request, pmq)
if identical_pes:
identical_pes[0].activate()
pe_id = identical_pes[0].id
else:
pe = PatientExplorer(patient_multiquery=pmq,
user=request.user,
active=True)
pe.save()
pe_id = pe.id
# log.critical(pprint.pformat(connection.queries)) # show all queries
# redirect to a new URL:
if run:
return redirect('pe_results', pe_id)
else:
return redirect('pe_choose')
def pe_tsv_zip(request: HttpRequest, pe_id: str) -> HttpResponse:
# http://stackoverflow.com/questions/12881294/django-create-a-zip-of-multiple-files-and-make-it-downloadable # noqa
pe = get_object_or_404(PatientExplorer, id=pe_id) # type: PatientExplorer
try:
return file_response(
pe.get_zipped_tsv_binary(),
content_type=ContentType.ZIP,
filename="crate_pe_{num}_{datetime}.zip".format(
num=pe.id,
datetime=datetime_iso_for_filename(),
)
)
except DatabaseError as exception:
return render_bad_pe(request, pe, exception)
def pe_excel(request: HttpRequest, pe_id: str) -> HttpResponse:
pe = get_object_or_404(PatientExplorer, id=pe_id) # type: PatientExplorer
try:
return file_response(
pe.get_xlsx_binary(),
content_type=ContentType.XLSX,
filename="crate_pe_{num}_{datetime}.xlsx".format(
num=pe.id,
datetime=datetime_iso_for_filename(),
)
)
except DatabaseError as exception:
return render_bad_pe(request, pe, exception)
def pe_data_finder_results(request: HttpRequest, pe_id: str) -> HttpResponse:
pe = get_object_or_404(PatientExplorer, id=pe_id) # type: PatientExplorer
profile = request.user.profile
patients_per_page = get_patients_per_page(request)
element_counter = HtmlElementCounter()
patient_id_query_html = prettify_sql_html(pe.get_patient_id_query())
# If this query is done as a UNION, it's massive, e.g. ~410 characters
# * number of tables (e.g. 1448 in one RiO database), for 0.5 Mb of query.
# So do it more sensibly:
try:
mrids = pe.get_patient_mrids()
page = paginate(request, mrids, per_page=patients_per_page)
active_mrids = list(page)
results_table_html = ''
query_html = ''
if active_mrids:
fieldnames = []
rows = []
for table_identifier, sql, args in \
pe.patient_multiquery.gen_data_finder_queries(
mrids=active_mrids):
with pe.get_executed_cursor(sql, args) as cursor:
if not fieldnames:
fieldnames = get_fieldnames_from_cursor(cursor)
rows = cursor.fetchall()
query_html += element_counter.visibility_div_with_divbutton( # noqa
contents=prettify_sql_and_args(sql, args),
title_html="SQL for " + table_identifier)
results_table_html = resultset_html_table(
fieldnames=fieldnames,
rows=rows,
element_counter=element_counter,
collapse_at_len=profile.collapse_at_len,
collapse_at_n_lines=profile.collapse_at_n_lines,
line_length=profile.line_length,
no_ditto_cols=[2, 3, 4],
null=''
)
context = {
'nav_on_pe_df_results': True,
'some_patients': len(active_mrids) > 0,
'results_table_html': results_table_html,
'query_html': query_html,
'page': page,
'rowcount': len(mrids),
'patient_id_query_html': patient_id_query_html,
'patients_per_page': patients_per_page,
'sql_highlight_css': prettify_sql_css(),
}
context.update(query_context(request))
return render(request, 'pe_df_result.html', context)
except DatabaseError as exception:
return render_bad_pe(request, pe, exception)
def pe_data_finder_excel(request: HttpRequest, pe_id: str) -> HttpResponse:
pe = get_object_or_404(PatientExplorer, id=pe_id) # type: PatientExplorer
try:
return file_response(
pe.data_finder_excel,
content_type=ContentType.XLSX,
filename="crate_pe_df_{num}_{datetime}.xlsx".format(
num=pe.id,
datetime=datetime_iso_for_filename(),
)
)
except DatabaseError as exception:
return render_bad_pe(request, pe, exception)
def pe_monster_results(request: HttpRequest, pe_id: str) -> HttpResponse:
pe = get_object_or_404(PatientExplorer, id=pe_id) # type: PatientExplorer
grammar = research_database_info.grammar
profile = request.user.profile
highlights = Highlight.get_active_highlights(request)
highlight_dict = Highlight.as_ordered_dict(highlights)
element_counter = HtmlElementCounter()
patient_id_query_html = prettify_sql_html(pe.get_patient_id_query())
patients_per_page = get_patients_per_page(request)
try:
rids = pe.get_patient_mrids()
page = paginate(request, rids, per_page=patients_per_page)
active_rids = list(page)
results = []
pmq = pe.patient_multiquery
if active_rids:
for table_id, sql, args in pmq.gen_monster_queries(mrids=active_rids): # noqa
with pe.get_executed_cursor(sql, args) as cursor:
fieldnames = get_fieldnames_from_cursor(cursor)
rows = cursor.fetchall()
if rows:
table_html = resultset_html_table(
fieldnames=fieldnames,
rows=rows,
element_counter=element_counter,
highlight_dict=highlight_dict,
collapse_at_len=profile.collapse_at_len,
collapse_at_n_lines=profile.collapse_at_n_lines,
line_length=profile.line_length,
)
else:
table_html = "<div><i>No data</i></div>"
query_html = element_counter.visibility_div_with_divbutton(
contents=prettify_sql_and_args(sql, args),
title_html="SQL")
results.append({
'tablename': table_id.identifier(grammar),
'table_html': table_html,
'query_html': query_html,
})
context = {
'nav_on_pe_monster_results': True,
'results': results,
'page': page,
'rowcount': len(rids),
'patient_id_query_html': patient_id_query_html,
'patients_per_page': patients_per_page,
'sql_highlight_css': prettify_sql_css(),
}
context.update(query_context(request))
return render(request, 'pe_monster_result.html', context)
except DatabaseError as exception:
return render_bad_pe(request, pe, exception)
def pe_table_browser(request: HttpRequest, pe_id: str) -> HttpResponse:
pe = get_object_or_404(PatientExplorer, id=pe_id) # type: PatientExplorer
tables = research_database_info.get_tables()
with_database = research_database_info.uses_database_level()
try:
context = {
'nav_on_pe_table_browser': True,
'pe_id': pe_id,
'tables': tables,
'with_database': with_database,
}
context.update(query_context(request))
return render(request, 'pe_table_browser.html', context)
except DatabaseError as exception:
return render_bad_pe(request, pe, exception)
def pe_one_table(request: HttpRequest, pe_id: str,
schema: str, table: str, db: str = '') -> HttpResponse:
pe = get_object_or_404(PatientExplorer, id=pe_id) # type: PatientExplorer
table_id = TableId(db=db, schema=schema, table=table)
grammar = research_database_info.grammar
highlights = Highlight.get_active_highlights(request)
highlight_dict = Highlight.as_ordered_dict(highlights)
element_counter = HtmlElementCounter()
profile = request.user.profile
patients_per_page = get_patients_per_page(request)
try:
mrids = pe.get_patient_mrids()
page = paginate(request, mrids, per_page=patients_per_page)
active_mrids = list(page)
table_html = "<div><i>No data</i></div>"
sql = ""
args = []
rowcount = 0
if active_mrids:
mrid_column = research_database_info.get_mrid_column_from_table(
table_id)
where_clause = "{mrid} IN ({in_clause})".format(
mrid=mrid_column.identifier(grammar),
in_clause=",".join(["?"] * len(active_mrids)),
) # ... see notes for translate_sql_qmark_to_percent()
args = active_mrids
sql = add_to_select(
sql='',
select_elements=[SelectElement(
raw_select='*',
from_table_for_raw_select=table_id
)],
grammar=grammar,
where_conditions=[WhereCondition(
raw_sql=where_clause,
from_table_for_raw_sql=mrid_column.table_id
)],
magic_join=True,
formatted=True)
with pe.get_executed_cursor(sql, args) as cursor:
fieldnames = get_fieldnames_from_cursor(cursor)
rows = cursor.fetchall()
rowcount = cursor.rowcount
if rows:
table_html = resultset_html_table(
fieldnames=fieldnames,
rows=rows,
element_counter=element_counter,
highlight_dict=highlight_dict,
collapse_at_len=profile.collapse_at_len,
collapse_at_n_lines=profile.collapse_at_n_lines,
line_length=profile.line_length,
)
# Render
context = {
'table_html': table_html,
'page': page,
'rowcount': rowcount,
'sql': prettify_sql_and_args(sql=sql, args=args),
'sql_highlight_css': prettify_sql_css(),
}
context.update(query_context(request))
return render(request, 'query_result.html', context)
except DatabaseError as exception:
return render_bad_pe(request, pe, exception)