Coverage for /home/mattis/projects/websites/dighl/edictor/src/edictor/util.py: 92%
333 statements
« prev ^ index » next coverage.py v7.6.1, created at 2024-08-07 06:52 +0200
« prev ^ index » next coverage.py v7.6.1, created at 2024-08-07 06:52 +0200
1"""
2Utility functions for the server.
3"""
4import sqlite3
5import urllib
6import os
7import json
8import codecs
9import getpass
10import signal
12from urllib.request import urlopen
14from pathlib import Path
15from datetime import datetime
17DATA = {
18 "js": "text/javascript",
19 "css": "text/css",
20 "html": "text/html",
21 "tsv": "text/plain; charset=utf-8",
22 "csv": "text/plain; charset=utf-8",
23 "png": "",
24 "jpg": "",
25 "ttf": "",
26 "woff": "",
27 "json": "text/plain; charset=utf-8",
28 "config": "config.json",
29}
32def opendb(path, conf):
33 if Path(conf["sqlite"], path + ".sqlite3").exists():
34 db = sqlite3.connect(
35 Path(conf["sqlite"], path + ".sqlite3"))
36 elif edictor_path(conf["sqlite"], path + ".sqlite3").exists():
37 db = sqlite3.connect(
38 edictor_path(conf["sqlite"], path + ".sqlite3"))
39 else:
40 raise ValueError("SQLITE DB could not be found.")
41 return db, db.cursor()
44def edictor_path(*comps):
45 return Path(__file__).parent.joinpath("app", *comps)
48def parse_args(path):
49 args = {}
50 # avoid splitting error
51 if "?" in path and "=" in path:
52 for k, v in map(
53 lambda x: x.split("="),
54 path.split("?")[1].split("#")[0].split("&"),
55 ):
56 args[k] = v
57 return args
60def parse_post(path):
61 args = {}
62 if isinstance(path, bytes):
63 path = path.decode("utf-8")
64 if "=" in path:
65 for k, v in map(
66 lambda x: x.split("="),
67 path.split("#")[0].split("&")):
68 args[k] = v
69 return args
72def download(s, post):
73 """
74 Download command, that writes the file to the current folder.
75 """
76 args = parse_post(post)
77 if not args["file"].endswith(".tsv"):
78 return
79 date, time = str(datetime.today()).split(" ")
80 if Path(args["file"]).exists():
81 os.rename(
82 args["file"],
83 args["file"][:-4] + "-" + date + "-".join(time.split(":")[:2]) + ".tsv"
84 )
85 with codecs.open(args["file"], "w", "utf-8") as f:
86 f.write(urllib.parse.unquote_plus(args["data"]))
88 send_response(s, "success")
91def send_response(s, content, content_type="text/html",
92 content_disposition=None, encode=True):
93 if encode:
94 content = bytes(content, "utf-8")
95 s.send_response(200)
96 s.send_header("Content-type", content_type)
97 if content_disposition:
98 s.send_header("Content-disposition", content_disposition)
99 s.end_headers()
100 s.wfile.write(content)
103def handle_args(args, query, qtype):
104 if qtype == "POST":
105 args.update(parse_post(query))
106 elif qtype == "GET":
107 args.update(parse_args(query))
110# noinspection PyPackageRequirements
111def check(s):
112 try:
113 import lingpy
114 import lingrex
115 message = "lingpy"
116 except ImportError: # pragma: no cover
117 message = "python"
118 send_response(s, message)
121def configuration():
122 """
123 Load the Configuration Data File.
124 """
125 if Path(DATA["config"]).exists():
126 with codecs.open(DATA["config"], "r", "utf-8") as f:
127 conf = json.load(f)
128 elif edictor_path(DATA["config"]).exists():
129 with codecs.open(edictor_path(DATA["config"]), "r", "utf-8") as f:
130 conf = json.load(f)
131 else: # pragma: no cover
132 conf = {
133 "user": "unknown",
134 "links": None,
135 "sqlite": "sqlite",
136 }
138 if conf.get("remote"): # pragma: no cover
139 if not conf.get("user"):
140 conf["user"] = input("User name: ")
141 if not conf.get("pw"):
142 conf["pw"] = getpass.getpass("Remote password: ")
143 # prepare the links now
144 for key, values in conf["remote"].items():
145 for file in values:
146 values[file]["data"] = "&".join(
147 ["{0}={1}".format(k, v) for k, v in
148 values[file]["data"].items()])
150 # represent urls as lists
151 if conf.get("links"):
152 for link in conf["links"]:
153 link["url"] = link["url"] + "?" + "&".join(
154 ["{0}={1}".format(k, v) for k, v in link["data"].items()])
156 if not conf.get("sqlite"):
157 conf["sqlite"] = "sqlite"
159 if not conf.get("user"):
160 conf["user"] = "unknown"
162 return conf
165def get_distinct(what, cursor, name):
166 out = [line[0] for line in cursor.execute(
167 'select distinct val from ' + name + ' where col="' + what + '";'
168 )]
169 return out
172def get_columns(cursor, name):
173 out = [line[0] for line in cursor.execute(
174 'select distinct col from ' + name + ';')]
175 return out
178def file_type(path):
179 return path.split("?")[0].split(".")[-1]
182def file_name(path):
183 return path.split("?")[0]
186def file_handler(s, ft, fn):
187 """
188 Handle different file types.
189 """
190 if ft in ["js", "html", "css", "csv"]:
191 try:
192 with codecs.open(edictor_path(fn[1:]), "r", "utf-8") as f:
193 message = bytes(f.read(), "utf-8")
194 except FileNotFoundError:
195 message = b"404 FNF"
196 elif ft == "tsv":
197 # if a file is in the same folder where the app was started, it is
198 # marked by preceding it with "/data/" by the JS application, so
199 # these files must be checked for first, as they are local files.
200 if Path(fn[6:]).exists() and fn.startswith("/data/"):
201 with codecs.open(fn[6:], "r", "utf-8") as f:
202 message = bytes(f.read(), "utf-8")
203 else:
204 if edictor_path(fn[1:]).exists():
205 with codecs.open(edictor_path(fn[1:]), "r", "utf-8") as f:
206 message = bytes(f.read(), "utf-8")
207 else:
208 message = b"404 FNF"
209 elif ft in ["png", "ttf", "jpg", "woff"]:
210 try:
211 with codecs.open(edictor_path(fn[1:]), 'rb', None) as f:
212 message = f.read()
213 except FileNotFoundError:
214 message = b"404 FNF"
215 send_response(s, message, DATA[ft], encode=False)
218def serve_base(s, conf):
219 with codecs.open(edictor_path("index.html"), "r", "utf-8") as f:
220 text = f.read()
221 link_template = """<div class="dataset inside" onclick="window.open('{url}');"><span>{name}</span></div>"""
223 links = []
224 for link in conf["links"]:
225 links += [link_template.format(**link)]
226 text = text.replace("{USERDATA}", "".join(links))
228 # add paths that are in the current folder
229 paths = []
230 for path in Path().glob("*.tsv"):
231 paths += [link_template.format(url="edictor.html?file=" + path.name,
232 name="Open File «" + path.name + "»")]
233 text = text.replace("{DATASETS}", "".join(paths))
234 text = text.replace(' id="files" style="display:none"', '')
235 text = text.replace(' id="user" style="display:none"', '')
236 text = text.replace(' class="user" style="display:none"', '')
238 send_response(s, text)
241# noinspection SqlDialectInspection,SqlResolve
242def new_id(s, query, qtype, conf):
243 """
244 Obtain new identifier from currently largest one.
245 """
246 args = dict(
247 remote_dbase='',
248 file='',
249 new_id='',
250 )
251 handle_args(args, query, qtype)
252 if conf.get("remote") and args["remote_dbase"] in conf["remote"]: # pragma: no cover
253 print("requesting remote ID")
254 info = conf["remote"][args["remote_dbase"]]["new_id.py"]
255 req = urllib.request.Request(
256 info["url"],
257 data=bytes(info["data"] + "&new_id=true", "utf-8"))
258 req.add_header('Content-Type', 'application/x-www-form-urlencoded')
259 req.get_method = lambda: 'POST'
260 data = urllib.request.urlopen(req).read()
261 send_response(
262 s,
263 data,
264 encode=False,
265 content_type="text/plain; charset=utf-8",
266 content_disposition='attachment; filename="triples.tsv"'
267 )
268 return
270 db, cursor = opendb(args["remote_dbase"], conf)
272 if args['new_id'] == "true":
273 cursor.execute('select DISTINCT ID from ' + args['file'] + ';')
274 linesA = [x[0] for x in cursor.fetchall()]
275 # noinspection SqlNoDataSourceInspection
276 cursor.execute(
277 'select DISTINCT ID from backup where FILE = "' + args['file'] + '";'
278 )
279 linesB = [x[0] for x in cursor.fetchall()]
280 try:
281 maxA = max(linesA)
282 except ValueError:
283 maxA = 0
284 try:
285 maxB = max(linesB)
286 except ValueError:
287 maxB = 0
289 if maxA >= maxB:
290 message = str(maxA + 1)
291 else:
292 message = str(maxB + 1)
293 else:
294 lines = [x[0] for x in cursor.execute('select DISTINCT VAL from ' + args['file'] +
295 ' where COL="' + args['new_id'] + '";')]
296 # dammit but, it doesn't really seem to work without explicit
297 # type-checking
298 cogids = []
299 for line in lines:
300 try:
301 cogids += [int(line)]
302 except ValueError:
303 try:
304 cogids += [int(x) for x in line.split(' ')]
305 except ValueError:
306 pass
307 message = str(max(cogids) + 1)
308 send_response(s, message)
311def cognates(s, query, qtype):
312 args = {
313 "wordlist": "",
314 "mode": "full",
315 "method": "lexstat"
316 }
317 handle_args(args, query, qtype)
318 args["wordlist"] = urllib.parse.unquote_plus(args["wordlist"])
320 # assemble the wordlist header
321 from lingpy.compare.partial import Partial
322 from lingpy.compare.lexstat import LexStat
323 from lingpy import basictypes
324 tmp = {0: ["doculect", "concept", "form", "tokens"]}
325 for row in args["wordlist"].split("\n")[:-1]:
326 idx, doculect, concept, tokens = row.split('\t')
327 tmp[int(idx)] = [
328 doculect,
329 concept,
330 tokens,
331 tokens.split(" ")
332 ]
333 out = ""
334 if args["mode"] == "partial":
335 part = Partial(tmp)
336 part.partial_cluster(
337 method="sca", threshold=0.45, ref="cogid",
338 cluster_method="upgma")
339 for idx in part:
340 out += str(idx) + "\t" + str(basictypes.ints(part[idx, "cogid"])) + "\n"
341 else:
342 lex = LexStat(tmp)
343 lex.cluster(
344 method="sca", threshold=0.45, ref="cogid",
345 cluster_method="upgma")
346 for idx in lex:
347 out += str(idx) + "\t" + str(lex[idx, "cogid"]) + "\n"
349 send_response(
350 s,
351 out,
352 content_type="text/plain; charset=utf-8",
353 content_disposition='attachment; filename="triples.tsv"'
354 )
357def patterns(s, query, qtype):
358 """
359 Compute correspondence patterns with CoPaR (LingRex)
360 """
361 args = {
362 "wordlist": "",
363 "mode": "full",
364 "method": "copar",
365 "minrefs": 2
366 }
367 handle_args(args, query, qtype)
368 args["wordlist"] = urllib.parse.unquote_plus(args["wordlist"])
370 # assemble the wordlist header
371 import lingpy
372 from lingrex.copar import CoPaR
373 if args["mode"] == "partial":
374 ref = "cogids"
375 else:
376 ref = "cogid"
377 tmp = {0: ["doculect", "concept", "form", "tokens", ref, "alignment", "structure"]}
378 for row in args["wordlist"].split("\n")[:-1]:
379 idx, doculect, concept, tokens, cogid, alignment = row.split('\t')
380 tmp[int(idx)] = [
381 doculect,
382 concept,
383 tokens,
384 tokens.split(" "),
385 lingpy.basictypes.ints(cogid) if args["mode"] == "partial" else int(cogid),
386 alignment.split(" "),
387 lingpy.tokens2class(tokens.split(), "cv")
388 ]
389 cop = CoPaR(
390 tmp,
391 ref=ref,
392 transcription="form",
393 fuzzy=True if args["mode"] == "partial" else False,
394 minrefs=args["minrefs"]
395 )
396 print("Loaded the CoPaR object.")
397 cop.get_sites()
398 print("Loaded the Sites.")
399 cop.cluster_sites()
400 print("Clustered Sites.")
401 cop.sites_to_pattern()
402 print("Converted Sites to Patterns.")
403 cop.add_patterns()
404 out = ""
405 for idx in cop:
406 out += str(idx) + "\t" + " ".join(cop[idx, "patterns"]) + "\n"
407 send_response(
408 s,
409 out,
410 content_type="text/plain; charset=utf-8",
411 content_disposition='attachment; filename="triples.tsv"'
412 )
413 print("Successfully computed correspondence patterns.")
416def alignments(s, query, qtype):
417 args = {
418 "wordlist": "",
419 "mode": "full",
420 "method": "library"
421 }
422 handle_args(args, query, qtype)
423 args["wordlist"] = urllib.parse.unquote_plus(args["wordlist"])
425 print("Carrying out alignments with LingPy")
426 # assemble the wordlist header
427 import lingpy
428 ref = "cogid" if args["mode"] == "full" else "cogids"
429 tmp = {0: ["doculect", "concept", "form", "tokens", ref]}
430 for row in args["wordlist"].split("\n")[:-1]:
431 idx, doculect, concept, tokens, cogid = row.split('\t')
432 tmp[int(idx)] = [
433 doculect,
434 concept,
435 tokens,
436 tokens.split(" "),
437 lingpy.basictypes.ints(cogid) if args["mode"] == "partial" else cogid
438 ]
439 alms = lingpy.Alignments(tmp, ref=ref, transcription="form",
440 fuzzy=True if args["mode"] == "partial" else False)
441 alms.align(method=args["method"])
442 out = ""
443 for idx in alms:
444 out += str(idx) + "\t" + " ".join(alms[idx, "alignment"]) + "\n"
446 send_response(
447 s,
448 out,
449 content_type="text/plain; charset=utf-8",
450 content_disposition='attachment; filename="triples.tsv"'
451 )
454def triples(s, query, qtype, conf):
455 """
456 Basic access to the triple storage storing data in SQLITE.
457 """
458 args = dict(
459 remote_dbase='',
460 file='',
461 columns='',
462 concepts='',
463 doculects='',
464 )
465 handle_args(args, query, qtype)
467 if conf.get("remote") and args["remote_dbase"] in conf["remote"]: # pragma: no cover
468 print("EDICTOR loading remote TSV file.")
469 info = conf["remote"][args["remote_dbase"]]["triples.py"]
470 req = urllib.request.Request(
471 info["url"],
472 data=bytes(info["data"], "utf-8"))
473 req.add_header('Content-Type', 'application/x-www-form-urlencoded')
474 req.get_method = lambda: 'POST'
475 data = urllib.request.urlopen(req).read()
476 send_response(
477 s,
478 data,
479 encode=False,
480 content_type="text/plain; charset=utf-8",
481 content_disposition='attachment; filename="triples.tsv"'
482 )
483 return
485 db, cursor = opendb(args["remote_dbase"], conf)
487 # get unique columns
488 if not args['columns']:
489 cols = get_columns(cursor, args['file'])
490 else:
491 cols = args['columns'].split('%7C')
493 text = 'ID\t' + '\t'.join(cols) + '\n'
495 # if neither concepts or doculects are passed from the args, all ids are
496 # selected from the database
497 if not args['concepts'] and not args['doculects']:
498 idxs = [line[0] for line in cursor.execute(
499 'select distinct ID from ' + args['file'] + ';')]
500 else:
501 # we evaluate the concept string
502 if args['concepts']:
503 cstring = 'COL = "CONCEPT" and VAL in ("' + \
504 '","'.join(args['concepts'].split('%7C')) + '")'
505 else:
506 cstring = ''
507 if args['doculects']:
508 dstring = 'COL = "DOCULECT" and VAL in ("' + \
509 '","'.join(args['doculects'].split('%7C')) + '")'
510 else:
511 dstring = ''
513 if cstring:
514 cidxs = [line[0] for line in cursor.execute(
515 'select distinct ID from ' + args['file'] + ' where ' + cstring)]
516 else:
517 cidxs = []
518 if dstring:
519 didxs = [line[0] for line in cursor.execute(
520 'select distinct ID from ' + args['file'] + ' where ' + dstring)]
521 else:
522 didxs = []
524 if cidxs and didxs:
525 idxs = [idx for idx in cidxs if idx in didxs]
526 else:
527 idxs = cidxs or didxs
529 # make the dictionary
530 D = {}
531 for a, b, c in cursor.execute('select * from ' + args['file'] + ';'):
532 if c not in ['-', '']:
533 try:
534 D[a][b] = c
535 except KeyError:
536 D[a] = {b: c}
538 # make object
539 for idx in idxs:
540 txt = str(idx)
541 for col in cols:
542 try:
543 txt += '\t' + D[idx][col]
544 except IndexError:
545 txt += '\t'
546 except ValueError:
547 txt += "\t"
548 except KeyError:
549 txt += "\t"
550 text += txt + "\n"
551 send_response(s, text, content_type="text/plain; charset=utf-8",
552 content_disposition='attachment; filename="triples.tsv"')
555# noinspection SqlDialectInspection,SqlNoDataSourceInspection,SqlResolve
556def modifications(s, post, qtype, conf):
557 """
558 Check for remote modifications in the data, done in another application.
560 Note
561 ----
562 This operation is not only useful when working with many people, but also
563 when working on a local host but with multiple windows open. The call
564 checks for recently modified data in the database and inserts them into the
565 wordlist, if modifications are detected. It is triggered in certain
566 intervals, but mostly dependent on the use of the Wordlist Panel of the
567 EDICTOR.
568 """
569 now = str(datetime.now()).split('.')[0]
570 args = {}
571 handle_args(args, post, qtype)
573 if not "remote_dbase" in args:
574 return
576 if conf.get("remote") and args["remote_dbase"] in conf["remote"]: # pragma: no cover
577 print("EDICTOR checking for modifications in remote data.")
578 info = conf["remote"][args["remote_dbase"]]["modifications.py"]
579 data = info["data"] + "&date=" + args["date"]
580 req = urllib.request.Request(
581 info["url"],
582 data=bytes(info["data"], "utf-8"))
583 req.add_header('Content-Type', 'application/x-www-form-urlencoded')
584 req.get_method = lambda: 'POST'
585 data = urllib.request.urlopen(req).read()
586 send_response(
587 s,
588 data,
589 encode=False,
590 content_type="text/plain; charset=utf-8",
591 content_disposition='attachment; filename="triples.tsv"'
592 )
593 return
595 db, cursor = opendb(args["remote_dbase"], conf)
596 cursor.execute(
597 'select ID, COL from backup where FILE="' + args['file'] + '"' +
598 ' and DATE > ' + args['date'] +
599 ' group by ID,COL limit 100;')
600 lines = cursor.fetchall()
601 data = dict([((a, b), c) for a, b, c in cursor.execute(
602 'select * from ' + args['file'] + ';'
603 )])
604 message = ""
605 for line in lines:
606 try:
607 val = data[line[0], line[1]].encode('utf-8')
608 message += '{0}\t{1}\t{2}\n'.format(line[0], line[1], val)
609 except KeyError:
610 pass
611 send_response(s, message)
614# noinspection SqlResolve
615def update(s, post, qtype, conf):
616 """
617 Update data on local or remote SQLite file.
619 Note
620 ----
621 The update routine is carried out with a post-request that is sent to the
622 local host, or by sending a get request to the remote host (which must be
623 specified in the configuration file).
624 """
626 now = str(datetime.now()).split('.')[0]
627 args = {}
628 handle_args(args, post, qtype)
630 if conf.get("remote") and args["remote_dbase"] in conf["remote"]: # pragma: no cover
631 print("send remote data")
632 info = conf["remote"][args["remote_dbase"]]["update.py"]
633 url = info["url"]
634 data = info["data"]
635 if "update" in args:
636 data += "&ID=" + args["ids"].replace("%7C%7C%7C", "|||")
637 data += "&COL=" + args["cols"].replace("%7C%7C%7C", "|||")
638 data += "&VAL=" + args["vals"].replace("%7C%7C%7C", "|||")
639 data += "&update=true"
640 elif "delete" in args:
641 data += "&ID=" + args["ID"] + "&delete=true"
643 passman = urllib.request.HTTPPasswordMgrWithDefaultRealm()
644 passman.add_password(None, url, conf["user"], conf["pw"])
646 authhandler = urllib.request.HTTPBasicAuthHandler(passman)
647 opener = urllib.request.build_opener(authhandler)
648 urllib.request.install_opener(opener)
650 req = urllib.request.Request(
651 info["url"],
652 data=bytes(data, "utf-8"))
653 req.add_header('Content-Type', 'application/x-www-form-urlencoded')
654 req.get_method = lambda: 'POST'
655 res = urllib.request.urlopen(req)
656 message = res.read()
657 send_response(s, message, encode=False)
658 return
660 db, cursor = opendb(args["remote_dbase"], conf)
662 if "update" in args:
663 idxs = urllib.parse.unquote(args['ids']).split("|||")
664 cols = urllib.parse.unquote(args['cols']).split("|||")
665 vals = urllib.parse.unquote(args['vals']).split("|||")
667 # iterate over the entries
668 if len(idxs) == len(cols) == len(vals):
669 pass
670 else:
671 print('ERROR: wrong values submitted')
672 return
673 for idx, col, val in zip(idxs, cols, vals):
675 # unquote the value
676 val = urllib.parse.unquote(val)
678 # check for quote characters
679 if '"' in val:
680 val = val.replace('"', '""')
682 # get original data value
683 try:
684 orig_val = [x for x in cursor.execute(
685 'select VAL from ' + args['file'] + ' where ID=' + \
686 idx + ' and COL like "' + col + '";')][0][0]
688 qstring = 'update ' + args[
689 'file'] + ' set VAL="' + val + '" where ID=' + idx + ' and COL="' + col + '";'
690 cursor.execute(
691 qstring
692 )
694 message = 'UPDATE: Modification successful replace "{0}" with "{1}" on {2}.'.format(
695 orig_val.encode('utf-8'),
696 val,
697 now)
699 except IndexError:
700 orig_val = '!newvalue!'
702 # create new datum if value has not been retrieved
703 cursor.execute(
704 'insert into ' + args['file'] + ' values(' +
705 idx + ',"' + col + '","' +
706 val + '");')
707 message = 'INSERTION: Successfully inserted {0} on {1}'.format(
708 val, now)
710 # modify original value with double quotes for safety
711 if '"' in orig_val:
712 orig_val = orig_val.replace('"', '""')
714 # insert the backup line
715 try:
716 # noinspection SqlDialectInspection,SqlResolve
717 cursor.execute(
718 'insert into backup values(?,?,?,?,strftime("%s","now"),?);',
719 (
720 args['file'],
721 idx,
722 col,
723 orig_val,
724 conf["user"]
725 ))
726 except Exception as e:
727 print(e)
728 message = 'ERROR'
730 db.commit()
732 elif "delete" in args:
733 lines = [line for line in cursor.execute(
734 'select * from ' + args['file'] + ' where ID=' + args['ID'] + ';'
735 )]
736 for idx, col, val in lines:
737 cursor.execute(
738 'insert into backup values(?,?,?,?,strftime("%s","now"),?);',
739 (args['file'], idx, col, val, conf["user"]))
740 cursor.execute(
741 'delete from ' + args['file'] + ' where ID=' + args['ID'] + ';')
742 db.commit()
743 message = 'DELETION: Successfully deleted all entries for ID {0} on {1}.'.format(
744 args['ID'],
745 now)
746 send_response(s, message)
749def quit(s):
750 """
751 Exit the application.
753 :param s: server
754 :return:
755 """
756 send_response(s, "Terminated the application.")
757 os.kill(os.getpid(), signal.SIGTERM)