Coverage for /home/mattis/projects/websites/dighl/edictor/src/edictor/wordlist.py: 63%

46 statements  

« prev     ^ index     » next       coverage.py v7.6.1, created at 2024-08-07 06:52 +0200

1""" 

2Handle EDICTOR wordlist data. 

3""" 

4import urllib 

5import tempfile 

6try: 

7 import lingpy 

8except ImportError: 

9 lingpy = False 

10try: 

11 from lexibase import LexiBase 

12except ImportError: 

13 LexiBase = False 

14 

15 

16# noinspection HttpUrlsUsage 

17def fetch_wordlist( 

18 dataset, 

19 remote_dbase=None, 

20 concepts=None, 

21 languages=None, 

22 columns=None, 

23 to_lingpy=None, 

24 transform=None, 

25 base_url="http://lingulist.de/edictor", 

26): 

27 """ 

28 Download wordlist from an EDICTOR server application. 

29 """ 

30 url = base_url + "/triples/get_data.py?file=" + dataset 

31 if not remote_dbase: 

32 url += "&remote_dbase=" + dataset + ".sqlite3" 

33 else: 

34 url += "&remote_dbase=" + remote_dbase 

35 if concepts: 

36 url += "&concepts=" + "|".join([urllib.parse.quote(c) for c in concepts]) 

37 if languages: 

38 url += "&doculects=" + "|".join([urllib.parse.quote(c) for c in languages]) 

39 if columns: 

40 url += "&columns=" + "|".join(columns) 

41 

42 data = urllib.request.urlopen(url).read() 

43 if to_lingpy: 

44 if not lingpy: 

45 raise ValueError( 

46 "Package lingpy has to be installed to use this method.") 

47 with tempfile.NamedTemporaryFile() as tf: 

48 tf.write(data) 

49 tf.flush() 

50 return transform(tf.name) if transform else lingpy.Wordlist(tf.name) 

51 return data.decode("utf-8") 

52 

53 

54def get_wordlist( 

55 path, 

56 name, 

57 columns=None, 

58 preprocessing=None, 

59 namespace=None, 

60 lexibase=False, 

61 custom_args=None 

62): 

63 """ 

64 Function retrieves a wordlist from a CLDF dataset. 

65 """ 

66 if not lingpy: 

67 raise ValueError( 

68 "Package lingpy has to be installed to use this method.") 

69 

70 wordlist = lingpy.Wordlist.from_cldf( 

71 path, 

72 columns=columns or ( 

73 "language_id", "concept_name", "value", "form", "segments", "comment"), 

74 namespace=namespace or dict( 

75 [ 

76 ("language_id", "doculect"), 

77 ("concept_name", "concept"), 

78 ("value", "value"), 

79 ("form", "form"), 

80 ("segments", "tokens"), 

81 ("comment", "note"), 

82 ] 

83 ), 

84 ) 

85 

86 if preprocessing and custom_args: 

87 dct = preprocessing(wordlist, args=custom_args) 

88 elif preprocessing: 

89 dct = preprocessing(wordlist) 

90 else: 

91 dct = {idx: wordlist[idx] for idx in wordlist} 

92 dct[0] = wordlist.columns 

93 

94 if not lexibase: 

95 lingpy.Wordlist(dct).output("tsv", filename=name, ignore="all", prettify=False) 

96 else: 

97 if not LexiBase: 

98 raise ValueError( 

99 "Package lexibase has to be installed to use this method.") 

100 lex = LexiBase(dct, dbase=name + ".sqlite3") 

101 lex.create(name)