Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

# *-* coding: utf-8 *-* 

""" 

Utility functions for borrowing detection with the PhyBo class. 

""" 

from __future__ import unicode_literals, division, print_function 

 

from lingpy import log 

from lingpy.util import write_text_file, as_string 

 

try: 

import scipy.stats as sps 

except ImportError: 

log.missing_module('scipy') 

 

 

def tstats( 

wordlist, 

glm='', 

network=False, 

acs=False, 

tree=False, 

singletons=True, 

return_dists=False 

): 

""" 

Calculate transmission statistics for a given MLN. 

""" 

 

# check for attributes 

# return if no glm and no network is given 

if not glm and not network: 

raise ValueError("You must specify at least one network or a gain-loss model.") 

 

# check for acs and network 

if glm: 

# network = wordlist.graph[glm] 

acs = wordlist.acs[glm] 

 

# check for tree 

if not tree: 

tree = wordlist.tree 

 

# add the distributions of the leaves to the acs 

for t in tree.taxa: 

paps = wordlist.get_list(taxa=t, entry='pap', flat=True) 

cons = wordlist.get_list(taxa=t, entry='concept', flat=True) 

 

acs[t] = [(p, c) for p, c in zip(paps, cons)] 

 

# now we apply a simple way to resolve directions by taking the first 

# occurence of links in the tree to be the innovation, and all dependent 

# links to be the source of borrowings 

 

# create a queue 

queue = ['root'] 

 

# make dictionary of innovated chars: these are currently all present in the 

# root, we order list as [inheritance,innovation,transfer] 

tracer = dict([(c[0], [0, 1, 0]) for c in acs['root']]) 

states = {} 

 

# start to iterate 

while queue: 

 

# get current node 

node = queue.pop(0) 

 

# get the children 

children = tree.getNodeMatchingName(node).Children 

 

# get the chars of the node 

node_chars = list(set([c[0] for c in acs[node]])) 

 

# if there are children 

for child in children: 

 

# get the node name 

name = child.Name 

 

# append name to the queue 

queue += [name] 

 

# get the chars of the child 

chars = list(set([c[0] for c in acs[name]])) 

 

inn = 0 

ret = 0 

bor = 0 

# iterate over chars and decide where they come from 

for char in chars: 

 

if char not in wordlist.singletons or not singletons: 

# if char is inherited, increase the score 

if char in node_chars: 

tracer[char][0] += 1 

ret += 1 

 

# if occurs the first time, it is an innovation 

elif char not in tracer: 

tracer[char] = [0, 1, 0] 

inn += 1 

 

# if it is in the tracer 

elif char not in node_chars and char in tracer: 

tracer[char][2] += 1 

bor += 1 

 

states[name] = [ret, inn, bor] 

 

# calculate the scores 

ret = sum([c[0] for c in tracer.values()]) 

inn = sum([c[1] for c in tracer.values()]) 

tra = sum([c[2] for c in tracer.values()]) 

 

ipn = inn / len(acs) 

tpn = tra / len(acs) 

 

total2 = ipn + tpn 

 

log.info("Innovations: {0}, {1:.2f}, {2:.2f}".format(inn, ipn, ipn / total2)) 

log.info("Transferred: {0}, {1:.2f}, {2:.2f}".format(tra, tpn, tpn / total2)) 

 

if return_dists: 

leaves = [] 

nodes = [] 

for node in [n for n in tree.getNodeNames() if n != 'root']: 

innovations = states[node][1] + states[node][2] 

if node in tree.taxa: 

leaves += [innovations] 

else: 

nodes += [innovations] 

 

# evaluate using mwu 

p, r = sps.mstats.kruskalwallis(leaves, nodes) 

 

return p, r 

 

return inn, tra, tracer 

 

 

def get_acs(wordlist, glm, homoplasy=0, **keywords): 

""" 

Calculate ancestral cognate distributions. 

""" 

gls = wordlist.gls[glm] 

 

queue = ['root'] 

acs = dict( 

root=[] 

) 

for char in gls: 

positives = [x[0] for x in gls[char][0] if x[1] == 1] 

if 'root' in positives: 

acs['root'] += [char] 

 

while queue: 

 

parent = queue.pop(0) 

 

children = wordlist.tree.getNodeMatchingName(parent).Children 

 

for child in children: 

child_name = child.Name 

queue += [child_name] 

acs[child_name] = [] 

for char in gls: 

positives = [x[0] for x in gls[char][0] if x[1] == 1] 

negatives = [x[0] for x in gls[char][0] if x[1] == 0] 

if child_name in positives: 

acs[child_name] += [char] 

elif child_name in negatives: 

pass 

elif char in acs[parent]: 

acs[child_name] += [char] 

 

dist = [] 

for t in acs: 

if t not in wordlist.taxa: 

acs[t] = sorted(set(acs[t])) 

forms = len(acs[t]) 

dist += [int(forms - forms * homoplasy + 0.5)] 

 

return acs, dist 

 

 

def check_stats(models, wordlist, filename='results.txt', pprint=False): 

results = [] 

for m in models: 

p, z = tstats(wordlist, m, return_dists=True) 

results += [[m, p, z]] 

 

 

txt = '' 

for a, b, c in results: 

txt += '{0}\t{1:.2f}\t{2:.2f}\n'.format(a, b, c) 

as_string(txt, pprint) 

if filename: write_text_file(filename, txt)