Package pywurfl :: Module algorithms
[hide private]
[frames] | no frames]

Source Code for Module pywurfl.algorithms

  1  # pywurfl Algorithms - Wireless Universal Resource File UA search algorithms 
  2  # Copyright (C) 2006-2009 Armand Lynch 
  3  # 
  4  # This library is free software; you can redistribute it and/or modify it 
  5  # under the terms of the GNU Lesser General Public License as published by the 
  6  # Free Software Foundation; either version 2.1 of the License, or (at your 
  7  # option) any later version. 
  8  # 
  9  # This library is distributed in the hope that it will be useful, but WITHOUT 
 10  # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 
 11  # FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more 
 12  # details. 
 13  # 
 14  # You should have received a copy of the GNU Lesser General Public License 
 15  # along with this library; if not, write to the Free Software Foundation, Inc., 
 16  # 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 
 17  # 
 18  # Armand Lynch <lyncha@users.sourceforge.net> 
 19   
 20  __doc__ = \ 
 21  """ 
 22  pywurfl search algorithms 
 23  """ 
 24   
 25  import re 
 26   
 27  from pywurfl.exceptions import DeviceNotFound 
 28   
 29   
 30  __author__ = "Armand Lynch <lyncha@users.sourceforge.net>" 
 31  __copyright__ = "Copyright 2006-2009, Armand Lynch" 
 32  __license__ = "LGPL" 
 33  __url__ = "http://celljam.net/" 
 34   
 35   
36 -class Algorithm(object):
37 """ 38 Base class for all pywurfl search algorithms 39 """
40 - def __call__(self, ua, devices):
41 """ 42 Every pywurfl algorithm class must define a __call__ method. 43 44 @param ua: The user agent 45 @type ua: string 46 @param devices: The devices object to search 47 @type devices: Devices 48 @rtype: Device 49 """ 50 raise NotImplementedError
51 52 53 try: 54 import Levenshtein 55
56 - class JaroWinkler(Algorithm):
57 """ 58 Jaro-Winkler Search Algorithm 59 """ 60
61 - def __init__(self, accuracy=1.0, weight=0.05):
62 """ 63 @param accuracy: The tolerance that the Jaro-Winkler algorithm will 64 use to determine if a user agent matches 65 0.0 <= accuracy <= 1.0 66 @type accuracy: float 67 @param weight: The prefix weight is inverse value of common prefix 68 length sufficient to consider the strings 69 'identical' (excerpt from the Levenshtein module 70 documentation). 71 @type weight: float 72 """ 73 74 self.accuracy = accuracy 75 self.weight = weight
76
77 - def __call__(self, ua, devices):
78 """ 79 @param ua: The user agent 80 @type ua: string 81 @param devices: The devices object to search 82 @type devices: Devices 83 @rtype: Device 84 @raises pywurfl.DeviceNotFound 85 """ 86 match = max((Levenshtein.jaro_winkler(x, ua, self.weight), x) for 87 x in devices.devuas) 88 if match[0] >= self.accuracy: 89 return devices.devuas[match[1]] 90 else: 91 raise DeviceNotFound(ua)
92 93
94 - class LevenshteinDistance(Algorithm):
95 """ 96 Levenshtein distance Search Algorithm 97 """ 98
99 - def __call__(self, ua, devices):
100 """ 101 @param ua: The user agent 102 @type ua: string 103 @param devices: The devices object to search 104 @type devices: Devices 105 @rtype: Device 106 """ 107 108 match = max((Levenshtein.distance(ua, x), x) for x in 109 devices.devuas) 110 return devices.devuas[match[1]]
111 112 except ImportError: 113 pass 114 115
116 -class Tokenizer(Algorithm):
117 """ 118 Tokenizer Search Algorithm 119 """ 120 tokenize_chars = ('/', '.', ',', ';', '-', '_', ' ', '(', ')') 121 base_regex = '[\\'+'\\'.join(tokenize_chars)+']*' 122
123 - def __init__(self, devwindow=30):
124 """ 125 @param devwindow: If more than devwindow user agents match, 126 return empty device. 127 @type devwindow: integer 128 """ 129 self.devwindow = devwindow
130
131 - def _tokenize(self, s):
132 """ 133 @param s: The user agent to tokenize 134 @type s: string 135 """ 136 for d in self.tokenize_chars: 137 s = s.replace(d, ' ') 138 return [re.escape(x) for x in s.split()]
139
140 - def __call__(self, ua, devices):
141 """ 142 @param ua: The user agent 143 @type ua: string 144 @param devices: The devices object to search 145 @type devices: Devices 146 @rtype: Device 147 """ 148 uas = devices.devuas.keys() 149 tokens = self._tokenize(ua) 150 regex = '' 151 for t in tokens: 152 if regex: 153 regex += self.base_regex + t 154 else: 155 regex += t 156 157 regex2 = regex + '.*' 158 159 uare = re.compile(regex2, re.I) 160 uas2 = [x for x in uas if uare.match(x)] 161 162 # If the last regex didn't produce any matches and more than 163 # devwindow devices were matched before, return a generic device. 164 # Else, there is a device that "looks" like some others so return 165 # the first one. 166 if len(uas2) == 0 and len(uas) > self.devwindow: 167 return devices.devids['generic'] 168 elif len(uas2) == 0 and len(uas) <= self.devwindow: 169 #uas.sort() 170 return devices.devuas[uas[0]] 171 172 # We found one good looking match 173 if len(uas2) == 1: 174 #uas2.sort() 175 return devices.devuas[uas2[0]] 176 177 # We've got matches so search some more 178 uas = uas2 179 180 # We've got some matches but we ran out of tokens so search with. 181 # If we matched more than devwindow, return a generic device. 182 # Else we've got some devices within the devwindow so return the first 183 # one. 184 if len(uas2) > self.devwindow: 185 return devices.devids['generic'] 186 else: 187 #uas2.sort() 188 return devices.devuas[uas2[0]]
189