Package pywurfl :: Module algorithms
[hide private]
[frames] | no frames]

Source Code for Module pywurfl.algorithms

  1  # pywurfl Algorithms - Wireless Universal Resource File UA search algorithms 
  2  # Copyright (C) 2006 Armand Lynch 
  3  # 
  4  # This library is free software; you can redistribute it and/or modify it 
  5  # under the terms of the GNU Lesser General Public License as published by the 
  6  # Free Software Foundation; either version 2.1 of the License, or (at your 
  7  # option) any later version. 
  8  # 
  9  # This library is distributed in the hope that it will be useful, but WITHOUT 
 10  # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 
 11  # FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more 
 12  # details. 
 13  # 
 14  # You should have received a copy of the GNU Lesser General Public License 
 15  # along with this library; if not, write to the Free Software Foundation, Inc., 
 16  # 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 
 17  # 
 18  # Armand Lynch <lyncha@users.sourceforge.net> 
 19   
 20  __doc__ = \ 
 21  """ 
 22  pywurfl search algorithms 
 23  """ 
 24   
 25  import re 
 26   
 27  from pywurfl.exceptions import DeviceNotFound 
 28   
 29   
 30  __author__ = "Armand Lynch <lyncha@users.sourceforge.net>" 
 31  __copyright__ = "Copyright 2006, Armand Lynch" 
 32  __license__ = "LGPL" 
 33  __url__ = "http://celljam.net/" 
 34   
 35   
36 -class Algorithm(object):
37 """ 38 Base class for all pywurfl search algorithms 39 """
40 - def __call__(self, ua, devices):
41 """ 42 Every pywurfl algorithm class must define a __call__ method. 43 44 @param ua: The user agent 45 @type ua: string 46 @param devices: The devices object to search 47 @type devices: Devices 48 @rtype: Device 49 """ 50 raise NotImplementedError
51 52 53 try: 54 import Levenshtein 55
56 - class JaroWinkler(Algorithm):
57 """ 58 Jaro-Winkler Search Algorithm 59 """ 60
61 - def __init__(self, accuracy=1.0, weight=0.05):
62 """ 63 @param accuracy: The tolerance that the Jaro-Winkler algorithm will 64 use to determine if a user agent matches 65 0.0 <= accuracy <= 1.0 66 @type accuracy: float 67 @param weight: The prefix weight is inverse value of common prefix 68 length sufficient to consider the strings 69 'identical' (excerpt from the Levenshtein module 70 documentation). 71 @type weight: float 72 """ 73 74 self.accuracy = accuracy 75 self.weight = weight
76
77 - def __call__(self, ua, devices):
78 """ 79 @param ua: The user agent 80 @type ua: string 81 @param devices: The devices object to search 82 @type devices: Devices 83 @rtype: Device 84 @raises pywurfl.DeviceNotFound 85 """ 86 match = max((Levenshtein.jaro_winkler(x, ua, self.weight), x) for 87 x in devices.devuas) 88 if match[0] >= self.accuracy: 89 return devices.devuas[match[1]] 90 else: 91 raise DeviceNotFound(ua)
92 93
94 - class LevenshteinDistance(Algorithm):
95 """ 96 Levenshtein distance Search Algorithm 97 """ 98
99 - def __call__(self, ua, devices):
100 """ 101 @param ua: The user agent 102 @type ua: string 103 @param devices: The devices object to search 104 @type devices: Devices 105 @rtype: Device 106 """ 107 108 match = max((Levenshtein.distance(ua, x), x) for x in 109 devices.devuas) 110 return devices.devuas[match[1]]
111 112 except ImportError: 113 pass 114 115
116 -class Tokenizer(Algorithm):
117 """ 118 Tokenizer Search Algorithm 119 """ 120 tokenize_chars = ('/', '.', '-', '_', ' ') 121
122 - def __init__(self, devwindow=30):
123 """ 124 @param devwindow: If more than devwindow user agents match, 125 return empty device. 126 @type devwindow: integer 127 """ 128 self.devwindow = devwindow
129
130 - def _tokenize(self, s):
131 """ 132 @param s: The user agent to tokenize 133 @type s: string 134 """ 135 for d in self.tokenize_chars: 136 s = s.replace(d, ' ') 137 return [re.escape(x) for x in s.split()]
138
139 - def __call__(self, ua, devices):
140 """ 141 @param ua: The user agent 142 @type ua: string 143 @param devices: The devices object to search 144 @type devices: Devices 145 @rtype: Device 146 """ 147 uas = devices.devuas.keys() 148 tokens = self._tokenize(ua) 149 regex = '' 150 for t in tokens: 151 if regex: 152 regex += '[\/\.\-_ ]*' + t 153 else: 154 regex += t 155 156 regex2 = regex + '.*' 157 158 uare = re.compile(regex2, re.I) 159 uas2 = [x for x in uas if uare.match(x)] 160 161 # If the last regex didn't produce any matches and more than 162 # devwindow devices were matched before, return a generic device. 163 # Else, there is a device that "looks" like some others so return 164 # the first one. 165 if len(uas2) == 0 and len(uas) > self.devwindow: 166 return devices.devids['generic'] 167 elif len(uas2) == 0 and len(uas) <= self.devwindow: 168 #uas.sort() 169 return devices.devuas[uas[0]] 170 171 # We found one good looking match 172 if len(uas2) == 1: 173 #uas2.sort() 174 return devices.devuas[uas2[0]] 175 176 # We've got matches so search some more 177 uas = uas2 178 179 # We've got some matches but we ran out of tokens so search with. 180 # If we matched more than devwindow, return a generic device. 181 # Else we've got some devices within the devwindow so return the first 182 # one. 183 if len(uas2) > self.devwindow: 184 return devices.devids['generic'] 185 else: 186 #uas2.sort() 187 return devices.devuas[uas2[0]]
188