Package pywurfl :: Module algorithms
[hide private]
[frames] | no frames]

Source Code for Module pywurfl.algorithms

  1  # pywurfl Algorithms - Wireless Universal Resource File UA search algorithms 
  2  # Copyright (C) 2006 Armand Lynch 
  3  # 
  4  # This library is free software; you can redistribute it and/or modify it 
  5  # under the terms of the GNU Lesser General Public License as published by the 
  6  # Free Software Foundation; either version 2.1 of the License, or (at your 
  7  # option) any later version. 
  8  # 
  9  # This library is distributed in the hope that it will be useful, but WITHOUT 
 10  # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 
 11  # FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more 
 12  # details. 
 13  # 
 14  # You should have received a copy of the GNU Lesser General Public License 
 15  # along with this library; if not, write to the Free Software Foundation, Inc., 
 16  # 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 
 17  # 
 18  # Armand Lynch <lyncha@users.sourceforge.net> 
 19   
 20  __doc__ = \ 
 21  """ 
 22  pywurfl search algorithms 
 23  """ 
 24   
 25  import re 
 26  from pywurfl import DeviceNotFound 
 27   
 28  __author__ = "Armand Lynch <lyncha@users.sourceforge.net>" 
 29  __copyright__ = "Copyright 2006, Armand Lynch" 
 30  __license__ = "LGPL" 
 31  __url__ = "http://wurfl.sourceforge.net/python/" 
 32  __version__ = "1.0.0a" 
 33   
34 -class Algorithm(object):
35 """ 36 Base class for all pywurfl search algorithms 37 """
38 - def __call__(self, ua, devices):
39 """ 40 Every pywurfl algorithm class must define a __call__ method. 41 42 @param ua: The user agent 43 @type ua: string 44 @param devices: The devices object to search 45 @type devices: Devices 46 @rtype: Device 47 """ 48 raise NotImplementedError
49 50 51 try: 52 import Levenshtein 53
54 - class JaroWinkler(Algorithm):
55 """ 56 Jaro-Winkler Search Algorithm 57 """ 58
59 - def __init__(self, accuracy=1.0, weight=0.05):
60 """ 61 @param accuracy: The tolerance that the Jaro-Winkler algorithm will 62 use to determine if a user agent matches 63 0.0 >= accuracy <= 1.0 64 @type accuracy: float 65 """ 66 67 self.accuracy = accuracy 68 self.weight = weight
69
70 - def __call__(self, ua, devices):
71 """ 72 @param ua: The user agent 73 @type ua: string 74 @param devices: The devices object to search 75 @type devices: Devices 76 @rtype: Device 77 @raises pywurfl.DeviceNotFound 78 """ 79 match = max((Levenshtein.jaro_winkler(x, ua, self.weight), x) for 80 x in devices.devuas) 81 if match[0] >= self.accuracy: 82 return devices.devuas[match[1]] 83 else: 84 raise DeviceNotFound(ua)
85 86
87 - class LevenshteinDistance(Algorithm):
88 """ 89 Levenshtein distance Search Algorithm 90 """ 91
92 - def __call__(self, ua, devices):
93 """ 94 @param ua: The user agent 95 @type ua: string 96 @param devices: The devices object to search 97 @type devices: Devices 98 @rtype: Device 99 """ 100 101 match = max((Levenshtein.distance(ua, x), x) for x in 102 devices.devuas) 103 return devices.devuas[match[1]]
104 105 except ImportError: 106 pass 107 108
109 -class Tokenizer(Algorithm):
110 """ 111 Tokenizer Search Algorithm 112 """ 113 tokenize_chars = ('/', '.', '-', '_', ' ') 114
115 - def __init__(self, devwindow=30):
116 """ 117 @param devwindow: If more than devwindow user agents match, 118 return empty device. 119 @type devwindow: integer 120 """ 121 self.devwindow = devwindow
122
123 - def _tokenize(self, s):
124 """ 125 @param s: The user agent to tokenize 126 @type s: string 127 """ 128 for d in self.tokenize_chars: 129 s = s.replace(d, ' ') 130 return [re.escape(x) for x in s.split()]
131
132 - def __call__(self, ua, devices):
133 """ 134 @param ua: The user agent 135 @type ua: string 136 @param devices: The devices object to search 137 @type devices: Devices 138 @rtype: Device 139 """ 140 uas = devices.devuas.keys() 141 tokens = self._tokenize(ua) 142 regex = '' 143 for t in tokens: 144 if regex: 145 regex += '[\/\.\-_ ]*' + t 146 else: 147 regex += t 148 149 regex2 = regex + '.*' 150 151 uare = re.compile(regex2, re.I) 152 uas2 = [x for x in uas if uare.match(x)] 153 154 # If the last regex didn't produce any matches and more than 155 # devwindow devices were matched before, return a generic device. 156 # Else, there is a device that "looks" like some others so return 157 # the first one. 158 if len(uas2) == 0 and len(uas) > self.devwindow: 159 return devices.devids['generic'] 160 elif len(uas2) == 0 and len(uas) <= self.devwindow: 161 #uas.sort() 162 return devices.devuas[uas[0]] 163 164 # We found one good looking match 165 if len(uas2) == 1: 166 #uas2.sort() 167 return devices.devuas[uas2[0]] 168 169 # We've got matches so search some more 170 uas = uas2 171 172 # We've got some matches but we ran out of tokens so search with. 173 # If we matched more than devwindow, return a generic device. 174 # Else we've got some devices within the devwindow so return the first 175 # one. 176 if len(uas2) > self.devwindow: 177 return devices.devids['generic'] 178 else: 179 #uas2.sort() 180 return devices.devuas[uas2[0]]
181