1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 __doc__ = """
21 This module contains the supporting classes for the Two Step Analysis user agent
22 algorithm that is used as the primary way to match user agents with the Java API
23 for the WURFL.
24
25 A description of the way the following source is intended to work can be found
26 within the source for the original Java API implementation here:
27 http://sourceforge.net/projects/wurfl/files/WURFL Java API/
28
29 The original Java code is GPLd and Copyright (c) WURFL-Pro srl
30 """
31
32 __author__ = "Armand Lynch <lyncha@users.sourceforge.net>"
33 __copyright__ = "Copyright 2011, Armand Lynch"
34 __license__ = "LGPL"
35 __url__ = "http://celljam.net/"
36 __version__ = "1.2.1"
37
38 import re
39
40
41
42
43
44
45 babel_fish_re = re.compile(ur"\s*\(via babelfish.yahoo.com\)\s*", re.UNICODE)
46 uplink_re = re.compile(ur"\s*UP\.Link.+$", re.UNICODE)
47 yeswap_re = re.compile(ur"\s*Mozilla/4\.0 \(YesWAP mobile phone proxy\)",
48 re.UNICODE)
49 safari_re = re.compile(ur"(Mozilla\/5\.0.*)(\;\s*U\;.*?)(Safari\/\d{0,3})",
50 re.UNICODE)
51 locale_re = re.compile(ur"(; [a-z]{2}(-[a-zA-Z]{0,2})?)", re.UNICODE)
52 serial_number_re = re.compile(ur"(\[(TF|NT|ST)[\d|X]+\])|(\/SN[\d|X]+)",
53 re.UNICODE)
54 android_re = re.compile(ur"(Android[\s/]\d.\d)(.*?;)", re.UNICODE)
55 konqueror_re = re.compile(ur"(Konqueror\/\d)", re.UNICODE)
63 """Replace the "via babelfish.yahoo.com" with ''"""
64
65 return babel_fish_re.sub('', user_agent)
66
69 """ Replaces the heading "BlackBerry" string with ''"""
70
71 try:
72 index = user_agent.index(u"BlackBerry")
73 if u"AppleWebKit" not in user_agent:
74 return user_agent[index:]
75 except ValueError:
76 pass
77 return user_agent
78
81 """Replace the trailing UP.Link ... with ''"""
82
83 return uplink_re.sub('', user_agent)
84
87 """Replace the "YesWAP mobile phone proxy" with ''"""
88
89 return yeswap_re.sub('', user_agent)
90
94
98
101 def normalizer(user_agent):
102
103 for f in funcs:
104 user_agent = f(user_agent)
105 return user_agent.replace(' ', ' ').strip()
106 return normalizer
107
108
109 generic = _combine_funcs(serial_no, blackberry, uplink, yeswap, babelfish,
110 locale_remover)
114 def combined_normalizer(user_agent):
115 user_agent = generic(user_agent)
116 return normalizer_func(user_agent)
117 combined_normalizer.__doc__ = normalizer_func.__doc__
118 return combined_normalizer
119
124 if search_string in user_agent:
125 start = user_agent.index(search_string)
126 user_agent = user_agent[start:start + vsn_size]
127 return user_agent
128
129
130 @prenormalized
131 -def chrome(user_agent):
134
135
136 @prenormalized
137 -def firefox(user_agent):
140
141
142 @prenormalized
143 -def konqueror(user_agent):
149
150
151 @prenormalized
152 -def msie(user_agent):
153
154 if u"MSIE" in user_agent:
155 user_agent = user_agent[0:user_agent.index(u"MSIE")+9]
156 return user_agent
157
158
159 @prenormalized
160 -def safari(user_agent):
161 """
162 Return the safari user agent stripping out all the characters between
163 U; and Safari/xxx
164
165 e.g Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_4_11; fr) AppleWebKit/525.18 (KHTML, like Gecko) Version/3.1.1 Safari/525.18
166 becomes
167 Mozilla/5.0 (Macintosh Safari/525
168 """
169
170 match = safari_re.search(user_agent)
171 if match and len(match.groups()) >= 3:
172 user_agent = " ".join([match.group(1).strip(), match.group(3).strip()])
173 return user_agent
174
175
176 @prenormalized
177 -def lg(user_agent):
178 try:
179 lg_index = user_agent.index(u"LG")
180 return user_agent[lg_index:]
181 except ValueError:
182 return user_agent
183
184
185 @prenormalized
186 -def maemo(user_agent):
187 try:
188 maemo_index = user_agent.index(u"Maemo")
189 return user_agent[maemo_index:]
190 except ValueError:
191 return user_agent
192
193
194 @prenormalized
195 -def android(user_agent):
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224