1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 r"""
19 =====================
20 Javascript Minifier
21 =====================
22
23 rJSmin is a javascript minifier written in python.
24
25 The minifier is based on the semantics of `jsmin.c by Douglas Crockford`_\.
26
27 The module is a re-implementation aiming for speed, so it can be used at
28 runtime (rather than during a preprocessing step). Usually it produces the
29 same results as the original ``jsmin.c``. It differs in the following ways:
30
31 - there is no error detection: unterminated string, regex and comment
32 literals are treated as regular javascript code and minified as such.
33 - Control characters inside string and regex literals are left untouched; they
34 are not converted to spaces (nor to \n)
35 - Newline characters are not allowed inside string and regex literals, except
36 for line continuations in string literals (ECMA-5).
37 - "return /regex/" is recognized correctly.
38 - "+ +" and "- -" sequences are not collapsed to '++' or '--'
39 - Newlines before ! operators are removed more sensibly
40 - Comments starting with an exclamation mark (``!``) can be kept optionally
41 - rJSmin does not handle streams, but only complete strings. (However, the
42 module provides a "streamy" interface).
43
44 Since most parts of the logic are handled by the regex engine it's way
45 faster than the original python port of ``jsmin.c`` by Baruch Even. The speed
46 factor varies between about 6 and 55 depending on input and python version
47 (it gets faster the more compressed the input already is). Compared to the
48 speed-refactored python port by Dave St.Germain the performance gain is less
49 dramatic but still between 1.2 and 7. See the docs/BENCHMARKS file for
50 details.
51
52 rjsmin.c is a reimplementation of rjsmin.py in C and speeds it up even more.
53
54 Both python 2 and python 3 are supported.
55
56 .. _jsmin.c by Douglas Crockford:
57 http://www.crockford.com/javascript/jsmin.c
58 """
59 __author__ = "Andr\xe9 Malo"
60 __author__ = getattr(__author__, 'decode', lambda x: __author__)('latin-1')
61 __docformat__ = "restructuredtext en"
62 __license__ = "Apache License, Version 2.0"
63 __version__ = '1.0.8'
64 __all__ = ['jsmin']
65
66 import re as _re
67
68
70 """
71 Generate JS minifier based on `jsmin.c by Douglas Crockford`_
72
73 .. _jsmin.c by Douglas Crockford:
74 http://www.crockford.com/javascript/jsmin.c
75
76 :Parameters:
77 `python_only` : ``bool``
78 Use only the python variant. If true, the c extension is not even
79 tried to be loaded.
80
81 :Return: Minifier
82 :Rtype: ``callable``
83 """
84
85 if not python_only:
86 try:
87 import _rjsmin
88 except ImportError:
89 pass
90 else:
91 return _rjsmin.jsmin
92 try:
93 xrange
94 except NameError:
95 xrange = range
96
97 space_chars = r'[\000-\011\013\014\016-\040]'
98
99 line_comment = r'(?://[^\r\n]*)'
100 space_comment = r'(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)'
101 space_comment_nobang = r'(?:/\*(?!!)[^*]*\*+(?:[^/*][^*]*\*+)*/)'
102 bang_comment = r'(?:/\*![^*]*\*+(?:[^/*][^*]*\*+)*/)'
103
104 string1 = \
105 r'(?:\047[^\047\\\r\n]*(?:\\(?:[^\r\n]|\r?\n|\r)[^\047\\\r\n]*)*\047)'
106 string2 = r'(?:"[^"\\\r\n]*(?:\\(?:[^\r\n]|\r?\n|\r)[^"\\\r\n]*)*")'
107 strings = r'(?:%s|%s)' % (string1, string2)
108
109 charclass = r'(?:\[[^\\\]\r\n]*(?:\\[^\r\n][^\\\]\r\n]*)*\])'
110 nospecial = r'[^/\\\[\r\n]'
111 regex = r'(?:/(?![\r\n/*])%s*(?:(?:\\[^\r\n]|%s)%s*)*/)' % (
112 nospecial, charclass, nospecial
113 )
114 space = r'(?:%s|%s)' % (space_chars, space_comment)
115 space_nobang = r'(?:%s|%s)' % (space_chars, space_comment_nobang)
116 newline = r'(?:%s?[\r\n])' % line_comment
117
118 def fix_charclass(result):
119 """ Fixup string of chars to fit into a regex char class """
120 pos = result.find('-')
121 if pos >= 0:
122 result = r'%s%s-' % (result[:pos], result[pos + 1:])
123
124 def sequentize(string):
125 """
126 Notate consecutive characters as sequence
127
128 (1-4 instead of 1234)
129 """
130 first, last, result = None, None, []
131 for char in map(ord, string):
132 if last is None:
133 first = last = char
134 elif last + 1 == char:
135 last = char
136 else:
137 result.append((first, last))
138 first = last = char
139 if last is not None:
140 result.append((first, last))
141 return ''.join(['%s%s%s' % (
142 chr(first),
143 last > first + 1 and '-' or '',
144 last != first and chr(last) or ''
145 ) for first, last in result])
146
147 return _re.sub(r'([\000-\040\047])',
148 lambda m: '\\%03o' % ord(m.group(1)), (sequentize(result)
149 .replace('\\', '\\\\')
150 .replace('[', '\\[')
151 .replace(']', '\\]')
152 )
153 )
154
155 def id_literal_(what):
156 """ Make id_literal like char class """
157 match = _re.compile(what).match
158 result = ''.join([
159 chr(c) for c in xrange(127) if not match(chr(c))
160 ])
161 return '[^%s]' % fix_charclass(result)
162
163 def not_id_literal_(keep):
164 """ Make negated id_literal like char class """
165 match = _re.compile(id_literal_(keep)).match
166 result = ''.join([
167 chr(c) for c in xrange(127) if not match(chr(c))
168 ])
169 return r'[%s]' % fix_charclass(result)
170
171 not_id_literal = not_id_literal_(r'[a-zA-Z0-9_$]')
172 preregex1 = r'[(,=:\[!&|?{};\r\n]'
173 preregex2 = r'%(not_id_literal)sreturn' % locals()
174
175 id_literal = id_literal_(r'[a-zA-Z0-9_$]')
176 id_literal_open = id_literal_(r'[a-zA-Z0-9_${\[(!+-]')
177 id_literal_close = id_literal_(r'[a-zA-Z0-9_$}\])"\047+-]')
178
179 dull = r'[^\047"/\000-\040]'
180
181 space_sub_simple = _re.compile((
182 r'(%(dull)s+)'
183 r'|(%(strings)s%(dull)s*)'
184 r'|(?<=%(preregex1)s)'
185 r'%(space)s*(?:%(newline)s%(space)s*)*'
186 r'(%(regex)s%(dull)s*)'
187 r'|(?<=%(preregex2)s)'
188 r'%(space)s*(?:%(newline)s%(space)s)*'
189 r'(%(regex)s%(dull)s*)'
190 r'|(?<=%(id_literal_close)s)'
191 r'%(space)s*(?:(%(newline)s)%(space)s*)+'
192 r'(?=%(id_literal_open)s)'
193 r'|(?<=%(id_literal)s)(%(space)s)+(?=%(id_literal)s)'
194 r'|(?<=\+)(%(space)s)+(?=\+)'
195 r'|(?<=-)(%(space)s)+(?=-)'
196 r'|%(space)s+'
197 r'|(?:%(newline)s%(space)s*)+'
198 ) % locals()).sub
199
200
201 def space_subber_simple(match):
202 """ Substitution callback """
203
204 groups = match.groups()
205 if groups[0]: return groups[0]
206 elif groups[1]: return groups[1]
207 elif groups[2]: return groups[2]
208 elif groups[3]: return groups[3]
209 elif groups[4]: return '\n'
210 elif groups[5] or groups[6] or groups[7]: return ' '
211 else: return ''
212
213 space_sub_banged = _re.compile((
214 r'(%(dull)s+)'
215 r'|(%(strings)s%(dull)s*)'
216 r'|(%(bang_comment)s%(dull)s*)'
217 r'|(?<=%(preregex1)s)'
218 r'%(space)s*(?:%(newline)s%(space)s*)*'
219 r'(%(regex)s%(dull)s*)'
220 r'|(?<=%(preregex2)s)'
221 r'%(space)s*(?:%(newline)s%(space)s)*'
222 r'(%(regex)s%(dull)s*)'
223 r'|(?<=%(id_literal_close)s)'
224 r'%(space)s*(?:(%(newline)s)%(space)s*)+'
225 r'(?=%(id_literal_open)s)'
226 r'|(?<=%(id_literal)s)(%(space)s)+(?=%(id_literal)s)'
227 r'|(?<=\+)(%(space)s)+(?=\+)'
228 r'|(?<=-)(%(space)s)+(?=-)'
229 r'|%(space)s+'
230 r'|(?:%(newline)s%(space)s*)+'
231 ) % dict(locals(), space=space_nobang)).sub
232
233
234 def space_subber_banged(match):
235 """ Substitution callback """
236
237 groups = match.groups()
238 if groups[0]: return groups[0]
239 elif groups[1]: return groups[1]
240 elif groups[2]: return groups[2]
241 elif groups[3]: return groups[3]
242 elif groups[4]: return groups[4]
243 elif groups[5]: return '\n'
244 elif groups[6] or groups[7] or groups[8]: return ' '
245 else: return ''
246
247 def jsmin(script, keep_bang_comments=False):
248 r"""
249 Minify javascript based on `jsmin.c by Douglas Crockford`_\.
250
251 Instead of parsing the stream char by char, it uses a regular
252 expression approach which minifies the whole script with one big
253 substitution regex.
254
255 .. _jsmin.c by Douglas Crockford:
256 http://www.crockford.com/javascript/jsmin.c
257
258 :Parameters:
259 `script` : ``str``
260 Script to minify
261
262 `keep_bang_comments` : ``bool``
263 Keep comments starting with an exclamation mark? (``/*!...*/``)
264
265 :Return: Minified script
266 :Rtype: ``str``
267 """
268 if keep_bang_comments:
269 return space_sub_banged(
270 space_subber_banged, '\n%s\n' % script
271 ).strip()
272 else:
273 return space_sub_simple(
274 space_subber_simple, '\n%s\n' % script
275 ).strip()
276
277 return jsmin
278
279 jsmin = _make_jsmin()
280
281
283 r"""
284 Minify javascript based on `jsmin.c by Douglas Crockford`_\.
285
286 Instead of parsing the stream char by char, it uses a regular
287 expression approach which minifies the whole script with one big
288 substitution regex.
289
290 .. _jsmin.c by Douglas Crockford:
291 http://www.crockford.com/javascript/jsmin.c
292
293 :Warning: This function is the digest of a _make_jsmin() call. It just
294 utilizes the resulting regexes. It's here for fun and may
295 vanish any time. Use the `jsmin` function instead.
296
297 :Parameters:
298 `script` : ``str``
299 Script to minify
300
301 `keep_bang_comments` : ``bool``
302 Keep comments starting with an exclamation mark? (``/*!...*/``)
303
304 :Return: Minified script
305 :Rtype: ``str``
306 """
307 if not keep_bang_comments:
308 rex = (
309 r'([^\047"/\000-\040]+)|((?:(?:\047[^\047\\\r\n]*(?:\\(?:[^\r\n]'
310 r'|\r?\n|\r)[^\047\\\r\n]*)*\047)|(?:"[^"\\\r\n]*(?:\\(?:[^\r\n]'
311 r'|\r?\n|\r)[^"\\\r\n]*)*"))[^\047"/\000-\040]*)|(?<=[(,=:\[!&|?'
312 r'{};\r\n])(?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*'
313 r'][^*]*\*+)*/))*(?:(?:(?://[^\r\n]*)?[\r\n])(?:[\000-\011\013\0'
314 r'14\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*)*((?:/(?![\r'
315 r'\n/*])[^/\\\[\r\n]*(?:(?:\\[^\r\n]|(?:\[[^\\\]\r\n]*(?:\\[^\r'
316 r'\n][^\\\]\r\n]*)*\]))[^/\\\[\r\n]*)*/)[^\047"/\000-\040]*)|(?<'
317 r'=[\000-#%-,./:-@\[-^`{-~-]return)(?:[\000-\011\013\014\016-\04'
318 r'0]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*(?:(?:(?://[^\r\n]*)?['
319 r'\r\n])(?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^'
320 r'*]*\*+)*/)))*((?:/(?![\r\n/*])[^/\\\[\r\n]*(?:(?:\\[^\r\n]|(?:'
321 r'\[[^\\\]\r\n]*(?:\\[^\r\n][^\\\]\r\n]*)*\]))[^/\\\[\r\n]*)*/)['
322 r'^\047"/\000-\040]*)|(?<=[^\000-!#%&(*,./:-@\[\\^`{|~])(?:[\000'
323 r'-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*(?'
324 r':((?:(?://[^\r\n]*)?[\r\n]))(?:[\000-\011\013\014\016-\040]|(?'
325 r':/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*)+(?=[^\000-\040"#%-\047)*,.'
326 r'/:-@\\-^`|-~])|(?<=[^\000-#%-,./:-@\[-^`{-~-])((?:[\000-\011\0'
327 r'13\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)))+(?=[^\00'
328 r'0-#%-,./:-@\[-^`{-~-])|(?<=\+)((?:[\000-\011\013\014\016-\040]'
329 r'|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)))+(?=\+)|(?<=-)((?:[\000-'
330 r'\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)))+(?'
331 r'=-)|(?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]'
332 r'*\*+)*/))+|(?:(?:(?://[^\r\n]*)?[\r\n])(?:[\000-\011\013\014\0'
333 r'16-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*)+'
334 )
335 def subber(match):
336 """ Substitution callback """
337 groups = match.groups()
338 return (
339 groups[0] or
340 groups[1] or
341 groups[2] or
342 groups[3] or
343 (groups[4] and '\n') or
344 (groups[5] and ' ') or
345 (groups[6] and ' ') or
346 (groups[7] and ' ') or
347 ''
348 )
349 else:
350 rex = (
351 r'([^\047"/\000-\040]+)|((?:(?:\047[^\047\\\r\n]*(?:\\(?:[^\r\n]'
352 r'|\r?\n|\r)[^\047\\\r\n]*)*\047)|(?:"[^"\\\r\n]*(?:\\(?:[^\r\n]'
353 r'|\r?\n|\r)[^"\\\r\n]*)*"))[^\047"/\000-\040]*)|((?:/\*![^*]*\*'
354 r'+(?:[^/*][^*]*\*+)*/)[^\047"/\000-\040]*)|(?<=[(,=:\[!&|?{};\r'
355 r'\n])(?:[\000-\011\013\014\016-\040]|(?:/\*(?!!)[^*]*\*+(?:[^/*'
356 r'][^*]*\*+)*/))*(?:(?:(?://[^\r\n]*)?[\r\n])(?:[\000-\011\013\0'
357 r'14\016-\040]|(?:/\*(?!!)[^*]*\*+(?:[^/*][^*]*\*+)*/))*)*((?:/('
358 r'?![\r\n/*])[^/\\\[\r\n]*(?:(?:\\[^\r\n]|(?:\[[^\\\]\r\n]*(?:'
359 r'\\[^\r\n][^\\\]\r\n]*)*\]))[^/\\\[\r\n]*)*/)[^\047"/\000-\040]'
360 r'*)|(?<=[\000-#%-,./:-@\[-^`{-~-]return)(?:[\000-\011\013\014\0'
361 r'16-\040]|(?:/\*(?!!)[^*]*\*+(?:[^/*][^*]*\*+)*/))*(?:(?:(?://['
362 r'^\r\n]*)?[\r\n])(?:[\000-\011\013\014\016-\040]|(?:/\*(?!!)[^*'
363 r']*\*+(?:[^/*][^*]*\*+)*/)))*((?:/(?![\r\n/*])[^/\\\[\r\n]*(?:('
364 r'?:\\[^\r\n]|(?:\[[^\\\]\r\n]*(?:\\[^\r\n][^\\\]\r\n]*)*\]))[^/'
365 r'\\\[\r\n]*)*/)[^\047"/\000-\040]*)|(?<=[^\000-!#%&(*,./:-@\[\\'
366 r'^`{|~])(?:[\000-\011\013\014\016-\040]|(?:/\*(?!!)[^*]*\*+(?:['
367 r'^/*][^*]*\*+)*/))*(?:((?:(?://[^\r\n]*)?[\r\n]))(?:[\000-\011'
368 r'\013\014\016-\040]|(?:/\*(?!!)[^*]*\*+(?:[^/*][^*]*\*+)*/))*)+'
369 r'(?=[^\000-\040"#%-\047)*,./:-@\\-^`|-~])|(?<=[^\000-#%-,./:-@'
370 r'\[-^`{-~-])((?:[\000-\011\013\014\016-\040]|(?:/\*(?!!)[^*]*\*'
371 r'+(?:[^/*][^*]*\*+)*/)))+(?=[^\000-#%-,./:-@\[-^`{-~-])|(?<=\+)'
372 r'((?:[\000-\011\013\014\016-\040]|(?:/\*(?!!)[^*]*\*+(?:[^/*][^'
373 r'*]*\*+)*/)))+(?=\+)|(?<=-)((?:[\000-\011\013\014\016-\040]|(?:'
374 r'/\*(?!!)[^*]*\*+(?:[^/*][^*]*\*+)*/)))+(?=-)|(?:[\000-\011\013'
375 r'\014\016-\040]|(?:/\*(?!!)[^*]*\*+(?:[^/*][^*]*\*+)*/))+|(?:(?'
376 r':(?://[^\r\n]*)?[\r\n])(?:[\000-\011\013\014\016-\040]|(?:/\*('
377 r'?!!)[^*]*\*+(?:[^/*][^*]*\*+)*/))*)+'
378 )
379 def subber(match):
380 """ Substitution callback """
381 groups = match.groups()
382 return (
383 groups[0] or
384 groups[1] or
385 groups[2] or
386 groups[3] or
387 groups[4] or
388 (groups[5] and '\n') or
389 (groups[6] and ' ') or
390 (groups[7] and ' ') or
391 (groups[8] and ' ') or
392 ''
393 )
394
395 return _re.sub(rex, subber, '\n%s\n' % script).strip()
396
397
398 if __name__ == '__main__':
400 """ Main """
401 import sys as _sys
402 keep_bang_comments = (
403 '-b' in _sys.argv[1:]
404 or '-bp' in _sys.argv[1:]
405 or '-pb' in _sys.argv[1:]
406 )
407 if '-p' in _sys.argv[1:] or '-bp' in _sys.argv[1:] \
408 or '-pb' in _sys.argv[1:]:
409 global jsmin
410 jsmin = _make_jsmin(python_only=True)
411 _sys.stdout.write(jsmin(
412 _sys.stdin.read(), keep_bang_comments=keep_bang_comments
413 ))
414 main()
415