Module rjsmin

Source Code for Module rjsmin

  1  #!/usr/bin/env python 
  2  # -*- coding: ascii -*- 
  3  # 
  4  # Copyright 2011 - 2014 
  5  # Andr\xe9 Malo or his licensors, as applicable 
  6  # 
  7  # Licensed under the Apache License, Version 2.0 (the "License"); 
  8  # you may not use this file except in compliance with the License. 
  9  # You may obtain a copy of the License at 
 10  # 
 11  #     http://www.apache.org/licenses/LICENSE-2.0 
 12  # 
 13  # Unless required by applicable law or agreed to in writing, software 
 14  # distributed under the License is distributed on an "AS IS" BASIS, 
 15  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
 16  # See the License for the specific language governing permissions and 
 17  # limitations under the License. 
 18  r""" 
 19  ===================== 
 20   Javascript Minifier 
 21  ===================== 
 22   
 23  rJSmin is a javascript minifier written in python. 
 24   
 25  The minifier is based on the semantics of `jsmin.c by Douglas Crockford`_\. 
 26   
 27  The module is a re-implementation aiming for speed, so it can be used at 
 28  runtime (rather than during a preprocessing step). Usually it produces the 
 29  same results as the original ``jsmin.c``. It differs in the following ways: 
 30   
 31  - there is no error detection: unterminated string, regex and comment 
 32    literals are treated as regular javascript code and minified as such. 
 33  - Control characters inside string and regex literals are left untouched; they 
 34    are not converted to spaces (nor to \n) 
 35  - Newline characters are not allowed inside string and regex literals, except 
 36    for line continuations in string literals (ECMA-5). 
 37  - "return /regex/" is recognized correctly. 
 38  - "+ +" and "- -" sequences are not collapsed to '++' or '--' 
 39  - Newlines before ! operators are removed more sensibly 
 40  - Comments starting with an exclamation mark (``!``) can be kept optionally 
 41  - rJSmin does not handle streams, but only complete strings. (However, the 
 42    module provides a "streamy" interface). 
 43   
 44  Since most parts of the logic are handled by the regex engine it's way 
 45  faster than the original python port of ``jsmin.c`` by Baruch Even. The speed 
 46  factor varies between about 6 and 55 depending on input and python version 
 47  (it gets faster the more compressed the input already is). Compared to the 
 48  speed-refactored python port by Dave St.Germain the performance gain is less 
 49  dramatic but still between 1.2 and 7. See the docs/BENCHMARKS file for 
 50  details. 
 51   
 52  rjsmin.c is a reimplementation of rjsmin.py in C and speeds it up even more. 
 53   
 54  Both python 2 and python 3 are supported. 
 55   
 56  .. _jsmin.c by Douglas Crockford: 
 57     http://www.crockford.com/javascript/jsmin.c 
 58  """ 
 59  __author__ = "Andr\xe9 Malo" 
 60  __author__ = getattr(__author__, 'decode', lambda x: __author__)('latin-1') 
 61  __docformat__ = "restructuredtext en" 
 62  __license__ = "Apache License, Version 2.0" 
 63  __version__ = '1.0.8' 
 64  __all__ = ['jsmin'] 
 65   
 66  import re as _re 
 67   
 68   
69 -def _make_jsmin(python_only=False):
70 """ 71 Generate JS minifier based on `jsmin.c by Douglas Crockford`_ 72 73 .. _jsmin.c by Douglas Crockford: 74 http://www.crockford.com/javascript/jsmin.c 75 76 :Parameters: 77 `python_only` : ``bool`` 78 Use only the python variant. If true, the c extension is not even 79 tried to be loaded. 80 81 :Return: Minifier 82 :Rtype: ``callable`` 83 """ 84 # pylint: disable = R0912, R0914, W0612 85 if not python_only: 86 try: 87 import _rjsmin 88 except ImportError: 89 pass 90 else: 91 return _rjsmin.jsmin 92 try: 93 xrange 94 except NameError: 95 xrange = range # pylint: disable = W0622 96 97 space_chars = r'[\000-\011\013\014\016-\040]' 98 99 line_comment = r'(?://[^\r\n]*)' 100 space_comment = r'(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)' 101 space_comment_nobang = r'(?:/\*(?!!)[^*]*\*+(?:[^/*][^*]*\*+)*/)' 102 bang_comment = r'(?:/\*![^*]*\*+(?:[^/*][^*]*\*+)*/)' 103 104 string1 = \ 105 r'(?:\047[^\047\\\r\n]*(?:\\(?:[^\r\n]|\r?\n|\r)[^\047\\\r\n]*)*\047)' 106 string2 = r'(?:"[^"\\\r\n]*(?:\\(?:[^\r\n]|\r?\n|\r)[^"\\\r\n]*)*")' 107 strings = r'(?:%s|%s)' % (string1, string2) 108 109 charclass = r'(?:\[[^\\\]\r\n]*(?:\\[^\r\n][^\\\]\r\n]*)*\])' 110 nospecial = r'[^/\\\[\r\n]' 111 regex = r'(?:/(?![\r\n/*])%s*(?:(?:\\[^\r\n]|%s)%s*)*/)' % ( 112 nospecial, charclass, nospecial 113 ) 114 space = r'(?:%s|%s)' % (space_chars, space_comment) 115 space_nobang = r'(?:%s|%s)' % (space_chars, space_comment_nobang) 116 newline = r'(?:%s?[\r\n])' % line_comment 117 118 def fix_charclass(result): 119 """ Fixup string of chars to fit into a regex char class """ 120 pos = result.find('-') 121 if pos >= 0: 122 result = r'%s%s-' % (result[:pos], result[pos + 1:]) 123 124 def sequentize(string): 125 """ 126 Notate consecutive characters as sequence 127 128 (1-4 instead of 1234) 129 """ 130 first, last, result = None, None, [] 131 for char in map(ord, string): 132 if last is None: 133 first = last = char 134 elif last + 1 == char: 135 last = char 136 else: 137 result.append((first, last)) 138 first = last = char 139 if last is not None: 140 result.append((first, last)) 141 return ''.join(['%s%s%s' % ( 142 chr(first), 143 last > first + 1 and '-' or '', 144 last != first and chr(last) or '' 145 ) for first, last in result])
146 147 return _re.sub(r'([\000-\040\047])', # for better portability 148 lambda m: '\\%03o' % ord(m.group(1)), (sequentize(result) 149 .replace('\\', '\\\\') 150 .replace('[', '\\[') 151 .replace(']', '\\]') 152 ) 153 ) 154 155 def id_literal_(what): 156 """ Make id_literal like char class """ 157 match = _re.compile(what).match 158 result = ''.join([ 159 chr(c) for c in xrange(127) if not match(chr(c)) 160 ]) 161 return '[^%s]' % fix_charclass(result) 162 163 def not_id_literal_(keep): 164 """ Make negated id_literal like char class """ 165 match = _re.compile(id_literal_(keep)).match 166 result = ''.join([ 167 chr(c) for c in xrange(127) if not match(chr(c)) 168 ]) 169 return r'[%s]' % fix_charclass(result) 170 171 not_id_literal = not_id_literal_(r'[a-zA-Z0-9_$]') 172 preregex1 = r'[(,=:\[!&|?{};\r\n]' 173 preregex2 = r'%(not_id_literal)sreturn' % locals() 174 175 id_literal = id_literal_(r'[a-zA-Z0-9_$]') 176 id_literal_open = id_literal_(r'[a-zA-Z0-9_${\[(!+-]') 177 id_literal_close = id_literal_(r'[a-zA-Z0-9_$}\])"\047+-]') 178 179 dull = r'[^\047"/\000-\040]' 180 181 space_sub_simple = _re.compile(( 182 r'(%(dull)s+)' 183 r'|(%(strings)s%(dull)s*)' 184 r'|(?<=%(preregex1)s)' 185 r'%(space)s*(?:%(newline)s%(space)s*)*' 186 r'(%(regex)s%(dull)s*)' 187 r'|(?<=%(preregex2)s)' 188 r'%(space)s*(?:%(newline)s%(space)s)*' 189 r'(%(regex)s%(dull)s*)' 190 r'|(?<=%(id_literal_close)s)' 191 r'%(space)s*(?:(%(newline)s)%(space)s*)+' 192 r'(?=%(id_literal_open)s)' 193 r'|(?<=%(id_literal)s)(%(space)s)+(?=%(id_literal)s)' 194 r'|(?<=\+)(%(space)s)+(?=\+)' 195 r'|(?<=-)(%(space)s)+(?=-)' 196 r'|%(space)s+' 197 r'|(?:%(newline)s%(space)s*)+' 198 ) % locals()).sub 199 #print space_sub_simple.__self__.pattern 200 201 def space_subber_simple(match): 202 """ Substitution callback """ 203 # pylint: disable = C0321, R0911 204 groups = match.groups() 205 if groups[0]: return groups[0] 206 elif groups[1]: return groups[1] 207 elif groups[2]: return groups[2] 208 elif groups[3]: return groups[3] 209 elif groups[4]: return '\n' 210 elif groups[5] or groups[6] or groups[7]: return ' ' 211 else: return '' 212 213 space_sub_banged = _re.compile(( 214 r'(%(dull)s+)' 215 r'|(%(strings)s%(dull)s*)' 216 r'|(%(bang_comment)s%(dull)s*)' 217 r'|(?<=%(preregex1)s)' 218 r'%(space)s*(?:%(newline)s%(space)s*)*' 219 r'(%(regex)s%(dull)s*)' 220 r'|(?<=%(preregex2)s)' 221 r'%(space)s*(?:%(newline)s%(space)s)*' 222 r'(%(regex)s%(dull)s*)' 223 r'|(?<=%(id_literal_close)s)' 224 r'%(space)s*(?:(%(newline)s)%(space)s*)+' 225 r'(?=%(id_literal_open)s)' 226 r'|(?<=%(id_literal)s)(%(space)s)+(?=%(id_literal)s)' 227 r'|(?<=\+)(%(space)s)+(?=\+)' 228 r'|(?<=-)(%(space)s)+(?=-)' 229 r'|%(space)s+' 230 r'|(?:%(newline)s%(space)s*)+' 231 ) % dict(locals(), space=space_nobang)).sub 232 #print space_sub_banged.__self__.pattern 233 234 def space_subber_banged(match): 235 """ Substitution callback """ 236 # pylint: disable = C0321, R0911 237 groups = match.groups() 238 if groups[0]: return groups[0] 239 elif groups[1]: return groups[1] 240 elif groups[2]: return groups[2] 241 elif groups[3]: return groups[3] 242 elif groups[4]: return groups[4] 243 elif groups[5]: return '\n' 244 elif groups[6] or groups[7] or groups[8]: return ' ' 245 else: return '' 246 247 def jsmin(script, keep_bang_comments=False): # pylint: disable = W0621 248 r""" 249 Minify javascript based on `jsmin.c by Douglas Crockford`_\. 250 251 Instead of parsing the stream char by char, it uses a regular 252 expression approach which minifies the whole script with one big 253 substitution regex. 254 255 .. _jsmin.c by Douglas Crockford: 256 http://www.crockford.com/javascript/jsmin.c 257 258 :Parameters: 259 `script` : ``str`` 260 Script to minify 261 262 `keep_bang_comments` : ``bool`` 263 Keep comments starting with an exclamation mark? (``/*!...*/``) 264 265 :Return: Minified script 266 :Rtype: ``str`` 267 """ 268 if keep_bang_comments: 269 return space_sub_banged( 270 space_subber_banged, '\n%s\n' % script 271 ).strip() 272 else: 273 return space_sub_simple( 274 space_subber_simple, '\n%s\n' % script 275 ).strip() 276 277 return jsmin 278 279 jsmin = _make_jsmin() 280 281
282 -def jsmin_for_posers(script, keep_bang_comments=False):
283 r""" 284 Minify javascript based on `jsmin.c by Douglas Crockford`_\. 285 286 Instead of parsing the stream char by char, it uses a regular 287 expression approach which minifies the whole script with one big 288 substitution regex. 289 290 .. _jsmin.c by Douglas Crockford: 291 http://www.crockford.com/javascript/jsmin.c 292 293 :Warning: This function is the digest of a _make_jsmin() call. It just 294 utilizes the resulting regexes. It's here for fun and may 295 vanish any time. Use the `jsmin` function instead. 296 297 :Parameters: 298 `script` : ``str`` 299 Script to minify 300 301 `keep_bang_comments` : ``bool`` 302 Keep comments starting with an exclamation mark? (``/*!...*/``) 303 304 :Return: Minified script 305 :Rtype: ``str`` 306 """ 307 if not keep_bang_comments: 308 rex = ( 309 r'([^\047"/\000-\040]+)|((?:(?:\047[^\047\\\r\n]*(?:\\(?:[^\r\n]' 310 r'|\r?\n|\r)[^\047\\\r\n]*)*\047)|(?:"[^"\\\r\n]*(?:\\(?:[^\r\n]' 311 r'|\r?\n|\r)[^"\\\r\n]*)*"))[^\047"/\000-\040]*)|(?<=[(,=:\[!&|?' 312 r'{};\r\n])(?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*' 313 r'][^*]*\*+)*/))*(?:(?:(?://[^\r\n]*)?[\r\n])(?:[\000-\011\013\0' 314 r'14\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*)*((?:/(?![\r' 315 r'\n/*])[^/\\\[\r\n]*(?:(?:\\[^\r\n]|(?:\[[^\\\]\r\n]*(?:\\[^\r' 316 r'\n][^\\\]\r\n]*)*\]))[^/\\\[\r\n]*)*/)[^\047"/\000-\040]*)|(?<' 317 r'=[\000-#%-,./:-@\[-^`{-~-]return)(?:[\000-\011\013\014\016-\04' 318 r'0]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*(?:(?:(?://[^\r\n]*)?[' 319 r'\r\n])(?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^' 320 r'*]*\*+)*/)))*((?:/(?![\r\n/*])[^/\\\[\r\n]*(?:(?:\\[^\r\n]|(?:' 321 r'\[[^\\\]\r\n]*(?:\\[^\r\n][^\\\]\r\n]*)*\]))[^/\\\[\r\n]*)*/)[' 322 r'^\047"/\000-\040]*)|(?<=[^\000-!#%&(*,./:-@\[\\^`{|~])(?:[\000' 323 r'-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*(?' 324 r':((?:(?://[^\r\n]*)?[\r\n]))(?:[\000-\011\013\014\016-\040]|(?' 325 r':/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*)+(?=[^\000-\040"#%-\047)*,.' 326 r'/:-@\\-^`|-~])|(?<=[^\000-#%-,./:-@\[-^`{-~-])((?:[\000-\011\0' 327 r'13\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)))+(?=[^\00' 328 r'0-#%-,./:-@\[-^`{-~-])|(?<=\+)((?:[\000-\011\013\014\016-\040]' 329 r'|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)))+(?=\+)|(?<=-)((?:[\000-' 330 r'\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)))+(?' 331 r'=-)|(?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]' 332 r'*\*+)*/))+|(?:(?:(?://[^\r\n]*)?[\r\n])(?:[\000-\011\013\014\0' 333 r'16-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*)+' 334 ) 335 def subber(match): 336 """ Substitution callback """ 337 groups = match.groups() 338 return ( 339 groups[0] or 340 groups[1] or 341 groups[2] or 342 groups[3] or 343 (groups[4] and '\n') or 344 (groups[5] and ' ') or 345 (groups[6] and ' ') or 346 (groups[7] and ' ') or 347 '' 348 )
349 else: 350 rex = ( 351 r'([^\047"/\000-\040]+)|((?:(?:\047[^\047\\\r\n]*(?:\\(?:[^\r\n]' 352 r'|\r?\n|\r)[^\047\\\r\n]*)*\047)|(?:"[^"\\\r\n]*(?:\\(?:[^\r\n]' 353 r'|\r?\n|\r)[^"\\\r\n]*)*"))[^\047"/\000-\040]*)|((?:/\*![^*]*\*' 354 r'+(?:[^/*][^*]*\*+)*/)[^\047"/\000-\040]*)|(?<=[(,=:\[!&|?{};\r' 355 r'\n])(?:[\000-\011\013\014\016-\040]|(?:/\*(?!!)[^*]*\*+(?:[^/*' 356 r'][^*]*\*+)*/))*(?:(?:(?://[^\r\n]*)?[\r\n])(?:[\000-\011\013\0' 357 r'14\016-\040]|(?:/\*(?!!)[^*]*\*+(?:[^/*][^*]*\*+)*/))*)*((?:/(' 358 r'?![\r\n/*])[^/\\\[\r\n]*(?:(?:\\[^\r\n]|(?:\[[^\\\]\r\n]*(?:' 359 r'\\[^\r\n][^\\\]\r\n]*)*\]))[^/\\\[\r\n]*)*/)[^\047"/\000-\040]' 360 r'*)|(?<=[\000-#%-,./:-@\[-^`{-~-]return)(?:[\000-\011\013\014\0' 361 r'16-\040]|(?:/\*(?!!)[^*]*\*+(?:[^/*][^*]*\*+)*/))*(?:(?:(?://[' 362 r'^\r\n]*)?[\r\n])(?:[\000-\011\013\014\016-\040]|(?:/\*(?!!)[^*' 363 r']*\*+(?:[^/*][^*]*\*+)*/)))*((?:/(?![\r\n/*])[^/\\\[\r\n]*(?:(' 364 r'?:\\[^\r\n]|(?:\[[^\\\]\r\n]*(?:\\[^\r\n][^\\\]\r\n]*)*\]))[^/' 365 r'\\\[\r\n]*)*/)[^\047"/\000-\040]*)|(?<=[^\000-!#%&(*,./:-@\[\\' 366 r'^`{|~])(?:[\000-\011\013\014\016-\040]|(?:/\*(?!!)[^*]*\*+(?:[' 367 r'^/*][^*]*\*+)*/))*(?:((?:(?://[^\r\n]*)?[\r\n]))(?:[\000-\011' 368 r'\013\014\016-\040]|(?:/\*(?!!)[^*]*\*+(?:[^/*][^*]*\*+)*/))*)+' 369 r'(?=[^\000-\040"#%-\047)*,./:-@\\-^`|-~])|(?<=[^\000-#%-,./:-@' 370 r'\[-^`{-~-])((?:[\000-\011\013\014\016-\040]|(?:/\*(?!!)[^*]*\*' 371 r'+(?:[^/*][^*]*\*+)*/)))+(?=[^\000-#%-,./:-@\[-^`{-~-])|(?<=\+)' 372 r'((?:[\000-\011\013\014\016-\040]|(?:/\*(?!!)[^*]*\*+(?:[^/*][^' 373 r'*]*\*+)*/)))+(?=\+)|(?<=-)((?:[\000-\011\013\014\016-\040]|(?:' 374 r'/\*(?!!)[^*]*\*+(?:[^/*][^*]*\*+)*/)))+(?=-)|(?:[\000-\011\013' 375 r'\014\016-\040]|(?:/\*(?!!)[^*]*\*+(?:[^/*][^*]*\*+)*/))+|(?:(?' 376 r':(?://[^\r\n]*)?[\r\n])(?:[\000-\011\013\014\016-\040]|(?:/\*(' 377 r'?!!)[^*]*\*+(?:[^/*][^*]*\*+)*/))*)+' 378 ) 379 def subber(match): 380 """ Substitution callback """ 381 groups = match.groups() 382 return ( 383 groups[0] or 384 groups[1] or 385 groups[2] or 386 groups[3] or 387 groups[4] or 388 (groups[5] and '\n') or 389 (groups[6] and ' ') or 390 (groups[7] and ' ') or 391 (groups[8] and ' ') or 392 '' 393 ) 394 395 return _re.sub(rex, subber, '\n%s\n' % script).strip() 396 397 398 if __name__ == '__main__':
399 - def main():
400 """ Main """ 401 import sys as _sys 402 keep_bang_comments = ( 403 '-b' in _sys.argv[1:] 404 or '-bp' in _sys.argv[1:] 405 or '-pb' in _sys.argv[1:] 406 ) 407 if '-p' in _sys.argv[1:] or '-bp' in _sys.argv[1:] \ 408 or '-pb' in _sys.argv[1:]: 409 global jsmin # pylint: disable = W0603 410 jsmin = _make_jsmin(python_only=True) 411 _sys.stdout.write(jsmin( 412 _sys.stdin.read(), keep_bang_comments=keep_bang_comments 413 ))
414 main() 415