Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# -*- coding: utf-8 -*- 

2""" 

3 pygments.lexers.perl 

4 ~~~~~~~~~~~~~~~~~~~~ 

5 

6 Lexers for Perl, Raku and related languages. 

7 

8 :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS. 

9 :license: BSD, see LICENSE for details. 

10""" 

11 

12import re 

13 

14from pygments.lexer import RegexLexer, ExtendedRegexLexer, include, bygroups, \ 

15 using, this, default, words 

16from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ 

17 Number, Punctuation 

18from pygments.util import shebang_matches 

19 

20__all__ = ['PerlLexer', 'Perl6Lexer'] 

21 

22 

23class PerlLexer(RegexLexer): 

24 """ 

25 For `Perl <https://www.perl.org>`_ source code. 

26 """ 

27 

28 name = 'Perl' 

29 aliases = ['perl', 'pl'] 

30 filenames = ['*.pl', '*.pm', '*.t', '*.perl'] 

31 mimetypes = ['text/x-perl', 'application/x-perl'] 

32 

33 flags = re.DOTALL | re.MULTILINE 

34 # TODO: give this to a perl guy who knows how to parse perl... 

35 tokens = { 

36 'balanced-regex': [ 

37 (r'/(\\\\|\\[^\\]|[^\\/])*/[egimosx]*', String.Regex, '#pop'), 

38 (r'!(\\\\|\\[^\\]|[^\\!])*![egimosx]*', String.Regex, '#pop'), 

39 (r'\\(\\\\|[^\\])*\\[egimosx]*', String.Regex, '#pop'), 

40 (r'\{(\\\\|\\[^\\]|[^\\}])*\}[egimosx]*', String.Regex, '#pop'), 

41 (r'<(\\\\|\\[^\\]|[^\\>])*>[egimosx]*', String.Regex, '#pop'), 

42 (r'\[(\\\\|\\[^\\]|[^\\\]])*\][egimosx]*', String.Regex, '#pop'), 

43 (r'\((\\\\|\\[^\\]|[^\\)])*\)[egimosx]*', String.Regex, '#pop'), 

44 (r'@(\\\\|\\[^\\]|[^\\@])*@[egimosx]*', String.Regex, '#pop'), 

45 (r'%(\\\\|\\[^\\]|[^\\%])*%[egimosx]*', String.Regex, '#pop'), 

46 (r'\$(\\\\|\\[^\\]|[^\\$])*\$[egimosx]*', String.Regex, '#pop'), 

47 ], 

48 'root': [ 

49 (r'\A\#!.+?$', Comment.Hashbang), 

50 (r'\#.*?$', Comment.Single), 

51 (r'^=[a-zA-Z0-9]+\s+.*?\n=cut', Comment.Multiline), 

52 (words(( 

53 'case', 'continue', 'do', 'else', 'elsif', 'for', 'foreach', 

54 'if', 'last', 'my', 'next', 'our', 'redo', 'reset', 'then', 

55 'unless', 'until', 'while', 'print', 'new', 'BEGIN', 

56 'CHECK', 'INIT', 'END', 'return'), suffix=r'\b'), 

57 Keyword), 

58 (r'(format)(\s+)(\w+)(\s*)(=)(\s*\n)', 

59 bygroups(Keyword, Text, Name, Text, Punctuation, Text), 'format'), 

60 (r'(eq|lt|gt|le|ge|ne|not|and|or|cmp)\b', Operator.Word), 

61 # common delimiters 

62 (r's/(\\\\|\\[^\\]|[^\\/])*/(\\\\|\\[^\\]|[^\\/])*/[egimosx]*', 

63 String.Regex), 

64 (r's!(\\\\|\\!|[^!])*!(\\\\|\\!|[^!])*![egimosx]*', String.Regex), 

65 (r's\\(\\\\|[^\\])*\\(\\\\|[^\\])*\\[egimosx]*', String.Regex), 

66 (r's@(\\\\|\\[^\\]|[^\\@])*@(\\\\|\\[^\\]|[^\\@])*@[egimosx]*', 

67 String.Regex), 

68 (r's%(\\\\|\\[^\\]|[^\\%])*%(\\\\|\\[^\\]|[^\\%])*%[egimosx]*', 

69 String.Regex), 

70 # balanced delimiters 

71 (r's\{(\\\\|\\[^\\]|[^\\}])*\}\s*', String.Regex, 'balanced-regex'), 

72 (r's<(\\\\|\\[^\\]|[^\\>])*>\s*', String.Regex, 'balanced-regex'), 

73 (r's\[(\\\\|\\[^\\]|[^\\\]])*\]\s*', String.Regex, 

74 'balanced-regex'), 

75 (r's\((\\\\|\\[^\\]|[^\\)])*\)\s*', String.Regex, 

76 'balanced-regex'), 

77 

78 (r'm?/(\\\\|\\[^\\]|[^\\/\n])*/[gcimosx]*', String.Regex), 

79 (r'm(?=[/!\\{<\[(@%$])', String.Regex, 'balanced-regex'), 

80 (r'((?<==~)|(?<=\())\s*/(\\\\|\\[^\\]|[^\\/])*/[gcimosx]*', 

81 String.Regex), 

82 (r'\s+', Text), 

83 (words(( 

84 'abs', 'accept', 'alarm', 'atan2', 'bind', 'binmode', 'bless', 'caller', 'chdir', 

85 'chmod', 'chomp', 'chop', 'chown', 'chr', 'chroot', 'close', 'closedir', 'connect', 

86 'continue', 'cos', 'crypt', 'dbmclose', 'dbmopen', 'defined', 'delete', 'die', 

87 'dump', 'each', 'endgrent', 'endhostent', 'endnetent', 'endprotoent', 

88 'endpwent', 'endservent', 'eof', 'eval', 'exec', 'exists', 'exit', 'exp', 'fcntl', 

89 'fileno', 'flock', 'fork', 'format', 'formline', 'getc', 'getgrent', 'getgrgid', 

90 'getgrnam', 'gethostbyaddr', 'gethostbyname', 'gethostent', 'getlogin', 

91 'getnetbyaddr', 'getnetbyname', 'getnetent', 'getpeername', 'getpgrp', 

92 'getppid', 'getpriority', 'getprotobyname', 'getprotobynumber', 

93 'getprotoent', 'getpwent', 'getpwnam', 'getpwuid', 'getservbyname', 

94 'getservbyport', 'getservent', 'getsockname', 'getsockopt', 'glob', 'gmtime', 

95 'goto', 'grep', 'hex', 'import', 'index', 'int', 'ioctl', 'join', 'keys', 'kill', 'last', 

96 'lc', 'lcfirst', 'length', 'link', 'listen', 'local', 'localtime', 'log', 'lstat', 

97 'map', 'mkdir', 'msgctl', 'msgget', 'msgrcv', 'msgsnd', 'my', 'next', 'oct', 'open', 

98 'opendir', 'ord', 'our', 'pack', 'pipe', 'pop', 'pos', 'printf', 

99 'prototype', 'push', 'quotemeta', 'rand', 'read', 'readdir', 

100 'readline', 'readlink', 'readpipe', 'recv', 'redo', 'ref', 'rename', 

101 'reverse', 'rewinddir', 'rindex', 'rmdir', 'scalar', 'seek', 'seekdir', 

102 'select', 'semctl', 'semget', 'semop', 'send', 'setgrent', 'sethostent', 'setnetent', 

103 'setpgrp', 'setpriority', 'setprotoent', 'setpwent', 'setservent', 

104 'setsockopt', 'shift', 'shmctl', 'shmget', 'shmread', 'shmwrite', 'shutdown', 

105 'sin', 'sleep', 'socket', 'socketpair', 'sort', 'splice', 'split', 'sprintf', 'sqrt', 

106 'srand', 'stat', 'study', 'substr', 'symlink', 'syscall', 'sysopen', 'sysread', 

107 'sysseek', 'system', 'syswrite', 'tell', 'telldir', 'tie', 'tied', 'time', 'times', 'tr', 

108 'truncate', 'uc', 'ucfirst', 'umask', 'undef', 'unlink', 'unpack', 'unshift', 'untie', 

109 'utime', 'values', 'vec', 'wait', 'waitpid', 'wantarray', 'warn', 'write'), suffix=r'\b'), 

110 Name.Builtin), 

111 (r'((__(DATA|DIE|WARN)__)|(STD(IN|OUT|ERR)))\b', Name.Builtin.Pseudo), 

112 (r'(<<)([\'"]?)([a-zA-Z_]\w*)(\2;?\n.*?\n)(\3)(\n)', 

113 bygroups(String, String, String.Delimiter, String, String.Delimiter, Text)), 

114 (r'__END__', Comment.Preproc, 'end-part'), 

115 (r'\$\^[ADEFHILMOPSTWX]', Name.Variable.Global), 

116 (r"\$[\\\"\[\]'&`+*.,;=%~?@$!<>(^|/-](?!\w)", Name.Variable.Global), 

117 (r'[$@%#]+', Name.Variable, 'varname'), 

118 (r'0_?[0-7]+(_[0-7]+)*', Number.Oct), 

119 (r'0x[0-9A-Fa-f]+(_[0-9A-Fa-f]+)*', Number.Hex), 

120 (r'0b[01]+(_[01]+)*', Number.Bin), 

121 (r'(?i)(\d*(_\d*)*\.\d+(_\d*)*|\d+(_\d*)*\.\d+(_\d*)*)(e[+-]?\d+)?', 

122 Number.Float), 

123 (r'(?i)\d+(_\d*)*e[+-]?\d+(_\d*)*', Number.Float), 

124 (r'\d+(_\d+)*', Number.Integer), 

125 (r"'(\\\\|\\[^\\]|[^'\\])*'", String), 

126 (r'"(\\\\|\\[^\\]|[^"\\])*"', String), 

127 (r'`(\\\\|\\[^\\]|[^`\\])*`', String.Backtick), 

128 (r'<([^\s>]+)>', String.Regex), 

129 (r'(q|qq|qw|qr|qx)\{', String.Other, 'cb-string'), 

130 (r'(q|qq|qw|qr|qx)\(', String.Other, 'rb-string'), 

131 (r'(q|qq|qw|qr|qx)\[', String.Other, 'sb-string'), 

132 (r'(q|qq|qw|qr|qx)\<', String.Other, 'lt-string'), 

133 (r'(q|qq|qw|qr|qx)([\W_])(.|\n)*?\2', String.Other), 

134 (r'(package)(\s+)([a-zA-Z_]\w*(?:::[a-zA-Z_]\w*)*)', 

135 bygroups(Keyword, Text, Name.Namespace)), 

136 (r'(use|require|no)(\s+)([a-zA-Z_]\w*(?:::[a-zA-Z_]\w*)*)', 

137 bygroups(Keyword, Text, Name.Namespace)), 

138 (r'(sub)(\s+)', bygroups(Keyword, Text), 'funcname'), 

139 (words(( 

140 'no', 'package', 'require', 'use'), suffix=r'\b'), 

141 Keyword), 

142 (r'(\[\]|\*\*|::|<<|>>|>=|<=>|<=|={3}|!=|=~|' 

143 r'!~|&&?|\|\||\.{1,3})', Operator), 

144 (r'[-+/*%=<>&^|!\\~]=?', Operator), 

145 (r'[()\[\]:;,<>/?{}]', Punctuation), # yes, there's no shortage 

146 # of punctuation in Perl! 

147 (r'(?=\w)', Name, 'name'), 

148 ], 

149 'format': [ 

150 (r'\.\n', String.Interpol, '#pop'), 

151 (r'[^\n]*\n', String.Interpol), 

152 ], 

153 'varname': [ 

154 (r'\s+', Text), 

155 (r'\{', Punctuation, '#pop'), # hash syntax? 

156 (r'\)|,', Punctuation, '#pop'), # argument specifier 

157 (r'\w+::', Name.Namespace), 

158 (r'[\w:]+', Name.Variable, '#pop'), 

159 ], 

160 'name': [ 

161 (r'[a-zA-Z_]\w*(::[a-zA-Z_]\w*)*(::)?(?=\s*->)', Name.Namespace, '#pop'), 

162 (r'[a-zA-Z_]\w*(::[a-zA-Z_]\w*)*::', Name.Namespace, '#pop'), 

163 (r'[\w:]+', Name, '#pop'), 

164 (r'[A-Z_]+(?=\W)', Name.Constant, '#pop'), 

165 (r'(?=\W)', Text, '#pop'), 

166 ], 

167 'funcname': [ 

168 (r'[a-zA-Z_]\w*[!?]?', Name.Function), 

169 (r'\s+', Text), 

170 # argument declaration 

171 (r'(\([$@%]*\))(\s*)', bygroups(Punctuation, Text)), 

172 (r';', Punctuation, '#pop'), 

173 (r'.*?\{', Punctuation, '#pop'), 

174 ], 

175 'cb-string': [ 

176 (r'\\[{}\\]', String.Other), 

177 (r'\\', String.Other), 

178 (r'\{', String.Other, 'cb-string'), 

179 (r'\}', String.Other, '#pop'), 

180 (r'[^{}\\]+', String.Other) 

181 ], 

182 'rb-string': [ 

183 (r'\\[()\\]', String.Other), 

184 (r'\\', String.Other), 

185 (r'\(', String.Other, 'rb-string'), 

186 (r'\)', String.Other, '#pop'), 

187 (r'[^()]+', String.Other) 

188 ], 

189 'sb-string': [ 

190 (r'\\[\[\]\\]', String.Other), 

191 (r'\\', String.Other), 

192 (r'\[', String.Other, 'sb-string'), 

193 (r'\]', String.Other, '#pop'), 

194 (r'[^\[\]]+', String.Other) 

195 ], 

196 'lt-string': [ 

197 (r'\\[<>\\]', String.Other), 

198 (r'\\', String.Other), 

199 (r'\<', String.Other, 'lt-string'), 

200 (r'\>', String.Other, '#pop'), 

201 (r'[^<>]+', String.Other) 

202 ], 

203 'end-part': [ 

204 (r'.+', Comment.Preproc, '#pop') 

205 ] 

206 } 

207 

208 def analyse_text(text): 

209 if shebang_matches(text, r'perl'): 

210 return True 

211 

212 result = 0 

213 

214 if re.search(r'(?:my|our)\s+[$@%(]', text): 

215 result += 0.9 

216 

217 if ':=' in text: 

218 # := is not valid Perl, but it appears in unicon, so we should 

219 # become less confident if we think we found Perl with := 

220 result /= 2 

221 

222 return result 

223 

224 

225class Perl6Lexer(ExtendedRegexLexer): 

226 """ 

227 For `Raku <https://www.raku.org>`_ (a.k.a. Perl 6) source code. 

228 

229 .. versionadded:: 2.0 

230 """ 

231 

232 name = 'Perl6' 

233 aliases = ['perl6', 'pl6', 'raku'] 

234 filenames = ['*.pl', '*.pm', '*.nqp', '*.p6', '*.6pl', '*.p6l', '*.pl6', 

235 '*.6pm', '*.p6m', '*.pm6', '*.t', '*.raku', '*.rakumod', 

236 '*.rakutest', '*.rakudoc'] 

237 mimetypes = ['text/x-perl6', 'application/x-perl6'] 

238 flags = re.MULTILINE | re.DOTALL | re.UNICODE 

239 

240 PERL6_IDENTIFIER_RANGE = r"['\w:-]" 

241 

242 PERL6_KEYWORDS = ( 

243 #Phasers 

244 'BEGIN','CATCH','CHECK','CLOSE','CONTROL','DOC','END','ENTER','FIRST', 

245 'INIT','KEEP','LAST','LEAVE','NEXT','POST','PRE','QUIT','UNDO', 

246 #Keywords 

247 'anon','augment','but','class','constant','default','does','else', 

248 'elsif','enum','for','gather','given','grammar','has','if','import', 

249 'is','let','loop','made','make','method','module','multi','my','need', 

250 'orwith','our','proceed','proto','repeat','require','return', 

251 'return-rw','returns','role','rule','state','sub','submethod','subset', 

252 'succeed','supersede','token','try','unit','unless','until','use', 

253 'when','while','with','without', 

254 #Traits 

255 'export','native','repr','required','rw','symbol', 

256 ) 

257 

258 PERL6_BUILTINS = ( 

259 'ACCEPTS','abs','abs2rel','absolute','accept','accessed','acos', 

260 'acosec','acosech','acosh','acotan','acotanh','acquire','act','action', 

261 'actions','add','add_attribute','add_enum_value','add_fallback', 

262 'add_method','add_parent','add_private_method','add_role','add_trustee', 

263 'adverb','after','all','allocate','allof','allowed','alternative-names', 

264 'annotations','antipair','antipairs','any','anyof','app_lifetime', 

265 'append','arch','archname','args','arity','Array','asec','asech','asin', 

266 'asinh','ASSIGN-KEY','ASSIGN-POS','assuming','ast','at','atan','atan2', 

267 'atanh','AT-KEY','atomic-assign','atomic-dec-fetch','atomic-fetch', 

268 'atomic-fetch-add','atomic-fetch-dec','atomic-fetch-inc', 

269 'atomic-fetch-sub','atomic-inc-fetch','AT-POS','attributes','auth', 

270 'await','backtrace','Bag','BagHash','bail-out','base','basename', 

271 'base-repeating','batch','BIND-KEY','BIND-POS','bind-stderr', 

272 'bind-stdin','bind-stdout','bind-udp','bits','bless','block','Bool', 

273 'bool-only','bounds','break','Bridge','broken','BUILD','build-date', 

274 'bytes','cache','callframe','calling-package','CALL-ME','callsame', 

275 'callwith','can','cancel','candidates','cando','can-ok','canonpath', 

276 'caps','caption','Capture','cas','catdir','categorize','categorize-list', 

277 'catfile','catpath','cause','ceiling','cglobal','changed','Channel', 

278 'chars','chdir','child','child-name','child-typename','chmod','chomp', 

279 'chop','chr','chrs','chunks','cis','classify','classify-list','cleanup', 

280 'clone','close','closed','close-stdin','cmp-ok','code','codes','collate', 

281 'column','comb','combinations','command','comment','compiler','Complex', 

282 'compose','compose_type','composer','condition','config', 

283 'configure_destroy','configure_type_checking','conj','connect', 

284 'constraints','construct','contains','contents','copy','cos','cosec', 

285 'cosech','cosh','cotan','cotanh','count','count-only','cpu-cores', 

286 'cpu-usage','CREATE','create_type','cross','cue','curdir','curupdir','d', 

287 'Date','DateTime','day','daycount','day-of-month','day-of-week', 

288 'day-of-year','days-in-month','declaration','decode','decoder','deepmap', 

289 'default','defined','DEFINITE','delayed','DELETE-KEY','DELETE-POS', 

290 'denominator','desc','DESTROY','destroyers','devnull','diag', 

291 'did-you-mean','die','dies-ok','dir','dirname','dir-sep','DISTROnames', 

292 'do','does','does-ok','done','done-testing','duckmap','dynamic','e', 

293 'eager','earlier','elems','emit','enclosing','encode','encoder', 

294 'encoding','end','ends-with','enum_from_value','enum_value_list', 

295 'enum_values','enums','eof','EVAL','eval-dies-ok','EVALFILE', 

296 'eval-lives-ok','exception','excludes-max','excludes-min','EXISTS-KEY', 

297 'EXISTS-POS','exit','exitcode','exp','expected','explicitly-manage', 

298 'expmod','extension','f','fail','fails-like','fc','feature','file', 

299 'filename','find_method','find_method_qualified','finish','first','flat', 

300 'flatmap','flip','floor','flunk','flush','fmt','format','formatter', 

301 'freeze','from','from-list','from-loop','from-posix','full', 

302 'full-barrier','get','get_value','getc','gist','got','grab','grabpairs', 

303 'grep','handle','handled','handles','hardware','has_accessor','Hash', 

304 'head','headers','hh-mm-ss','hidden','hides','hour','how','hyper','id', 

305 'illegal','im','in','indent','index','indices','indir','infinite', 

306 'infix','infix:<+>','infix:<->','install_method_cache','Instant', 

307 'instead','Int','int-bounds','interval','in-timezone','invalid-str', 

308 'invert','invocant','IO','IO::Notification.watch-path','is_trusted', 

309 'is_type','isa','is-absolute','isa-ok','is-approx','is-deeply', 

310 'is-hidden','is-initial-thread','is-int','is-lazy','is-leap-year', 

311 'isNaN','isnt','is-prime','is-relative','is-routine','is-setting', 

312 'is-win','item','iterator','join','keep','kept','KERNELnames','key', 

313 'keyof','keys','kill','kv','kxxv','l','lang','last','lastcall','later', 

314 'lazy','lc','leading','level','like','line','lines','link','List', 

315 'listen','live','lives-ok','local','lock','log','log10','lookup','lsb', 

316 'made','MAIN','make','Map','match','max','maxpairs','merge','message', 

317 'method','method_table','methods','migrate','min','minmax','minpairs', 

318 'minute','misplaced','Mix','MixHash','mkdir','mode','modified','month', 

319 'move','mro','msb','multi','multiness','my','name','named','named_names', 

320 'narrow','nativecast','native-descriptor','nativesizeof','new','new_type', 

321 'new-from-daycount','new-from-pairs','next','nextcallee','next-handle', 

322 'nextsame','nextwith','NFC','NFD','NFKC','NFKD','nl-in','nl-out', 

323 'nodemap','nok','none','norm','not','note','now','nude','Num', 

324 'numerator','Numeric','of','offset','offset-in-hours','offset-in-minutes', 

325 'ok','old','on-close','one','on-switch','open','opened','operation', 

326 'optional','ord','ords','orig','os-error','osname','out-buffer','pack', 

327 'package','package-kind','package-name','packages','pair','pairs', 

328 'pairup','parameter','params','parent','parent-name','parents','parse', 

329 'parse-base','parsefile','parse-names','parts','pass','path','path-sep', 

330 'payload','peer-host','peer-port','periods','perl','permutations','phaser', 

331 'pick','pickpairs','pid','placeholder','plan','plus','polar','poll', 

332 'polymod','pop','pos','positional','posix','postfix','postmatch', 

333 'precomp-ext','precomp-target','pred','prefix','prematch','prepend', 

334 'print','printf','print-nl','print-to','private','private_method_table', 

335 'proc','produce','Promise','prompt','protect','pull-one','push', 

336 'push-all','push-at-least','push-exactly','push-until-lazy','put', 

337 'qualifier-type','quit','r','race','radix','rand','range','Rat','raw', 

338 're','read','readchars','readonly','ready','Real','reallocate','reals', 

339 'reason','rebless','receive','recv','redispatcher','redo','reduce', 

340 'rel2abs','relative','release','rename','repeated','replacement', 

341 'report','reserved','resolve','restore','result','resume','rethrow', 

342 'reverse','right','rindex','rmdir','role','roles_to_compose','rolish', 

343 'roll','rootdir','roots','rotate','rotor','round','roundrobin', 

344 'routine-type','run','rwx','s','samecase','samemark','samewith','say', 

345 'schedule-on','scheduler','scope','sec','sech','second','seek','self', 

346 'send','Set','set_hidden','set_name','set_package','set_rw','set_value', 

347 'SetHash','set-instruments','setup_finalization','shape','share','shell', 

348 'shift','sibling','sigil','sign','signal','signals','signature','sin', 

349 'sinh','sink','sink-all','skip','skip-at-least','skip-at-least-pull-one', 

350 'skip-one','skip-rest','sleep','sleep-timer','sleep-until','Slip','slurp', 

351 'slurp-rest','slurpy','snap','snapper','so','socket-host','socket-port', 

352 'sort','source','source-package','spawn','SPEC','splice','split', 

353 'splitdir','splitpath','sprintf','spurt','sqrt','squish','srand','stable', 

354 'start','started','starts-with','status','stderr','stdout','Str', 

355 'sub_signature','subbuf','subbuf-rw','subname','subparse','subst', 

356 'subst-mutate','substr','substr-eq','substr-rw','subtest','succ','sum', 

357 'Supply','symlink','t','tail','take','take-rw','tan','tanh','tap', 

358 'target','target-name','tc','tclc','tell','then','throttle','throw', 

359 'throws-like','timezone','tmpdir','to','today','todo','toggle','to-posix', 

360 'total','trailing','trans','tree','trim','trim-leading','trim-trailing', 

361 'truncate','truncated-to','trusts','try_acquire','trying','twigil','type', 

362 'type_captures','typename','uc','udp','uncaught_handler','unimatch', 

363 'uniname','uninames','uniparse','uniprop','uniprops','unique','unival', 

364 'univals','unlike','unlink','unlock','unpack','unpolar','unshift', 

365 'unwrap','updir','USAGE','use-ok','utc','val','value','values','VAR', 

366 'variable','verbose-config','version','VMnames','volume','vow','w','wait', 

367 'warn','watch','watch-path','week','weekday-of-month','week-number', 

368 'week-year','WHAT','when','WHERE','WHEREFORE','WHICH','WHO', 

369 'whole-second','WHY','wordcase','words','workaround','wrap','write', 

370 'write-to','x','yada','year','yield','yyyy-mm-dd','z','zip','zip-latest', 

371 

372 ) 

373 

374 PERL6_BUILTIN_CLASSES = ( 

375 #Booleans 

376 'False','True', 

377 #Classes 

378 'Any','Array','Associative','AST','atomicint','Attribute','Backtrace', 

379 'Backtrace::Frame','Bag','Baggy','BagHash','Blob','Block','Bool','Buf', 

380 'Callable','CallFrame','Cancellation','Capture','CArray','Channel','Code', 

381 'compiler','Complex','ComplexStr','Cool','CurrentThreadScheduler', 

382 'Cursor','Date','Dateish','DateTime','Distro','Duration','Encoding', 

383 'Exception','Failure','FatRat','Grammar','Hash','HyperWhatever','Instant', 

384 'Int','int16','int32','int64','int8','IntStr','IO','IO::ArgFiles', 

385 'IO::CatHandle','IO::Handle','IO::Notification','IO::Path', 

386 'IO::Path::Cygwin','IO::Path::QNX','IO::Path::Unix','IO::Path::Win32', 

387 'IO::Pipe','IO::Socket','IO::Socket::Async','IO::Socket::INET','IO::Spec', 

388 'IO::Spec::Cygwin','IO::Spec::QNX','IO::Spec::Unix','IO::Spec::Win32', 

389 'IO::Special','Iterable','Iterator','Junction','Kernel','Label','List', 

390 'Lock','Lock::Async','long','longlong','Macro','Map','Match', 

391 'Metamodel::AttributeContainer','Metamodel::C3MRO','Metamodel::ClassHOW', 

392 'Metamodel::EnumHOW','Metamodel::Finalization','Metamodel::MethodContainer', 

393 'Metamodel::MROBasedMethodDispatch','Metamodel::MultipleInheritance', 

394 'Metamodel::Naming','Metamodel::Primitives','Metamodel::PrivateMethodContainer', 

395 'Metamodel::RoleContainer','Metamodel::Trusting','Method','Mix','MixHash', 

396 'Mixy','Mu','NFC','NFD','NFKC','NFKD','Nil','Num','num32','num64', 

397 'Numeric','NumStr','ObjAt','Order','Pair','Parameter','Perl','Pod::Block', 

398 'Pod::Block::Code','Pod::Block::Comment','Pod::Block::Declarator', 

399 'Pod::Block::Named','Pod::Block::Para','Pod::Block::Table','Pod::Heading', 

400 'Pod::Item','Pointer','Positional','PositionalBindFailover','Proc', 

401 'Proc::Async','Promise','Proxy','PseudoStash','QuantHash','Range','Rat', 

402 'Rational','RatStr','Real','Regex','Routine','Scalar','Scheduler', 

403 'Semaphore','Seq','Set','SetHash','Setty','Signature','size_t','Slip', 

404 'Stash','Str','StrDistance','Stringy','Sub','Submethod','Supplier', 

405 'Supplier::Preserving','Supply','Systemic','Tap','Telemetry', 

406 'Telemetry::Instrument::Thread','Telemetry::Instrument::Usage', 

407 'Telemetry::Period','Telemetry::Sampler','Thread','ThreadPoolScheduler', 

408 'UInt','uint16','uint32','uint64','uint8','Uni','utf8','Variable', 

409 'Version','VM','Whatever','WhateverCode','WrapHandle' 

410 ) 

411 

412 PERL6_OPERATORS = ( 

413 'X', 'Z', 'after', 'also', 'and', 'andthen', 'before', 'cmp', 'div', 

414 'eq', 'eqv', 'extra', 'ff', 'fff', 'ge', 'gt', 'le', 'leg', 'lt', 'm', 

415 'mm', 'mod', 'ne', 'or', 'orelse', 'rx', 's', 'tr', 'x', 'xor', 'xx', 

416 '++', '--', '**', '!', '+', '-', '~', '?', '|', '||', '+^', '~^', '?^', 

417 '^', '*', '/', '%', '%%', '+&', '+<', '+>', '~&', '~<', '~>', '?&', 

418 'gcd', 'lcm', '+', '-', '+|', '+^', '~|', '~^', '?|', '?^', 

419 '~', '&', '^', 'but', 'does', '<=>', '..', '..^', '^..', '^..^', 

420 '!=', '==', '<', '<=', '>', '>=', '~~', '===', '!eqv', 

421 '&&', '||', '^^', '//', 'min', 'max', '??', '!!', 'ff', 'fff', 'so', 

422 'not', '<==', '==>', '<<==', '==>>','unicmp', 

423 ) 

424 

425 # Perl 6 has a *lot* of possible bracketing characters 

426 # this list was lifted from STD.pm6 (https://github.com/perl6/std) 

427 PERL6_BRACKETS = { 

428 '\u0028': '\u0029', '\u003c': '\u003e', '\u005b': '\u005d', 

429 '\u007b': '\u007d', '\u00ab': '\u00bb', '\u0f3a': '\u0f3b', 

430 '\u0f3c': '\u0f3d', '\u169b': '\u169c', '\u2018': '\u2019', 

431 '\u201a': '\u2019', '\u201b': '\u2019', '\u201c': '\u201d', 

432 '\u201e': '\u201d', '\u201f': '\u201d', '\u2039': '\u203a', 

433 '\u2045': '\u2046', '\u207d': '\u207e', '\u208d': '\u208e', 

434 '\u2208': '\u220b', '\u2209': '\u220c', '\u220a': '\u220d', 

435 '\u2215': '\u29f5', '\u223c': '\u223d', '\u2243': '\u22cd', 

436 '\u2252': '\u2253', '\u2254': '\u2255', '\u2264': '\u2265', 

437 '\u2266': '\u2267', '\u2268': '\u2269', '\u226a': '\u226b', 

438 '\u226e': '\u226f', '\u2270': '\u2271', '\u2272': '\u2273', 

439 '\u2274': '\u2275', '\u2276': '\u2277', '\u2278': '\u2279', 

440 '\u227a': '\u227b', '\u227c': '\u227d', '\u227e': '\u227f', 

441 '\u2280': '\u2281', '\u2282': '\u2283', '\u2284': '\u2285', 

442 '\u2286': '\u2287', '\u2288': '\u2289', '\u228a': '\u228b', 

443 '\u228f': '\u2290', '\u2291': '\u2292', '\u2298': '\u29b8', 

444 '\u22a2': '\u22a3', '\u22a6': '\u2ade', '\u22a8': '\u2ae4', 

445 '\u22a9': '\u2ae3', '\u22ab': '\u2ae5', '\u22b0': '\u22b1', 

446 '\u22b2': '\u22b3', '\u22b4': '\u22b5', '\u22b6': '\u22b7', 

447 '\u22c9': '\u22ca', '\u22cb': '\u22cc', '\u22d0': '\u22d1', 

448 '\u22d6': '\u22d7', '\u22d8': '\u22d9', '\u22da': '\u22db', 

449 '\u22dc': '\u22dd', '\u22de': '\u22df', '\u22e0': '\u22e1', 

450 '\u22e2': '\u22e3', '\u22e4': '\u22e5', '\u22e6': '\u22e7', 

451 '\u22e8': '\u22e9', '\u22ea': '\u22eb', '\u22ec': '\u22ed', 

452 '\u22f0': '\u22f1', '\u22f2': '\u22fa', '\u22f3': '\u22fb', 

453 '\u22f4': '\u22fc', '\u22f6': '\u22fd', '\u22f7': '\u22fe', 

454 '\u2308': '\u2309', '\u230a': '\u230b', '\u2329': '\u232a', 

455 '\u23b4': '\u23b5', '\u2768': '\u2769', '\u276a': '\u276b', 

456 '\u276c': '\u276d', '\u276e': '\u276f', '\u2770': '\u2771', 

457 '\u2772': '\u2773', '\u2774': '\u2775', '\u27c3': '\u27c4', 

458 '\u27c5': '\u27c6', '\u27d5': '\u27d6', '\u27dd': '\u27de', 

459 '\u27e2': '\u27e3', '\u27e4': '\u27e5', '\u27e6': '\u27e7', 

460 '\u27e8': '\u27e9', '\u27ea': '\u27eb', '\u2983': '\u2984', 

461 '\u2985': '\u2986', '\u2987': '\u2988', '\u2989': '\u298a', 

462 '\u298b': '\u298c', '\u298d': '\u298e', '\u298f': '\u2990', 

463 '\u2991': '\u2992', '\u2993': '\u2994', '\u2995': '\u2996', 

464 '\u2997': '\u2998', '\u29c0': '\u29c1', '\u29c4': '\u29c5', 

465 '\u29cf': '\u29d0', '\u29d1': '\u29d2', '\u29d4': '\u29d5', 

466 '\u29d8': '\u29d9', '\u29da': '\u29db', '\u29f8': '\u29f9', 

467 '\u29fc': '\u29fd', '\u2a2b': '\u2a2c', '\u2a2d': '\u2a2e', 

468 '\u2a34': '\u2a35', '\u2a3c': '\u2a3d', '\u2a64': '\u2a65', 

469 '\u2a79': '\u2a7a', '\u2a7d': '\u2a7e', '\u2a7f': '\u2a80', 

470 '\u2a81': '\u2a82', '\u2a83': '\u2a84', '\u2a8b': '\u2a8c', 

471 '\u2a91': '\u2a92', '\u2a93': '\u2a94', '\u2a95': '\u2a96', 

472 '\u2a97': '\u2a98', '\u2a99': '\u2a9a', '\u2a9b': '\u2a9c', 

473 '\u2aa1': '\u2aa2', '\u2aa6': '\u2aa7', '\u2aa8': '\u2aa9', 

474 '\u2aaa': '\u2aab', '\u2aac': '\u2aad', '\u2aaf': '\u2ab0', 

475 '\u2ab3': '\u2ab4', '\u2abb': '\u2abc', '\u2abd': '\u2abe', 

476 '\u2abf': '\u2ac0', '\u2ac1': '\u2ac2', '\u2ac3': '\u2ac4', 

477 '\u2ac5': '\u2ac6', '\u2acd': '\u2ace', '\u2acf': '\u2ad0', 

478 '\u2ad1': '\u2ad2', '\u2ad3': '\u2ad4', '\u2ad5': '\u2ad6', 

479 '\u2aec': '\u2aed', '\u2af7': '\u2af8', '\u2af9': '\u2afa', 

480 '\u2e02': '\u2e03', '\u2e04': '\u2e05', '\u2e09': '\u2e0a', 

481 '\u2e0c': '\u2e0d', '\u2e1c': '\u2e1d', '\u2e20': '\u2e21', 

482 '\u3008': '\u3009', '\u300a': '\u300b', '\u300c': '\u300d', 

483 '\u300e': '\u300f', '\u3010': '\u3011', '\u3014': '\u3015', 

484 '\u3016': '\u3017', '\u3018': '\u3019', '\u301a': '\u301b', 

485 '\u301d': '\u301e', '\ufd3e': '\ufd3f', '\ufe17': '\ufe18', 

486 '\ufe35': '\ufe36', '\ufe37': '\ufe38', '\ufe39': '\ufe3a', 

487 '\ufe3b': '\ufe3c', '\ufe3d': '\ufe3e', '\ufe3f': '\ufe40', 

488 '\ufe41': '\ufe42', '\ufe43': '\ufe44', '\ufe47': '\ufe48', 

489 '\ufe59': '\ufe5a', '\ufe5b': '\ufe5c', '\ufe5d': '\ufe5e', 

490 '\uff08': '\uff09', '\uff1c': '\uff1e', '\uff3b': '\uff3d', 

491 '\uff5b': '\uff5d', '\uff5f': '\uff60', '\uff62': '\uff63', 

492 } 

493 

494 def _build_word_match(words, boundary_regex_fragment=None, prefix='', suffix=''): 

495 if boundary_regex_fragment is None: 

496 return r'\b(' + prefix + r'|'.join(re.escape(x) for x in words) + \ 

497 suffix + r')\b' 

498 else: 

499 return r'(?<!' + boundary_regex_fragment + r')' + prefix + r'(' + \ 

500 r'|'.join(re.escape(x) for x in words) + r')' + suffix + r'(?!' + \ 

501 boundary_regex_fragment + r')' 

502 

503 def brackets_callback(token_class): 

504 def callback(lexer, match, context): 

505 groups = match.groupdict() 

506 opening_chars = groups['delimiter'] 

507 n_chars = len(opening_chars) 

508 adverbs = groups.get('adverbs') 

509 

510 closer = Perl6Lexer.PERL6_BRACKETS.get(opening_chars[0]) 

511 text = context.text 

512 

513 if closer is None: # it's not a mirrored character, which means we 

514 # just need to look for the next occurrence 

515 

516 end_pos = text.find(opening_chars, match.start('delimiter') + n_chars) 

517 else: # we need to look for the corresponding closing character, 

518 # keep nesting in mind 

519 closing_chars = closer * n_chars 

520 nesting_level = 1 

521 

522 search_pos = match.start('delimiter') 

523 

524 while nesting_level > 0: 

525 next_open_pos = text.find(opening_chars, search_pos + n_chars) 

526 next_close_pos = text.find(closing_chars, search_pos + n_chars) 

527 

528 if next_close_pos == -1: 

529 next_close_pos = len(text) 

530 nesting_level = 0 

531 elif next_open_pos != -1 and next_open_pos < next_close_pos: 

532 nesting_level += 1 

533 search_pos = next_open_pos 

534 else: # next_close_pos < next_open_pos 

535 nesting_level -= 1 

536 search_pos = next_close_pos 

537 

538 end_pos = next_close_pos 

539 

540 if end_pos < 0: # if we didn't find a closer, just highlight the 

541 # rest of the text in this class 

542 end_pos = len(text) 

543 

544 if adverbs is not None and re.search(r':to\b', adverbs): 

545 heredoc_terminator = text[match.start('delimiter') + n_chars:end_pos] 

546 end_heredoc = re.search(r'^\s*' + re.escape(heredoc_terminator) + 

547 r'\s*$', text[end_pos:], re.MULTILINE) 

548 

549 if end_heredoc: 

550 end_pos += end_heredoc.end() 

551 else: 

552 end_pos = len(text) 

553 

554 yield match.start(), token_class, text[match.start():end_pos + n_chars] 

555 context.pos = end_pos + n_chars 

556 

557 return callback 

558 

559 def opening_brace_callback(lexer, match, context): 

560 stack = context.stack 

561 

562 yield match.start(), Text, context.text[match.start():match.end()] 

563 context.pos = match.end() 

564 

565 # if we encounter an opening brace and we're one level 

566 # below a token state, it means we need to increment 

567 # the nesting level for braces so we know later when 

568 # we should return to the token rules. 

569 if len(stack) > 2 and stack[-2] == 'token': 

570 context.perl6_token_nesting_level += 1 

571 

572 def closing_brace_callback(lexer, match, context): 

573 stack = context.stack 

574 

575 yield match.start(), Text, context.text[match.start():match.end()] 

576 context.pos = match.end() 

577 

578 # if we encounter a free closing brace and we're one level 

579 # below a token state, it means we need to check the nesting 

580 # level to see if we need to return to the token state. 

581 if len(stack) > 2 and stack[-2] == 'token': 

582 context.perl6_token_nesting_level -= 1 

583 if context.perl6_token_nesting_level == 0: 

584 stack.pop() 

585 

586 def embedded_perl6_callback(lexer, match, context): 

587 context.perl6_token_nesting_level = 1 

588 yield match.start(), Text, context.text[match.start():match.end()] 

589 context.pos = match.end() 

590 context.stack.append('root') 

591 

592 # If you're modifying these rules, be careful if you need to process '{' or '}' 

593 # characters. We have special logic for processing these characters (due to the fact 

594 # that you can nest Perl 6 code in regex blocks), so if you need to process one of 

595 # them, make sure you also process the corresponding one! 

596 tokens = { 

597 'common': [ 

598 (r'#[`|=](?P<delimiter>(?P<first_char>[' + ''.join(PERL6_BRACKETS) + r'])(?P=first_char)*)', 

599 brackets_callback(Comment.Multiline)), 

600 (r'#[^\n]*$', Comment.Single), 

601 (r'^(\s*)=begin\s+(\w+)\b.*?^\1=end\s+\2', Comment.Multiline), 

602 (r'^(\s*)=for.*?\n\s*?\n', Comment.Multiline), 

603 (r'^=.*?\n\s*?\n', Comment.Multiline), 

604 (r'(regex|token|rule)(\s*' + PERL6_IDENTIFIER_RANGE + '+:sym)', 

605 bygroups(Keyword, Name), 'token-sym-brackets'), 

606 (r'(regex|token|rule)(?!' + PERL6_IDENTIFIER_RANGE + r')(\s*' + PERL6_IDENTIFIER_RANGE + '+)?', 

607 bygroups(Keyword, Name), 'pre-token'), 

608 # deal with a special case in the Perl 6 grammar (role q { ... }) 

609 (r'(role)(\s+)(q)(\s*)', bygroups(Keyword, Text, Name, Text)), 

610 (_build_word_match(PERL6_KEYWORDS, PERL6_IDENTIFIER_RANGE), Keyword), 

611 (_build_word_match(PERL6_BUILTIN_CLASSES, PERL6_IDENTIFIER_RANGE, suffix='(?::[UD])?'), 

612 Name.Builtin), 

613 (_build_word_match(PERL6_BUILTINS, PERL6_IDENTIFIER_RANGE), Name.Builtin), 

614 # copied from PerlLexer 

615 (r'[$@%&][.^:?=!~]?' + PERL6_IDENTIFIER_RANGE + '+(?:<<.*?>>|<.*?>|«.*?»)*', 

616 Name.Variable), 

617 (r'\$[!/](?:<<.*?>>|<.*?>|«.*?»)*', Name.Variable.Global), 

618 (r'::\?\w+', Name.Variable.Global), 

619 (r'[$@%&]\*' + PERL6_IDENTIFIER_RANGE + '+(?:<<.*?>>|<.*?>|«.*?»)*', 

620 Name.Variable.Global), 

621 (r'\$(?:<.*?>)+', Name.Variable), 

622 (r'(?:q|qq|Q)[a-zA-Z]?\s*(?P<adverbs>:[\w\s:]+)?\s*(?P<delimiter>(?P<first_char>[^0-9a-zA-Z:\s])' 

623 r'(?P=first_char)*)', brackets_callback(String)), 

624 # copied from PerlLexer 

625 (r'0_?[0-7]+(_[0-7]+)*', Number.Oct), 

626 (r'0x[0-9A-Fa-f]+(_[0-9A-Fa-f]+)*', Number.Hex), 

627 (r'0b[01]+(_[01]+)*', Number.Bin), 

628 (r'(?i)(\d*(_\d*)*\.\d+(_\d*)*|\d+(_\d*)*\.\d+(_\d*)*)(e[+-]?\d+)?', 

629 Number.Float), 

630 (r'(?i)\d+(_\d*)*e[+-]?\d+(_\d*)*', Number.Float), 

631 (r'\d+(_\d+)*', Number.Integer), 

632 (r'(?<=~~)\s*/(?:\\\\|\\/|.)*?/', String.Regex), 

633 (r'(?<=[=(,])\s*/(?:\\\\|\\/|.)*?/', String.Regex), 

634 (r'm\w+(?=\()', Name), 

635 (r'(?:m|ms|rx)\s*(?P<adverbs>:[\w\s:]+)?\s*(?P<delimiter>(?P<first_char>[^\w:\s])' 

636 r'(?P=first_char)*)', brackets_callback(String.Regex)), 

637 (r'(?:s|ss|tr)\s*(?::[\w\s:]+)?\s*/(?:\\\\|\\/|.)*?/(?:\\\\|\\/|.)*?/', 

638 String.Regex), 

639 (r'<[^\s=].*?\S>', String), 

640 (_build_word_match(PERL6_OPERATORS), Operator), 

641 (r'\w' + PERL6_IDENTIFIER_RANGE + '*', Name), 

642 (r"'(\\\\|\\[^\\]|[^'\\])*'", String), 

643 (r'"(\\\\|\\[^\\]|[^"\\])*"', String), 

644 ], 

645 'root': [ 

646 include('common'), 

647 (r'\{', opening_brace_callback), 

648 (r'\}', closing_brace_callback), 

649 (r'.+?', Text), 

650 ], 

651 'pre-token': [ 

652 include('common'), 

653 (r'\{', Text, ('#pop', 'token')), 

654 (r'.+?', Text), 

655 ], 

656 'token-sym-brackets': [ 

657 (r'(?P<delimiter>(?P<first_char>[' + ''.join(PERL6_BRACKETS) + '])(?P=first_char)*)', 

658 brackets_callback(Name), ('#pop', 'pre-token')), 

659 default(('#pop', 'pre-token')), 

660 ], 

661 'token': [ 

662 (r'\}', Text, '#pop'), 

663 (r'(?<=:)(?:my|our|state|constant|temp|let).*?;', using(this)), 

664 # make sure that quotes in character classes aren't treated as strings 

665 (r'<(?:[-!?+.]\s*)?\[.*?\]>', String.Regex), 

666 # make sure that '#' characters in quotes aren't treated as comments 

667 (r"(?<!\\)'(\\\\|\\[^\\]|[^'\\])*'", String.Regex), 

668 (r'(?<!\\)"(\\\\|\\[^\\]|[^"\\])*"', String.Regex), 

669 (r'#.*?$', Comment.Single), 

670 (r'\{', embedded_perl6_callback), 

671 ('.+?', String.Regex), 

672 ], 

673 } 

674 

675 def analyse_text(text): 

676 def strip_pod(lines): 

677 in_pod = False 

678 stripped_lines = [] 

679 

680 for line in lines: 

681 if re.match(r'^=(?:end|cut)', line): 

682 in_pod = False 

683 elif re.match(r'^=\w+', line): 

684 in_pod = True 

685 elif not in_pod: 

686 stripped_lines.append(line) 

687 

688 return stripped_lines 

689 

690 # XXX handle block comments 

691 lines = text.splitlines() 

692 lines = strip_pod(lines) 

693 text = '\n'.join(lines) 

694 

695 if shebang_matches(text, r'perl6|rakudo|niecza|pugs'): 

696 return True 

697 

698 saw_perl_decl = False 

699 rating = False 

700 

701 # check for my/our/has declarations 

702 if re.search(r"(?:my|our|has)\s+(?:" + Perl6Lexer.PERL6_IDENTIFIER_RANGE + 

703 r"+\s+)?[$@%&(]", text): 

704 rating = 0.8 

705 saw_perl_decl = True 

706 

707 for line in lines: 

708 line = re.sub('#.*', '', line) 

709 if re.match(r'^\s*$', line): 

710 continue 

711 

712 # match v6; use v6; use v6.0; use v6.0.0; 

713 if re.match(r'^\s*(?:use\s+)?v6(?:\.\d(?:\.\d)?)?;', line): 

714 return True 

715 # match class, module, role, enum, grammar declarations 

716 class_decl = re.match(r'^\s*(?:(?P<scope>my|our)\s+)?(?:module|class|role|enum|grammar)', line) 

717 if class_decl: 

718 if saw_perl_decl or class_decl.group('scope') is not None: 

719 return True 

720 rating = 0.05 

721 continue 

722 break 

723 

724 if ':=' in text: 

725 # Same logic as above for PerlLexer 

726 rating /= 2 

727 

728 return rating 

729 

730 def __init__(self, **options): 

731 super().__init__(**options) 

732 self.encoding = options.get('encoding', 'utf-8')