Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/pygments/lexers/perl.py : 29%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# -*- coding: utf-8 -*-
2"""
3 pygments.lexers.perl
4 ~~~~~~~~~~~~~~~~~~~~
6 Lexers for Perl, Raku and related languages.
8 :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS.
9 :license: BSD, see LICENSE for details.
10"""
12import re
14from pygments.lexer import RegexLexer, ExtendedRegexLexer, include, bygroups, \
15 using, this, default, words
16from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
17 Number, Punctuation
18from pygments.util import shebang_matches
20__all__ = ['PerlLexer', 'Perl6Lexer']
23class PerlLexer(RegexLexer):
24 """
25 For `Perl <https://www.perl.org>`_ source code.
26 """
28 name = 'Perl'
29 aliases = ['perl', 'pl']
30 filenames = ['*.pl', '*.pm', '*.t', '*.perl']
31 mimetypes = ['text/x-perl', 'application/x-perl']
33 flags = re.DOTALL | re.MULTILINE
34 # TODO: give this to a perl guy who knows how to parse perl...
35 tokens = {
36 'balanced-regex': [
37 (r'/(\\\\|\\[^\\]|[^\\/])*/[egimosx]*', String.Regex, '#pop'),
38 (r'!(\\\\|\\[^\\]|[^\\!])*![egimosx]*', String.Regex, '#pop'),
39 (r'\\(\\\\|[^\\])*\\[egimosx]*', String.Regex, '#pop'),
40 (r'\{(\\\\|\\[^\\]|[^\\}])*\}[egimosx]*', String.Regex, '#pop'),
41 (r'<(\\\\|\\[^\\]|[^\\>])*>[egimosx]*', String.Regex, '#pop'),
42 (r'\[(\\\\|\\[^\\]|[^\\\]])*\][egimosx]*', String.Regex, '#pop'),
43 (r'\((\\\\|\\[^\\]|[^\\)])*\)[egimosx]*', String.Regex, '#pop'),
44 (r'@(\\\\|\\[^\\]|[^\\@])*@[egimosx]*', String.Regex, '#pop'),
45 (r'%(\\\\|\\[^\\]|[^\\%])*%[egimosx]*', String.Regex, '#pop'),
46 (r'\$(\\\\|\\[^\\]|[^\\$])*\$[egimosx]*', String.Regex, '#pop'),
47 ],
48 'root': [
49 (r'\A\#!.+?$', Comment.Hashbang),
50 (r'\#.*?$', Comment.Single),
51 (r'^=[a-zA-Z0-9]+\s+.*?\n=cut', Comment.Multiline),
52 (words((
53 'case', 'continue', 'do', 'else', 'elsif', 'for', 'foreach',
54 'if', 'last', 'my', 'next', 'our', 'redo', 'reset', 'then',
55 'unless', 'until', 'while', 'print', 'new', 'BEGIN',
56 'CHECK', 'INIT', 'END', 'return'), suffix=r'\b'),
57 Keyword),
58 (r'(format)(\s+)(\w+)(\s*)(=)(\s*\n)',
59 bygroups(Keyword, Text, Name, Text, Punctuation, Text), 'format'),
60 (r'(eq|lt|gt|le|ge|ne|not|and|or|cmp)\b', Operator.Word),
61 # common delimiters
62 (r's/(\\\\|\\[^\\]|[^\\/])*/(\\\\|\\[^\\]|[^\\/])*/[egimosx]*',
63 String.Regex),
64 (r's!(\\\\|\\!|[^!])*!(\\\\|\\!|[^!])*![egimosx]*', String.Regex),
65 (r's\\(\\\\|[^\\])*\\(\\\\|[^\\])*\\[egimosx]*', String.Regex),
66 (r's@(\\\\|\\[^\\]|[^\\@])*@(\\\\|\\[^\\]|[^\\@])*@[egimosx]*',
67 String.Regex),
68 (r's%(\\\\|\\[^\\]|[^\\%])*%(\\\\|\\[^\\]|[^\\%])*%[egimosx]*',
69 String.Regex),
70 # balanced delimiters
71 (r's\{(\\\\|\\[^\\]|[^\\}])*\}\s*', String.Regex, 'balanced-regex'),
72 (r's<(\\\\|\\[^\\]|[^\\>])*>\s*', String.Regex, 'balanced-regex'),
73 (r's\[(\\\\|\\[^\\]|[^\\\]])*\]\s*', String.Regex,
74 'balanced-regex'),
75 (r's\((\\\\|\\[^\\]|[^\\)])*\)\s*', String.Regex,
76 'balanced-regex'),
78 (r'm?/(\\\\|\\[^\\]|[^\\/\n])*/[gcimosx]*', String.Regex),
79 (r'm(?=[/!\\{<\[(@%$])', String.Regex, 'balanced-regex'),
80 (r'((?<==~)|(?<=\())\s*/(\\\\|\\[^\\]|[^\\/])*/[gcimosx]*',
81 String.Regex),
82 (r'\s+', Text),
83 (words((
84 'abs', 'accept', 'alarm', 'atan2', 'bind', 'binmode', 'bless', 'caller', 'chdir',
85 'chmod', 'chomp', 'chop', 'chown', 'chr', 'chroot', 'close', 'closedir', 'connect',
86 'continue', 'cos', 'crypt', 'dbmclose', 'dbmopen', 'defined', 'delete', 'die',
87 'dump', 'each', 'endgrent', 'endhostent', 'endnetent', 'endprotoent',
88 'endpwent', 'endservent', 'eof', 'eval', 'exec', 'exists', 'exit', 'exp', 'fcntl',
89 'fileno', 'flock', 'fork', 'format', 'formline', 'getc', 'getgrent', 'getgrgid',
90 'getgrnam', 'gethostbyaddr', 'gethostbyname', 'gethostent', 'getlogin',
91 'getnetbyaddr', 'getnetbyname', 'getnetent', 'getpeername', 'getpgrp',
92 'getppid', 'getpriority', 'getprotobyname', 'getprotobynumber',
93 'getprotoent', 'getpwent', 'getpwnam', 'getpwuid', 'getservbyname',
94 'getservbyport', 'getservent', 'getsockname', 'getsockopt', 'glob', 'gmtime',
95 'goto', 'grep', 'hex', 'import', 'index', 'int', 'ioctl', 'join', 'keys', 'kill', 'last',
96 'lc', 'lcfirst', 'length', 'link', 'listen', 'local', 'localtime', 'log', 'lstat',
97 'map', 'mkdir', 'msgctl', 'msgget', 'msgrcv', 'msgsnd', 'my', 'next', 'oct', 'open',
98 'opendir', 'ord', 'our', 'pack', 'pipe', 'pop', 'pos', 'printf',
99 'prototype', 'push', 'quotemeta', 'rand', 'read', 'readdir',
100 'readline', 'readlink', 'readpipe', 'recv', 'redo', 'ref', 'rename',
101 'reverse', 'rewinddir', 'rindex', 'rmdir', 'scalar', 'seek', 'seekdir',
102 'select', 'semctl', 'semget', 'semop', 'send', 'setgrent', 'sethostent', 'setnetent',
103 'setpgrp', 'setpriority', 'setprotoent', 'setpwent', 'setservent',
104 'setsockopt', 'shift', 'shmctl', 'shmget', 'shmread', 'shmwrite', 'shutdown',
105 'sin', 'sleep', 'socket', 'socketpair', 'sort', 'splice', 'split', 'sprintf', 'sqrt',
106 'srand', 'stat', 'study', 'substr', 'symlink', 'syscall', 'sysopen', 'sysread',
107 'sysseek', 'system', 'syswrite', 'tell', 'telldir', 'tie', 'tied', 'time', 'times', 'tr',
108 'truncate', 'uc', 'ucfirst', 'umask', 'undef', 'unlink', 'unpack', 'unshift', 'untie',
109 'utime', 'values', 'vec', 'wait', 'waitpid', 'wantarray', 'warn', 'write'), suffix=r'\b'),
110 Name.Builtin),
111 (r'((__(DATA|DIE|WARN)__)|(STD(IN|OUT|ERR)))\b', Name.Builtin.Pseudo),
112 (r'(<<)([\'"]?)([a-zA-Z_]\w*)(\2;?\n.*?\n)(\3)(\n)',
113 bygroups(String, String, String.Delimiter, String, String.Delimiter, Text)),
114 (r'__END__', Comment.Preproc, 'end-part'),
115 (r'\$\^[ADEFHILMOPSTWX]', Name.Variable.Global),
116 (r"\$[\\\"\[\]'&`+*.,;=%~?@$!<>(^|/-](?!\w)", Name.Variable.Global),
117 (r'[$@%#]+', Name.Variable, 'varname'),
118 (r'0_?[0-7]+(_[0-7]+)*', Number.Oct),
119 (r'0x[0-9A-Fa-f]+(_[0-9A-Fa-f]+)*', Number.Hex),
120 (r'0b[01]+(_[01]+)*', Number.Bin),
121 (r'(?i)(\d*(_\d*)*\.\d+(_\d*)*|\d+(_\d*)*\.\d+(_\d*)*)(e[+-]?\d+)?',
122 Number.Float),
123 (r'(?i)\d+(_\d*)*e[+-]?\d+(_\d*)*', Number.Float),
124 (r'\d+(_\d+)*', Number.Integer),
125 (r"'(\\\\|\\[^\\]|[^'\\])*'", String),
126 (r'"(\\\\|\\[^\\]|[^"\\])*"', String),
127 (r'`(\\\\|\\[^\\]|[^`\\])*`', String.Backtick),
128 (r'<([^\s>]+)>', String.Regex),
129 (r'(q|qq|qw|qr|qx)\{', String.Other, 'cb-string'),
130 (r'(q|qq|qw|qr|qx)\(', String.Other, 'rb-string'),
131 (r'(q|qq|qw|qr|qx)\[', String.Other, 'sb-string'),
132 (r'(q|qq|qw|qr|qx)\<', String.Other, 'lt-string'),
133 (r'(q|qq|qw|qr|qx)([\W_])(.|\n)*?\2', String.Other),
134 (r'(package)(\s+)([a-zA-Z_]\w*(?:::[a-zA-Z_]\w*)*)',
135 bygroups(Keyword, Text, Name.Namespace)),
136 (r'(use|require|no)(\s+)([a-zA-Z_]\w*(?:::[a-zA-Z_]\w*)*)',
137 bygroups(Keyword, Text, Name.Namespace)),
138 (r'(sub)(\s+)', bygroups(Keyword, Text), 'funcname'),
139 (words((
140 'no', 'package', 'require', 'use'), suffix=r'\b'),
141 Keyword),
142 (r'(\[\]|\*\*|::|<<|>>|>=|<=>|<=|={3}|!=|=~|'
143 r'!~|&&?|\|\||\.{1,3})', Operator),
144 (r'[-+/*%=<>&^|!\\~]=?', Operator),
145 (r'[()\[\]:;,<>/?{}]', Punctuation), # yes, there's no shortage
146 # of punctuation in Perl!
147 (r'(?=\w)', Name, 'name'),
148 ],
149 'format': [
150 (r'\.\n', String.Interpol, '#pop'),
151 (r'[^\n]*\n', String.Interpol),
152 ],
153 'varname': [
154 (r'\s+', Text),
155 (r'\{', Punctuation, '#pop'), # hash syntax?
156 (r'\)|,', Punctuation, '#pop'), # argument specifier
157 (r'\w+::', Name.Namespace),
158 (r'[\w:]+', Name.Variable, '#pop'),
159 ],
160 'name': [
161 (r'[a-zA-Z_]\w*(::[a-zA-Z_]\w*)*(::)?(?=\s*->)', Name.Namespace, '#pop'),
162 (r'[a-zA-Z_]\w*(::[a-zA-Z_]\w*)*::', Name.Namespace, '#pop'),
163 (r'[\w:]+', Name, '#pop'),
164 (r'[A-Z_]+(?=\W)', Name.Constant, '#pop'),
165 (r'(?=\W)', Text, '#pop'),
166 ],
167 'funcname': [
168 (r'[a-zA-Z_]\w*[!?]?', Name.Function),
169 (r'\s+', Text),
170 # argument declaration
171 (r'(\([$@%]*\))(\s*)', bygroups(Punctuation, Text)),
172 (r';', Punctuation, '#pop'),
173 (r'.*?\{', Punctuation, '#pop'),
174 ],
175 'cb-string': [
176 (r'\\[{}\\]', String.Other),
177 (r'\\', String.Other),
178 (r'\{', String.Other, 'cb-string'),
179 (r'\}', String.Other, '#pop'),
180 (r'[^{}\\]+', String.Other)
181 ],
182 'rb-string': [
183 (r'\\[()\\]', String.Other),
184 (r'\\', String.Other),
185 (r'\(', String.Other, 'rb-string'),
186 (r'\)', String.Other, '#pop'),
187 (r'[^()]+', String.Other)
188 ],
189 'sb-string': [
190 (r'\\[\[\]\\]', String.Other),
191 (r'\\', String.Other),
192 (r'\[', String.Other, 'sb-string'),
193 (r'\]', String.Other, '#pop'),
194 (r'[^\[\]]+', String.Other)
195 ],
196 'lt-string': [
197 (r'\\[<>\\]', String.Other),
198 (r'\\', String.Other),
199 (r'\<', String.Other, 'lt-string'),
200 (r'\>', String.Other, '#pop'),
201 (r'[^<>]+', String.Other)
202 ],
203 'end-part': [
204 (r'.+', Comment.Preproc, '#pop')
205 ]
206 }
208 def analyse_text(text):
209 if shebang_matches(text, r'perl'):
210 return True
212 result = 0
214 if re.search(r'(?:my|our)\s+[$@%(]', text):
215 result += 0.9
217 if ':=' in text:
218 # := is not valid Perl, but it appears in unicon, so we should
219 # become less confident if we think we found Perl with :=
220 result /= 2
222 return result
225class Perl6Lexer(ExtendedRegexLexer):
226 """
227 For `Raku <https://www.raku.org>`_ (a.k.a. Perl 6) source code.
229 .. versionadded:: 2.0
230 """
232 name = 'Perl6'
233 aliases = ['perl6', 'pl6', 'raku']
234 filenames = ['*.pl', '*.pm', '*.nqp', '*.p6', '*.6pl', '*.p6l', '*.pl6',
235 '*.6pm', '*.p6m', '*.pm6', '*.t', '*.raku', '*.rakumod',
236 '*.rakutest', '*.rakudoc']
237 mimetypes = ['text/x-perl6', 'application/x-perl6']
238 flags = re.MULTILINE | re.DOTALL | re.UNICODE
240 PERL6_IDENTIFIER_RANGE = r"['\w:-]"
242 PERL6_KEYWORDS = (
243 #Phasers
244 'BEGIN','CATCH','CHECK','CLOSE','CONTROL','DOC','END','ENTER','FIRST',
245 'INIT','KEEP','LAST','LEAVE','NEXT','POST','PRE','QUIT','UNDO',
246 #Keywords
247 'anon','augment','but','class','constant','default','does','else',
248 'elsif','enum','for','gather','given','grammar','has','if','import',
249 'is','let','loop','made','make','method','module','multi','my','need',
250 'orwith','our','proceed','proto','repeat','require','return',
251 'return-rw','returns','role','rule','state','sub','submethod','subset',
252 'succeed','supersede','token','try','unit','unless','until','use',
253 'when','while','with','without',
254 #Traits
255 'export','native','repr','required','rw','symbol',
256 )
258 PERL6_BUILTINS = (
259 'ACCEPTS','abs','abs2rel','absolute','accept','accessed','acos',
260 'acosec','acosech','acosh','acotan','acotanh','acquire','act','action',
261 'actions','add','add_attribute','add_enum_value','add_fallback',
262 'add_method','add_parent','add_private_method','add_role','add_trustee',
263 'adverb','after','all','allocate','allof','allowed','alternative-names',
264 'annotations','antipair','antipairs','any','anyof','app_lifetime',
265 'append','arch','archname','args','arity','Array','asec','asech','asin',
266 'asinh','ASSIGN-KEY','ASSIGN-POS','assuming','ast','at','atan','atan2',
267 'atanh','AT-KEY','atomic-assign','atomic-dec-fetch','atomic-fetch',
268 'atomic-fetch-add','atomic-fetch-dec','atomic-fetch-inc',
269 'atomic-fetch-sub','atomic-inc-fetch','AT-POS','attributes','auth',
270 'await','backtrace','Bag','BagHash','bail-out','base','basename',
271 'base-repeating','batch','BIND-KEY','BIND-POS','bind-stderr',
272 'bind-stdin','bind-stdout','bind-udp','bits','bless','block','Bool',
273 'bool-only','bounds','break','Bridge','broken','BUILD','build-date',
274 'bytes','cache','callframe','calling-package','CALL-ME','callsame',
275 'callwith','can','cancel','candidates','cando','can-ok','canonpath',
276 'caps','caption','Capture','cas','catdir','categorize','categorize-list',
277 'catfile','catpath','cause','ceiling','cglobal','changed','Channel',
278 'chars','chdir','child','child-name','child-typename','chmod','chomp',
279 'chop','chr','chrs','chunks','cis','classify','classify-list','cleanup',
280 'clone','close','closed','close-stdin','cmp-ok','code','codes','collate',
281 'column','comb','combinations','command','comment','compiler','Complex',
282 'compose','compose_type','composer','condition','config',
283 'configure_destroy','configure_type_checking','conj','connect',
284 'constraints','construct','contains','contents','copy','cos','cosec',
285 'cosech','cosh','cotan','cotanh','count','count-only','cpu-cores',
286 'cpu-usage','CREATE','create_type','cross','cue','curdir','curupdir','d',
287 'Date','DateTime','day','daycount','day-of-month','day-of-week',
288 'day-of-year','days-in-month','declaration','decode','decoder','deepmap',
289 'default','defined','DEFINITE','delayed','DELETE-KEY','DELETE-POS',
290 'denominator','desc','DESTROY','destroyers','devnull','diag',
291 'did-you-mean','die','dies-ok','dir','dirname','dir-sep','DISTROnames',
292 'do','does','does-ok','done','done-testing','duckmap','dynamic','e',
293 'eager','earlier','elems','emit','enclosing','encode','encoder',
294 'encoding','end','ends-with','enum_from_value','enum_value_list',
295 'enum_values','enums','eof','EVAL','eval-dies-ok','EVALFILE',
296 'eval-lives-ok','exception','excludes-max','excludes-min','EXISTS-KEY',
297 'EXISTS-POS','exit','exitcode','exp','expected','explicitly-manage',
298 'expmod','extension','f','fail','fails-like','fc','feature','file',
299 'filename','find_method','find_method_qualified','finish','first','flat',
300 'flatmap','flip','floor','flunk','flush','fmt','format','formatter',
301 'freeze','from','from-list','from-loop','from-posix','full',
302 'full-barrier','get','get_value','getc','gist','got','grab','grabpairs',
303 'grep','handle','handled','handles','hardware','has_accessor','Hash',
304 'head','headers','hh-mm-ss','hidden','hides','hour','how','hyper','id',
305 'illegal','im','in','indent','index','indices','indir','infinite',
306 'infix','infix:<+>','infix:<->','install_method_cache','Instant',
307 'instead','Int','int-bounds','interval','in-timezone','invalid-str',
308 'invert','invocant','IO','IO::Notification.watch-path','is_trusted',
309 'is_type','isa','is-absolute','isa-ok','is-approx','is-deeply',
310 'is-hidden','is-initial-thread','is-int','is-lazy','is-leap-year',
311 'isNaN','isnt','is-prime','is-relative','is-routine','is-setting',
312 'is-win','item','iterator','join','keep','kept','KERNELnames','key',
313 'keyof','keys','kill','kv','kxxv','l','lang','last','lastcall','later',
314 'lazy','lc','leading','level','like','line','lines','link','List',
315 'listen','live','lives-ok','local','lock','log','log10','lookup','lsb',
316 'made','MAIN','make','Map','match','max','maxpairs','merge','message',
317 'method','method_table','methods','migrate','min','minmax','minpairs',
318 'minute','misplaced','Mix','MixHash','mkdir','mode','modified','month',
319 'move','mro','msb','multi','multiness','my','name','named','named_names',
320 'narrow','nativecast','native-descriptor','nativesizeof','new','new_type',
321 'new-from-daycount','new-from-pairs','next','nextcallee','next-handle',
322 'nextsame','nextwith','NFC','NFD','NFKC','NFKD','nl-in','nl-out',
323 'nodemap','nok','none','norm','not','note','now','nude','Num',
324 'numerator','Numeric','of','offset','offset-in-hours','offset-in-minutes',
325 'ok','old','on-close','one','on-switch','open','opened','operation',
326 'optional','ord','ords','orig','os-error','osname','out-buffer','pack',
327 'package','package-kind','package-name','packages','pair','pairs',
328 'pairup','parameter','params','parent','parent-name','parents','parse',
329 'parse-base','parsefile','parse-names','parts','pass','path','path-sep',
330 'payload','peer-host','peer-port','periods','perl','permutations','phaser',
331 'pick','pickpairs','pid','placeholder','plan','plus','polar','poll',
332 'polymod','pop','pos','positional','posix','postfix','postmatch',
333 'precomp-ext','precomp-target','pred','prefix','prematch','prepend',
334 'print','printf','print-nl','print-to','private','private_method_table',
335 'proc','produce','Promise','prompt','protect','pull-one','push',
336 'push-all','push-at-least','push-exactly','push-until-lazy','put',
337 'qualifier-type','quit','r','race','radix','rand','range','Rat','raw',
338 're','read','readchars','readonly','ready','Real','reallocate','reals',
339 'reason','rebless','receive','recv','redispatcher','redo','reduce',
340 'rel2abs','relative','release','rename','repeated','replacement',
341 'report','reserved','resolve','restore','result','resume','rethrow',
342 'reverse','right','rindex','rmdir','role','roles_to_compose','rolish',
343 'roll','rootdir','roots','rotate','rotor','round','roundrobin',
344 'routine-type','run','rwx','s','samecase','samemark','samewith','say',
345 'schedule-on','scheduler','scope','sec','sech','second','seek','self',
346 'send','Set','set_hidden','set_name','set_package','set_rw','set_value',
347 'SetHash','set-instruments','setup_finalization','shape','share','shell',
348 'shift','sibling','sigil','sign','signal','signals','signature','sin',
349 'sinh','sink','sink-all','skip','skip-at-least','skip-at-least-pull-one',
350 'skip-one','skip-rest','sleep','sleep-timer','sleep-until','Slip','slurp',
351 'slurp-rest','slurpy','snap','snapper','so','socket-host','socket-port',
352 'sort','source','source-package','spawn','SPEC','splice','split',
353 'splitdir','splitpath','sprintf','spurt','sqrt','squish','srand','stable',
354 'start','started','starts-with','status','stderr','stdout','Str',
355 'sub_signature','subbuf','subbuf-rw','subname','subparse','subst',
356 'subst-mutate','substr','substr-eq','substr-rw','subtest','succ','sum',
357 'Supply','symlink','t','tail','take','take-rw','tan','tanh','tap',
358 'target','target-name','tc','tclc','tell','then','throttle','throw',
359 'throws-like','timezone','tmpdir','to','today','todo','toggle','to-posix',
360 'total','trailing','trans','tree','trim','trim-leading','trim-trailing',
361 'truncate','truncated-to','trusts','try_acquire','trying','twigil','type',
362 'type_captures','typename','uc','udp','uncaught_handler','unimatch',
363 'uniname','uninames','uniparse','uniprop','uniprops','unique','unival',
364 'univals','unlike','unlink','unlock','unpack','unpolar','unshift',
365 'unwrap','updir','USAGE','use-ok','utc','val','value','values','VAR',
366 'variable','verbose-config','version','VMnames','volume','vow','w','wait',
367 'warn','watch','watch-path','week','weekday-of-month','week-number',
368 'week-year','WHAT','when','WHERE','WHEREFORE','WHICH','WHO',
369 'whole-second','WHY','wordcase','words','workaround','wrap','write',
370 'write-to','x','yada','year','yield','yyyy-mm-dd','z','zip','zip-latest',
372 )
374 PERL6_BUILTIN_CLASSES = (
375 #Booleans
376 'False','True',
377 #Classes
378 'Any','Array','Associative','AST','atomicint','Attribute','Backtrace',
379 'Backtrace::Frame','Bag','Baggy','BagHash','Blob','Block','Bool','Buf',
380 'Callable','CallFrame','Cancellation','Capture','CArray','Channel','Code',
381 'compiler','Complex','ComplexStr','Cool','CurrentThreadScheduler',
382 'Cursor','Date','Dateish','DateTime','Distro','Duration','Encoding',
383 'Exception','Failure','FatRat','Grammar','Hash','HyperWhatever','Instant',
384 'Int','int16','int32','int64','int8','IntStr','IO','IO::ArgFiles',
385 'IO::CatHandle','IO::Handle','IO::Notification','IO::Path',
386 'IO::Path::Cygwin','IO::Path::QNX','IO::Path::Unix','IO::Path::Win32',
387 'IO::Pipe','IO::Socket','IO::Socket::Async','IO::Socket::INET','IO::Spec',
388 'IO::Spec::Cygwin','IO::Spec::QNX','IO::Spec::Unix','IO::Spec::Win32',
389 'IO::Special','Iterable','Iterator','Junction','Kernel','Label','List',
390 'Lock','Lock::Async','long','longlong','Macro','Map','Match',
391 'Metamodel::AttributeContainer','Metamodel::C3MRO','Metamodel::ClassHOW',
392 'Metamodel::EnumHOW','Metamodel::Finalization','Metamodel::MethodContainer',
393 'Metamodel::MROBasedMethodDispatch','Metamodel::MultipleInheritance',
394 'Metamodel::Naming','Metamodel::Primitives','Metamodel::PrivateMethodContainer',
395 'Metamodel::RoleContainer','Metamodel::Trusting','Method','Mix','MixHash',
396 'Mixy','Mu','NFC','NFD','NFKC','NFKD','Nil','Num','num32','num64',
397 'Numeric','NumStr','ObjAt','Order','Pair','Parameter','Perl','Pod::Block',
398 'Pod::Block::Code','Pod::Block::Comment','Pod::Block::Declarator',
399 'Pod::Block::Named','Pod::Block::Para','Pod::Block::Table','Pod::Heading',
400 'Pod::Item','Pointer','Positional','PositionalBindFailover','Proc',
401 'Proc::Async','Promise','Proxy','PseudoStash','QuantHash','Range','Rat',
402 'Rational','RatStr','Real','Regex','Routine','Scalar','Scheduler',
403 'Semaphore','Seq','Set','SetHash','Setty','Signature','size_t','Slip',
404 'Stash','Str','StrDistance','Stringy','Sub','Submethod','Supplier',
405 'Supplier::Preserving','Supply','Systemic','Tap','Telemetry',
406 'Telemetry::Instrument::Thread','Telemetry::Instrument::Usage',
407 'Telemetry::Period','Telemetry::Sampler','Thread','ThreadPoolScheduler',
408 'UInt','uint16','uint32','uint64','uint8','Uni','utf8','Variable',
409 'Version','VM','Whatever','WhateverCode','WrapHandle'
410 )
412 PERL6_OPERATORS = (
413 'X', 'Z', 'after', 'also', 'and', 'andthen', 'before', 'cmp', 'div',
414 'eq', 'eqv', 'extra', 'ff', 'fff', 'ge', 'gt', 'le', 'leg', 'lt', 'm',
415 'mm', 'mod', 'ne', 'or', 'orelse', 'rx', 's', 'tr', 'x', 'xor', 'xx',
416 '++', '--', '**', '!', '+', '-', '~', '?', '|', '||', '+^', '~^', '?^',
417 '^', '*', '/', '%', '%%', '+&', '+<', '+>', '~&', '~<', '~>', '?&',
418 'gcd', 'lcm', '+', '-', '+|', '+^', '~|', '~^', '?|', '?^',
419 '~', '&', '^', 'but', 'does', '<=>', '..', '..^', '^..', '^..^',
420 '!=', '==', '<', '<=', '>', '>=', '~~', '===', '!eqv',
421 '&&', '||', '^^', '//', 'min', 'max', '??', '!!', 'ff', 'fff', 'so',
422 'not', '<==', '==>', '<<==', '==>>','unicmp',
423 )
425 # Perl 6 has a *lot* of possible bracketing characters
426 # this list was lifted from STD.pm6 (https://github.com/perl6/std)
427 PERL6_BRACKETS = {
428 '\u0028': '\u0029', '\u003c': '\u003e', '\u005b': '\u005d',
429 '\u007b': '\u007d', '\u00ab': '\u00bb', '\u0f3a': '\u0f3b',
430 '\u0f3c': '\u0f3d', '\u169b': '\u169c', '\u2018': '\u2019',
431 '\u201a': '\u2019', '\u201b': '\u2019', '\u201c': '\u201d',
432 '\u201e': '\u201d', '\u201f': '\u201d', '\u2039': '\u203a',
433 '\u2045': '\u2046', '\u207d': '\u207e', '\u208d': '\u208e',
434 '\u2208': '\u220b', '\u2209': '\u220c', '\u220a': '\u220d',
435 '\u2215': '\u29f5', '\u223c': '\u223d', '\u2243': '\u22cd',
436 '\u2252': '\u2253', '\u2254': '\u2255', '\u2264': '\u2265',
437 '\u2266': '\u2267', '\u2268': '\u2269', '\u226a': '\u226b',
438 '\u226e': '\u226f', '\u2270': '\u2271', '\u2272': '\u2273',
439 '\u2274': '\u2275', '\u2276': '\u2277', '\u2278': '\u2279',
440 '\u227a': '\u227b', '\u227c': '\u227d', '\u227e': '\u227f',
441 '\u2280': '\u2281', '\u2282': '\u2283', '\u2284': '\u2285',
442 '\u2286': '\u2287', '\u2288': '\u2289', '\u228a': '\u228b',
443 '\u228f': '\u2290', '\u2291': '\u2292', '\u2298': '\u29b8',
444 '\u22a2': '\u22a3', '\u22a6': '\u2ade', '\u22a8': '\u2ae4',
445 '\u22a9': '\u2ae3', '\u22ab': '\u2ae5', '\u22b0': '\u22b1',
446 '\u22b2': '\u22b3', '\u22b4': '\u22b5', '\u22b6': '\u22b7',
447 '\u22c9': '\u22ca', '\u22cb': '\u22cc', '\u22d0': '\u22d1',
448 '\u22d6': '\u22d7', '\u22d8': '\u22d9', '\u22da': '\u22db',
449 '\u22dc': '\u22dd', '\u22de': '\u22df', '\u22e0': '\u22e1',
450 '\u22e2': '\u22e3', '\u22e4': '\u22e5', '\u22e6': '\u22e7',
451 '\u22e8': '\u22e9', '\u22ea': '\u22eb', '\u22ec': '\u22ed',
452 '\u22f0': '\u22f1', '\u22f2': '\u22fa', '\u22f3': '\u22fb',
453 '\u22f4': '\u22fc', '\u22f6': '\u22fd', '\u22f7': '\u22fe',
454 '\u2308': '\u2309', '\u230a': '\u230b', '\u2329': '\u232a',
455 '\u23b4': '\u23b5', '\u2768': '\u2769', '\u276a': '\u276b',
456 '\u276c': '\u276d', '\u276e': '\u276f', '\u2770': '\u2771',
457 '\u2772': '\u2773', '\u2774': '\u2775', '\u27c3': '\u27c4',
458 '\u27c5': '\u27c6', '\u27d5': '\u27d6', '\u27dd': '\u27de',
459 '\u27e2': '\u27e3', '\u27e4': '\u27e5', '\u27e6': '\u27e7',
460 '\u27e8': '\u27e9', '\u27ea': '\u27eb', '\u2983': '\u2984',
461 '\u2985': '\u2986', '\u2987': '\u2988', '\u2989': '\u298a',
462 '\u298b': '\u298c', '\u298d': '\u298e', '\u298f': '\u2990',
463 '\u2991': '\u2992', '\u2993': '\u2994', '\u2995': '\u2996',
464 '\u2997': '\u2998', '\u29c0': '\u29c1', '\u29c4': '\u29c5',
465 '\u29cf': '\u29d0', '\u29d1': '\u29d2', '\u29d4': '\u29d5',
466 '\u29d8': '\u29d9', '\u29da': '\u29db', '\u29f8': '\u29f9',
467 '\u29fc': '\u29fd', '\u2a2b': '\u2a2c', '\u2a2d': '\u2a2e',
468 '\u2a34': '\u2a35', '\u2a3c': '\u2a3d', '\u2a64': '\u2a65',
469 '\u2a79': '\u2a7a', '\u2a7d': '\u2a7e', '\u2a7f': '\u2a80',
470 '\u2a81': '\u2a82', '\u2a83': '\u2a84', '\u2a8b': '\u2a8c',
471 '\u2a91': '\u2a92', '\u2a93': '\u2a94', '\u2a95': '\u2a96',
472 '\u2a97': '\u2a98', '\u2a99': '\u2a9a', '\u2a9b': '\u2a9c',
473 '\u2aa1': '\u2aa2', '\u2aa6': '\u2aa7', '\u2aa8': '\u2aa9',
474 '\u2aaa': '\u2aab', '\u2aac': '\u2aad', '\u2aaf': '\u2ab0',
475 '\u2ab3': '\u2ab4', '\u2abb': '\u2abc', '\u2abd': '\u2abe',
476 '\u2abf': '\u2ac0', '\u2ac1': '\u2ac2', '\u2ac3': '\u2ac4',
477 '\u2ac5': '\u2ac6', '\u2acd': '\u2ace', '\u2acf': '\u2ad0',
478 '\u2ad1': '\u2ad2', '\u2ad3': '\u2ad4', '\u2ad5': '\u2ad6',
479 '\u2aec': '\u2aed', '\u2af7': '\u2af8', '\u2af9': '\u2afa',
480 '\u2e02': '\u2e03', '\u2e04': '\u2e05', '\u2e09': '\u2e0a',
481 '\u2e0c': '\u2e0d', '\u2e1c': '\u2e1d', '\u2e20': '\u2e21',
482 '\u3008': '\u3009', '\u300a': '\u300b', '\u300c': '\u300d',
483 '\u300e': '\u300f', '\u3010': '\u3011', '\u3014': '\u3015',
484 '\u3016': '\u3017', '\u3018': '\u3019', '\u301a': '\u301b',
485 '\u301d': '\u301e', '\ufd3e': '\ufd3f', '\ufe17': '\ufe18',
486 '\ufe35': '\ufe36', '\ufe37': '\ufe38', '\ufe39': '\ufe3a',
487 '\ufe3b': '\ufe3c', '\ufe3d': '\ufe3e', '\ufe3f': '\ufe40',
488 '\ufe41': '\ufe42', '\ufe43': '\ufe44', '\ufe47': '\ufe48',
489 '\ufe59': '\ufe5a', '\ufe5b': '\ufe5c', '\ufe5d': '\ufe5e',
490 '\uff08': '\uff09', '\uff1c': '\uff1e', '\uff3b': '\uff3d',
491 '\uff5b': '\uff5d', '\uff5f': '\uff60', '\uff62': '\uff63',
492 }
494 def _build_word_match(words, boundary_regex_fragment=None, prefix='', suffix=''):
495 if boundary_regex_fragment is None:
496 return r'\b(' + prefix + r'|'.join(re.escape(x) for x in words) + \
497 suffix + r')\b'
498 else:
499 return r'(?<!' + boundary_regex_fragment + r')' + prefix + r'(' + \
500 r'|'.join(re.escape(x) for x in words) + r')' + suffix + r'(?!' + \
501 boundary_regex_fragment + r')'
503 def brackets_callback(token_class):
504 def callback(lexer, match, context):
505 groups = match.groupdict()
506 opening_chars = groups['delimiter']
507 n_chars = len(opening_chars)
508 adverbs = groups.get('adverbs')
510 closer = Perl6Lexer.PERL6_BRACKETS.get(opening_chars[0])
511 text = context.text
513 if closer is None: # it's not a mirrored character, which means we
514 # just need to look for the next occurrence
516 end_pos = text.find(opening_chars, match.start('delimiter') + n_chars)
517 else: # we need to look for the corresponding closing character,
518 # keep nesting in mind
519 closing_chars = closer * n_chars
520 nesting_level = 1
522 search_pos = match.start('delimiter')
524 while nesting_level > 0:
525 next_open_pos = text.find(opening_chars, search_pos + n_chars)
526 next_close_pos = text.find(closing_chars, search_pos + n_chars)
528 if next_close_pos == -1:
529 next_close_pos = len(text)
530 nesting_level = 0
531 elif next_open_pos != -1 and next_open_pos < next_close_pos:
532 nesting_level += 1
533 search_pos = next_open_pos
534 else: # next_close_pos < next_open_pos
535 nesting_level -= 1
536 search_pos = next_close_pos
538 end_pos = next_close_pos
540 if end_pos < 0: # if we didn't find a closer, just highlight the
541 # rest of the text in this class
542 end_pos = len(text)
544 if adverbs is not None and re.search(r':to\b', adverbs):
545 heredoc_terminator = text[match.start('delimiter') + n_chars:end_pos]
546 end_heredoc = re.search(r'^\s*' + re.escape(heredoc_terminator) +
547 r'\s*$', text[end_pos:], re.MULTILINE)
549 if end_heredoc:
550 end_pos += end_heredoc.end()
551 else:
552 end_pos = len(text)
554 yield match.start(), token_class, text[match.start():end_pos + n_chars]
555 context.pos = end_pos + n_chars
557 return callback
559 def opening_brace_callback(lexer, match, context):
560 stack = context.stack
562 yield match.start(), Text, context.text[match.start():match.end()]
563 context.pos = match.end()
565 # if we encounter an opening brace and we're one level
566 # below a token state, it means we need to increment
567 # the nesting level for braces so we know later when
568 # we should return to the token rules.
569 if len(stack) > 2 and stack[-2] == 'token':
570 context.perl6_token_nesting_level += 1
572 def closing_brace_callback(lexer, match, context):
573 stack = context.stack
575 yield match.start(), Text, context.text[match.start():match.end()]
576 context.pos = match.end()
578 # if we encounter a free closing brace and we're one level
579 # below a token state, it means we need to check the nesting
580 # level to see if we need to return to the token state.
581 if len(stack) > 2 and stack[-2] == 'token':
582 context.perl6_token_nesting_level -= 1
583 if context.perl6_token_nesting_level == 0:
584 stack.pop()
586 def embedded_perl6_callback(lexer, match, context):
587 context.perl6_token_nesting_level = 1
588 yield match.start(), Text, context.text[match.start():match.end()]
589 context.pos = match.end()
590 context.stack.append('root')
592 # If you're modifying these rules, be careful if you need to process '{' or '}'
593 # characters. We have special logic for processing these characters (due to the fact
594 # that you can nest Perl 6 code in regex blocks), so if you need to process one of
595 # them, make sure you also process the corresponding one!
596 tokens = {
597 'common': [
598 (r'#[`|=](?P<delimiter>(?P<first_char>[' + ''.join(PERL6_BRACKETS) + r'])(?P=first_char)*)',
599 brackets_callback(Comment.Multiline)),
600 (r'#[^\n]*$', Comment.Single),
601 (r'^(\s*)=begin\s+(\w+)\b.*?^\1=end\s+\2', Comment.Multiline),
602 (r'^(\s*)=for.*?\n\s*?\n', Comment.Multiline),
603 (r'^=.*?\n\s*?\n', Comment.Multiline),
604 (r'(regex|token|rule)(\s*' + PERL6_IDENTIFIER_RANGE + '+:sym)',
605 bygroups(Keyword, Name), 'token-sym-brackets'),
606 (r'(regex|token|rule)(?!' + PERL6_IDENTIFIER_RANGE + r')(\s*' + PERL6_IDENTIFIER_RANGE + '+)?',
607 bygroups(Keyword, Name), 'pre-token'),
608 # deal with a special case in the Perl 6 grammar (role q { ... })
609 (r'(role)(\s+)(q)(\s*)', bygroups(Keyword, Text, Name, Text)),
610 (_build_word_match(PERL6_KEYWORDS, PERL6_IDENTIFIER_RANGE), Keyword),
611 (_build_word_match(PERL6_BUILTIN_CLASSES, PERL6_IDENTIFIER_RANGE, suffix='(?::[UD])?'),
612 Name.Builtin),
613 (_build_word_match(PERL6_BUILTINS, PERL6_IDENTIFIER_RANGE), Name.Builtin),
614 # copied from PerlLexer
615 (r'[$@%&][.^:?=!~]?' + PERL6_IDENTIFIER_RANGE + '+(?:<<.*?>>|<.*?>|«.*?»)*',
616 Name.Variable),
617 (r'\$[!/](?:<<.*?>>|<.*?>|«.*?»)*', Name.Variable.Global),
618 (r'::\?\w+', Name.Variable.Global),
619 (r'[$@%&]\*' + PERL6_IDENTIFIER_RANGE + '+(?:<<.*?>>|<.*?>|«.*?»)*',
620 Name.Variable.Global),
621 (r'\$(?:<.*?>)+', Name.Variable),
622 (r'(?:q|qq|Q)[a-zA-Z]?\s*(?P<adverbs>:[\w\s:]+)?\s*(?P<delimiter>(?P<first_char>[^0-9a-zA-Z:\s])'
623 r'(?P=first_char)*)', brackets_callback(String)),
624 # copied from PerlLexer
625 (r'0_?[0-7]+(_[0-7]+)*', Number.Oct),
626 (r'0x[0-9A-Fa-f]+(_[0-9A-Fa-f]+)*', Number.Hex),
627 (r'0b[01]+(_[01]+)*', Number.Bin),
628 (r'(?i)(\d*(_\d*)*\.\d+(_\d*)*|\d+(_\d*)*\.\d+(_\d*)*)(e[+-]?\d+)?',
629 Number.Float),
630 (r'(?i)\d+(_\d*)*e[+-]?\d+(_\d*)*', Number.Float),
631 (r'\d+(_\d+)*', Number.Integer),
632 (r'(?<=~~)\s*/(?:\\\\|\\/|.)*?/', String.Regex),
633 (r'(?<=[=(,])\s*/(?:\\\\|\\/|.)*?/', String.Regex),
634 (r'm\w+(?=\()', Name),
635 (r'(?:m|ms|rx)\s*(?P<adverbs>:[\w\s:]+)?\s*(?P<delimiter>(?P<first_char>[^\w:\s])'
636 r'(?P=first_char)*)', brackets_callback(String.Regex)),
637 (r'(?:s|ss|tr)\s*(?::[\w\s:]+)?\s*/(?:\\\\|\\/|.)*?/(?:\\\\|\\/|.)*?/',
638 String.Regex),
639 (r'<[^\s=].*?\S>', String),
640 (_build_word_match(PERL6_OPERATORS), Operator),
641 (r'\w' + PERL6_IDENTIFIER_RANGE + '*', Name),
642 (r"'(\\\\|\\[^\\]|[^'\\])*'", String),
643 (r'"(\\\\|\\[^\\]|[^"\\])*"', String),
644 ],
645 'root': [
646 include('common'),
647 (r'\{', opening_brace_callback),
648 (r'\}', closing_brace_callback),
649 (r'.+?', Text),
650 ],
651 'pre-token': [
652 include('common'),
653 (r'\{', Text, ('#pop', 'token')),
654 (r'.+?', Text),
655 ],
656 'token-sym-brackets': [
657 (r'(?P<delimiter>(?P<first_char>[' + ''.join(PERL6_BRACKETS) + '])(?P=first_char)*)',
658 brackets_callback(Name), ('#pop', 'pre-token')),
659 default(('#pop', 'pre-token')),
660 ],
661 'token': [
662 (r'\}', Text, '#pop'),
663 (r'(?<=:)(?:my|our|state|constant|temp|let).*?;', using(this)),
664 # make sure that quotes in character classes aren't treated as strings
665 (r'<(?:[-!?+.]\s*)?\[.*?\]>', String.Regex),
666 # make sure that '#' characters in quotes aren't treated as comments
667 (r"(?<!\\)'(\\\\|\\[^\\]|[^'\\])*'", String.Regex),
668 (r'(?<!\\)"(\\\\|\\[^\\]|[^"\\])*"', String.Regex),
669 (r'#.*?$', Comment.Single),
670 (r'\{', embedded_perl6_callback),
671 ('.+?', String.Regex),
672 ],
673 }
675 def analyse_text(text):
676 def strip_pod(lines):
677 in_pod = False
678 stripped_lines = []
680 for line in lines:
681 if re.match(r'^=(?:end|cut)', line):
682 in_pod = False
683 elif re.match(r'^=\w+', line):
684 in_pod = True
685 elif not in_pod:
686 stripped_lines.append(line)
688 return stripped_lines
690 # XXX handle block comments
691 lines = text.splitlines()
692 lines = strip_pod(lines)
693 text = '\n'.join(lines)
695 if shebang_matches(text, r'perl6|rakudo|niecza|pugs'):
696 return True
698 saw_perl_decl = False
699 rating = False
701 # check for my/our/has declarations
702 if re.search(r"(?:my|our|has)\s+(?:" + Perl6Lexer.PERL6_IDENTIFIER_RANGE +
703 r"+\s+)?[$@%&(]", text):
704 rating = 0.8
705 saw_perl_decl = True
707 for line in lines:
708 line = re.sub('#.*', '', line)
709 if re.match(r'^\s*$', line):
710 continue
712 # match v6; use v6; use v6.0; use v6.0.0;
713 if re.match(r'^\s*(?:use\s+)?v6(?:\.\d(?:\.\d)?)?;', line):
714 return True
715 # match class, module, role, enum, grammar declarations
716 class_decl = re.match(r'^\s*(?:(?P<scope>my|our)\s+)?(?:module|class|role|enum|grammar)', line)
717 if class_decl:
718 if saw_perl_decl or class_decl.group('scope') is not None:
719 return True
720 rating = 0.05
721 continue
722 break
724 if ':=' in text:
725 # Same logic as above for PerlLexer
726 rating /= 2
728 return rating
730 def __init__(self, **options):
731 super().__init__(**options)
732 self.encoding = options.get('encoding', 'utf-8')