|
1 | 1 | -- Copyright 2006-2025 Mitchell. See LICENSE. |
2 | 2 | -- MediaWiki LPeg lexer. |
3 | 3 | -- Contributed by Alexander Misel. |
| 4 | + |
4 | 5 | local lexer = lexer |
5 | | -local P, S, B = lpeg.P, lpeg.S, lpeg.B |
| 6 | +local P, S = lpeg.P, lpeg.S |
| 7 | + |
6 | 8 | local lex = lexer.new(...) |
7 | 9 |
|
| 10 | +-- Comments (high priority to avoid conflicts) |
| 11 | +lex:add_rule('comment', lex:tag(lexer.COMMENT, lexer.range('<!--', '-->'))) |
| 12 | + |
8 | 13 | -- HTML-like tags |
9 | | -local dq_str = P('"') * ((lexer.any - S('>"\\')) + ('\\' * lexer.any))^0 * P('"') |
10 | 14 | local unquoted_attr = (lexer.any - (S('"' .. "'" .. '<>=') + lexer.space))^1 |
| 15 | +local tag_name = lexer.alpha^1 |
11 | 16 | local tag_attr = lex:tag(lexer.ATTRIBUTE, lexer.alpha^1 * lexer.space^0 * |
12 | | - ('=' * lexer.space^0 * (dq_str + unquoted_attr))^-1 * lexer.space^0) |
13 | | -local tag_name = lexer.alnum^1 |
14 | | -local tag_inner_content = lexer.space^0 * tag_attr^0 * lexer.space^0 * P('/')^-1 |
| 17 | + ('=' * lexer.space^0 * (lexer.range('"') + unquoted_attr))^-1 * lexer.space^0) |
| 18 | +lex:add_rule('tag', lex:tag(lexer.TAG, '<' * P('/')^-1 * tag_name * |
| 19 | + (lexer.space^1 * tag_attr)^0 * lexer.space^0 * P('/')^-1 * '>')) |
15 | 20 |
|
16 | | --- The tag rule should consume from '<' to the final '>' |
17 | | -lex:add_rule('tag', lex:tag(lexer.TAG, '<' * P('/')^-1 * tag_name * tag_inner_content * P('>'))) |
| 21 | +-- Internal Links |
| 22 | +lex:add_rule('internal_link', lex:tag(lexer.LINK, lexer.range('[[', ']]'))) |
18 | 23 |
|
19 | | --- Internal Link: [[Target]] or [[Target|Display Text]] |
20 | | --- The content can contain almost anything except unbalanced square brackets. |
21 | | --- We'll highlight the whole thing as LINK. |
22 | | -local internal_link_content = (lexer.any - P(']]'))^1 -- Matches everything until ']]' |
23 | | -lex:add_rule('internal_link', lex:tag(lexer.LINK, P('[[') * internal_link_content * P(']]'))) |
| 24 | +-- External Links |
| 25 | +lex:add_rule('external_link', lex:tag(lexer.LINK, |
| 26 | + P('[') * lex:word_match(lexer.TYPE) * P('://') * |
| 27 | + (lexer.any - P(']'))^0 * P(']'))) |
24 | 28 |
|
25 | | --- External Link: [http://example.com Link text] or [http://example.com] |
26 | | --- Content should start with a protocol (http/s, ftp, mailto etc.) |
27 | | -local protocol = lexer.alpha^2 * P('://') |
28 | | -local external_link_content = (protocol * (lexer.any - P(']'))^1) + (lexer.any - P(']'))^1 |
29 | | -lex:add_rule('external_link', lex:tag(lexer.LINK, P('[') * external_link_content * P(']'))) |
| 29 | +-- Parser Functions |
| 30 | +lex:add_rule('parser_func', lex:tag(lexer.FUNCTION, |
| 31 | + P('{{') * P('#')^-1 * lexer.alpha^1 * P(':') * |
| 32 | + (lexer.any - S('{}'))^0 * P('}}'))) |
30 | 33 |
|
31 | | --- Parser Functions: {{#function:args}} or {{function:args}} |
32 | | --- This is a very complex area. This lexer assumes a simple "name:" pattern. |
33 | | --- Tag the function name and its arguments. |
34 | | -local parser_function_name = P('#')^-1 * (lexer.alpha + S('_'))^1 * P(':') |
35 | | -local parser_function_content = (lexer.any - S('{}'))^1 |
36 | | -lex:add_rule('parser_func', |
37 | | - lex:tag(lexer.FUNCTION, P('{{') * parser_function_name * parser_function_content * P('}}'))) |
| 34 | +-- Templates and Variables |
| 35 | +lex:add_rule('template', lex:tag(lexer.VARIABLE, |
| 36 | + P('{{') * lexer.alpha^1 * (P('|') * (lexer.any - S('{}'))^0)^0 * P('}}'))) |
38 | 37 |
|
39 | | --- Templates and Variables: {{TemplateName|args}} or {{VARIABLENAME}} |
40 | | --- Tag the template/variable name. |
41 | | --- This rule needs to be placed *after* parser_func if there's any ambiguity in parsing. |
42 | | -local template_or_variable_name = (lexer.alnum + S('_'))^1 |
43 | | -local template_content = (P('|') * (lexer.any - S('{}'))^1)^-1 -- Optional content after | |
44 | | -lex:add_rule('template', |
45 | | - lex:tag(lexer.VARIABLE, P('{{') * template_or_variable_name * template_content * P('}}'))) |
| 38 | +-- Headings |
| 39 | +lex:add_rule('heading', lex:tag(lexer.HEADING, |
| 40 | + lexer.starts_line(S('=')^2 * lexer.space^0 * |
| 41 | + (lexer.any - S('=\r\n'))^1 * lexer.space^0 * S('=')^2))) |
46 | 42 |
|
47 | | --- Headings (e.g., == My Heading ==) |
48 | | --- Capture the heading text as lexer.HEADING |
49 | | -local heading_level = S('=')^1 |
50 | | -lex:add_rule('heading', |
51 | | - lex:tag(lexer.HEADING, lexer.starts_line(heading_level * lexer.space^0 * |
52 | | - (lexer.any - S('=') - lexer.newline)^1 * lexer.space^0 * heading_level))) |
| 43 | +-- Bold and Italic formatting |
| 44 | +lex:add_rule('bold', lex:tag(lexer.BOLD, lexer.range("'''", "'''"))) |
| 45 | +lex:add_rule('italic', lex:tag(lexer.ITALIC, lexer.range("''", "''"))) |
53 | 46 |
|
54 | | --- Operators. |
55 | | --- Consider adding more specific rules for bold/italic instead of general operators. |
56 | | --- For now, keep existing general operators. |
57 | | -lex:add_rule('operator', lex:tag(lexer.OPERATOR, S('-=|#~!'))) |
58 | | - |
59 | | --- Behavior switches (e.g., __TOC__) |
| 47 | +-- Behavior switches |
60 | 48 | lex:add_rule('behavior_switch', |
61 | | - lex:tag(lexer.KEYWORD, lex:word_match('behavior_switch'))) |
62 | | - |
63 | | --- Comments. |
64 | | -lex:add_rule('comment', lex:tag(lexer.COMMENT, lexer.range('<!--', '-->'))) |
| 49 | + lex:tag(lexer.PREPROCESSOR, lex:word_match(lexer.PREPROCESSOR))) |
65 | 50 |
|
66 | 51 | -- Word lists |
67 | | -lex:set_word_list('behavior_switch', |
68 | | - {'__TOC__', '__FORCETOC__', '__NOTOC__', '__NOEDITSECTION__', '__NOCC__', |
69 | | - '__NOINDEX__', '__NOKEYWORDLINK__', '__NOCONTENTCONVERT__', '__NOEDITSECTION__'}) |
| 52 | +lex:set_word_list(lexer.TYPE, { |
| 53 | + 'http', 'https', 'ftp', 'ftps', 'mailto', 'news', 'irc', 'gopher' |
| 54 | +}) |
| 55 | + |
| 56 | +lex:set_word_list(lexer.PREPROCESSOR, { |
| 57 | + '__NOTOC__', '__FORCETOC__', '__TOC__', '__NOEDITSECTION__', '__NEWSECTIONLINK__', |
| 58 | + '__NONEWSECTIONLINK__', '__NOGALLERY__', '__HIDDENCAT__', '__NOCONTENTCONVERT__', |
| 59 | + '__NOCC__', '__NOTITLECONVERT__', '__NOTC__', '__START__', '__END__', '__INDEX__', |
| 60 | + '__NOINDEX__', '__STATICREDIRECT__', '__DISAMBIG__' |
| 61 | +}) |
70 | 62 |
|
| 63 | +--- Properties |
71 | 64 | lexer.property['scintillua.comment'] = '<!--|-->' |
72 | 65 | lexer.property['scintillua.angle.braces'] = '1' |
73 | 66 |
|
|
0 commit comments