Skip to content

Commit 934fc06

Browse files
committed
fix(mediawiki): accommodate reviewers requests
* fix `tag` rule to allow using attributes properly * replace homegrown definitions with the ones from lexer.* * separate keyword lists to separate section
1 parent 3e91db5 commit 934fc06

File tree

1 file changed

+43
-50
lines changed

1 file changed

+43
-50
lines changed

lexers/mediawiki.lua

Lines changed: 43 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1,73 +1,66 @@
11
-- Copyright 2006-2025 Mitchell. See LICENSE.
22
-- MediaWiki LPeg lexer.
33
-- Contributed by Alexander Misel.
4+
45
local lexer = lexer
5-
local P, S, B = lpeg.P, lpeg.S, lpeg.B
6+
local P, S = lpeg.P, lpeg.S
7+
68
local lex = lexer.new(...)
79

10+
-- Comments (high priority to avoid conflicts)
11+
lex:add_rule('comment', lex:tag(lexer.COMMENT, lexer.range('<!--', '-->')))
12+
813
-- HTML-like tags
9-
local dq_str = P('"') * ((lexer.any - S('>"\\')) + ('\\' * lexer.any))^0 * P('"')
1014
local unquoted_attr = (lexer.any - (S('"' .. "'" .. '<>=') + lexer.space))^1
15+
local tag_name = lexer.alpha^1
1116
local tag_attr = lex:tag(lexer.ATTRIBUTE, lexer.alpha^1 * lexer.space^0 *
12-
('=' * lexer.space^0 * (dq_str + unquoted_attr))^-1 * lexer.space^0)
13-
local tag_name = lexer.alnum^1
14-
local tag_inner_content = lexer.space^0 * tag_attr^0 * lexer.space^0 * P('/')^-1
17+
('=' * lexer.space^0 * (lexer.range('"') + unquoted_attr))^-1 * lexer.space^0)
18+
lex:add_rule('tag', lex:tag(lexer.TAG, '<' * P('/')^-1 * tag_name *
19+
(lexer.space^1 * tag_attr)^0 * lexer.space^0 * P('/')^-1 * '>'))
1520

16-
-- The tag rule should consume from '<' to the final '>'
17-
lex:add_rule('tag', lex:tag(lexer.TAG, '<' * P('/')^-1 * tag_name * tag_inner_content * P('>')))
21+
-- Internal Links
22+
lex:add_rule('internal_link', lex:tag(lexer.LINK, lexer.range('[[', ']]')))
1823

19-
-- Internal Link: [[Target]] or [[Target|Display Text]]
20-
-- The content can contain almost anything except unbalanced square brackets.
21-
-- We'll highlight the whole thing as LINK.
22-
local internal_link_content = (lexer.any - P(']]'))^1 -- Matches everything until ']]'
23-
lex:add_rule('internal_link', lex:tag(lexer.LINK, P('[[') * internal_link_content * P(']]')))
24+
-- External Links
25+
lex:add_rule('external_link', lex:tag(lexer.LINK,
26+
P('[') * lex:word_match(lexer.TYPE) * P('://') *
27+
(lexer.any - P(']'))^0 * P(']')))
2428

25-
-- External Link: [http://example.com Link text] or [http://example.com]
26-
-- Content should start with a protocol (http/s, ftp, mailto etc.)
27-
local protocol = lexer.alpha^2 * P('://')
28-
local external_link_content = (protocol * (lexer.any - P(']'))^1) + (lexer.any - P(']'))^1
29-
lex:add_rule('external_link', lex:tag(lexer.LINK, P('[') * external_link_content * P(']')))
29+
-- Parser Functions
30+
lex:add_rule('parser_func', lex:tag(lexer.FUNCTION,
31+
P('{{') * P('#')^-1 * lexer.alpha^1 * P(':') *
32+
(lexer.any - S('{}'))^0 * P('}}')))
3033

31-
-- Parser Functions: {{#function:args}} or {{function:args}}
32-
-- This is a very complex area. This lexer assumes a simple "name:" pattern.
33-
-- Tag the function name and its arguments.
34-
local parser_function_name = P('#')^-1 * (lexer.alpha + S('_'))^1 * P(':')
35-
local parser_function_content = (lexer.any - S('{}'))^1
36-
lex:add_rule('parser_func',
37-
lex:tag(lexer.FUNCTION, P('{{') * parser_function_name * parser_function_content * P('}}')))
34+
-- Templates and Variables
35+
lex:add_rule('template', lex:tag(lexer.VARIABLE,
36+
P('{{') * lexer.alpha^1 * (P('|') * (lexer.any - S('{}'))^0)^0 * P('}}')))
3837

39-
-- Templates and Variables: {{TemplateName|args}} or {{VARIABLENAME}}
40-
-- Tag the template/variable name.
41-
-- This rule needs to be placed *after* parser_func if there's any ambiguity in parsing.
42-
local template_or_variable_name = (lexer.alnum + S('_'))^1
43-
local template_content = (P('|') * (lexer.any - S('{}'))^1)^-1 -- Optional content after |
44-
lex:add_rule('template',
45-
lex:tag(lexer.VARIABLE, P('{{') * template_or_variable_name * template_content * P('}}')))
38+
-- Headings
39+
lex:add_rule('heading', lex:tag(lexer.HEADING,
40+
lexer.starts_line(S('=')^2 * lexer.space^0 *
41+
(lexer.any - S('=\r\n'))^1 * lexer.space^0 * S('=')^2)))
4642

47-
-- Headings (e.g., == My Heading ==)
48-
-- Capture the heading text as lexer.HEADING
49-
local heading_level = S('=')^1
50-
lex:add_rule('heading',
51-
lex:tag(lexer.HEADING, lexer.starts_line(heading_level * lexer.space^0 *
52-
(lexer.any - S('=') - lexer.newline)^1 * lexer.space^0 * heading_level)))
43+
-- Bold and Italic formatting
44+
lex:add_rule('bold', lex:tag(lexer.BOLD, lexer.range("'''", "'''")))
45+
lex:add_rule('italic', lex:tag(lexer.ITALIC, lexer.range("''", "''")))
5346

54-
-- Operators.
55-
-- Consider adding more specific rules for bold/italic instead of general operators.
56-
-- For now, keep existing general operators.
57-
lex:add_rule('operator', lex:tag(lexer.OPERATOR, S('-=|#~!')))
58-
59-
-- Behavior switches (e.g., __TOC__)
47+
-- Behavior switches
6048
lex:add_rule('behavior_switch',
61-
lex:tag(lexer.KEYWORD, lex:word_match('behavior_switch')))
62-
63-
-- Comments.
64-
lex:add_rule('comment', lex:tag(lexer.COMMENT, lexer.range('<!--', '-->')))
49+
lex:tag(lexer.PREPROCESSOR, lex:word_match(lexer.PREPROCESSOR)))
6550

6651
-- Word lists
67-
lex:set_word_list('behavior_switch',
68-
{'__TOC__', '__FORCETOC__', '__NOTOC__', '__NOEDITSECTION__', '__NOCC__',
69-
'__NOINDEX__', '__NOKEYWORDLINK__', '__NOCONTENTCONVERT__', '__NOEDITSECTION__'})
52+
lex:set_word_list(lexer.TYPE, {
53+
'http', 'https', 'ftp', 'ftps', 'mailto', 'news', 'irc', 'gopher'
54+
})
55+
56+
lex:set_word_list(lexer.PREPROCESSOR, {
57+
'__NOTOC__', '__FORCETOC__', '__TOC__', '__NOEDITSECTION__', '__NEWSECTIONLINK__',
58+
'__NONEWSECTIONLINK__', '__NOGALLERY__', '__HIDDENCAT__', '__NOCONTENTCONVERT__',
59+
'__NOCC__', '__NOTITLECONVERT__', '__NOTC__', '__START__', '__END__', '__INDEX__',
60+
'__NOINDEX__', '__STATICREDIRECT__', '__DISAMBIG__'
61+
})
7062

63+
--- Properties
7164
lexer.property['scintillua.comment'] = '<!--|-->'
7265
lexer.property['scintillua.angle.braces'] = '1'
7366

0 commit comments

Comments
 (0)