Skip to content

Commit 7babbdf

Browse files
committed
fix: Preserve formatting in fenced code blocks / quotes
1 parent e67f267 commit 7babbdf

File tree

8 files changed

+246
-16
lines changed

8 files changed

+246
-16
lines changed

src/parser/blocks/blockquote.rs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ use crate::parser::util::*;
33
use crate::parser::MarkdownParserState;
44
use nom::{
55
character::complete::char,
6+
combinator::opt,
67
multi::{many1, many_m_n},
78
sequence::preceded,
89
IResult, Parser,
@@ -13,13 +14,16 @@ pub(crate) fn blockquote<'a>(
1314
state: Rc<MarkdownParserState>,
1415
) -> impl FnMut(&'a str) -> IResult<&'a str, Vec<Block>> {
1516
move |input: &'a str| {
16-
let prefix = preceded(many_m_n(0, 3, char(' ')), char('>'));
17+
// Block quote marker: 0-3 leading spaces, '>', optional space
18+
// Per CommonMark spec, the space after '>' is part of the marker and should be stripped
19+
let prefix = preceded(many_m_n(0, 3, char(' ')), (char('>'), opt(char(' '))));
1720

1821
let (input, lines) =
1922
many1(preceded(prefix, line_terminated(not_eof_or_eol0))).parse(input)?;
2023
let inner = lines.join("\n");
2124

22-
let (_, inner) = many1(crate::parser::blocks::block(state.clone()))
25+
let nested_state = Rc::new(state.nested());
26+
let (_, inner) = many1(crate::parser::blocks::block(nested_state))
2327
.parse(&inner)
2428
.map_err(|err| err.map_input(|_| input))?;
2529

src/parser/blocks/code_block.rs

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,11 +46,18 @@ pub(crate) fn code_block_indented<'a>(
4646
}
4747

4848
pub(crate) fn code_block_fenced<'a>(
49-
_state: Rc<MarkdownParserState>,
49+
state: Rc<MarkdownParserState>,
5050
) -> impl FnMut(&'a str) -> IResult<&'a str, CodeBlock> {
5151
move |input: &'a str| {
5252
let (input, space_prefix) = many_m_n(0, 3, char(' ')).parse(input)?;
53-
let prefix_length = space_prefix.len();
53+
// Only strip prefix indentation when NOT in nested context.
54+
// In nested context (inside list items, blockquotes, etc.), the container
55+
// parser already stripped the container's indentation from the content.
56+
let prefix_length = if state.is_nested_block_context {
57+
0
58+
} else {
59+
space_prefix.len()
60+
};
5461

5562
let (input, (fence, info)) = line_terminated((
5663
recognize(alt((

src/parser/blocks/footnote_definition.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,8 @@ pub(crate) fn footnote_definition<'a>(
3737
footnote_content.push_str(line)
3838
}
3939

40-
let (_, blocks) = many0(crate::parser::blocks::block(state.clone()))
40+
let nested_state = Rc::new(state.nested());
41+
let (_, blocks) = many0(crate::parser::blocks::block(nested_state))
4142
.parse(&footnote_content)
4243
.map_err(|err| err.map_input(|_| input))?;
4344

src/parser/blocks/github_alert.rs

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -67,17 +67,24 @@ pub(crate) fn github_alert<'a>(
6767
};
6868

6969
// Now parse the rest of the blockquote lines
70-
let prefix = preceded(many_m_n(0, 3, char(' ')), char('>'));
71-
let (input, mut lines) =
70+
// Block quote marker: 0-3 leading spaces, '>', optional space
71+
// Per CommonMark spec, the space after '>' is part of the marker and should be stripped
72+
let prefix = preceded(many_m_n(0, 3, char(' ')), (char('>'), opt(char(' '))));
73+
let (input, lines) =
7274
many1(preceded(prefix, line_terminated(not_eof_or_eol0))).parse(input)?;
7375

7476
// Remove the first line (alert marker) and join the rest
75-
lines.remove(0); // Remove the alert marker line completely
76-
let inner = lines.join("\n");
77+
// Use slice instead of remove(0) to avoid panic on empty vec (although many1 guarantees at least one)
78+
let inner = if lines.len() > 1 {
79+
lines[1..].join("\n")
80+
} else {
81+
String::new()
82+
};
7783

7884
// Parse the inner content as blocks
85+
let nested_state = Rc::new(state.nested());
7986
let (_, blocks) = if !inner.is_empty() {
80-
many1(crate::parser::blocks::block(state.clone()))
87+
many1(crate::parser::blocks::block(nested_state))
8188
.parse(&inner)
8289
.map_err(|err| err.map_input(|_| input))?
8390
} else {

src/parser/blocks/list.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,8 @@ pub(crate) fn list_item(
215215
}
216216
}
217217

218-
let (_, blocks) = many0(crate::parser::blocks::block(state.clone()))
218+
let nested_state = Rc::new(state.nested());
219+
let (_, blocks) = many0(crate::parser::blocks::block(nested_state))
219220
.parse(&item_content)
220221
.map_err(|err| err.map_input(|_| input))?;
221222

src/parser/mod.rs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,9 +82,16 @@ use std::rc::Rc;
8282
/// let config = MarkdownParserConfig::default();
8383
/// let state = MarkdownParserState::with_config(config);
8484
/// ```
85+
/// Note: This struct is marked `#[non_exhaustive]` to allow adding new fields
86+
/// in future versions without breaking existing code.
87+
#[non_exhaustive]
8588
pub struct MarkdownParserState {
8689
/// The parser configuration (reference-counted for efficient cloning)
8790
pub config: Rc<MarkdownParserConfig>,
91+
/// Whether we are parsing content extracted from a container block (list item, blockquote, etc.)
92+
/// When true, fenced code blocks should not strip additional indentation from their content.
93+
/// This field is for internal use only.
94+
pub(crate) is_nested_block_context: bool,
8895
}
8996

9097
impl MarkdownParserState {
@@ -118,6 +125,19 @@ impl MarkdownParserState {
118125
pub fn with_config(config: MarkdownParserConfig) -> Self {
119126
Self {
120127
config: Rc::new(config),
128+
is_nested_block_context: false,
129+
}
130+
}
131+
132+
/// Create a nested parser state for parsing content extracted from container blocks
133+
///
134+
/// This method creates a new state that shares the same configuration but marks
135+
/// the parsing context as nested. This prevents double-stripping of indentation
136+
/// when parsing fenced code blocks inside list items, blockquotes, etc.
137+
pub(crate) fn nested(&self) -> Self {
138+
Self {
139+
config: self.config.clone(),
140+
is_nested_block_context: true,
121141
}
122142
}
123143
}

src/printer/block.rs

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -59,13 +59,36 @@ impl<'a> ToDoc<'a> for Block {
5959
match kind {
6060
CodeBlockKind::Fenced { info } => {
6161
let info = info.as_deref().unwrap_or("");
62-
arena
63-
.text(format!("```{info}\n"))
64-
.append(arena.text(literal.clone()))
65-
.append(arena.text("\n```"))
62+
// Use hardline() between lines so nest() indentation applies correctly
63+
// when the code block is inside a list or other nested structure.
64+
// We use split('\n') instead of lines() to preserve trailing newlines.
65+
let mut doc = arena.text(format!("```{info}"));
66+
67+
// Handle code block content.
68+
// For non-empty content, we use split('\n') instead of lines() to preserve
69+
// trailing newlines. Each line gets a hardline() before it so that nest()
70+
// indentation applies correctly when inside lists or other nested structures.
71+
// IMPORTANT: For blank lines (empty or whitespace-only), we only add
72+
// hardline() without any text, so that nest() doesn't compound whitespace
73+
// on repeated format passes. This ensures idempotent formatting.
74+
if !literal.is_empty() {
75+
let lines: Vec<&str> = literal.split('\n').collect();
76+
for line in lines {
77+
doc = doc.append(arena.hardline());
78+
// Only add text for lines with non-whitespace content.
79+
// This prevents whitespace from compounding on each format pass.
80+
let trimmed = line.trim_start();
81+
if !trimmed.is_empty() {
82+
doc = doc.append(arena.text(line.to_string()));
83+
}
84+
}
85+
}
86+
87+
// Closing fence must be on its own line
88+
doc.append(arena.hardline()).append(arena.text("```"))
6689
}
6790
CodeBlockKind::Indented => {
68-
// каждый строка с отступом 4 пробела
91+
// Each line indented with 4 spaces
6992
let indented = literal
7093
.lines()
7194
.map(|l| format!(" {l}"))

src/printer/tests/list.rs

Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,3 +70,170 @@ fn symmetric_round_trip_list_without_empty_line_before_list(input: &str) {
7070
let result = crate::printer::render_markdown(&doc, config);
7171
assert_eq!(input, result);
7272
}
73+
74+
// Regression test: fenced code blocks inside list items should preserve internal indentation
75+
// and formatting should be idempotent (format(format(x)) == format(x))
76+
// Fix for bug: fenced code blocks inside lists were losing indentation on each render pass
77+
// because literal '\n' characters prevented nest() from applying indentation properly.
78+
#[rstest(
79+
input,
80+
// Basic code block in ordered list item
81+
case(
82+
r#" 1. **Example:**
83+
84+
```rust
85+
fn test() {
86+
println!("hello");
87+
}
88+
```"#
89+
),
90+
// Code block with multiple indentation levels
91+
case(
92+
r#" 1. Item with code:
93+
94+
```python
95+
def foo():
96+
if True:
97+
return bar()
98+
```"#
99+
),
100+
// Nested list with code block (2 levels)
101+
case(
102+
r#" - Outer item
103+
- Inner item with code:
104+
105+
```js
106+
function test() {
107+
console.log("nested");
108+
}
109+
```"#
110+
),
111+
// Empty code block in list item
112+
case(
113+
r#" - Empty code block:
114+
115+
```rust
116+
```"#
117+
),
118+
// Code block with blank lines inside
119+
case(
120+
r#" - Code with blank lines:
121+
122+
```python
123+
def foo():
124+
pass
125+
126+
def bar():
127+
pass
128+
```"#
129+
),
130+
// Deeply nested list with code block (3 levels)
131+
case(
132+
r#" - Level 1
133+
- Level 2
134+
- Level 3 with code:
135+
136+
```rust
137+
fn deep() {
138+
nested();
139+
}
140+
```"#
141+
),
142+
// Unordered list with asterisk marker
143+
case(
144+
r#" * Item with asterisk:
145+
146+
```rust
147+
fn asterisk() {}
148+
```"#
149+
),
150+
// Unordered list with plus marker
151+
case(
152+
r#" + Item with plus:
153+
154+
```rust
155+
fn plus() {}
156+
```"#
157+
),
158+
// Multiple code blocks in one list item
159+
case(
160+
r#" - Multiple blocks:
161+
162+
First:
163+
164+
```rust
165+
fn first() {}
166+
```
167+
168+
Second:
169+
170+
```rust
171+
fn second() {}
172+
```"#
173+
),
174+
// Code block with unusual info string
175+
case(
176+
r#" - Item:
177+
178+
```rust,no_run,edition=2021
179+
fn info_string() {}
180+
```"#
181+
),
182+
)]
183+
fn fenced_code_block_in_list_idempotent(input: &str) {
184+
// First pass
185+
let doc1 = crate::parser::parse_markdown(crate::parser::MarkdownParserState::default(), input)
186+
.unwrap();
187+
let pass1 = crate::printer::render_markdown(&doc1, crate::printer::config::Config::default());
188+
189+
// Second pass - should be identical to first pass (idempotent)
190+
let doc2 = crate::parser::parse_markdown(crate::parser::MarkdownParserState::default(), &pass1)
191+
.unwrap();
192+
let pass2 = crate::printer::render_markdown(&doc2, crate::printer::config::Config::default());
193+
194+
assert_eq!(
195+
pass1, pass2,
196+
"Formatting should be idempotent.\nInput:\n{}\n\nFirst pass:\n{}\n\nSecond pass:\n{}",
197+
input, pass1, pass2
198+
);
199+
}
200+
201+
// Test that code blocks in blockquotes are also idempotent
202+
#[rstest(
203+
input,
204+
// Code block in blockquote
205+
case(
206+
r#"> Quote with code:
207+
>
208+
> ```rust
209+
> fn quoted() {
210+
> println!("in quote");
211+
> }
212+
> ```"#
213+
),
214+
// Code block in GitHub alert
215+
case(
216+
r#"> [!NOTE]
217+
> Alert with code:
218+
>
219+
> ```python
220+
> def alert():
221+
> pass
222+
> ```"#
223+
),
224+
)]
225+
fn fenced_code_block_in_blockquote_idempotent(input: &str) {
226+
let doc1 = crate::parser::parse_markdown(crate::parser::MarkdownParserState::default(), input)
227+
.unwrap();
228+
let pass1 = crate::printer::render_markdown(&doc1, crate::printer::config::Config::default());
229+
230+
let doc2 = crate::parser::parse_markdown(crate::parser::MarkdownParserState::default(), &pass1)
231+
.unwrap();
232+
let pass2 = crate::printer::render_markdown(&doc2, crate::printer::config::Config::default());
233+
234+
assert_eq!(
235+
pass1, pass2,
236+
"Formatting should be idempotent.\nInput:\n{}\n\nFirst pass:\n{}\n\nSecond pass:\n{}",
237+
input, pass1, pass2
238+
);
239+
}

0 commit comments

Comments
 (0)