From 937fb16d5868c12b2066ac23ce164b35f91f89cd Mon Sep 17 00:00:00 2001 From: chriszielinski Date: Fri, 26 Apr 2019 10:21:05 -0700 Subject: [PATCH 01/27] Add `cmark_strbuf_remove` that removes a subrange of characters. --- src/buffer.c | 5 +++++ src/buffer.h | 10 ++++++++++ 2 files changed, 15 insertions(+) mode change 100644 => 100755 src/buffer.c mode change 100644 => 100755 src/buffer.h diff --git a/src/buffer.c b/src/buffer.c old mode 100644 new mode 100755 index c7934e57d..9c7ecae49 --- a/src/buffer.c +++ b/src/buffer.c @@ -242,6 +242,11 @@ void cmark_strbuf_trim(cmark_strbuf *buf) { cmark_strbuf_rtrim(buf); } +void cmark_strbuf_remove(cmark_strbuf *buf, bufsize_t start_offset, bufsize_t len) { + memmove(buf->ptr + start_offset, buf->ptr + start_offset + len, buf->size - (start_offset + len)); + buf->size -= len; +} + // Destructively modify string, collapsing consecutive // space and newline characters into a single space. void cmark_strbuf_normalize_whitespace(cmark_strbuf *s) { diff --git a/src/buffer.h b/src/buffer.h old mode 100644 new mode 100755 index b85bb4406..20093c969 --- a/src/buffer.h +++ b/src/buffer.h @@ -103,6 +103,16 @@ void cmark_strbuf_rtrim(cmark_strbuf *buf); CMARK_GFM_EXPORT void cmark_strbuf_trim(cmark_strbuf *buf); +/** + Removes the characters in the given range. + + @param buf The string buffer. + @param start_offset The starting character offset. + @param len The length of characters to remove. + */ +CMARK_GFM_EXPORT +void cmark_strbuf_remove(cmark_strbuf *buf, bufsize_t start_offset, bufsize_t len); + CMARK_GFM_EXPORT void cmark_strbuf_normalize_whitespace(cmark_strbuf *s); From 0fea1ddd7230ae020aec99ce23b48423645eb1ac Mon Sep 17 00:00:00 2001 From: chriszielinski Date: Fri, 26 Apr 2019 10:29:53 -0700 Subject: [PATCH 02/27] Fix source positions for inlines inside inconsistently indented blocks. --- src/blocks.c | 35 +++++++++++++++++++++++ src/inlines.c | 78 +++++++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 107 insertions(+), 6 deletions(-) mode change 100644 => 100755 src/blocks.c mode change 100644 => 100755 src/inlines.c diff --git a/src/blocks.c b/src/blocks.c old mode 100644 new mode 100755 index 53e882f19..0c497fb2e --- a/src/blocks.c +++ b/src/blocks.c @@ -207,8 +207,37 @@ static void add_line(cmark_node *node, cmark_chunk *ch, cmark_parser *parser) { cmark_strbuf_putc(&node->content, ' '); } } + + // If inserting the initial line to the node... + if (node->content.size == 0 + // OR the node is a code block... + || node->type == CMARK_NODE_CODE_BLOCK + // OR the node is a HTML block. + || node->type == CMARK_NODE_HTML_BLOCK) { + + // Then do not insert the leading trivia. cmark_strbuf_put(&node->content, ch->data + parser->offset, ch->len - parser->offset); + } else { + // Special case for maintaining the source position of block quotes + // as they can be lazy (i.e. the block quote marker can be omitted). + // + // The simple solution is to replace any block quote markers (">") + // present in the leading trivia with whitespace. + // + // Note: Using `parser->offset` and not `parser->first_nonspace` + // because the latter encompasses the former with the addition of + // whitespace (which we are not interested in). + assert(parser->offset <= parser->first_nonspace); + for (int i = 0; i < parser->offset; i++) { + if (peek_at(ch, i) == '>') + ch->data[i] = ' '; + } + + // Otherwise, do not remove leading trivia for appends (i.e. lines + // other than the first). + cmark_strbuf_put(&node->content, ch->data, ch->len); + } } static void remove_trailing_blank_lines(cmark_strbuf *ln) { @@ -266,6 +295,12 @@ static bool resolve_reference_link_definitions( chunk.data += pos; chunk.len -= pos; + + // Leading whitespace is not stripped. + while (cmark_isspace(peek_at(&chunk, 0))) { + chunk.data += 1; + chunk.len -= 1; + } } cmark_strbuf_drop(node_content, (node_content->size - chunk.len)); return !is_blank(&b->content, 0); diff --git a/src/inlines.c b/src/inlines.c old mode 100644 new mode 100755 index c21430bde..b90427e7f --- a/src/inlines.c +++ b/src/inlines.c @@ -325,10 +325,10 @@ static bufsize_t scan_to_closing_backticks(subject *subj, // spaces, then removing a single leading + trailing space, // unless the code span consists entirely of space characters. static void S_normalize_code(cmark_strbuf *s) { - bufsize_t r, w; + bufsize_t r, w, last_char_after_nl; bool contains_nonspace = false; - for (r = 0, w = 0; r < s->size; ++r) { + for (r = 0, w = 0, last_char_after_nl = 0; r < s->size; ++r) { switch (s->ptr[r]) { case '\r': if (s->ptr[r + 1] != '\n') { @@ -337,8 +337,25 @@ static void S_normalize_code(cmark_strbuf *s) { break; case '\n': s->ptr[w++] = ' '; + last_char_after_nl = w; + break; + case ' ': + s->ptr[w++] = s->ptr[r]; break; default: + if (last_char_after_nl) { + // Remove leading whitespace. + bufsize_t remove_len = r - last_char_after_nl; + + if (remove_len) { + cmark_strbuf_remove(s, last_char_after_nl, remove_len); + w -= remove_len; + r -= remove_len; + } + + last_char_after_nl = 0; + } + s->ptr[w++] = s->ptr[r]; } if (s->ptr[r] != ' ') { @@ -346,6 +363,20 @@ static void S_normalize_code(cmark_strbuf *s) { } } + if (last_char_after_nl) { + // Remove leading whitespace. Only reach here if the closing backquote + // delimiter is on its own line. + bufsize_t remove_len = r - last_char_after_nl; + + if (remove_len) { + cmark_strbuf_remove(s, last_char_after_nl, remove_len); + w -= remove_len; + r -= remove_len; + } + + last_char_after_nl = 0; + } + // begins and ends with space? if (contains_nonspace && s->ptr[0] == ' ' && s->ptr[w - 1] == ' ') { @@ -361,13 +392,15 @@ static void S_normalize_code(cmark_strbuf *s) { // Parse backtick code section or raw backticks, return an inline. // Assumes that the subject has a backtick at the current position. static cmark_node *handle_backticks(subject *subj, int options) { + // Save the current source position in case of need to rewind. + bufsize_t subjpos = subj->pos; cmark_chunk openticks = take_while(subj, isbacktick); bufsize_t startpos = subj->pos; bufsize_t endpos = scan_to_closing_backticks(subj, openticks.len); if (endpos == 0) { // not found subj->pos = startpos; // rewind - return make_str(subj, subj->pos, subj->pos, openticks); + return make_str(subj, subjpos, subjpos, openticks); } else { cmark_strbuf buf = CMARK_BUF_INIT(subj->mem); @@ -797,6 +830,10 @@ static cmark_node *handle_backslash(cmark_parser *parser, subject *subj) { advance(subj); return make_str(subj, subj->pos - 2, subj->pos - 1, cmark_chunk_dup(&subj->input, subj->pos - 1, 1)); } else if (!is_eof(subj) && skip_line_end(subj)) { + // Adjust the subject source position state. + ++subj->line; + subj->column_offset = -subj->pos; + return make_linebreak(subj->mem); } else { return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("\\")); @@ -877,7 +914,8 @@ static cmark_node *handle_pointy_brace(subject *subj, int options) { contents = cmark_chunk_dup(&subj->input, subj->pos, matchlen - 1); subj->pos += matchlen; - return make_autolink(subj, subj->pos - 1 - matchlen, subj->pos - 1, contents, 0); + return make_autolink(subj, subj->pos + subj->column_offset - 1 - matchlen, + subj->pos + subj->column_offset - 1, contents, 0); } // next try to match an email autolink @@ -886,7 +924,8 @@ static cmark_node *handle_pointy_brace(subject *subj, int options) { contents = cmark_chunk_dup(&subj->input, subj->pos, matchlen - 1); subj->pos += matchlen; - return make_autolink(subj, subj->pos - 1 - matchlen, subj->pos - 1, contents, 1); + return make_autolink(subj, subj->pos + subj->column_offset - 1 - matchlen, + subj->pos + subj->column_offset - 1, contents, 1); } // finally, try to match an html tag @@ -1163,7 +1202,8 @@ static cmark_node *handle_close_bracket(cmark_parser *parser, subject *subj) { inl = make_simple(subj->mem, is_image ? CMARK_NODE_IMAGE : CMARK_NODE_LINK); inl->as.link.url = url; inl->as.link.title = title; - inl->start_line = inl->end_line = subj->line; + inl->start_line = opener->inl_text->start_line; + inl->end_line = subj->line; inl->start_column = opener->inl_text->start_column; inl->end_column = subj->pos + subj->column_offset + subj->block_offset; cmark_node_insert_before(opener->inl_text, inl); @@ -1304,10 +1344,21 @@ static int parse_inline(cmark_parser *parser, subject *subj, cmark_node *parent, cmark_chunk contents; unsigned char c; bufsize_t startpos, endpos; + int saved_block_offset = subj->block_offset; + c = peek_char(subj); if (c == 0) { return 0; } + + // If NOT the subject's initial line... + if (subj->column_offset != 0) { + // Reset the block offset. The line's leading trivia was not trimmed, + // so the source position will be computed appropriately without the + // block offset. + subj->block_offset = 0; + } + switch (c) { case '\r': case '\n': @@ -1370,12 +1421,27 @@ static int parse_inline(cmark_parser *parser, subject *subj, cmark_node *parent, cmark_chunk_rtrim(&contents); } + // If not the initial line (in the subject) AND at the beginning of another line. + if (subj->column_offset != 0 && startpos + subj->column_offset == 0) { + // Trim leading whitespace. + bufsize_t before_trim = contents.len; + cmark_chunk_ltrim(&contents); + + if (contents.len == 0) + break; // The contents were only whitespaces. + + // Update the start source position. + startpos += before_trim - contents.len; + } + new_inl = make_str(subj, startpos, endpos - 1, contents); } if (new_inl != NULL) { cmark_node_append_child(parent, new_inl); } + subj->block_offset = saved_block_offset; + return 1; } From e523c7380eec9d6cc4b2a8b253e521ef1a6df91f Mon Sep 17 00:00:00 2001 From: chriszielinski Date: Fri, 26 Apr 2019 10:38:37 -0700 Subject: [PATCH 03/27] Add three additional source position tests. --- api_test/main.c | 129 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 129 insertions(+) mode change 100644 => 100755 api_test/main.c diff --git a/api_test/main.c b/api_test/main.c old mode 100644 new mode 100755 index 62006eaa9..bd7dd7450 --- a/api_test/main.c +++ b/api_test/main.c @@ -994,6 +994,7 @@ static void test_pathological_regressions(test_batch_runner *runner) { } static void source_pos(test_batch_runner *runner) { + { static const char markdown[] = "# Hi *there*.\n" "\n" @@ -1057,6 +1058,107 @@ static void source_pos(test_batch_runner *runner) { free(xml); cmark_node_free(doc); } + { + static const char markdown[] = + "1. **Start condition:** line begins with the string ``, or the end of the line.\\\n" + " **End condition:** line contains an end tag\n" + " ``, ``, or `` (case-insensitive; it\n" + " need not match the start tag).\n"; + + cmark_node *doc = cmark_parse_document(markdown, sizeof(markdown) - 1, CMARK_OPT_DEFAULT); + char *xml = cmark_render_xml(doc, CMARK_OPT_DEFAULT | CMARK_OPT_SOURCEPOS); + STR_EQ(runner, xml, "\n" + "\n" + "\n" + " \n" + " \n" + " \n" + " \n" + " Start condition:\n" + " \n" + " line begins with the string \n" + " <script\n" + " ,\n" + " \n" + " <pre\n" + " , or \n" + " <style\n" + " (case-insensitive), followed by whitespace,\n" + " \n" + " the string \n" + " >\n" + " , or the end of the line.\n" + " \n" + " \n" + " End condition:\n" + " \n" + " line contains an end tag\n" + " \n" + " </script>\n" + " , \n" + " </pre>\n" + " , or \n" + " </style>\n" + " (case-insensitive; it\n" + " \n" + " need not match the start tag).\n" + " \n" + " \n" + " \n" + "\n", + "list (with EOL backslash) sourcepos are as expected"); + free(xml); + cmark_node_free(doc); + } + { + static const char markdown[] = + "> The overriding design goal for Markdown's formatting syntax is\n" + " > to make it as **readable as possible**. The idea is that a\n" + "> Markdown-formatted document should be publishable as-is, as\n" + " > plain text, without *looking like* it's been marked up with tags\n" + " > or formatting instructions.\n" + "> ()\n"; + + cmark_node *doc = cmark_parse_document(markdown, sizeof(markdown) - 1, CMARK_OPT_DEFAULT); + char *xml = cmark_render_xml(doc, CMARK_OPT_DEFAULT | CMARK_OPT_SOURCEPOS); + STR_EQ(runner, xml, "\n" + "\n" + "\n" + " \n" + " \n" + " The overriding design goal for Markdown's formatting syntax is\n" + " \n" + " to make it as \n" + " \n" + " readable as possible\n" + " \n" + " . The idea is that a\n" + " \n" + " Markdown-formatted document should be publishable as-is, as\n" + " \n" + " plain text, without \n" + " \n" + " looking like\n" + " \n" + " it's been marked up with tags\n" + " \n" + " or formatting instructions.\n" + " \n" + " (\n" + " \n" + " http://daringfireball.net/projects/markdown/\n" + " \n" + " )\n" + " \n" + " \n" + "\n", + "inconsistently indented blockquote sourcepos are as expected"); + free(xml); + cmark_node_free(doc); + } +} static void source_pos_inlines(test_batch_runner *runner) { { @@ -1103,6 +1205,33 @@ static void source_pos_inlines(test_batch_runner *runner) { free(xml); cmark_node_free(doc); } + { + static const char markdown[] = + "This link will have two [soft \n" + "line\n" + " breaks](https://commonmark.org)."; + + cmark_node *doc = cmark_parse_document(markdown, sizeof(markdown) - 1, CMARK_OPT_DEFAULT); + char *xml = cmark_render_xml(doc, CMARK_OPT_DEFAULT | CMARK_OPT_SOURCEPOS); + STR_EQ(runner, xml, "\n" + "\n" + "\n" + " \n" + " This link will have three \n" + " \n" + " soft\n" + " \n" + " line\n" + " \n" + " breaks\n" + " \n" + " .\n" + " \n" + "\n", + "autolink sourcepos are as expected"); + free(xml); + cmark_node_free(doc); + } } static void ref_source_pos(test_batch_runner *runner) { From 6df4f1f556d1a35c7b1e7afe8f64f4d729d02a0a Mon Sep 17 00:00:00 2001 From: chriszielinski Date: Fri, 26 Apr 2019 12:07:29 -0700 Subject: [PATCH 04/27] Fix outdated expected test result. --- api_test/main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/api_test/main.c b/api_test/main.c index bd7dd7450..8cc1b52ba 100755 --- a/api_test/main.c +++ b/api_test/main.c @@ -1217,9 +1217,9 @@ static void source_pos_inlines(test_batch_runner *runner) { "\n" "\n" " \n" - " This link will have three \n" - " \n" - " soft\n" + " This link will have two \n" + " \n" + " soft\n" " \n" " line\n" " \n" From 891db7009dc9eeacbb83e6ba51e7fbd60d6b0c06 Mon Sep 17 00:00:00 2001 From: chriszielinski Date: Tue, 30 Apr 2019 19:37:52 -0700 Subject: [PATCH 05/27] Fix autolink source position. --- src/inlines.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/inlines.c b/src/inlines.c index b90427e7f..9471396f5 100755 --- a/src/inlines.c +++ b/src/inlines.c @@ -150,8 +150,8 @@ static CMARK_INLINE cmark_node *make_autolink(subject *subj, link->as.link.url = cmark_clean_autolink(subj->mem, &url, is_email); link->as.link.title = cmark_chunk_literal(""); link->start_line = link->end_line = subj->line; - link->start_column = start_column + 1; - link->end_column = end_column + 1; + link->start_column = subj->column_offset + subj->block_offset + start_column + 1; + link->end_column = subj->column_offset + subj->block_offset + end_column + 1; cmark_node_append_child(link, make_str_with_entities(subj, start_column + 1, end_column - 1, &url)); return link; } @@ -914,8 +914,7 @@ static cmark_node *handle_pointy_brace(subject *subj, int options) { contents = cmark_chunk_dup(&subj->input, subj->pos, matchlen - 1); subj->pos += matchlen; - return make_autolink(subj, subj->pos + subj->column_offset - 1 - matchlen, - subj->pos + subj->column_offset - 1, contents, 0); + return make_autolink(subj, subj->pos - 1 - matchlen, subj->pos - 1, contents, 0); } // next try to match an email autolink @@ -924,8 +923,7 @@ static cmark_node *handle_pointy_brace(subject *subj, int options) { contents = cmark_chunk_dup(&subj->input, subj->pos, matchlen - 1); subj->pos += matchlen; - return make_autolink(subj, subj->pos + subj->column_offset - 1 - matchlen, - subj->pos + subj->column_offset - 1, contents, 1); + return make_autolink(subj, subj->pos - 1 - matchlen, subj->pos - 1, contents, 1); } // finally, try to match an html tag From 9160d605718160596f8a166b99f3ecb7b61b90cb Mon Sep 17 00:00:00 2001 From: chriszielinski Date: Tue, 30 Apr 2019 19:39:16 -0700 Subject: [PATCH 06/27] Fix expected autolink test fixture, and add an additional autolink source position test case. --- api_test/main.c | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/api_test/main.c b/api_test/main.c index 8cc1b52ba..c9fd849ef 100755 --- a/api_test/main.c +++ b/api_test/main.c @@ -1148,7 +1148,7 @@ static void source_pos(test_batch_runner *runner) { " \n" " (\n" " \n" - " http://daringfireball.net/projects/markdown/\n" + " http://daringfireball.net/projects/markdown/\n" " \n" " )\n" " \n" @@ -1232,6 +1232,34 @@ static void source_pos_inlines(test_batch_runner *runner) { free(xml); cmark_node_free(doc); } + { + static const char markdown[] = + " 1. \n" + " \n"; + + cmark_node *doc = cmark_parse_document(markdown, sizeof(markdown) - 1, CMARK_OPT_DEFAULT); + char *xml = cmark_render_xml(doc, CMARK_OPT_DEFAULT | CMARK_OPT_SOURCEPOS); + STR_EQ(runner, xml, "\n" + "\n" + "\n" + " \n" + " \n" + " \n" + " \n" + " http://www.google.com\n" + " \n" + " \n" + " \n" + " http://www.google.com\n" + " \n" + " \n" + " \n" + " \n" + "\n", + "autolink (in list) sourcepos are as expected"); + free(xml); + cmark_node_free(doc); + } } static void ref_source_pos(test_batch_runner *runner) { From 469031e0b7e8ad996e5e165183448fdcf62c9dd3 Mon Sep 17 00:00:00 2001 From: chriszielinski Date: Thu, 2 May 2019 19:00:17 -0700 Subject: [PATCH 07/27] Fix source position of setext headings. --- src/blocks.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/blocks.c b/src/blocks.c index 0c497fb2e..2925ae653 100755 --- a/src/blocks.c +++ b/src/blocks.c @@ -323,8 +323,7 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) { b->end_line = parser->line_number; b->end_column = parser->last_line_length; } else if (S_type(b) == CMARK_NODE_DOCUMENT || - (S_type(b) == CMARK_NODE_CODE_BLOCK && b->as.code.fenced) || - (S_type(b) == CMARK_NODE_HEADING && b->as.heading.setext)) { + (S_type(b) == CMARK_NODE_CODE_BLOCK && b->as.code.fenced)) { b->end_line = parser->line_number; b->end_column = parser->curline.size; if (b->end_column && parser->curline.ptr[b->end_column - 1] == '\n') From 51cd4f5731ee8c839dda97a40f63e84d9864178b Mon Sep 17 00:00:00 2001 From: chriszielinski Date: Thu, 2 May 2019 19:00:50 -0700 Subject: [PATCH 08/27] Add setext heading test case. --- api_test/main.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/api_test/main.c b/api_test/main.c index c9fd849ef..baa10e626 100755 --- a/api_test/main.c +++ b/api_test/main.c @@ -1260,6 +1260,28 @@ static void source_pos_inlines(test_batch_runner *runner) { free(xml); cmark_node_free(doc); } + { + static const char markdown[] = + "Level 1 Heading\n" + "===============\n" + "A paragraph.\n"; + + cmark_node *doc = cmark_parse_document(markdown, sizeof(markdown) - 1, CMARK_OPT_DEFAULT); + char *xml = cmark_render_xml(doc, CMARK_OPT_DEFAULT | CMARK_OPT_SOURCEPOS); + STR_EQ(runner, xml, "\n" + "\n" + "\n" + " \n" + " Level 1 Heading\n" + " \n" + " \n" + " A paragraph.\n" + " \n" + "\n", + "heading sourcepos are as expected"); + free(xml); + cmark_node_free(doc); + } } static void ref_source_pos(test_batch_runner *runner) { From 4ef606429a7d9fc8b2055793bb270faf985b8d74 Mon Sep 17 00:00:00 2001 From: chriszielinski Date: Fri, 3 May 2019 13:43:43 -0700 Subject: [PATCH 09/27] Fix source position for ATX-style headings. --- src/blocks.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/blocks.c b/src/blocks.c index 2925ae653..84c0f85fd 100755 --- a/src/blocks.c +++ b/src/blocks.c @@ -318,6 +318,10 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) { CMARK_NODE__OPEN); // shouldn't call finalize on closed blocks b->flags &= ~CMARK_NODE__OPEN; + if (S_type(b) == CMARK_NODE_HEADING && !b->as.heading.setext) { + parser->last_line_length += b->end_column; + } + if (parser->curline.size == 0) { // end of input - line number has not been incremented b->end_line = parser->line_number; @@ -1426,7 +1430,10 @@ static void add_text_to_container(cmark_parser *parser, cmark_node *container, } else if (accepts_lines(S_type(container))) { if (S_type(container) == CMARK_NODE_HEADING && container->as.heading.setext == false) { + bufsize_t original_len = input->len; chop_trailing_hashtags(input); + // Substract one to exclude the trailing newline. + container->end_column += original_len - input->len - 1; } S_advance_offset(parser, input, parser->first_nonspace - parser->offset, false); From 035c392f6107866367c2210716da1a0da49ef2fd Mon Sep 17 00:00:00 2001 From: chriszielinski Date: Fri, 3 May 2019 13:44:20 -0700 Subject: [PATCH 10/27] Add ATX-style heading source position test case. --- api_test/main.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/api_test/main.c b/api_test/main.c index baa10e626..741979461 100755 --- a/api_test/main.c +++ b/api_test/main.c @@ -1282,6 +1282,30 @@ static void source_pos_inlines(test_batch_runner *runner) { free(xml); cmark_node_free(doc); } + { + static const char markdown[] = + "# This is an H1 #\n" + "\n" + "> # Header 1 #\n"; + + cmark_node *doc = cmark_parse_document(markdown, sizeof(markdown) - 1, CMARK_OPT_DEFAULT); + char *xml = cmark_render_xml(doc, CMARK_OPT_DEFAULT | CMARK_OPT_SOURCEPOS); + STR_EQ(runner, xml, "\n" + "\n" + "\n" + " \n" + " This is an H1\n" + " \n" + " \n" + " \n" + " Header 1\n" + " \n" + " \n" + "\n", + "atx heading sourcepos are as expected"); + free(xml); + cmark_node_free(doc); + } } static void ref_source_pos(test_batch_runner *runner) { From 92444d7d4637018dd7bfd5140af71ef598b7cae7 Mon Sep 17 00:00:00 2001 From: chriszielinski Date: Mon, 6 May 2019 10:28:34 -0700 Subject: [PATCH 11/27] Fix HTMl block source position. --- src/blocks.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/blocks.c b/src/blocks.c index 84c0f85fd..82704500a 100755 --- a/src/blocks.c +++ b/src/blocks.c @@ -327,7 +327,8 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) { b->end_line = parser->line_number; b->end_column = parser->last_line_length; } else if (S_type(b) == CMARK_NODE_DOCUMENT || - (S_type(b) == CMARK_NODE_CODE_BLOCK && b->as.code.fenced)) { + (S_type(b) == CMARK_NODE_CODE_BLOCK && b->as.code.fenced) || + (S_type(b) == CMARK_NODE_HTML_BLOCK)) { b->end_line = parser->line_number; b->end_column = parser->curline.size; if (b->end_column && parser->curline.ptr[b->end_column - 1] == '\n') From 21571967c2b8524e359f2f4851a94b1b4da5921a Mon Sep 17 00:00:00 2001 From: chriszielinski Date: Mon, 6 May 2019 10:34:40 -0700 Subject: [PATCH 12/27] Add HTML block source position test case. --- api_test/main.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/api_test/main.c b/api_test/main.c index 741979461..138e1fb49 100755 --- a/api_test/main.c +++ b/api_test/main.c @@ -1158,6 +1158,54 @@ static void source_pos(test_batch_runner *runner) { free(xml); cmark_node_free(doc); } + { + static const char markdown[] = + "\n" + "\n" + "
```javascript\n"
+    "var s = \"JavaScript syntax highlighting\";\n"
+    "alert(s);\n"
+    "```\n"
+    " \n"
+    "```python\n"
+    "s = \"Python syntax highlighting\"\n"
+    "print s\n"
+    "```\n"
+    " \n"
+    "```\n"
+    "No language indicated, so no syntax highlighting. \n"
+    "But let's throw in a <b>tag</b>.\n"
+    "```\n"
+    "
\n" + "\n" + "\n"; + + cmark_node *doc = cmark_parse_document(markdown, sizeof(markdown) - 1, CMARK_OPT_DEFAULT); + char *xml = cmark_render_xml(doc, CMARK_OPT_DEFAULT | CMARK_OPT_SOURCEPOS); + STR_EQ(runner, xml, "\n" + "\n" + "\n" + " <pre lang="no-highlight"><code>```javascript\n" + "var s = "JavaScript syntax highlighting";\n" + "alert(s);\n" + "```\n" + " \n" + "```python\n" + "s = "Python syntax highlighting"\n" + "print s\n" + "```\n" + " \n" + "```\n" + "No language indicated, so no syntax highlighting. \n" + "But let's throw in a &lt;b&gt;tag&lt;/b&gt;.\n" + "```\n" + "</code></pre>\n" + "\n" + "\n", + "html block sourcepos are as expected"); + free(xml); + cmark_node_free(doc); + } } static void source_pos_inlines(test_batch_runner *runner) { From 79bf93b7cfb998e939aae74c88d37494b7505662 Mon Sep 17 00:00:00 2001 From: chriszielinski Date: Mon, 6 May 2019 10:47:21 -0700 Subject: [PATCH 13/27] Fix thematic break source position. --- src/blocks.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/blocks.c b/src/blocks.c index 82704500a..5e222d97d 100755 --- a/src/blocks.c +++ b/src/blocks.c @@ -318,6 +318,11 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) { CMARK_NODE__OPEN); // shouldn't call finalize on closed blocks b->flags &= ~CMARK_NODE__OPEN; + if (S_type(b) == CMARK_NODE_THEMATIC_BREAK) { + // Already been "finalized". + return parent; + } + if (S_type(b) == CMARK_NODE_HEADING && !b->as.heading.setext) { parser->last_line_length += b->end_column; } @@ -1220,6 +1225,10 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container, // it's only now that we know the line is not part of a setext heading: *container = add_child(parser, *container, CMARK_NODE_THEMATIC_BREAK, parser->first_nonspace + 1); + // A thematic break can only be on a single line, so we can set the + // end source position here. + (*container)->end_line = parser->line_number; + (*container)->end_column = input->len - 1; S_advance_offset(parser, input, input->len - 1 - parser->offset, false); } else if (!indented && parser->options & CMARK_OPT_FOOTNOTES && From 53041c309cc9eeacbedae343724ea59833c85634 Mon Sep 17 00:00:00 2001 From: chriszielinski Date: Mon, 6 May 2019 10:47:57 -0700 Subject: [PATCH 14/27] Add thematic break source position test case. --- api_test/main.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/api_test/main.c b/api_test/main.c index 138e1fb49..37cb6360e 100755 --- a/api_test/main.c +++ b/api_test/main.c @@ -1206,6 +1206,24 @@ static void source_pos(test_batch_runner *runner) { free(xml); cmark_node_free(doc); } + { + static const char markdown[] = + "---\n" + "\n" + "\n" + "\n"; + + cmark_node *doc = cmark_parse_document(markdown, sizeof(markdown) - 1, CMARK_OPT_DEFAULT); + char *xml = cmark_render_xml(doc, CMARK_OPT_DEFAULT | CMARK_OPT_SOURCEPOS); + STR_EQ(runner, xml, "\n" + "\n" + "\n" + " \n" + "\n", + "thematic break sourcepos are as expected"); + free(xml); + cmark_node_free(doc); + } } static void source_pos_inlines(test_batch_runner *runner) { From be7f9c0b4068cdee016f9ddd2226c6ffd9a1ebe7 Mon Sep 17 00:00:00 2001 From: chriszielinski Date: Tue, 7 May 2019 17:41:25 -0700 Subject: [PATCH 15/27] Fix ending source position for lists and list items. --- src/blocks.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/blocks.c b/src/blocks.c index 5e222d97d..aee29c937 100755 --- a/src/blocks.c +++ b/src/blocks.c @@ -327,7 +327,11 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) { parser->last_line_length += b->end_column; } - if (parser->curline.size == 0) { + if ((S_type(b) == CMARK_NODE_ITEM || S_type(b) == CMARK_NODE_LIST) + && b->last_child) { + b->end_line = b->last_child->end_line; + b->end_column = b->last_child->end_column; + } else if (parser->curline.size == 0) { // end of input - line number has not been incremented b->end_line = parser->line_number; b->end_column = parser->last_line_length; @@ -1300,6 +1304,8 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container, // add the list item *container = add_child(parser, *container, CMARK_NODE_ITEM, parser->first_nonspace + 1); +// (*container)->end_line = parser->line_number; +// (*container)->end_column = parser->column; /* TODO: static */ memcpy(&((*container)->as.list), data, sizeof(*data)); parser->mem->free(data); From d2721bf8ba455e522fad5bb5e6efd3411d82d020 Mon Sep 17 00:00:00 2001 From: chriszielinski Date: Tue, 7 May 2019 18:32:14 -0700 Subject: [PATCH 16/27] Add list/list item source position test case. --- api_test/main.c | 47 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/api_test/main.c b/api_test/main.c index 37cb6360e..b03d0ee9b 100755 --- a/api_test/main.c +++ b/api_test/main.c @@ -1035,7 +1035,7 @@ static void source_pos(test_batch_runner *runner) { " \n" " \n" " \n" - " \n" + " \n" " \n" " Okay.\n" " \n" @@ -1224,6 +1224,51 @@ static void source_pos(test_batch_runner *runner) { free(xml); cmark_node_free(doc); } + { + static const char markdown[] = + "1. List 1, Item A.\n" + " 1. List 2, Item A.\n" + " Second line.\n" + "\n" + "2. List 1, Item B.\n" + " Second line.\n" + "\n" + "\n" + "\n"; + + cmark_node *doc = cmark_parse_document(markdown, sizeof(markdown) - 1, CMARK_OPT_DEFAULT); + char *xml = cmark_render_xml(doc, CMARK_OPT_DEFAULT | CMARK_OPT_SOURCEPOS); + STR_EQ(runner, xml, "\n" + "\n" + "\n" + " \n" + " \n" + " \n" + " List 1, Item A.\n" + " \n" + " \n" + " \n" + " \n" + " List 2, Item A.\n" + " \n" + " Second line.\n" + " \n" + " \n" + " \n" + " \n" + " \n" + " \n" + " List 1, Item B.\n" + " \n" + " Second line.\n" + " \n" + " \n" + " \n" + "\n", + "list sourcepos are as expected"); + free(xml); + cmark_node_free(doc); + } } static void source_pos_inlines(test_batch_runner *runner) { From 94dba88e6dcfe66e0f691e3cb9529d1666d142f9 Mon Sep 17 00:00:00 2001 From: chriszielinski Date: Tue, 7 May 2019 18:43:41 -0700 Subject: [PATCH 17/27] Remove commented out code. --- src/blocks.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/blocks.c b/src/blocks.c index aee29c937..b052b2adc 100755 --- a/src/blocks.c +++ b/src/blocks.c @@ -1304,8 +1304,6 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container, // add the list item *container = add_child(parser, *container, CMARK_NODE_ITEM, parser->first_nonspace + 1); -// (*container)->end_line = parser->line_number; -// (*container)->end_column = parser->column; /* TODO: static */ memcpy(&((*container)->as.list), data, sizeof(*data)); parser->mem->free(data); From 99621a33510a02b0783c7300915c90ef40f92458 Mon Sep 17 00:00:00 2001 From: chriszielinski Date: Tue, 7 May 2019 19:27:22 -0700 Subject: [PATCH 18/27] Correct list source position. --- src/blocks.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/blocks.c b/src/blocks.c index b052b2adc..64d6e2df5 100755 --- a/src/blocks.c +++ b/src/blocks.c @@ -331,6 +331,12 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) { && b->last_child) { b->end_line = b->last_child->end_line; b->end_column = b->last_child->end_column; + + if (S_type(b) == CMARK_NODE_ITEM && b->parent) { + // The finalization order is not deterministic... + b->parent->end_line = b->end_line; + b->parent->end_column = b->end_column; + } } else if (parser->curline.size == 0) { // end of input - line number has not been incremented b->end_line = parser->line_number; From 25709117bc4929bff02d7d89dcd298822456b4a3 Mon Sep 17 00:00:00 2001 From: chriszielinski Date: Wed, 15 May 2019 08:22:51 -0700 Subject: [PATCH 19/27] Fix source position for HTML blocks without a matching end condition. --- src/blocks.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/blocks.c b/src/blocks.c index 64d6e2df5..89a3d9867 100755 --- a/src/blocks.c +++ b/src/blocks.c @@ -343,7 +343,7 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) { b->end_column = parser->last_line_length; } else if (S_type(b) == CMARK_NODE_DOCUMENT || (S_type(b) == CMARK_NODE_CODE_BLOCK && b->as.code.fenced) || - (S_type(b) == CMARK_NODE_HTML_BLOCK)) { + (S_type(b) == CMARK_NODE_HTML_BLOCK && b->end_line == b->start_line && b->end_column == 0)) { b->end_line = parser->line_number; b->end_column = parser->curline.size; if (b->end_column && parser->curline.ptr[b->end_column - 1] == '\n') @@ -1400,6 +1400,12 @@ static void add_text_to_container(cmark_parser *parser, cmark_node *container, } else { // not a lazy continuation // Finalize any blocks that were not matched and set cur to container: while (parser->current != last_matched_container) { + if (S_type(parser->current) == CMARK_NODE_HTML_BLOCK) { + // Edge case: Closing an HTML block without a matching end condition. + parser->current->end_line = parser->line_number - 1; + parser->current->end_column = parser->last_line_length; + } + parser->current = finalize(parser, parser->current); assert(parser->current != NULL); } From 062d4616bbc559af5d404576cee50f2d4baeb0c5 Mon Sep 17 00:00:00 2001 From: chriszielinski Date: Wed, 15 May 2019 08:23:57 -0700 Subject: [PATCH 20/27] Add test case for source position of a HTML block without a matching end condition. --- api_test/main.c | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/api_test/main.c b/api_test/main.c index b03d0ee9b..e13937500 100755 --- a/api_test/main.c +++ b/api_test/main.c @@ -1269,6 +1269,39 @@ static void source_pos(test_batch_runner *runner) { free(xml); cmark_node_free(doc); } + { + static const char markdown[] = + "* List 1, item A.\n" + "\n" + "