From c01c18125512172f447ab43882f74e53fd263704 Mon Sep 17 00:00:00 2001 From: shriMADhav U k Date: Mon, 3 Mar 2025 17:11:44 +0100 Subject: [PATCH 01/16] change html blockquote parsing logik --- docs/source/topics/text-formatting.rst | 31 ++++++++++++-------------- pyrogram/parser/html.py | 7 ++---- 2 files changed, 16 insertions(+), 22 deletions(-) diff --git a/docs/source/topics/text-formatting.rst b/docs/source/topics/text-formatting.rst index 3ab0a2d95d..8553a5497b 100644 --- a/docs/source/topics/text-formatting.rst +++ b/docs/source/topics/text-formatting.rst @@ -61,7 +61,7 @@ To strictly use this mode, pass :obj:`~pyrogram.enums.ParseMode.MARKDOWN` to the >blockquote - |>escaped blockquote + **>escaped blockquote >Fist line of multi line blockquote >Block quotation continued @@ -69,13 +69,12 @@ To strictly use this mode, pass :obj:`~pyrogram.enums.ParseMode.MARKDOWN` to the >Block quotation continued >The last line of the block quotation - **> - The expandable block quotation started right after the previous block quotation - It is separated from the previous block quotation by expandable syntax - Expandable block quotation continued - Hidden by default part of the expandable block quotation started - Expandable block quotation continued - The last line of the expandable block quotation with the expandability mark<** + **>The expandable block quotation started right after the previous block quotation + **>It is separated from the previous block quotation by expandable syntax + **>Expandable block quotation continued + **>Hidden by default part of the expandable block quotation started + **>Expandable block quotation continued + **>The last line of the expandable block quotation with the expandability mark|| `inline fixed-width code` @@ -115,21 +114,19 @@ To strictly use this mode, pass :obj:`~pyrogram.enums.ParseMode.MARKDOWN` to the ">blockquote\n" - "|>escaped blockquote\n" - ">Fist line of multi line blockquote\n" ">Block quotation continued\n" ">Block quotation continued\n" ">Block quotation continued\n" ">The last line of the block quotation" - "**>\n" - "The expandable block quotation started right after the previous block quotation\n" - "It is separated from the previous block quotation by expandable syntax\n" - "Expandable block quotation continued\n" - "Hidden by default part of the expandable block quotation started\n" - "Expandable block quotation continued\n" - "The last line of the expandable block quotation with the expandability mark<**" + "||\n" + "**>The expandable block quotation started right after the previous block quotation\n" + "**>It is separated from the previous block quotation by expandable syntax\n" + "**>Expandable block quotation continued\n" + "**>Hidden by default part of the expandable block quotation started\n" + "**>Expandable block quotation continued\n" + "**>The last line of the expandable block quotation with the expandability mark||" ), parse_mode=ParseMode.MARKDOWN diff --git a/pyrogram/parser/html.py b/pyrogram/parser/html.py index 594feba04b..f5e53250ac 100644 --- a/pyrogram/parser/html.py +++ b/pyrogram/parser/html.py @@ -1,5 +1,5 @@ # Pyrogram - Telegram MTProto API Client Library for Python -# Copyright (C) 2017-present Dan +# Copyright (C) 2017-present # # This file is part of Pyrogram. # @@ -178,16 +178,13 @@ def parse_one(entity): language = getattr(entity, "language", "") or "" start_tag = f'<{name} language="{language}">' if language else f"<{name}>" end_tag = f"" - elif entity_type == MessageEntityType.BLOCKQUOTE: - name = entity_type.name.lower() - start_tag = f"<{name}>" - end_tag = f"" elif entity_type == MessageEntityType.EXPANDABLE_BLOCKQUOTE: name = "blockquote" start_tag = f"<{name} expandable>" end_tag = f"" elif entity_type in ( MessageEntityType.CODE, + MessageEntityType.BLOCKQUOTE, MessageEntityType.SPOILER, ): name = entity_type.name.lower() From 6564d5fddea3be621a50b1768e20eb0a0dfe8aef Mon Sep 17 00:00:00 2001 From: shriMADhav U k Date: Mon, 3 Mar 2025 17:13:12 +0100 Subject: [PATCH 02/16] (fix): Adapt markdown unparser from telethon The problem with current implementation is when we have nested markdown inside a url the markdown order is messed up. Co-Authored-By: wulan17 --- pyrogram/parser/__init__.py | 2 +- pyrogram/parser/utils.py | 19 ++++++++++++++++++- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/pyrogram/parser/__init__.py b/pyrogram/parser/__init__.py index 00c7acae76..af477e50fe 100644 --- a/pyrogram/parser/__init__.py +++ b/pyrogram/parser/__init__.py @@ -1,5 +1,5 @@ # Pyrogram - Telegram MTProto API Client Library for Python -# Copyright (C) 2017-present Dan +# Copyright (C) 2017-present # # This file is part of Pyrogram. # diff --git a/pyrogram/parser/utils.py b/pyrogram/parser/utils.py index 32c81707f6..42a23348ad 100644 --- a/pyrogram/parser/utils.py +++ b/pyrogram/parser/utils.py @@ -1,5 +1,5 @@ # Pyrogram - Telegram MTProto API Client Library for Python -# Copyright (C) 2017-present Dan +# Copyright (C) 2017-present # # This file is part of Pyrogram. # @@ -39,3 +39,20 @@ def remove_surrogates(text): def replace_once(source: str, old: str, new: str, start: int): return source[:start] + source[start:].replace(old, new, 1) + + +def within_surrogate(text, index, *, length=None): + """ + + https://github.com/LonamiWebs/Telethon/blob/63d9b26/telethon/helpers.py#L52-L63 + + `True` if ``index`` is within a surrogate (before and after it, not at!). + """ + if length is None: + length = len(text) + + return ( + 1 < index < len(text) and # in bounds + '\ud800' <= text[index - 1] <= '\udbff' and # previous is + '\ud800' <= text[index] <= '\udfff' # current is + ) From f9c4f6fc278e5da4f91b5c30c3b3c464236e0693 Mon Sep 17 00:00:00 2001 From: shriMADhav U k Date: Mon, 3 Mar 2025 17:14:35 +0100 Subject: [PATCH 03/16] broken fixes - Add support for multi-line blockquote in markdown unparser - Add support for custom emoji in markdown unparser Co-Authored-By: wulan17 --- pyrogram/parser/markdown.py | 381 +++++++++++++++--------------------- pyrogram/parser/parser.py | 2 +- 2 files changed, 157 insertions(+), 226 deletions(-) diff --git a/pyrogram/parser/markdown.py b/pyrogram/parser/markdown.py index bdf62cad40..d98cc31f34 100644 --- a/pyrogram/parser/markdown.py +++ b/pyrogram/parser/markdown.py @@ -1,5 +1,5 @@ # Pyrogram - Telegram MTProto API Client Library for Python -# Copyright (C) 2017-present Dan +# Copyright (C) 2017-present # # This file is part of Pyrogram. # @@ -22,7 +22,6 @@ import pyrogram from pyrogram.enums import MessageEntityType - from . import utils from .html import HTML @@ -34,149 +33,91 @@ CODE_DELIM = "`" PRE_DELIM = "```" BLOCKQUOTE_DELIM = ">" -BLOCKQUOTE_ESCAPE_DELIM = "|>" BLOCKQUOTE_EXPANDABLE_DELIM = "**>" -BLOCKQUOTE_EXPANDABLE_END_DELIM = "<**" - - -MARKDOWN_RE = re.compile( - r"({d})|(!?)\[(.+?)\]\((.+?)\)".format( - d="|".join( - [ - "".join(i) - for i in [ - [rf"\{j}" for j in i] - for i in [ - PRE_DELIM, - CODE_DELIM, - STRIKE_DELIM, - UNDERLINE_DELIM, - ITALIC_DELIM, - BOLD_DELIM, - SPOILER_DELIM, - ] - ] + +MARKDOWN_RE = re.compile(r"({d})".format( + d="|".join( + ["".join(i) for i in [ + [rf"\{j}" for j in i] + for i in [ + PRE_DELIM, + CODE_DELIM, + STRIKE_DELIM, + UNDERLINE_DELIM, + ITALIC_DELIM, + BOLD_DELIM, + SPOILER_DELIM ] - ) - ) -) + ]] + ))) +URL_RE = re.compile(r"(!?)\[(.+?)\]\((.+?)\)") OPENING_TAG = "<{}>" CLOSING_TAG = "" URL_MARKUP = '{}' EMOJI_MARKUP = "{}" FIXED_WIDTH_DELIMS = [CODE_DELIM, PRE_DELIM] +CODE_TAG_RE = re.compile(r".*?") class Markdown: def __init__(self, client: Optional["pyrogram.Client"]): self.html = HTML(client) - @staticmethod - def escape_and_create_quotes(text: str, strict: bool): - text_lines: list[str | None] = text.splitlines() - - # Indexes of Already escaped lines - html_escaped_list: list[int] = [] - - # Temporary Queue to hold lines to be quoted - to_quote_list: list[tuple[int, str]] = [] - - def create_blockquote(quote_type: str = "") -> None: - """ - Merges all lines in quote_queue into first line of queue - Encloses that line in html quote - Replaces rest of the lines with None placeholders to preserve indexes - """ - if len(to_quote_list) == 0: - return - - joined_lines = "\n".join([i[1] for i in to_quote_list]) + def blockquote_parser(self, text): + text = re.sub(r'\n>', '\n>', re.sub(r'^>', '>', text)) + lines = text.split('\n') + result = [] - first_line_index, _ = to_quote_list[0] - text_lines[first_line_index] = ( - f"{joined_lines}" - ) - - for line_to_remove in to_quote_list[1:]: - text_lines[line_to_remove[0]] = None - - to_quote_list.clear() - - # Handle Expandable Quote - inside_blockquote = False - for index, line in enumerate(text_lines): - if line.startswith(BLOCKQUOTE_EXPANDABLE_DELIM) and not inside_blockquote: - delim_stripped_line = line[3:] - parsed_line = ( - html.escape(delim_stripped_line) if strict else delim_stripped_line - ) - - to_quote_list.append((index, parsed_line)) - html_escaped_list.append(index) - - inside_blockquote = True - continue - - elif line.endswith(BLOCKQUOTE_EXPANDABLE_END_DELIM) and inside_blockquote: - delim_stripped_line = line[:-3] - parsed_line = ( - html.escape(delim_stripped_line) if strict else delim_stripped_line - ) - - to_quote_list.append((index, parsed_line)) - html_escaped_list.append(index) - - inside_blockquote = False - - create_blockquote(quote_type=" expandable") - - if inside_blockquote: - parsed_line = html.escape(line) if strict else line - to_quote_list.append((index, parsed_line)) - html_escaped_list.append(index) - - # Handle Single line/Continued Quote - for index, line in enumerate(text_lines): - if line is None: - continue - - if line.startswith(BLOCKQUOTE_ESCAPE_DELIM): - text_lines[index] = line[1:] - create_blockquote() - continue + in_blockquote = False + for line in lines: if line.startswith(BLOCKQUOTE_DELIM): - delim_stripped_line = line[1:] - parsed_line = ( - html.escape(delim_stripped_line) if strict else delim_stripped_line - ) + if not in_blockquote: + line = re.sub(r'^> ', OPENING_TAG.format("blockquote"), line) + line = re.sub(r'^>', OPENING_TAG.format("blockquote"), line) + in_blockquote = True + result.append(line.strip()) + else: + result.append(line[1:].strip()) + elif line.startswith(BLOCKQUOTE_EXPANDABLE_DELIM): + if not in_blockquote: + line = re.sub(r'^\*\*> ', OPENING_TAG.format("blockquote expandable"), line) + line = re.sub(r'^\*\*>', OPENING_TAG.format("blockquote expandable"), line) + in_blockquote = True + result.append(line.strip()) + else: + result.append(line[3:].strip()) + else: + if in_blockquote: + line = CLOSING_TAG.format("blockquote") + line + in_blockquote = False + result.append(line) - to_quote_list.append((index, parsed_line)) - html_escaped_list.append(index) + if in_blockquote: + line = result[len(result)-1] + CLOSING_TAG.format("blockquote") + result.pop(len(result)-1) + result.append(line) - elif len(to_quote_list) > 0: - create_blockquote() - else: - create_blockquote() + return '\n'.join(result) + async def parse(self, text: str, strict: bool = False): if strict: - for idx, line in enumerate(text_lines): - if idx not in html_escaped_list: - text_lines[idx] = html.escape(line) - - return "\n".join( - [valid_line for valid_line in text_lines if valid_line is not None] - ) + text = html.escape(text) + text = self.blockquote_parser(text) - async def parse(self, text: str, strict: bool = False): - text = self.escape_and_create_quotes(text, strict=strict) delims = set() is_fixed_width = False + placeholders = {} + for i, code_section in enumerate(CODE_TAG_RE.findall(text)): + placeholder = f"{{CODE_SECTION_{i}}}" + placeholders[placeholder] = code_section + text = text.replace(code_section, placeholder, 1) + for i, match in enumerate(re.finditer(MARKDOWN_RE, text)): start, _ = match.span() - delim, is_emoji, text_url, url = match.groups() + delim = match.group(1) full = match.group(0) if delim in FIXED_WIDTH_DELIMS: @@ -185,20 +126,6 @@ async def parse(self, text: str, strict: bool = False): if is_fixed_width and delim not in FIXED_WIDTH_DELIMS: continue - if not is_emoji and text_url: - text = utils.replace_once( - text, full, URL_MARKUP.format(url, text_url), start - ) - continue - - if is_emoji: - emoji = text_url - emoji_id = url.lstrip("tg://emoji?id=") - text = utils.replace_once( - text, full, EMOJI_MARKUP.format(emoji_id, emoji), start - ) - continue - if delim == BOLD_DELIM: tag = "b" elif delim == ITALIC_DELIM: @@ -224,109 +151,113 @@ async def parse(self, text: str, strict: bool = False): tag = CLOSING_TAG.format(tag) if delim == PRE_DELIM and delim in delims: - delim_and_language = text[text.find(PRE_DELIM) :].split("\n")[0] - language = delim_and_language[len(PRE_DELIM) :] - text = utils.replace_once( - text, delim_and_language, f'
', start
-                )
+                delim_and_language = text[text.find(PRE_DELIM):].split("\n")[0]
+                language = delim_and_language[len(PRE_DELIM):]
+                text = utils.replace_once(text, delim_and_language, f'
', start)
                 continue
 
             text = utils.replace_once(text, delim, tag, start)
 
+        for i, match in enumerate(re.finditer(URL_RE, text)):
+            start, _ = match.span()
+            is_emoji, text_url, url = match.groups()
+            full = match.group(0)
+
+            if not is_emoji and text_url:
+                text = utils.replace_once(text, full, URL_MARKUP.format(url, text_url), start)
+                continue
+
+            if is_emoji:
+                emoji = text_url
+                emoji_id = url.lstrip("tg://emoji?id=")
+                text = utils.replace_once(text, full, EMOJI_MARKUP.format(emoji_id, emoji), start)
+                continue
+
+        for placeholder, code_section in placeholders.items():
+            text = text.replace(placeholder, code_section)
+
         return await self.html.parse(text)
 
     @staticmethod
     def unparse(text: str, entities: list):
+        """
+        Performs the reverse operation to .parse(), effectively returning
+        markdown-like syntax given a normal text and its MessageEntity's.
+
+        :param text: the text to be reconverted into markdown.
+        :param entities: list of MessageEntity's applied to the text.
+        :return: a markdown-like text representing the combination of both inputs.
+        """
+        delimiters = {
+            MessageEntityType.BOLD: BOLD_DELIM,
+            MessageEntityType.ITALIC: ITALIC_DELIM,
+            MessageEntityType.UNDERLINE: UNDERLINE_DELIM,
+            MessageEntityType.STRIKETHROUGH: STRIKE_DELIM,
+            MessageEntityType.CODE: CODE_DELIM,
+            MessageEntityType.PRE: PRE_DELIM,
+            MessageEntityType.BLOCKQUOTE: BLOCKQUOTE_DELIM,
+            MessageEntityType.EXPANDABLE_BLOCKQUOTE: BLOCKQUOTE_EXPANDABLE_DELIM,
+            MessageEntityType.SPOILER: SPOILER_DELIM
+        }
+
         text = utils.add_surrogates(text)
 
-        entities_offsets = []
-
-        for entity in entities:
-            entity_type = entity.type
-            start = entity.offset
-            end = start + entity.length
-
-            if entity_type == MessageEntityType.BOLD:
-                start_tag = end_tag = BOLD_DELIM
-            elif entity_type == MessageEntityType.ITALIC:
-                start_tag = end_tag = ITALIC_DELIM
-            elif entity_type == MessageEntityType.UNDERLINE:
-                start_tag = end_tag = UNDERLINE_DELIM
-            elif entity_type == MessageEntityType.STRIKETHROUGH:
-                start_tag = end_tag = STRIKE_DELIM
-            elif entity_type == MessageEntityType.CODE:
-                start_tag = end_tag = CODE_DELIM
-            elif entity_type == MessageEntityType.PRE:
-                language = getattr(entity, "language", "") or ""
-                start_tag = f"{PRE_DELIM}{language}\n"
-                end_tag = f"\n{PRE_DELIM}"
-            elif entity_type == MessageEntityType.BLOCKQUOTE:
-                start_tag = BLOCKQUOTE_DELIM + " "
-                end_tag = ""
-                blockquote_text = text[start:end]
-                lines = blockquote_text.split("\n")
-                last_length = 0
-                for line in lines:
-                    if len(line) == 0 and last_length == end:
-                        continue
-                    start_offset = start + last_length
-                    last_length = last_length + len(line)
-                    end_offset = start_offset + last_length
-                    entities_offsets.append(
-                        (
-                            start_tag,
-                            start_offset,
-                        )
-                    )
-                    entities_offsets.append(
-                        (
-                            end_tag,
-                            end_offset,
-                        )
-                    )
-                    last_length = last_length + 1
-                continue
-            elif entity_type == MessageEntityType.EXPANDABLE_BLOCKQUOTE:
-                start_tag = BLOCKQUOTE_EXPANDABLE_DELIM + " "
-                end_tag = " " + BLOCKQUOTE_EXPANDABLE_END_DELIM
-            elif entity_type == MessageEntityType.SPOILER:
-                start_tag = end_tag = SPOILER_DELIM
-            elif entity_type == MessageEntityType.TEXT_LINK:
-                url = entity.url
-                start_tag = "["
-                end_tag = f"]({url})"
-            elif entity_type == MessageEntityType.TEXT_MENTION:
-                user = entity.user
-                start_tag = "["
-                end_tag = f"](tg://user?id={user.id})"
-            elif entity_type == MessageEntityType.CUSTOM_EMOJI:
-                emoji_id = entity.custom_emoji_id
-                start_tag = "!["
-                end_tag = f"](tg://emoji?id={emoji_id})"
+        insert_at = []
+        for i, entity in enumerate(entities):
+            s = entity.offset
+            e = entity.offset + entity.length
+            delimiter = delimiters.get(entity.type, None)
+            if delimiter:
+                if entity.type != MessageEntityType.BLOCKQUOTE and entity.type != MessageEntityType.EXPANDABLE_BLOCKQUOTE:
+                    open_delimiter = delimiter
+                    close_delimiter = delimiter
+                    if entity.type == MessageEntityType.PRE:
+                        close_delimiter = '\n' + delimiter
+                        if entity.language:
+                            open_delimiter += entity.language + '\n'
+                        else:
+                            open_delimiter += '\n'
+                    insert_at.append((s, i, open_delimiter))
+                    insert_at.append((e, -i, close_delimiter))
+                else:
+                    # Handle multiline blockquotes
+                    text_subset = text[s:e]
+                    lines = text_subset.splitlines()
+                    for line_num, line in enumerate(lines):
+                        line_start = s + sum(len(l) + 1 for l in lines[:line_num])
+                        if entity.type == MessageEntityType.EXPANDABLE_BLOCKQUOTE:
+                            insert_at.append((line_start, i, BLOCKQUOTE_EXPANDABLE_DELIM))
+                        else:
+                            insert_at.append((line_start, i, BLOCKQUOTE_DELIM))
+                    # No closing delimiter for blockquotes
             else:
-                continue
-
-            entities_offsets.append(
-                (
-                    start_tag,
-                    start,
-                )
-            )
-            entities_offsets.append(
-                (
-                    end_tag,
-                    end,
-                )
-            )
-
-        entities_offsets = map(
-            lambda x: x[1],
-            sorted(
-                enumerate(entities_offsets), key=lambda x: (x[1][1], x[0]), reverse=True
-            ),
-        )
-
-        for entity, offset in entities_offsets:
-            text = text[:offset] + entity + text[offset:]
+                url = None
+                is_emoji = False
+                if entity.type == MessageEntityType.TEXT_LINK:
+                    url = entity.url
+                elif entity.type == MessageEntityType.TEXT_MENTION:
+                    url = f'tg://user?id={entity.user.id}'
+                elif entity.type == MessageEntityType.CUSTOM_EMOJI:
+                    url = f"tg://emoji?id={entity.custom_emoji_id}"
+                    is_emoji = True
+                if url:
+                    if is_emoji:
+                        insert_at.append((s, i, '!['))
+                    else:
+                        insert_at.append((s, i, '['))
+                    insert_at.append((e, -i, f']({url})'))
+
+        insert_at.sort(key=lambda t: (t[0], t[1]))
+        while insert_at:
+            at, _, what = insert_at.pop()
+
+            # If we are in the middle of a surrogate nudge the position by -1.
+            # Otherwise we would end up with malformed text and fail to encode.
+            # For example of bad input: "Hi \ud83d\ude1c"
+            # https://en.wikipedia.org/wiki/UTF-16#U+010000_to_U+10FFFF
+            while utils.within_surrogate(text, at):
+                at += 1
+
+            text = text[:at] + what + text[at:]
 
         return utils.remove_surrogates(text)
diff --git a/pyrogram/parser/parser.py b/pyrogram/parser/parser.py
index 0ce2b2375c..e2de12144e 100644
--- a/pyrogram/parser/parser.py
+++ b/pyrogram/parser/parser.py
@@ -1,5 +1,5 @@
 #  Pyrogram - Telegram MTProto API Client Library for Python
-#  Copyright (C) 2017-present Dan 
+#  Copyright (C) 2017-present 
 #
 #  This file is part of Pyrogram.
 #

From 1d5bb900d81896c4e5ec3df23f6fbf7ae05dbe5e Mon Sep 17 00:00:00 2001
From: shriMADhav U k 
Date: Mon, 3 Mar 2025 17:17:40 +0100
Subject: [PATCH 04/16] fix

---
 pyrogram/parser/markdown.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/pyrogram/parser/markdown.py b/pyrogram/parser/markdown.py
index d98cc31f34..2cd784fcff 100644
--- a/pyrogram/parser/markdown.py
+++ b/pyrogram/parser/markdown.py
@@ -181,6 +181,9 @@ async def parse(self, text: str, strict: bool = False):
     @staticmethod
     def unparse(text: str, entities: list):
         """
+
+        https://github.com/LonamiWebs/Telethon/blob/141b620/telethon/extensions/markdown.py#L137-L193
+
         Performs the reverse operation to .parse(), effectively returning
         markdown-like syntax given a normal text and its MessageEntity's.
 

From 34ca4e783749132021f317b177c84e6d8f9f4ef0 Mon Sep 17 00:00:00 2001
From: shriMADhav U k 
Date: Sun, 9 Mar 2025 12:03:22 +0100
Subject: [PATCH 05/16] markdown: Check if PRE is inside blockquote before
 unparsing it

Co-Authored-By: wulan17 
---
 pyrogram/parser/markdown.py | 41 +++++++++++++++++++++++++++++--------
 pyrogram/parser/utils.py    |  1 -
 2 files changed, 33 insertions(+), 9 deletions(-)

diff --git a/pyrogram/parser/markdown.py b/pyrogram/parser/markdown.py
index 2cd784fcff..aa2fbf7fdc 100644
--- a/pyrogram/parser/markdown.py
+++ b/pyrogram/parser/markdown.py
@@ -181,7 +181,6 @@ async def parse(self, text: str, strict: bool = False):
     @staticmethod
     def unparse(text: str, entities: list):
         """
-
         https://github.com/LonamiWebs/Telethon/blob/141b620/telethon/extensions/markdown.py#L137-L193
 
         Performs the reverse operation to .parse(), effectively returning
@@ -211,15 +210,41 @@ def unparse(text: str, entities: list):
             e = entity.offset + entity.length
             delimiter = delimiters.get(entity.type, None)
             if delimiter:
-                if entity.type != MessageEntityType.BLOCKQUOTE and entity.type != MessageEntityType.EXPANDABLE_BLOCKQUOTE:
+                if entity.type == MessageEntityType.PRE:
+                    inside_blockquote = any(
+                        blk_entity.offset <= s < blk_entity.offset + blk_entity.length and
+                        blk_entity.offset < e <= blk_entity.offset + blk_entity.length
+                        for blk_entity in entities
+                        if blk_entity.type == MessageEntityType.BLOCKQUOTE
+                    )
+                    is_expandable = any(
+                        blk_entity.offset <= s < blk_entity.offset + blk_entity.length and
+                        blk_entity.offset < e <= blk_entity.offset + blk_entity.length and
+                        blk_entity.collapsed
+                        for blk_entity in entities
+                        if blk_entity.type == MessageEntityType.BLOCKQUOTE
+                    )
+                    if inside_blockquote:
+                        if is_expandable:
+                            if entity.language:
+                                open_delimiter = f"{delimiter}{entity.language}\n**>"
+                            else:
+                                open_delimiter = f"{delimiter}\n**>"
+                            close_delimiter = f"\n**>{delimiter}"
+                        else:
+                            if entity.language:
+                                open_delimiter = f"{delimiter}{entity.language}\n>"
+                            else:
+                                open_delimiter = f"{delimiter}\n>"
+                            close_delimiter = f"\n>{delimiter}"
+                    else:
+                        open_delimiter = delimiter
+                        close_delimiter = delimiter
+                    insert_at.append((s, i, open_delimiter))
+                    insert_at.append((e, -i, close_delimiter))
+                elif entity.type != MessageEntityType.BLOCKQUOTE and entity.type != MessageEntityType.EXPANDABLE_BLOCKQUOTE:
                     open_delimiter = delimiter
                     close_delimiter = delimiter
-                    if entity.type == MessageEntityType.PRE:
-                        close_delimiter = '\n' + delimiter
-                        if entity.language:
-                            open_delimiter += entity.language + '\n'
-                        else:
-                            open_delimiter += '\n'
                     insert_at.append((s, i, open_delimiter))
                     insert_at.append((e, -i, close_delimiter))
                 else:
diff --git a/pyrogram/parser/utils.py b/pyrogram/parser/utils.py
index 42a23348ad..e011976943 100644
--- a/pyrogram/parser/utils.py
+++ b/pyrogram/parser/utils.py
@@ -43,7 +43,6 @@ def replace_once(source: str, old: str, new: str, start: int):
 
 def within_surrogate(text, index, *, length=None):
     """
-    
     https://github.com/LonamiWebs/Telethon/blob/63d9b26/telethon/helpers.py#L52-L63
 
     `True` if ``index`` is within a surrogate (before and after it, not at!).

From 6f6c0e73238d4dd67def9a35188edafb8f5e1f1d Mon Sep 17 00:00:00 2001
From: shriMADhav U k 
Date: Sun, 23 Mar 2025 18:11:55 +0100
Subject: [PATCH 06/16] testing

---
 docs/source/topics/text-formatting.rst |  33 ++--
 pyrogram/parser/markdown.py            | 255 ++++++++++++++++---------
 2 files changed, 179 insertions(+), 109 deletions(-)

diff --git a/docs/source/topics/text-formatting.rst b/docs/source/topics/text-formatting.rst
index 8553a5497b..56f7d43644 100644
--- a/docs/source/topics/text-formatting.rst
+++ b/docs/source/topics/text-formatting.rst
@@ -61,7 +61,7 @@ To strictly use this mode, pass :obj:`~pyrogram.enums.ParseMode.MARKDOWN` to the
 
     >blockquote
 
-    **>escaped blockquote 
+    |>escaped blockquote 
 
     >Fist line of multi line blockquote 
     >Block quotation continued
@@ -69,12 +69,13 @@ To strictly use this mode, pass :obj:`~pyrogram.enums.ParseMode.MARKDOWN` to the
     >Block quotation continued
     >The last line of the block quotation
 
-    **>The expandable block quotation started right after the previous block quotation
-    **>It is separated from the previous block quotation by expandable syntax 
-    **>Expandable block quotation continued
-    **>Hidden by default part of the expandable block quotation started
-    **>Expandable block quotation continued
-    **>The last line of the expandable block quotation with the expandability mark||
+    **>
+    The expandable block quotation started right after the previous block quotation
+    It is separated from the previous block quotation by expandable syntax 
+    Expandable block quotation continued
+    Hidden by default part of the expandable block quotation started
+    Expandable block quotation continued
+    The last line of the expandable block quotation with the expandability mark<**
 
     `inline fixed-width code`
 
@@ -110,23 +111,25 @@ To strictly use this mode, pass :obj:`~pyrogram.enums.ParseMode.MARKDOWN` to the
             "```"
             "for i in range(10):\n"
             "    print(i)"
-            "```"
+            "```\n"
 
             ">blockquote\n"
 
+            "|>escaped blockquote\n"
+
             ">Fist line of multi line blockquote\n"
             ">Block quotation continued\n"
             ">Block quotation continued\n"
             ">Block quotation continued\n"
             ">The last line of the block quotation"
 
-            "||\n"
-            "**>The expandable block quotation started right after the previous block quotation\n"
-            "**>It is separated from the previous block quotation by expandable syntax\n"
-            "**>Expandable block quotation continued\n"
-            "**>Hidden by default part of the expandable block quotation started\n"
-            "**>Expandable block quotation continued\n"
-            "**>The last line of the expandable block quotation with the expandability mark||"
+            "**>\n"
+            "The expandable block quotation started right after the previous block quotation\n"
+            "It is separated from the previous block quotation by expandable syntax\n"
+            "Expandable block quotation continued\n"
+            "Hidden by default part of the expandable block quotation started\n"
+            "Expandable block quotation continued\n"
+            "The last line of the expandable block quotation with the expandability mark<**"
 
         ),
         parse_mode=ParseMode.MARKDOWN
diff --git a/pyrogram/parser/markdown.py b/pyrogram/parser/markdown.py
index 48101d437e..1134a5d070 100644
--- a/pyrogram/parser/markdown.py
+++ b/pyrogram/parser/markdown.py
@@ -33,24 +33,32 @@
 CODE_DELIM = "`"
 PRE_DELIM = "```"
 BLOCKQUOTE_DELIM = ">"
+BLOCKQUOTE_ESCAPE_DELIM = "|>"
 BLOCKQUOTE_EXPANDABLE_DELIM = "**>"
-
-MARKDOWN_RE = re.compile(r"({d})".format(
-    d="|".join(
-        ["".join(i) for i in [
-            [rf"\{j}" for j in i]
-            for i in [
-                PRE_DELIM,
-                CODE_DELIM,
-                STRIKE_DELIM,
-                UNDERLINE_DELIM,
-                ITALIC_DELIM,
-                BOLD_DELIM,
-                SPOILER_DELIM
+BLOCKQUOTE_EXPANDABLE_END_DELIM = "<**"
+
+
+MARKDOWN_RE = re.compile(
+    r"({d})|(!?)\[(.+?)\]\((.+?)\)".format(
+        d="|".join(
+            [
+                "".join(i)
+                for i in [
+                    [rf"\{j}" for j in i]
+                    for i in [
+                        PRE_DELIM,
+                        CODE_DELIM,
+                        STRIKE_DELIM,
+                        UNDERLINE_DELIM,
+                        ITALIC_DELIM,
+                        BOLD_DELIM,
+                        SPOILER_DELIM,
+                    ]
+                ]
             ]
-        ]]
-    )))
-URL_RE = re.compile(r"(!?)\[(.+?)\]\((.+?)\)")
+        )
+    )
+)
 
 OPENING_TAG = "<{}>"
 CLOSING_TAG = ""
@@ -58,66 +66,118 @@
 EMOJI_MARKUP = "{}"
 FIXED_WIDTH_DELIMS = [CODE_DELIM, PRE_DELIM]
 CODE_TAG_RE = re.compile(r".*?")
+URL_RE = re.compile(r"(!?)\[(.+?)\]\((.+?)\)")
 
 
 class Markdown:
     def __init__(self, client: Optional["pyrogram.Client"]):
         self.html = HTML(client)
 
-    def blockquote_parser(self, text):
-        text = re.sub(r'\n>', '\n>', re.sub(r'^>', '>', text))
-        lines = text.split('\n')
-        result = []
+    @staticmethod
+    def escape_and_create_quotes(text: str, strict: bool):
+        text_lines: list[Union[str, None]] = text.splitlines()
+
+        # Indexes of Already escaped lines
+        html_escaped_list: list[int] = []
+
+        # Temporary Queue to hold lines to be quoted
+        to_quote_list: list[tuple[int, str]] = []
+
+        def create_blockquote(quote_type: str = "") -> None:
+            """
+            Merges all lines in quote_queue into first line of queue
+            Encloses that line in html quote
+            Replaces rest of the lines with None placeholders to preserve indexes
+            """
+            if len(to_quote_list) == 0:
+                return
 
-        in_blockquote = False
+            joined_lines = "\n".join([i[1] for i in to_quote_list])
+
+            first_line_index, _ = to_quote_list[0]
+            text_lines[first_line_index] = (
+                f"{joined_lines}"
+            )
+
+            for line_to_remove in to_quote_list[1:]:
+                text_lines[line_to_remove[0]] = None
+
+            to_quote_list.clear()
+
+        # Handle Expandable Quote
+        inside_blockquote = False
+        for index, line in enumerate(text_lines):
+            if line.startswith(BLOCKQUOTE_EXPANDABLE_DELIM) and not inside_blockquote:
+                delim_stripped_line = line[3:]
+                parsed_line = (
+                    html.escape(delim_stripped_line) if strict else delim_stripped_line
+                )
+
+                to_quote_list.append((index, parsed_line))
+                html_escaped_list.append(index)
+
+                inside_blockquote = True
+                continue
+
+            elif line.endswith(BLOCKQUOTE_EXPANDABLE_END_DELIM) and inside_blockquote:
+                delim_stripped_line = line[:-3]
+                parsed_line = (
+                    html.escape(delim_stripped_line) if strict else delim_stripped_line
+                )
+
+                to_quote_list.append((index, parsed_line))
+                html_escaped_list.append(index)
+
+                inside_blockquote = False
+
+                create_blockquote(quote_type=" expandable")
+
+            if inside_blockquote:
+                parsed_line = html.escape(line) if strict else line
+                to_quote_list.append((index, parsed_line))
+                html_escaped_list.append(index)
+
+        # Handle Single line/Continued Quote
+        for index, line in enumerate(text_lines):
+            if line is None:
+                continue 
+
+            if line.startswith(BLOCKQUOTE_ESCAPE_DELIM):
+                text_lines[index] = line[1:]
+                create_blockquote()
+                continue
 
-        for line in lines:
             if line.startswith(BLOCKQUOTE_DELIM):
-                if not in_blockquote:
-                    line = re.sub(r'^> ', OPENING_TAG.format("blockquote"), line)
-                    line = re.sub(r'^>', OPENING_TAG.format("blockquote"), line)
-                    in_blockquote = True
-                    result.append(line.strip())
-                else:
-                    result.append(line[1:].strip())
-            elif line.startswith(BLOCKQUOTE_EXPANDABLE_DELIM):
-                if not in_blockquote:
-                    line = re.sub(r'^\*\*> ', OPENING_TAG.format("blockquote expandable"), line)
-                    line = re.sub(r'^\*\*>', OPENING_TAG.format("blockquote expandable"), line)
-                    in_blockquote = True
-                    result.append(line.strip())
-                else:
-                    result.append(line[3:].strip())
-            else:
-                if in_blockquote:
-                    line = CLOSING_TAG.format("blockquote") + line
-                    in_blockquote = False
-                result.append(line)
+                delim_stripped_line = line[1:]
+                parsed_line = (
+                    html.escape(delim_stripped_line) if strict else delim_stripped_line
+                )
 
-        if in_blockquote:
-            line = result[len(result)-1] + CLOSING_TAG.format("blockquote")
-            result.pop(len(result)-1)
-            result.append(line)
+                to_quote_list.append((index, parsed_line))
+                html_escaped_list.append(index)
 
-        return '\n'.join(result)
+            elif len(to_quote_list) > 0:
+                create_blockquote()
+        else:
+            create_blockquote()
 
-    async def parse(self, text: str, strict: bool = False):
         if strict:
-            text = html.escape(text)
-        text = self.blockquote_parser(text)
+            for idx, line in enumerate(text_lines):
+                if idx not in html_escaped_list:
+                    text_lines[idx] = html.escape(line)
+
+        return "\n".join(
+            [valid_line for valid_line in text_lines if valid_line is not None]
+        )
 
+    async def parse(self, text: str, strict: bool = False):
+        text = self.escape_and_create_quotes(text, strict=strict)
         delims = set()
         is_fixed_width = False
 
-        placeholders = {}
-        for i, code_section in enumerate(CODE_TAG_RE.findall(text)):
-            placeholder = f"{{CODE_SECTION_{i}}}"
-            placeholders[placeholder] = code_section
-            text = text.replace(code_section, placeholder, 1)
-
         for i, match in enumerate(re.finditer(MARKDOWN_RE, text)):
             start, _ = match.span()
-            delim = match.group(1)
+            delim, is_emoji, text_url, url = match.groups()
             full = match.group(0)
 
             if delim in FIXED_WIDTH_DELIMS:
@@ -126,6 +186,20 @@ async def parse(self, text: str, strict: bool = False):
             if is_fixed_width and delim not in FIXED_WIDTH_DELIMS:
                 continue
 
+            if not is_emoji and text_url:
+                text = utils.replace_once(
+                    text, full, URL_MARKUP.format(url, text_url), start
+                )
+                continue
+
+            if is_emoji:
+                emoji = text_url
+                emoji_id = url.lstrip("tg://emoji?id=")
+                text = utils.replace_once(
+                    text, full, EMOJI_MARKUP.format(emoji_id, emoji), start
+                )
+                continue
+
             if delim == BOLD_DELIM:
                 tag = "b"
             elif delim == ITALIC_DELIM:
@@ -151,31 +225,15 @@ async def parse(self, text: str, strict: bool = False):
                 tag = CLOSING_TAG.format(tag)
 
             if delim == PRE_DELIM and delim in delims:
-                delim_and_language = text[text.find(PRE_DELIM):].split("\n")[0]
-                language = delim_and_language[len(PRE_DELIM):]
-                text = utils.replace_once(text, delim_and_language, f'
', start)
+                delim_and_language = text[text.find(PRE_DELIM) :].split("\n")[0]
+                language = delim_and_language[len(PRE_DELIM) :]
+                text = utils.replace_once(
+                    text, delim_and_language, f'
', start
+                )
                 continue
 
             text = utils.replace_once(text, delim, tag, start)
 
-        for i, match in enumerate(re.finditer(URL_RE, text)):
-            start, _ = match.span()
-            is_emoji, text_url, url = match.groups()
-            full = match.group(0)
-
-            if not is_emoji and text_url:
-                text = utils.replace_once(text, full, URL_MARKUP.format(url, text_url), start)
-                continue
-
-            if is_emoji:
-                emoji = text_url
-                emoji_id = url.lstrip("tg://emoji?id=")
-                text = utils.replace_once(text, full, EMOJI_MARKUP.format(emoji_id, emoji), start)
-                continue
-
-        for placeholder, code_section in placeholders.items():
-            text = text.replace(placeholder, code_section)
-
         return await self.html.parse(text)
 
     @staticmethod
@@ -258,22 +316,21 @@ def unparse(text: str, entities: list):
                         else:
                             insert_at.append((line_start, i, BLOCKQUOTE_DELIM))
                     # No closing delimiter for blockquotes
-            else:
-                url = None
-                is_emoji = False
-                if entity.type == MessageEntityType.TEXT_LINK:
-                    url = entity.url
-                elif entity.type == MessageEntityType.TEXT_MENTION:
-                    url = f'tg://user?id={entity.user.id}'
-                elif entity.type == MessageEntityType.CUSTOM_EMOJI:
-                    url = f"tg://emoji?id={entity.custom_emoji_id}"
-                    is_emoji = True
-                if url:
-                    if is_emoji:
-                        insert_at.append((s, i, '!['))
-                    else:
-                        insert_at.append((s, i, '['))
-                    insert_at.append((e, -i, f']({url})'))
+                    url = None
+                    is_emoji = False
+                    if entity.type == MessageEntityType.TEXT_LINK:
+                        url = entity.url
+                    elif entity.type == MessageEntityType.TEXT_MENTION:
+                        url = f'tg://user?id={entity.user.id}'
+                    elif entity.type == MessageEntityType.CUSTOM_EMOJI:
+                        url = f"tg://emoji?id={entity.custom_emoji_id}"
+                        is_emoji = True
+                    if url:
+                        if is_emoji:
+                            insert_at.append((s, i, '!['))
+                        else:
+                            insert_at.append((s, i, '['))
+                        insert_at.append((e, -i, f']({url})'))
 
         insert_at.sort(key=lambda t: (t[0], t[1]))
         while insert_at:
@@ -288,4 +345,14 @@ def unparse(text: str, entities: list):
 
             text = text[:at] + what + text[at:]
 
+        entities_offsets = map(
+            lambda x: x[1],
+            sorted(
+                enumerate(entities_offsets), key=lambda x: (x[1][1], x[0]), reverse=True
+            ),
+        )
+
+        for entity, offset in entities_offsets:
+            text = text[:offset] + entity + text[offset:]
+
         return utils.remove_surrogates(text)

From e89590ea5803d709663e34477da83146c7fb9abd Mon Sep 17 00:00:00 2001
From: shriMADhav U k 
Date: Sun, 23 Mar 2025 18:13:35 +0100
Subject: [PATCH 07/16] testing

---
 pyrogram/parser/markdown.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pyrogram/parser/markdown.py b/pyrogram/parser/markdown.py
index 1134a5d070..826f22864e 100644
--- a/pyrogram/parser/markdown.py
+++ b/pyrogram/parser/markdown.py
@@ -22,6 +22,7 @@
 
 import pyrogram
 from pyrogram.enums import MessageEntityType
+
 from . import utils
 from .html import HTML
 

From e42b46c5cbef19199cee86620686ca640d6678f0 Mon Sep 17 00:00:00 2001
From: shriMADhav U k 
Date: Sun, 23 Mar 2025 18:14:22 +0100
Subject: [PATCH 08/16] testing

---
 pyrogram/parser/markdown.py | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/pyrogram/parser/markdown.py b/pyrogram/parser/markdown.py
index 826f22864e..4da90cfe9d 100644
--- a/pyrogram/parser/markdown.py
+++ b/pyrogram/parser/markdown.py
@@ -346,14 +346,4 @@ def unparse(text: str, entities: list):
 
             text = text[:at] + what + text[at:]
 
-        entities_offsets = map(
-            lambda x: x[1],
-            sorted(
-                enumerate(entities_offsets), key=lambda x: (x[1][1], x[0]), reverse=True
-            ),
-        )
-
-        for entity, offset in entities_offsets:
-            text = text[:offset] + entity + text[offset:]
-
         return utils.remove_surrogates(text)

From 3fc56d32cefb2a3c23621c04ce2679b4feefc2c6 Mon Sep 17 00:00:00 2001
From: shriMADhav U k 
Date: Sun, 23 Mar 2025 18:17:54 +0100
Subject: [PATCH 09/16] testing

---
 pyrogram/parser/markdown.py | 33 +++++++++++++++++----------------
 1 file changed, 17 insertions(+), 16 deletions(-)

diff --git a/pyrogram/parser/markdown.py b/pyrogram/parser/markdown.py
index 4da90cfe9d..b371dadf87 100644
--- a/pyrogram/parser/markdown.py
+++ b/pyrogram/parser/markdown.py
@@ -316,22 +316,23 @@ def unparse(text: str, entities: list):
                             insert_at.append((line_start, i, BLOCKQUOTE_EXPANDABLE_DELIM))
                         else:
                             insert_at.append((line_start, i, BLOCKQUOTE_DELIM))
-                    # No closing delimiter for blockquotes
-                    url = None
-                    is_emoji = False
-                    if entity.type == MessageEntityType.TEXT_LINK:
-                        url = entity.url
-                    elif entity.type == MessageEntityType.TEXT_MENTION:
-                        url = f'tg://user?id={entity.user.id}'
-                    elif entity.type == MessageEntityType.CUSTOM_EMOJI:
-                        url = f"tg://emoji?id={entity.custom_emoji_id}"
-                        is_emoji = True
-                    if url:
-                        if is_emoji:
-                            insert_at.append((s, i, '!['))
-                        else:
-                            insert_at.append((s, i, '['))
-                        insert_at.append((e, -i, f']({url})'))
+            # No closing delimiter for blockquotes
+            else:
+                url = None
+                is_emoji = False
+                if entity.type == MessageEntityType.TEXT_LINK:
+                    url = entity.url
+                elif entity.type == MessageEntityType.TEXT_MENTION:
+                    url = f'tg://user?id={entity.user.id}'
+                elif entity.type == MessageEntityType.CUSTOM_EMOJI:
+                    url = f"tg://emoji?id={entity.custom_emoji_id}"
+                    is_emoji = True
+                if url:
+                    if is_emoji:
+                        insert_at.append((s, i, '!['))
+                    else:
+                        insert_at.append((s, i, '['))
+                    insert_at.append((e, -i, f']({url})'))
 
         insert_at.sort(key=lambda t: (t[0], t[1]))
         while insert_at:

From 8e8d1c4d48163d8c357f5e532fc243408f5dd578 Mon Sep 17 00:00:00 2001
From: shriMADhav U k 
Date: Sun, 23 Mar 2025 18:28:53 +0100
Subject: [PATCH 10/16] docs: text-formatting: Move up HTML section

https://github.com/Mayuri-Chan/pyrofork/commit/7562d04
https://github.com/Mayuri-Chan/pyrofork/commit/4926eda
https://github.com/Mayuri-Chan/pyrofork/commit/149dd4a

Co-Authored-By: wulan17 
---
 docs/source/topics/text-formatting.rst | 190 +++++++++++++------------
 1 file changed, 99 insertions(+), 91 deletions(-)

diff --git a/docs/source/topics/text-formatting.rst b/docs/source/topics/text-formatting.rst
index 56f7d43644..0619c2a5a6 100644
--- a/docs/source/topics/text-formatting.rst
+++ b/docs/source/topics/text-formatting.rst
@@ -40,8 +40,98 @@ list of the basic styles currently supported by Pyrogram.
 - spoiler
 - `text URL `_
 - `user text mention `_
+- :emoji:`👍`
 
 
+HTML Style
+----------
+
+To strictly use this mode, pass :obj:`~pyrogram.enums.HTML` to the *parse_mode* parameter when using
+:meth:`~pyrogram.Client.send_message`. The following tags are currently supported:
+
+.. code-block:: text
+
+    bold, bold
+
+    italic, italic
+
+    underline
+
+    strike, strike, strike
+
+    spoiler
+
+    text URL
+
+    inline mention
+
+    inline fixed-width code
+
+    👍
+
+    
+    pre-formatted
+      fixed-width
+        code block
+    
+ +**Example**: + +.. code-block:: python + + from pyrogram.enums import ParseMode + + await app.send_message( + chat_id="me", + text=( + "bold, bold" + "italic, italic" + "underline, underline" + "strike, strike, strike" + "spoiler\n\n" + + "bold italic bold italic bold strike italic bold strike spoiler underline italic bold bold\n\n" + + "inline URL " + "inline mention of a user\n" + "👍 " + "inline fixed-width code " + "
pre-formatted fixed-width code block
\n\n" + "
"
+            "for i in range(10):\n"
+            "    print(i)"
+            "
\n\n" + + "
Block quotation started" + "Block quotation continued" + "The last line of the block quotation
" + "
Expandable block quotation started" + "Expandable block quotation continued" + "Expandable block quotation continued" + "Hidden by default part of the block quotation started" + "Expandable block quotation continued" + "The last line of the block quotation
" + ), + parse_mode=ParseMode.HTML + ) + +.. note:: + + All ``<``, ``>`` and ``&`` symbols that are not a part of a tag or an HTML entity must be replaced with the + corresponding HTML entities (``<`` with ``<``, ``>`` with ``>`` and ``&`` with ``&``). You can use this + snippet to quickly escape those characters: + + .. code-block:: python + + text = "" + text = text.replace("<", "<").replace("&", "&") + + print(text) + + .. code-block:: text + + <my & text> + Markdown Style -------------- @@ -107,8 +197,9 @@ To strictly use this mode, pass :obj:`~pyrogram.enums.ParseMode.MARKDOWN` to the "~~strike~~, " "||spoiler||, " "[URL](https://telegramplayground.github.io/pyrogram/), " + "![👍](tg://emoji?id=5469770542288478598)" "`code`, " - "```" + "```py" "for i in range(10):\n" " print(i)" "```\n" @@ -135,96 +226,6 @@ To strictly use this mode, pass :obj:`~pyrogram.enums.ParseMode.MARKDOWN` to the parse_mode=ParseMode.MARKDOWN ) -HTML Style ----------- - -To strictly use this mode, pass :obj:`~pyrogram.enums.HTML` to the *parse_mode* parameter when using -:meth:`~pyrogram.Client.send_message`. The following tags are currently supported: - -.. code-block:: text - - bold, bold - - italic, italic - - underline - - strike, strike, strike - - spoiler - - text URL - - inline mention - - inline fixed-width code - - 🔥 - -
-    pre-formatted
-      fixed-width
-        code block
-    
- -**Example**: - -.. code-block:: python - - from pyrogram.enums import ParseMode - - await app.send_message( - chat_id="me", - text=( - "bold, bold" - "italic, italic" - "underline, underline" - "strike, strike, strike" - "spoiler\n\n" - - "bold italic bold italic bold strike italic bold strike spoiler underline italic bold bold\n\n" - - "inline URL " - "inline mention of a user\n" - "👍 " - "inline fixed-width code " - "
pre-formatted fixed-width code block
\n\n" - "
"
-            "for i in range(10):\n"
-            "    print(i)"
-            "
\n\n" - - "
Block quotation started" - "Block quotation continued" - "The last line of the block quotation
" - "
Expandable block quotation started" - "Expandable block quotation continued" - "Expandable block quotation continued" - "Hidden by default part of the block quotation started" - "Expandable block quotation continued" - "The last line of the block quotation
" - ), - parse_mode=ParseMode.HTML - ) - -.. note:: - - All ``<``, ``>`` and ``&`` symbols that are not a part of a tag or an HTML entity must be replaced with the - corresponding HTML entities (``<`` with ``<``, ``>`` with ``>`` and ``&`` with ``&``). You can use this - snippet to quickly escape those characters: - - .. code-block:: python - - import html - - text = "" - text = html.escape(text) - - print(text) - - .. code-block:: text - - <my text> Different Styles ---------------- @@ -272,6 +273,13 @@ Result: Nested and Overlapping Entities ------------------------------- +.. warning:: + + The Markdown style is not recommended for complex text formatting. + + If you want to use complex text formatting such as nested entities, overlapping entities use the HTML style instead. + + You can also style texts with more than one decoration at once by nesting entities together. For example, you can send a text message with both :bold-underline:`bold and underline` styles, or a text that has both :strike-italic:`italic and strike` styles, and you can still combine both Markdown and HTML together. From b2f8bdadfdfe751f552e7f3f954532d5e5d01ee4 Mon Sep 17 00:00:00 2001 From: Shrimadhav U K Date: Mon, 24 Mar 2025 15:30:35 +0530 Subject: [PATCH 11/16] twcp --- pyrogram/parser/markdown.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/pyrogram/parser/markdown.py b/pyrogram/parser/markdown.py index b371dadf87..bc26f23e9a 100644 --- a/pyrogram/parser/markdown.py +++ b/pyrogram/parser/markdown.py @@ -281,7 +281,7 @@ def unparse(text: str, entities: list): blk_entity.offset < e <= blk_entity.offset + blk_entity.length and blk_entity.collapsed for blk_entity in entities - if blk_entity.type == MessageEntityType.BLOCKQUOTE + if blk_entity.type == MessageEntityType.EXPANDABLE_BLOCKQUOTE ) if inside_blockquote: if is_expandable: @@ -297,7 +297,10 @@ def unparse(text: str, entities: list): open_delimiter = f"{delimiter}\n>" close_delimiter = f"\n>{delimiter}" else: - open_delimiter = delimiter + if entity.language: + open_delimiter = f"{delimiter}{entity.language}" + else: + open_delimiter = delimiter close_delimiter = delimiter insert_at.append((s, i, open_delimiter)) insert_at.append((e, -i, close_delimiter)) @@ -329,10 +332,10 @@ def unparse(text: str, entities: list): is_emoji = True if url: if is_emoji: - insert_at.append((s, i, '![')) + insert_at.append((s, i, '!')) else: insert_at.append((s, i, '[')) - insert_at.append((e, -i, f']({url})')) + insert_at.append((e, -i, f'')) insert_at.sort(key=lambda t: (t[0], t[1])) while insert_at: From dc6a742b924c272a028943daeccf316216ff8b5e Mon Sep 17 00:00:00 2001 From: shriMADhav U k Date: Mon, 24 Mar 2025 13:34:01 +0100 Subject: [PATCH 12/16] fixings --- pyrogram/parser/markdown.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyrogram/parser/markdown.py b/pyrogram/parser/markdown.py index bc26f23e9a..af4e25f395 100644 --- a/pyrogram/parser/markdown.py +++ b/pyrogram/parser/markdown.py @@ -298,7 +298,7 @@ def unparse(text: str, entities: list): close_delimiter = f"\n>{delimiter}" else: if entity.language: - open_delimiter = f"{delimiter}{entity.language}" + open_delimiter = f"{delimiter}{entity.language}\n" else: open_delimiter = delimiter close_delimiter = delimiter From 0e29d98d1357b668e2df83f6e81bd71ded05bf15 Mon Sep 17 00:00:00 2001 From: shriMADhav U k Date: Mon, 24 Mar 2025 13:35:47 +0100 Subject: [PATCH 13/16] fixings --- pyrogram/parser/markdown.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyrogram/parser/markdown.py b/pyrogram/parser/markdown.py index af4e25f395..9cd2074618 100644 --- a/pyrogram/parser/markdown.py +++ b/pyrogram/parser/markdown.py @@ -335,7 +335,7 @@ def unparse(text: str, entities: list): insert_at.append((s, i, '!')) else: insert_at.append((s, i, '[')) - insert_at.append((e, -i, f'')) + insert_at.append((e, -i, f']({url})')) insert_at.sort(key=lambda t: (t[0], t[1])) while insert_at: From 9606af9618f15b6ad2931ae10ebab9f32451a87b Mon Sep 17 00:00:00 2001 From: shriMADhav U k Date: Mon, 24 Mar 2025 13:36:47 +0100 Subject: [PATCH 14/16] fixings --- pyrogram/parser/markdown.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyrogram/parser/markdown.py b/pyrogram/parser/markdown.py index 9cd2074618..0097285cf9 100644 --- a/pyrogram/parser/markdown.py +++ b/pyrogram/parser/markdown.py @@ -332,7 +332,7 @@ def unparse(text: str, entities: list): is_emoji = True if url: if is_emoji: - insert_at.append((s, i, '!')) + insert_at.append((s, i, '![')) else: insert_at.append((s, i, '[')) insert_at.append((e, -i, f']({url})')) From e122eacb3257ac391b09287ae6055927322c9a9a Mon Sep 17 00:00:00 2001 From: Ryuk <88324835+anonymousx97@users.noreply.github.com> Date: Tue, 25 Mar 2025 16:28:38 +0530 Subject: [PATCH 15/16] `parser.markdown.py`: change blockquote style to BotAPI. (#164) --- pyrogram/parser/markdown.py | 150 ++++++++++++++++++------------------ 1 file changed, 75 insertions(+), 75 deletions(-) diff --git a/pyrogram/parser/markdown.py b/pyrogram/parser/markdown.py index 0097285cf9..4c5dea9724 100644 --- a/pyrogram/parser/markdown.py +++ b/pyrogram/parser/markdown.py @@ -36,8 +36,7 @@ BLOCKQUOTE_DELIM = ">" BLOCKQUOTE_ESCAPE_DELIM = "|>" BLOCKQUOTE_EXPANDABLE_DELIM = "**>" -BLOCKQUOTE_EXPANDABLE_END_DELIM = "<**" - +BLOCKQUOTE_EXPANDABLE_OPTIONAL_END_DELIM = "<**" MARKDOWN_RE = re.compile( r"({d})|(!?)\[(.+?)\]\((.+?)\)".format( @@ -82,6 +81,7 @@ def escape_and_create_quotes(text: str, strict: bool): html_escaped_list: list[int] = [] # Temporary Queue to hold lines to be quoted + # Index and Line to_quote_list: list[tuple[int, str]] = [] def create_blockquote(quote_type: str = "") -> None: @@ -93,83 +93,84 @@ def create_blockquote(quote_type: str = "") -> None: if len(to_quote_list) == 0: return - joined_lines = "\n".join([i[1] for i in to_quote_list]) + # Create quoted text block + joined_lines = "\n".join([text for _, text in to_quote_list]) first_line_index, _ = to_quote_list[0] - text_lines[first_line_index] = ( - f"{joined_lines}" - ) - for line_to_remove in to_quote_list[1:]: - text_lines[line_to_remove[0]] = None + # Enclose the block in html quote + # and add to starting index of quoted line + text_lines[first_line_index] = f"{joined_lines}" + + # Set None Placeholders for preserving indexes + for idx, line_to_remove in to_quote_list[1:]: + text_lines[idx] = None + # clear queue to_quote_list.clear() - # Handle Expandable Quote - inside_blockquote = False - for index, line in enumerate(text_lines): - if line.startswith(BLOCKQUOTE_EXPANDABLE_DELIM) and not inside_blockquote: - delim_stripped_line = line[3:] - parsed_line = ( - html.escape(delim_stripped_line) if strict else delim_stripped_line - ) + def process_text(start_delimiter, end_delimiter: str = "", quote_type: str = ""): + for index, line in enumerate(text_lines): + # Ignore None placeholders from previous runs + if line is None: + continue - to_quote_list.append((index, parsed_line)) - html_escaped_list.append(index) + # Ignore Escaped > + if line.startswith(BLOCKQUOTE_ESCAPE_DELIM): + text_lines[index] = line[1:] + create_blockquote(quote_type=quote_type) + continue - inside_blockquote = True - continue + # Parse lines starting with delimiter + if line.startswith(start_delimiter): + endswith_delimiter = end_delimiter and line.endswith(end_delimiter) - elif line.endswith(BLOCKQUOTE_EXPANDABLE_END_DELIM) and inside_blockquote: - delim_stripped_line = line[:-3] - parsed_line = ( - html.escape(delim_stripped_line) if strict else delim_stripped_line - ) + # Indexes to skip in line + start_index = len(start_delimiter) + end_index = end_index = len(line) - len(end_delimiter) if endswith_delimiter else len(line) - to_quote_list.append((index, parsed_line)) - html_escaped_list.append(index) + # Strip delimiters + delimiter_stripped_line = line[start_index:end_index] - inside_blockquote = False + # Escape if strict + parsed_line = html.escape(delimiter_stripped_line) if strict else delimiter_stripped_line + + # add to queue + to_quote_list.append((index, parsed_line)) - create_blockquote(quote_type=" expandable") + # save line index + html_escaped_list.append(index) - if inside_blockquote: - parsed_line = html.escape(line) if strict else line - to_quote_list.append((index, parsed_line)) - html_escaped_list.append(index) + # if line doesn't end with delimiter continue loop + if not endswith_delimiter: + continue - # Handle Single line/Continued Quote - for index, line in enumerate(text_lines): - if line is None: - continue - - if line.startswith(BLOCKQUOTE_ESCAPE_DELIM): - text_lines[index] = line[1:] - create_blockquote() - continue - - if line.startswith(BLOCKQUOTE_DELIM): - delim_stripped_line = line[1:] - parsed_line = ( - html.escape(delim_stripped_line) if strict else delim_stripped_line - ) + # If line doesn't start with a delimiter + # or has ended with delimiter + # it means the block quote has ended + # create pending quotes if any + create_blockquote(quote_type=quote_type) - to_quote_list.append((index, parsed_line)) - html_escaped_list.append(index) - - elif len(to_quote_list) > 0: - create_blockquote() - else: - create_blockquote() + else: + # is triggered when there's only one line of text + # the line above won't be triggered + # because loop will exit after first iteration + # so try to create quote if any in queue + create_blockquote(quote_type=quote_type) + + process_text( + start_delimiter=BLOCKQUOTE_EXPANDABLE_DELIM, + end_delimiter=BLOCKQUOTE_EXPANDABLE_OPTIONAL_END_DELIM, + quote_type=" expandable", + ) + process_text(start_delimiter=BLOCKQUOTE_DELIM) if strict: for idx, line in enumerate(text_lines): if idx not in html_escaped_list: text_lines[idx] = html.escape(line) - return "\n".join( - [valid_line for valid_line in text_lines if valid_line is not None] - ) + return "\n".join(filter(lambda x: x is not None, text_lines)) async def parse(self, text: str, strict: bool = False): text = self.escape_and_create_quotes(text, strict=strict) @@ -188,17 +189,13 @@ async def parse(self, text: str, strict: bool = False): continue if not is_emoji and text_url: - text = utils.replace_once( - text, full, URL_MARKUP.format(url, text_url), start - ) + text = utils.replace_once(text, full, URL_MARKUP.format(url, text_url), start) continue if is_emoji: emoji = text_url emoji_id = url.lstrip("tg://emoji?id=") - text = utils.replace_once( - text, full, EMOJI_MARKUP.format(emoji_id, emoji), start - ) + text = utils.replace_once(text, full, EMOJI_MARKUP.format(emoji_id, emoji), start) continue if delim == BOLD_DELIM: @@ -258,7 +255,7 @@ def unparse(text: str, entities: list): MessageEntityType.PRE: PRE_DELIM, MessageEntityType.BLOCKQUOTE: BLOCKQUOTE_DELIM, MessageEntityType.EXPANDABLE_BLOCKQUOTE: BLOCKQUOTE_EXPANDABLE_DELIM, - MessageEntityType.SPOILER: SPOILER_DELIM + MessageEntityType.SPOILER: SPOILER_DELIM, } text = utils.add_surrogates(text) @@ -271,15 +268,15 @@ def unparse(text: str, entities: list): if delimiter: if entity.type == MessageEntityType.PRE: inside_blockquote = any( - blk_entity.offset <= s < blk_entity.offset + blk_entity.length and - blk_entity.offset < e <= blk_entity.offset + blk_entity.length + blk_entity.offset <= s < blk_entity.offset + blk_entity.length + and blk_entity.offset < e <= blk_entity.offset + blk_entity.length for blk_entity in entities if blk_entity.type == MessageEntityType.BLOCKQUOTE ) is_expandable = any( - blk_entity.offset <= s < blk_entity.offset + blk_entity.length and - blk_entity.offset < e <= blk_entity.offset + blk_entity.length and - blk_entity.collapsed + blk_entity.offset <= s < blk_entity.offset + blk_entity.length + and blk_entity.offset < e <= blk_entity.offset + blk_entity.length + and blk_entity.collapsed for blk_entity in entities if blk_entity.type == MessageEntityType.EXPANDABLE_BLOCKQUOTE ) @@ -304,7 +301,10 @@ def unparse(text: str, entities: list): close_delimiter = delimiter insert_at.append((s, i, open_delimiter)) insert_at.append((e, -i, close_delimiter)) - elif entity.type != MessageEntityType.BLOCKQUOTE and entity.type != MessageEntityType.EXPANDABLE_BLOCKQUOTE: + elif ( + entity.type != MessageEntityType.BLOCKQUOTE + and entity.type != MessageEntityType.EXPANDABLE_BLOCKQUOTE + ): open_delimiter = delimiter close_delimiter = delimiter insert_at.append((s, i, open_delimiter)) @@ -326,16 +326,16 @@ def unparse(text: str, entities: list): if entity.type == MessageEntityType.TEXT_LINK: url = entity.url elif entity.type == MessageEntityType.TEXT_MENTION: - url = f'tg://user?id={entity.user.id}' + url = f"tg://user?id={entity.user.id}" elif entity.type == MessageEntityType.CUSTOM_EMOJI: url = f"tg://emoji?id={entity.custom_emoji_id}" is_emoji = True if url: if is_emoji: - insert_at.append((s, i, '![')) + insert_at.append((s, i, "![")) else: - insert_at.append((s, i, '[')) - insert_at.append((e, -i, f']({url})')) + insert_at.append((s, i, "[")) + insert_at.append((e, -i, f"]({url})")) insert_at.sort(key=lambda t: (t[0], t[1])) while insert_at: From 1da550cd8c397507876a92a7a2bea7dc88cad571 Mon Sep 17 00:00:00 2001 From: shriMADhav U k Date: Wed, 26 Mar 2025 14:31:34 +0100 Subject: [PATCH 16/16] fixes --- pyrogram/parser/markdown.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyrogram/parser/markdown.py b/pyrogram/parser/markdown.py index 4c5dea9724..167c1655e1 100644 --- a/pyrogram/parser/markdown.py +++ b/pyrogram/parser/markdown.py @@ -276,7 +276,7 @@ def unparse(text: str, entities: list): is_expandable = any( blk_entity.offset <= s < blk_entity.offset + blk_entity.length and blk_entity.offset < e <= blk_entity.offset + blk_entity.length - and blk_entity.collapsed + # and blk_entity.collapsed for blk_entity in entities if blk_entity.type == MessageEntityType.EXPANDABLE_BLOCKQUOTE ) @@ -297,7 +297,7 @@ def unparse(text: str, entities: list): if entity.language: open_delimiter = f"{delimiter}{entity.language}\n" else: - open_delimiter = delimiter + open_delimiter = f"{delimiter}\n" close_delimiter = delimiter insert_at.append((s, i, open_delimiter)) insert_at.append((e, -i, close_delimiter))