feat(utils): add Render util for recreating formatted text

- Add new `render` module in `crates/teloxide/src/utils.rs`
- Update CHANGELOG.md to document the new Render utility
- Expose ESCAPE_CHARS in markdown.rs for reuse in render module

This new Render utility allows recreating rendered HTML or Markdown
formatted text from text/caption and MessageEntity. It enhances the
library's text formatting capabilities, as discussed in PR #1152.
This commit is contained in:
YouKnow 2024-08-27 01:52:58 +03:30
parent 7b2de9ad39
commit a77e9f58f0
8 changed files with 623 additions and 4 deletions

View file

@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Add `filter_boost_added` and `filter_reply_to_story` filters to `MessageFilterExt` trait - Add `filter_boost_added` and `filter_reply_to_story` filters to `MessageFilterExt` trait
- Add `filter_mention_command` filter to `HandlerExt` trait ([issue #494](https://github.com/teloxide/teloxide/issues/494)) - Add `filter_mention_command` filter to `HandlerExt` trait ([issue #494](https://github.com/teloxide/teloxide/issues/494))
- Add `filter_business_connection`, `filter_business_message`, `filter_edited_business_message`, and `filter_deleted_business_messages` filters to update filters ([PR 1146](https://github.com/teloxide/teloxide/pull/1146)) - Add `filter_business_connection`, `filter_business_message`, `filter_edited_business_message`, and `filter_deleted_business_messages` filters to update filters ([PR 1146](https://github.com/teloxide/teloxide/pull/1146))
- Add `Render` util to recreate rendered Html or Markdown formatted text from text/caption and `MessageEntity` ([PR 1152](https://github.com/teloxide/teloxide/pull/1152))
### Changed ### Changed

View file

@ -3,6 +3,7 @@
pub mod command; pub mod command;
pub mod html; pub mod html;
pub mod markdown; pub mod markdown;
pub mod render;
pub(crate) mod shutdown_token; pub(crate) mod shutdown_token;
pub use teloxide_core::net::client_from_env; pub use teloxide_core::net::client_from_env;

View file

@ -4,6 +4,9 @@
use teloxide_core::types::{User, UserId}; use teloxide_core::types::{User, UserId};
pub(super) const ESCAPE_CHARS: [char; 18] =
['_', '*', '[', ']', '(', ')', '~', '`', '>', '#', '+', '-', '=', '|', '{', '}', '.', '!'];
/// Applies the bold font style to the string. /// Applies the bold font style to the string.
/// ///
/// Passed string will not be automatically escaped because it can contain /// Passed string will not be automatically escaped because it can contain
@ -119,11 +122,8 @@ pub fn code_inline(s: &str) -> String {
#[must_use = "This function returns a new string, rather than mutating the argument, so calling it \ #[must_use = "This function returns a new string, rather than mutating the argument, so calling it \
without using its output does nothing useful"] without using its output does nothing useful"]
pub fn escape(s: &str) -> String { pub fn escape(s: &str) -> String {
const CHARS: [char; 18] =
['_', '*', '[', ']', '(', ')', '~', '`', '>', '#', '+', '-', '=', '|', '{', '}', '.', '!'];
s.chars().fold(String::with_capacity(s.len()), |mut s, c| { s.chars().fold(String::with_capacity(s.len()), |mut s, c| {
if CHARS.contains(&c) { if ESCAPE_CHARS.contains(&c) {
s.push('\\'); s.push('\\');
} }
s.push(c); s.push(c);

View file

@ -0,0 +1,61 @@
//! A helprt trait for rendering text/caption and entities back to html or
//! markdown
use teloxide_core::types::Message;
use super::Render;
/// The [`RenderMessageTextHelper`] trait provides methods to generate HTML and
/// Markdown representations of the text and captions in a Telegram message.
pub trait RenderMessageTextHelper {
/// Returns the HTML representation of the message text, if the message
/// contains text. This method will parse the text and any entities
/// (such as bold, italic, links, etc.) and return the HTML-formatted
/// string.
#[must_use]
fn html_text(&self) -> Option<String>;
/// Returns the Markdown representation of the message text, if the message
/// contains text. This method will parse the text and any entities
/// (such as bold, italic, links, etc.) and return the
/// Markdown-formatted string.
#[must_use]
fn markdown_text(&self) -> Option<String>;
/// Returns the HTML representation of the message caption, if the message
/// contains caption. This method will parse the caption and any
/// entities (such as bold, italic, links, etc.) and return the
/// HTML-formatted string.
#[must_use]
fn html_caption(&self) -> Option<String>;
/// Returns the Markdown representation of the message caption, if the
/// message contains caption. This method will parse the caption and any
/// entities (such as bold, italic, links, etc.) and return the
/// Markdown-formatted string.
#[must_use]
fn markdown_caption(&self) -> Option<String>;
}
impl RenderMessageTextHelper for Message {
fn html_text(&self) -> Option<String> {
self.text()
.zip(self.entities())
.map(|(text, entities)| Render::new(text, entities).as_html())
}
fn markdown_text(&self) -> Option<String> {
self.text()
.zip(self.entities())
.map(|(text, entities)| Render::new(text, entities).as_markdown())
}
fn html_caption(&self) -> Option<String> {
self.caption()
.zip(self.caption_entities())
.map(|(text, entities)| Render::new(text, entities).as_html())
}
fn markdown_caption(&self) -> Option<String> {
self.caption()
.zip(self.caption_entities())
.map(|(text, entities)| Render::new(text, entities).as_markdown())
}
}

View file

@ -0,0 +1,71 @@
use std::fmt::Write;
use super::{ComplexTag, Kind, Place, SimpleTag, Tag, TagWriter};
pub static HTML: TagWriter = TagWriter {
bold: SimpleTag::new("<b>", "</b>"),
blockquote: SimpleTag::new("<blockquote>", "</blockquote>"),
italic: SimpleTag::new("<i>", "</i>"),
underline: SimpleTag::new("<u>", "</u>"),
strikethrough: SimpleTag::new("<s>", "</s>"),
spoiler: SimpleTag::new("<tg-spoiler>", "</tg-spoiler>"),
code: SimpleTag::new("<code>", "</code>"),
pre_no_lang: SimpleTag::new("<pre>", "</pre>"),
pre: ComplexTag::new("<pre><code class=\"language-", "\">", "</code></pre>"),
text_link: ComplexTag::new("<a href=\"", "\">", "</a>"),
text_mention: ComplexTag::new("<a href=\"tg://user?id=", "\">", "</a>"),
custom_emoji: ComplexTag::new("<tg-emoji emoji-id=\"", "\">", "</tg-emoji>"),
write_tag_fn: write_tag,
write_char_fn: write_char,
};
fn write_tag(tag: &Tag, buf: &mut String) {
match tag.kind {
Kind::Bold => buf.push_str(HTML.bold.get_tag(tag.place)),
Kind::Blockquote => buf.push_str(HTML.blockquote.get_tag(tag.place)),
Kind::Italic => buf.push_str(HTML.italic.get_tag(tag.place)),
Kind::Underline => buf.push_str(HTML.underline.get_tag(tag.place)),
Kind::Strikethrough => buf.push_str(HTML.strikethrough.get_tag(tag.place)),
Kind::Spoiler => buf.push_str(HTML.spoiler.get_tag(tag.place)),
Kind::Code => buf.push_str(HTML.code.get_tag(tag.place)),
Kind::Pre(lang) => match tag.place {
Place::Start => match lang {
Some(lang) => write!(buf, "{}{}{}", HTML.pre.start, lang, HTML.pre.middle).unwrap(),
None => buf.push_str(HTML.pre_no_lang.start),
},
Place::End => buf.push_str(lang.map_or(HTML.pre_no_lang.end, |_| HTML.pre.end)),
},
Kind::TextLink(url) => match tag.place {
Place::Start => {
write!(buf, "{}{}{}", HTML.text_link.start, url, HTML.text_link.middle).unwrap()
}
Place::End => buf.push_str(HTML.text_link.end),
},
Kind::TextMention(id) => match tag.place {
Place::Start => {
write!(buf, "{}{}{}", HTML.text_mention.start, id, HTML.text_mention.middle)
.unwrap()
}
Place::End => buf.push_str(HTML.text_mention.end),
},
Kind::CustomEmoji(custom_emoji_id) => match tag.place {
Place::Start => write!(
buf,
"{}{}{}",
HTML.custom_emoji.start, custom_emoji_id, HTML.custom_emoji.middle
)
.unwrap(),
Place::End => buf.push_str(HTML.custom_emoji.end),
},
}
}
fn write_char(ch: char, buf: &mut String) {
match ch {
'&' => buf.push_str("&amp;"),
'<' => buf.push_str("&lt;"),
'>' => buf.push_str("&gt;"),
c => buf.push(c),
}
}

View file

@ -0,0 +1,74 @@
use std::fmt::Write;
use crate::utils::markdown::ESCAPE_CHARS;
use super::{ComplexTag, Kind, Place, SimpleTag, Tag, TagWriter};
pub static MARKDOWN: TagWriter = TagWriter {
bold: SimpleTag::new("**", "**"),
blockquote: SimpleTag::new(">", ""),
italic: SimpleTag::new("_\r", "_\r"),
underline: SimpleTag::new("__\r", "__\r"),
strikethrough: SimpleTag::new("~", "~"),
spoiler: SimpleTag::new("||", "||"),
code: SimpleTag::new("`", "`"),
pre_no_lang: SimpleTag::new("```\n", "```\n"),
pre: ComplexTag::new("```", "\n", "```\n"),
text_link: ComplexTag::new("[", "](", ")"),
text_mention: ComplexTag::new("[", "](tg://user?id=", ")"),
custom_emoji: ComplexTag::new("[", "](tg://emoji?id=", ")"),
write_tag_fn: write_tag,
write_char_fn: write_char,
};
fn write_tag(tag: &Tag, buf: &mut String) {
match tag.kind {
Kind::Bold => buf.push_str(MARKDOWN.bold.get_tag(tag.place)),
Kind::Blockquote => buf.push_str(MARKDOWN.blockquote.get_tag(tag.place)),
Kind::Italic => buf.push_str(MARKDOWN.italic.get_tag(tag.place)),
Kind::Underline => buf.push_str(MARKDOWN.underline.get_tag(tag.place)),
Kind::Strikethrough => buf.push_str(MARKDOWN.strikethrough.get_tag(tag.place)),
Kind::Spoiler => buf.push_str(MARKDOWN.spoiler.get_tag(tag.place)),
Kind::Code => buf.push_str(MARKDOWN.code.get_tag(tag.place)),
Kind::Pre(lang) => match tag.place {
Place::Start => match lang {
Some(lang) => {
write!(buf, "{}{}{}", MARKDOWN.pre.start, lang, MARKDOWN.pre.middle).unwrap()
}
None => buf.push_str(MARKDOWN.pre_no_lang.start),
},
Place::End => buf.push_str(lang.map_or(MARKDOWN.pre_no_lang.end, |_| MARKDOWN.pre.end)),
},
Kind::TextLink(url) => match tag.place {
Place::Start => buf.push_str(MARKDOWN.text_link.start),
Place::End => {
write!(buf, "{}{}{}", MARKDOWN.text_link.middle, url, MARKDOWN.text_link.end)
.unwrap()
}
},
Kind::TextMention(id) => match tag.place {
Place::Start => buf.push_str(MARKDOWN.text_mention.start),
Place::End => {
write!(buf, "{}{}{}", MARKDOWN.text_mention.middle, id, MARKDOWN.text_mention.end)
.unwrap()
}
},
Kind::CustomEmoji(custom_emoji_id) => match tag.place {
Place::Start => buf.push_str(MARKDOWN.custom_emoji.start),
Place::End => write!(
buf,
"{}{}{}",
MARKDOWN.custom_emoji.middle, custom_emoji_id, MARKDOWN.custom_emoji.end
)
.unwrap(),
},
}
}
fn write_char(ch: char, buf: &mut String) {
if ESCAPE_CHARS.contains(&ch) {
buf.push('\\');
}
buf.push(ch);
}

View file

@ -0,0 +1,255 @@
//! Util for rendering texts and message entities to html and markdown string
use teloxide_core::types::{MessageEntity, MessageEntityKind as MEK};
use tag::*;
pub use helper::RenderMessageTextHelper;
mod helper;
mod html;
mod markdown;
mod tag;
/// The [`Render`] struct is responsible for parsing the text and entities to
/// produce the final formatted output.
#[derive(Clone, Eq, PartialEq)]
pub struct Render<'a> {
text: &'a str,
tags: Vec<Tag<'a>>,
}
impl<'a> Render<'a> {
/// Creates a new `Render` instance with the given text and entities.
///
/// The `Render` is responsible for parsing the text and entities to
/// produce the final formatted output. This constructor sets up the
/// initial state needed for the parsing process.
///
/// # Arguments
///
/// - `text`: The input text to be parsed.
/// - `entities`: The message entities (formatting, links, etc.) to be
/// applied to the text.
///
/// # Returns
///
/// A new [`Render`] instance.
#[must_use]
pub fn new(text: &'a str, entities: &'a [MessageEntity]) -> Self {
// get the needed size for the new tags that we want to parse from entities
let needed_size: usize = entities
.iter()
.filter(|e| {
matches!(
e.kind,
MEK::Bold
| MEK::Blockquote
| MEK::Italic
| MEK::Underline
| MEK::Strikethrough
| MEK::Spoiler
| MEK::Code
| MEK::Pre { .. }
| MEK::TextLink { .. }
| MEK::TextMention { .. }
| MEK::CustomEmoji { .. }
)
})
.count()
* 2; // 2 because we inseret two tag for each entity
let mut tags = Vec::with_capacity(needed_size);
for (index, entity) in entities.iter().enumerate() {
let kind = match &entity.kind {
MEK::Bold => Kind::Bold,
MEK::Blockquote => Kind::Blockquote,
MEK::Italic => Kind::Italic,
MEK::Underline => Kind::Underline,
MEK::Strikethrough => Kind::Strikethrough,
MEK::Spoiler => Kind::Spoiler,
MEK::Code => Kind::Code,
MEK::Pre { language } => Kind::Pre(language.as_ref().map(String::as_str)),
MEK::TextLink { url } => Kind::TextLink(url.as_str()),
MEK::TextMention { user } => Kind::TextMention(user.id.0),
MEK::CustomEmoji { custom_emoji_id } => Kind::CustomEmoji(custom_emoji_id),
_ => continue,
};
// FIXME: maybe instead of clone store all the `kind`s in a seperate
// vector and then just store the index here?
tags.push(Tag::start(kind.clone(), entity.offset, index));
tags.push(Tag::end(kind, entity.offset + entity.length, index));
}
tags.sort_unstable();
Self { text, tags }
}
/// Renders the text with the given [`TagWriter`].
///
/// This method iterates through the text and the associated position tags,
/// and writes the text with the appropriate tags to a buffer. The
/// resulting buffer is then returned as a `String`.
///
/// If input have no tags we just return the original text as-is.
#[must_use]
fn format(&self, writer: &TagWriter) -> String {
if self.tags.is_empty() {
return self.text.to_owned();
}
let mut buffer = String::with_capacity(self.text.len() + writer.get_tags_sizes(&self.tags));
let mut tags = self.tags.iter();
let mut current_tag = tags.next();
let mut prev_point = None;
for (idx, point) in self.text.encode_utf16().enumerate() {
loop {
match current_tag {
Some(tag) if tag.offset == idx => {
(writer.write_tag_fn)(tag, &mut buffer);
current_tag = tags.next();
}
_ => break,
}
}
let ch = if let Some(previous) = prev_point.take() {
char::decode_utf16([previous, point]).next().unwrap().unwrap()
} else {
match char::decode_utf16([point]).next().unwrap() {
Ok(c) => c,
Err(unpaired) => {
prev_point = Some(unpaired.unpaired_surrogate());
continue;
}
}
};
(writer.write_char_fn)(ch, &mut buffer);
}
for tag in current_tag.into_iter().chain(tags) {
(writer.write_tag_fn)(tag, &mut buffer);
}
buffer
}
/// Render and return the text as **Html-formatted** string.
#[must_use]
pub fn as_html(&self) -> String {
self.format(&html::HTML)
}
/// Render and return the text as **Markdown-formatted** string.
#[must_use]
pub fn as_markdown(&self) -> String {
self.format(&markdown::MARKDOWN)
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_render_simple() {
let text = "Bold italic <underline_";
let entities = vec![
MessageEntity { kind: MEK::Bold, offset: 0, length: 4 },
MessageEntity { kind: MEK::Italic, offset: 5, length: 6 },
MessageEntity { kind: MEK::Underline, offset: 12, length: 10 },
];
let render = Render::new(text, &entities);
assert_eq!(render.as_html(), "<b>Bold</b> <i>italic</i> <u>&lt;underline</u>_");
assert_eq!(render.as_markdown(), "**Bold** _\ritalic_\r __\r<underline__\r\\_");
}
#[test]
fn test_render_pre_with_lang() {
let text = "Some pre, normal and rusty code";
let entities = vec![
MessageEntity { kind: MEK::Pre { language: None }, offset: 5, length: 3 },
MessageEntity { kind: MEK::Code, offset: 10, length: 6 },
MessageEntity {
kind: MEK::Pre { language: Some("rust".to_owned()) },
offset: 21,
length: 5,
},
];
let render = Render::new(text, &entities);
assert_eq!(
render.as_html(),
"Some <pre>pre</pre>, <code>normal</code> and <pre><code \
class=\"language-rust\">rusty</code></pre> code",
);
assert_eq!(
render.as_markdown(),
"Some ```\npre```\n, `normal` and ```rust\nrusty```\n code",
);
}
#[test]
fn test_render_nested() {
let text = "Some bold both italics";
let entities = vec![
MessageEntity { kind: MEK::Bold, offset: 5, length: 9 },
MessageEntity { kind: MEK::Italic, offset: 10, length: 12 },
];
let render = Render::new(text, &entities);
assert_eq!(render.as_html(), "Some <b>bold <i>both</b> italics</i>");
assert_eq!(render.as_markdown(), "Some **bold _\rboth** italics_\r");
}
#[test]
fn test_render_complex() {
let text = "Hi how are you?\nnested entities are cool\nIm in a Blockquote!";
let entities = vec![
MessageEntity { kind: MEK::Bold, offset: 0, length: 2 },
MessageEntity { kind: MEK::Italic, offset: 3, length: 3 },
MessageEntity { kind: MEK::Underline, offset: 7, length: 3 },
MessageEntity { kind: MEK::Strikethrough, offset: 11, length: 3 },
MessageEntity { kind: MEK::Bold, offset: 16, length: 1 },
MessageEntity { kind: MEK::Bold, offset: 17, length: 5 },
MessageEntity { kind: MEK::Underline, offset: 17, length: 4 },
MessageEntity { kind: MEK::Strikethrough, offset: 17, length: 4 },
MessageEntity {
kind: MEK::TextLink { url: reqwest::Url::parse("https://t.me/").unwrap() },
offset: 23,
length: 8,
},
MessageEntity {
kind: MEK::TextLink { url: reqwest::Url::parse("tg://user?id=1234567").unwrap() },
offset: 32,
length: 3,
},
MessageEntity { kind: MEK::Code, offset: 36, length: 4 },
MessageEntity { kind: MEK::Blockquote, offset: 41, length: 19 },
];
let render = Render::new(text, &entities);
assert_eq!(
render.as_html(),
"<b>Hi</b> <i>how</i> <u>are</u> <s>you</s>?\n<b>n</b><b><u><s>este</s></u>d</b> \
<a href=\"https://t.me/\">entities</a> <a href=\"tg://user?id=1234567\">are</a> <code>cool</code>\n\
<blockquote>Im in a Blockquote!</blockquote>"
);
assert_eq!(
render.as_markdown(),
"**Hi** _\rhow_\r __\rare__\r ~you~?\n**n****__\r~este~__\rd** [entities](https://t.me/) \
[are](tg://user?id=1234567) `cool`\n>Im in a Blockquote\\!"
);
}
}

View file

@ -0,0 +1,156 @@
use std::cmp::Ordering;
#[derive(Clone)]
pub struct Tag<'a> {
pub place: Place,
pub kind: Kind<'a>,
pub offset: usize,
pub index: usize,
}
impl<'a> Tag<'a> {
pub const fn start(kind: Kind<'a>, offset: usize, index: usize) -> Self {
Self { place: Place::Start, kind, offset, index }
}
pub const fn end(kind: Kind<'a>, offset: usize, index: usize) -> Self {
Self { place: Place::End, kind, offset, index }
}
}
impl<'a> Eq for Tag<'a> {}
impl<'a> PartialEq for Tag<'a> {
fn eq(&self, other: &Self) -> bool {
// We don't check kind here
self.place == other.place && self.offset == other.offset && self.index == other.index
}
}
impl<'a> Ord for Tag<'a> {
fn cmp(&self, other: &Self) -> Ordering {
self.offset.cmp(&other.offset).then_with(|| self.place.cmp(&other.place)).then_with(|| {
match other.place {
Place::Start => self.index.cmp(&other.index),
Place::End => other.index.cmp(&self.index),
}
})
}
}
impl<'a> PartialOrd for Tag<'a> {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub enum Place {
// HACK: `End` needs to be first because of the `Ord` Implementation.
// the reason is when comparing tags we want the `End` to be first if the offset
// is the same.
End,
Start,
}
#[derive(Clone, PartialEq, Eq)]
pub enum Kind<'a> {
Bold,
Blockquote,
Italic,
Underline,
Strikethrough,
Spoiler,
Code,
Pre(Option<&'a str>),
TextLink(&'a str),
TextMention(u64),
CustomEmoji(&'a str),
}
pub struct SimpleTag {
pub start: &'static str,
pub end: &'static str,
}
impl SimpleTag {
pub const fn new(start: &'static str, end: &'static str) -> Self {
Self { start, end }
}
/// Get tag size based on place
pub const fn get_tag(&self, place: Place) -> &'static str {
match place {
Place::Start => self.start,
Place::End => self.end,
}
}
}
pub struct ComplexTag {
pub start: &'static str,
pub middle: &'static str,
pub end: &'static str,
}
impl ComplexTag {
pub const fn new(start: &'static str, middle: &'static str, end: &'static str) -> Self {
Self { start, middle, end }
}
}
pub struct TagWriter {
pub bold: SimpleTag,
pub blockquote: SimpleTag,
pub italic: SimpleTag,
pub underline: SimpleTag,
pub strikethrough: SimpleTag,
pub spoiler: SimpleTag,
pub code: SimpleTag,
pub pre_no_lang: SimpleTag,
pub pre: ComplexTag,
pub text_link: ComplexTag,
pub text_mention: ComplexTag,
pub custom_emoji: ComplexTag,
/// Write the tag to buffer
pub write_tag_fn: fn(&Tag, buf: &mut String),
/// Write the char to buffer and escape characters if needed
pub write_char_fn: fn(char, buf: &mut String),
}
impl TagWriter {
/// Get the extra size needed for tags
pub fn get_tags_sizes(&self, tags: &[Tag]) -> usize {
tags.iter()
.map(|tag| match tag.kind {
Kind::Bold => self.bold.get_tag(tag.place).len(),
Kind::Blockquote => self.blockquote.get_tag(tag.place).len(),
Kind::Italic => self.italic.get_tag(tag.place).len(),
Kind::Underline => self.underline.get_tag(tag.place).len(),
Kind::Strikethrough => self.strikethrough.get_tag(tag.place).len(),
Kind::Spoiler => self.spoiler.get_tag(tag.place).len(),
Kind::Code => self.code.get_tag(tag.place).len(),
Kind::Pre(lang) => match tag.place {
Place::Start => lang
.map_or(self.pre_no_lang.start.len(), |l| self.pre.start.len() + l.len()),
Place::End => lang.map_or(self.pre_no_lang.end.len(), |_| {
self.pre.middle.len() + self.pre.end.len()
}),
},
Kind::TextLink(url) => match tag.place {
Place::Start => self.text_link.start.len() + url.len(),
Place::End => self.text_link.middle.len() + self.text_link.end.len(),
},
Kind::TextMention(id) => match tag.place {
Place::Start => self.text_mention.start.len() + id.ilog10() as usize + 1,
Place::End => self.text_mention.middle.len() + self.text_mention.end.len(),
},
Kind::CustomEmoji(custom_emoji_id) => match tag.place {
Place::Start => self.custom_emoji.start.len() + custom_emoji_id.len(),
Place::End => self.custom_emoji.middle.len() + self.custom_emoji.end.len(),
},
})
.sum()
}
}