Sharkey/packages/backend/src/core/MfmService.ts

687 lines
17 KiB
TypeScript
Raw Normal View History

/*
* SPDX-FileCopyrightText: syuilo and misskey-project
* SPDX-License-Identifier: AGPL-3.0-only
*/
2022-09-17 20:27:08 +02:00
import { URL } from 'node:url';
import { Inject, Injectable } from '@nestjs/common';
import * as parse5 from 'parse5';
import { Window, DocumentFragment, XMLSerializer } from 'happy-dom';
2022-09-17 20:27:08 +02:00
import { DI } from '@/di-symbols.js';
2022-09-20 22:33:11 +02:00
import type { Config } from '@/config.js';
2022-09-17 20:27:08 +02:00
import { intersperse } from '@/misc/prelude/array.js';
import { normalizeForSearch } from '@/misc/normalize-for-search.js';
import type { IMentionedRemoteUsers } from '@/models/Note.js';
2022-12-04 09:05:32 +01:00
import { bindThis } from '@/decorators.js';
import type { DefaultTreeAdapterMap } from 'parse5';
2024-02-03 15:01:09 +01:00
import type * as mfm from '@transfem-org/sfm-js';
2022-09-17 20:27:08 +02:00
const treeAdapter = parse5.defaultTreeAdapter;
type Node = DefaultTreeAdapterMap['node'];
type ChildNode = DefaultTreeAdapterMap['childNode'];
2022-09-17 20:27:08 +02:00
const urlRegex = /^https?:\/\/[\w\/:%#@$&?!()\[\]~.,=+\-]+/;
const urlRegexFull = /^https?:\/\/[\w\/:%#@$&?!()\[\]~.,=+\-]+$/;
@Injectable()
export class MfmService {
constructor(
@Inject(DI.config)
private config: Config,
) {
}
@bindThis
2022-09-17 20:27:08 +02:00
public fromHtml(html: string, hashtagNames?: string[]): string {
// some AP servers like Pixelfed use br tags as well as newlines
html = html.replace(/<br\s?\/?>\r?\n/gi, '\n');
const normalizedHashtagNames = hashtagNames == null ? undefined : new Set<string>(hashtagNames.map(x => normalizeForSearch(x)));
2022-09-17 20:27:08 +02:00
const dom = parse5.parseFragment(html);
2022-09-17 20:27:08 +02:00
let text = '';
2022-09-17 20:27:08 +02:00
for (const n of dom.childNodes) {
analyze(n);
}
2022-09-17 20:27:08 +02:00
return text.trim();
function getText(node: Node): string {
2022-09-17 20:27:08 +02:00
if (treeAdapter.isTextNode(node)) return node.value;
if (!treeAdapter.isElementNode(node)) return '';
if (node.nodeName === 'br') return '\n';
2022-09-17 20:27:08 +02:00
if (node.childNodes) {
return node.childNodes.map(n => getText(n)).join('');
}
2022-09-17 20:27:08 +02:00
return '';
}
function appendChildren(childNodes: ChildNode[]): void {
2022-09-17 20:27:08 +02:00
if (childNodes) {
for (const n of childNodes) {
analyze(n);
}
}
}
function analyze(node: Node) {
2022-09-17 20:27:08 +02:00
if (treeAdapter.isTextNode(node)) {
text += node.value;
return;
}
2022-09-17 20:27:08 +02:00
// Skip comment or document type node
if (!treeAdapter.isElementNode(node)) {
return;
}
2022-09-17 20:27:08 +02:00
switch (node.nodeName) {
case 'br': {
text += '\n';
break;
}
case 'a': {
2022-09-17 20:27:08 +02:00
const txt = getText(node);
const rel = node.attrs.find(x => x.name === 'rel');
const href = node.attrs.find(x => x.name === 'href');
2022-09-17 20:27:08 +02:00
// ハッシュタグ
if (normalizedHashtagNames && href && normalizedHashtagNames.has(normalizeForSearch(txt))) {
2022-09-17 20:27:08 +02:00
text += txt;
// メンション
} else if (txt.startsWith('@') && !(rel && rel.value.startsWith('me '))) {
2022-09-17 20:27:08 +02:00
const part = txt.split('@');
2022-09-17 20:27:08 +02:00
if (part.length === 2 && href) {
//#region ホスト名部分が省略されているので復元する
const acct = `${txt}@${(new URL(href.value)).hostname}`;
text += acct;
//#endregion
} else if (part.length === 3) {
text += txt;
}
// その他
2022-09-17 20:27:08 +02:00
} else {
const generateLink = () => {
if (!href && !txt) {
return '';
}
if (!href) {
return txt;
}
if (!txt || txt === href.value) { // #6383: Missing text node
if (href.value.match(urlRegexFull)) {
return href.value;
} else {
return `<${href.value}>`;
}
}
if (href.value.match(urlRegex) && !href.value.match(urlRegexFull)) {
return `[${txt}](<${href.value}>)`; // #6846
} else {
return `[${txt}](${href.value})`;
}
};
2022-09-17 20:27:08 +02:00
text += generateLink();
}
break;
}
case 'h1': {
2024-06-08 20:00:29 +02:00
text += '**【';
2022-09-17 20:27:08 +02:00
appendChildren(node.childNodes);
2024-06-08 20:00:29 +02:00
text += '】**\n';
2022-09-17 20:27:08 +02:00
break;
}
2024-06-08 20:00:29 +02:00
case 'h2':
case 'h3': {
2024-06-08 20:53:42 +02:00
text += '**';
appendChildren(node.childNodes);
text += '**\n';
2022-09-17 20:27:08 +02:00
break;
}
2022-09-17 20:27:08 +02:00
case 'b':
case 'strong': {
2022-09-17 20:27:08 +02:00
text += '**';
appendChildren(node.childNodes);
text += '**';
break;
}
case 'small': {
2022-09-17 20:27:08 +02:00
text += '<small>';
appendChildren(node.childNodes);
text += '</small>';
break;
}
2022-09-17 20:27:08 +02:00
case 's':
case 'del': {
2022-09-17 20:27:08 +02:00
text += '~~';
appendChildren(node.childNodes);
text += '~~';
break;
}
2022-09-17 20:27:08 +02:00
case 'i':
case 'em': {
2022-09-17 20:27:08 +02:00
text += '<i>';
appendChildren(node.childNodes);
text += '</i>';
break;
}
2022-09-17 20:27:08 +02:00
// block code (<pre><code>)
case 'pre': {
if (node.childNodes.length === 1 && node.childNodes[0].nodeName === 'code') {
text += '\n```\n';
text += getText(node.childNodes[0]);
text += '\n```\n';
} else {
appendChildren(node.childNodes);
}
break;
}
2022-09-17 20:27:08 +02:00
// inline code (<code>)
case 'code': {
text += '`';
appendChildren(node.childNodes);
text += '`';
break;
}
2022-09-17 20:27:08 +02:00
case 'blockquote': {
const t = getText(node);
if (t) {
text += '\n> ';
text += t.split('\n').join('\n> ');
}
break;
}
2022-09-17 20:27:08 +02:00
case 'p':
case 'h4':
case 'h5':
case 'h6': {
2022-09-17 20:27:08 +02:00
text += '\n\n';
appendChildren(node.childNodes);
break;
}
2022-09-17 20:27:08 +02:00
// other block elements
case 'div':
case 'header':
case 'footer':
case 'article':
case 'li':
case 'dt':
case 'dd': {
2022-09-17 20:27:08 +02:00
text += '\n';
appendChildren(node.childNodes);
break;
}
2022-09-17 20:27:08 +02:00
default: // includes inline elements
{
appendChildren(node.childNodes);
break;
}
}
}
}
@bindThis
2022-09-17 20:27:08 +02:00
public toHtml(nodes: mfm.MfmNode[] | null, mentionedRemoteUsers: IMentionedRemoteUsers = []) {
if (nodes == null) {
return null;
}
const { window } = new Window();
2022-09-17 20:27:08 +02:00
const doc = window.document;
const body = doc.createElement('p');
2022-09-17 20:27:08 +02:00
function appendChildren(children: mfm.MfmNode[], targetElement: any): void {
if (children) {
for (const child of children.map(x => (handlers as any)[x.type](x))) targetElement.appendChild(child);
}
}
function fnDefault(node: mfm.MfmFn) {
const el = doc.createElement('i');
appendChildren(node.children, el);
return el;
}
2022-09-17 20:27:08 +02:00
const handlers: { [K in mfm.MfmNode['type']]: (node: mfm.NodeType<K>) => any } = {
bold: (node) => {
const el = doc.createElement('b');
appendChildren(node.children, el);
return el;
},
2022-09-17 20:27:08 +02:00
small: (node) => {
const el = doc.createElement('small');
appendChildren(node.children, el);
return el;
},
2022-09-17 20:27:08 +02:00
strike: (node) => {
const el = doc.createElement('del');
appendChildren(node.children, el);
return el;
},
2022-09-17 20:27:08 +02:00
italic: (node) => {
const el = doc.createElement('i');
appendChildren(node.children, el);
return el;
},
2022-09-17 20:27:08 +02:00
fn: (node) => {
switch (node.props.name) {
case 'unixtime': {
const text = node.children[0].type === 'text' ? node.children[0].props.text : '';
try {
const date = new Date(parseInt(text, 10) * 1000);
const el = doc.createElement('time');
el.setAttribute('datetime', date.toISOString());
el.textContent = date.toISOString();
return el;
} catch (err) {
return fnDefault(node);
}
}
case 'ruby': {
if (node.children.length === 1) {
const child = node.children[0];
const text = child.type === 'text' ? child.props.text : '';
const rubyEl = doc.createElement('ruby');
const rtEl = doc.createElement('rt');
// ruby未対応のHTMLサニタイザーを通したときにルビが「劉備りゅうび」となるようにする
const rpStartEl = doc.createElement('rp');
rpStartEl.appendChild(doc.createTextNode('('));
const rpEndEl = doc.createElement('rp');
rpEndEl.appendChild(doc.createTextNode(')'));
rubyEl.appendChild(doc.createTextNode(text.split(' ')[0]));
rtEl.appendChild(doc.createTextNode(text.split(' ')[1]));
rubyEl.appendChild(rpStartEl);
rubyEl.appendChild(rtEl);
rubyEl.appendChild(rpEndEl);
return rubyEl;
} else {
const rt = node.children.at(-1);
if (!rt) {
return fnDefault(node);
}
const text = rt.type === 'text' ? rt.props.text : '';
const rubyEl = doc.createElement('ruby');
const rtEl = doc.createElement('rt');
// ruby未対応のHTMLサニタイザーを通したときにルビが「劉備りゅうび」となるようにする
const rpStartEl = doc.createElement('rp');
rpStartEl.appendChild(doc.createTextNode('('));
const rpEndEl = doc.createElement('rp');
rpEndEl.appendChild(doc.createTextNode(')'));
appendChildren(node.children.slice(0, node.children.length - 1), rubyEl);
rtEl.appendChild(doc.createTextNode(text.trim()));
rubyEl.appendChild(rpStartEl);
rubyEl.appendChild(rtEl);
rubyEl.appendChild(rpEndEl);
return rubyEl;
}
}
default: {
return fnDefault(node);
}
}
2022-09-17 20:27:08 +02:00
},
2022-09-17 20:27:08 +02:00
blockCode: (node) => {
const pre = doc.createElement('pre');
const inner = doc.createElement('code');
inner.textContent = node.props.code;
pre.appendChild(inner);
return pre;
},
2022-09-17 20:27:08 +02:00
center: (node) => {
const el = doc.createElement('div');
appendChildren(node.children, el);
return el;
},
2022-09-17 20:27:08 +02:00
emojiCode: (node) => {
return doc.createTextNode(`\u200B:${node.props.name}:\u200B`);
},
2022-09-17 20:27:08 +02:00
unicodeEmoji: (node) => {
return doc.createTextNode(node.props.emoji);
},
2022-09-17 20:27:08 +02:00
hashtag: (node) => {
const a = doc.createElement('a');
a.setAttribute('href', `${this.config.url}/tags/${node.props.hashtag}`);
2022-09-17 20:27:08 +02:00
a.textContent = `#${node.props.hashtag}`;
a.setAttribute('rel', 'tag');
return a;
},
2022-09-17 20:27:08 +02:00
inlineCode: (node) => {
const el = doc.createElement('code');
el.textContent = node.props.code;
return el;
},
2022-09-17 20:27:08 +02:00
mathInline: (node) => {
const el = doc.createElement('code');
el.textContent = node.props.formula;
return el;
},
2022-09-17 20:27:08 +02:00
mathBlock: (node) => {
const el = doc.createElement('code');
el.textContent = node.props.formula;
return el;
},
2022-09-17 20:27:08 +02:00
link: (node) => {
const a = doc.createElement('a');
a.setAttribute('href', node.props.url);
2022-09-17 20:27:08 +02:00
appendChildren(node.children, a);
return a;
},
2022-09-17 20:27:08 +02:00
mention: (node) => {
const a = doc.createElement('a');
const { username, host, acct } = node.props;
const remoteUserInfo = mentionedRemoteUsers.find(remoteUser => remoteUser.username === username && remoteUser.host === host);
a.setAttribute('href', remoteUserInfo ? (remoteUserInfo.url ? remoteUserInfo.url : remoteUserInfo.uri) : `${this.config.url}/${acct}`);
2022-09-17 20:27:08 +02:00
a.className = 'u-url mention';
a.textContent = acct;
return a;
},
2022-09-17 20:27:08 +02:00
quote: (node) => {
const el = doc.createElement('blockquote');
appendChildren(node.children, el);
return el;
},
2022-09-17 20:27:08 +02:00
text: (node) => {
if (!node.props.text.match(/[\r\n]/)) {
return doc.createTextNode(node.props.text);
}
2022-09-17 20:27:08 +02:00
const el = doc.createElement('span');
const nodes = node.props.text.split(/\r\n|\r|\n/).map(x => doc.createTextNode(x));
2022-09-17 20:27:08 +02:00
for (const x of intersperse<FIXME | 'br'>('br', nodes)) {
el.appendChild(x === 'br' ? doc.createElement('br') : x);
}
2022-09-17 20:27:08 +02:00
return el;
},
2022-09-17 20:27:08 +02:00
url: (node) => {
const a = doc.createElement('a');
a.setAttribute('href', node.props.url);
2022-09-17 20:27:08 +02:00
a.textContent = node.props.url;
return a;
},
2022-09-17 20:27:08 +02:00
search: (node) => {
const a = doc.createElement('a');
a.setAttribute('href', `https://www.google.com/search?q=${node.props.query}`);
2022-09-17 20:27:08 +02:00
a.textContent = node.props.content;
return a;
},
2022-09-17 20:27:08 +02:00
plain: (node) => {
const el = doc.createElement('span');
appendChildren(node.children, el);
return el;
},
};
appendChildren(nodes, body);
return new XMLSerializer().serializeToString(body);
}
// the toMastoApiHtml function was taken from Iceshrimp and written by zotan and modified by marie to work with the current MK version
2023-11-01 00:55:53 +01:00
@bindThis
public async toMastoApiHtml(nodes: mfm.MfmNode[] | null, mentionedRemoteUsers: IMentionedRemoteUsers = [], inline = false, quoteUri: string | null = null) {
if (nodes == null) {
return null;
}
const { window } = new Window();
const doc = window.document;
const body = doc.createElement('p');
async function appendChildren(children: mfm.MfmNode[], targetElement: any): Promise<void> {
if (children) {
for (const child of await Promise.all(children.map(async (x) => await (handlers as any)[x.type](x)))) targetElement.appendChild(child);
}
}
const handlers: {
2024-06-08 17:57:17 +02:00
[K in mfm.MfmNode['type']]: (node: mfm.NodeType<K>) => any;
} = {
async bold(node) {
const el = doc.createElement('span');
el.textContent = '**';
await appendChildren(node.children, el);
el.textContent += '**';
return el;
},
async small(node) {
const el = doc.createElement('small');
await appendChildren(node.children, el);
return el;
},
async strike(node) {
const el = doc.createElement('span');
el.textContent = '~~';
await appendChildren(node.children, el);
el.textContent += '~~';
return el;
},
async italic(node) {
const el = doc.createElement('span');
el.textContent = '*';
await appendChildren(node.children, el);
el.textContent += '*';
return el;
},
async fn(node) {
const el = doc.createElement('span');
el.textContent = '*';
await appendChildren(node.children, el);
el.textContent += '*';
return el;
},
blockCode(node) {
const pre = doc.createElement('pre');
const inner = doc.createElement('code');
const nodes = node.props.code
.split(/\r\n|\r|\n/)
.map((x) => doc.createTextNode(x));
for (const x of intersperse<FIXME | 'br'>('br', nodes)) {
inner.appendChild(x === 'br' ? doc.createElement('br') : x);
}
pre.appendChild(inner);
return pre;
},
async center(node) {
const el = doc.createElement('div');
await appendChildren(node.children, el);
return el;
},
emojiCode(node) {
return doc.createTextNode(`\u200B:${node.props.name}:\u200B`);
},
unicodeEmoji(node) {
return doc.createTextNode(node.props.emoji);
},
hashtag: (node) => {
const a = doc.createElement('a');
a.setAttribute('href', `${this.config.url}/tags/${node.props.hashtag}`);
a.textContent = `#${node.props.hashtag}`;
a.setAttribute('rel', 'tag');
a.setAttribute('class', 'hashtag');
return a;
},
inlineCode(node) {
const el = doc.createElement('code');
el.textContent = node.props.code;
return el;
},
mathInline(node) {
const el = doc.createElement('code');
el.textContent = node.props.formula;
return el;
},
mathBlock(node) {
const el = doc.createElement('code');
el.textContent = node.props.formula;
return el;
},
async link(node) {
const a = doc.createElement('a');
a.setAttribute('rel', 'nofollow noopener noreferrer');
a.setAttribute('target', '_blank');
a.setAttribute('href', node.props.url);
await appendChildren(node.children, a);
return a;
},
async mention(node) {
const { username, host, acct } = node.props;
const resolved = mentionedRemoteUsers.find(remoteUser => remoteUser.username === username && remoteUser.host === host);
const el = doc.createElement('span');
if (!resolved) {
el.textContent = acct;
} else {
el.setAttribute('class', 'h-card');
el.setAttribute('translate', 'no');
const a = doc.createElement('a');
a.setAttribute('href', resolved.url ? resolved.url : resolved.uri);
a.className = 'u-url mention';
const span = doc.createElement('span');
span.textContent = resolved.username || username;
a.textContent = '@';
a.appendChild(span);
el.appendChild(a);
}
return el;
},
async quote(node) {
const el = doc.createElement('blockquote');
await appendChildren(node.children, el);
return el;
},
text(node) {
const el = doc.createElement('span');
const nodes = node.props.text
.split(/\r\n|\r|\n/)
.map((x) => doc.createTextNode(x));
for (const x of intersperse<FIXME | 'br'>('br', nodes)) {
el.appendChild(x === 'br' ? doc.createElement('br') : x);
}
return el;
},
url(node) {
const a = doc.createElement('a');
a.setAttribute('rel', 'nofollow noopener noreferrer');
a.setAttribute('target', '_blank');
a.setAttribute('href', node.props.url);
a.textContent = node.props.url.replace(/^https?:\/\//, '');
return a;
},
search: (node) => {
const a = doc.createElement('a');
a.setAttribute('href', `https://www.google.com/search?q=${node.props.query}`);
a.textContent = node.props.content;
return a;
},
async plain(node) {
const el = doc.createElement('span');
await appendChildren(node.children, el);
return el;
},
};
2024-03-02 17:36:49 +01:00
await appendChildren(nodes, body);
if (quoteUri !== null) {
const a = doc.createElement('a');
a.setAttribute('href', quoteUri);
a.textContent = quoteUri.replace(/^https?:\/\//, '');
const quote = doc.createElement('span');
quote.setAttribute('class', 'quote-inline');
quote.appendChild(doc.createElement('br'));
quote.appendChild(doc.createElement('br'));
quote.innerHTML += 'RE: ';
quote.appendChild(a);
body.appendChild(quote);
}
let result = new XMLSerializer().serializeToString(body);
if (inline) {
2024-08-06 20:23:03 +02:00
result = result.replace(/^<p>/, '').replace(/<\/p>$/, '');
}
return result;
}
2022-09-17 20:27:08 +02:00
}