misskey/packages/backend/src/core/MfmService.ts
ikasoba 20eb4bc296
Fix(backend): ActivityPubでのHTMLへのシリアライズを修正 (#13752)
* devモードでもActivityPub系エンドポイントへアクセスできるように

* ActivityPubでのHTMLのシリアライズを修正

* ハードコードしていたurlを`httpUrl`へ修正

* テストの追加
2024-04-27 20:26:55 +09:00

466 lines
12 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*
* SPDX-FileCopyrightText: syuilo and misskey-project
* SPDX-License-Identifier: AGPL-3.0-only
*/
import { URL } from 'node:url';
import { Inject, Injectable } from '@nestjs/common';
import * as parse5 from 'parse5';
import { Window, XMLSerializer } from 'happy-dom';
import { DI } from '@/di-symbols.js';
import type { Config } from '@/config.js';
import { intersperse } from '@/misc/prelude/array.js';
import { normalizeForSearch } from '@/misc/normalize-for-search.js';
import type { IMentionedRemoteUsers } from '@/models/Note.js';
import { bindThis } from '@/decorators.js';
import * as TreeAdapter from '../../node_modules/parse5/dist/tree-adapters/default.js';
import type * as mfm from 'mfm-js';
const treeAdapter = TreeAdapter.defaultTreeAdapter;
const urlRegex = /^https?:\/\/[\w\/:%#@$&?!()\[\]~.,=+\-]+/;
const urlRegexFull = /^https?:\/\/[\w\/:%#@$&?!()\[\]~.,=+\-]+$/;
@Injectable()
export class MfmService {
constructor(
@Inject(DI.config)
private config: Config,
) {
}
@bindThis
public fromHtml(html: string, hashtagNames?: string[]): string {
// some AP servers like Pixelfed use br tags as well as newlines
html = html.replace(/<br\s?\/?>\r?\n/gi, '\n');
const normalizedHashtagNames = hashtagNames == null ? undefined : new Set<string>(hashtagNames.map(x => normalizeForSearch(x)));
const dom = parse5.parseFragment(html);
let text = '';
for (const n of dom.childNodes) {
analyze(n);
}
return text.trim();
function getText(node: TreeAdapter.Node): string {
if (treeAdapter.isTextNode(node)) return node.value;
if (!treeAdapter.isElementNode(node)) return '';
if (node.nodeName === 'br') return '\n';
if (node.childNodes) {
return node.childNodes.map(n => getText(n)).join('');
}
return '';
}
function appendChildren(childNodes: TreeAdapter.ChildNode[]): void {
if (childNodes) {
for (const n of childNodes) {
analyze(n);
}
}
}
function analyze(node: TreeAdapter.Node) {
if (treeAdapter.isTextNode(node)) {
text += node.value;
return;
}
// Skip comment or document type node
if (!treeAdapter.isElementNode(node)) return;
switch (node.nodeName) {
case 'br': {
text += '\n';
break;
}
case 'a':
{
const txt = getText(node);
const rel = node.attrs.find(x => x.name === 'rel');
const href = node.attrs.find(x => x.name === 'href');
// ハッシュタグ
if (normalizedHashtagNames && href && normalizedHashtagNames.has(normalizeForSearch(txt))) {
text += txt;
// メンション
} else if (txt.startsWith('@') && !(rel && rel.value.startsWith('me '))) {
const part = txt.split('@');
if (part.length === 2 && href) {
//#region ホスト名部分が省略されているので復元する
const acct = `${txt}@${(new URL(href.value)).hostname}`;
text += acct;
//#endregion
} else if (part.length === 3) {
text += txt;
}
// その他
} else {
const generateLink = () => {
if (!href && !txt) {
return '';
}
if (!href) {
return txt;
}
if (!txt || txt === href.value) { // #6383: Missing text node
if (href.value.match(urlRegexFull)) {
return href.value;
} else {
return `<${href.value}>`;
}
}
if (href.value.match(urlRegex) && !href.value.match(urlRegexFull)) {
return `[${txt}](<${href.value}>)`; // #6846
} else {
return `[${txt}](${href.value})`;
}
};
text += generateLink();
}
break;
}
case 'h1':
{
text += '【';
appendChildren(node.childNodes);
text += '】\n';
break;
}
case 'b':
case 'strong':
{
text += '**';
appendChildren(node.childNodes);
text += '**';
break;
}
case 'small':
{
text += '<small>';
appendChildren(node.childNodes);
text += '</small>';
break;
}
case 's':
case 'del':
{
text += '~~';
appendChildren(node.childNodes);
text += '~~';
break;
}
case 'i':
case 'em':
{
text += '<i>';
appendChildren(node.childNodes);
text += '</i>';
break;
}
// block code (<pre><code>)
case 'pre': {
if (node.childNodes.length === 1 && node.childNodes[0].nodeName === 'code') {
text += '\n```\n';
text += getText(node.childNodes[0]);
text += '\n```\n';
} else {
appendChildren(node.childNodes);
}
break;
}
// inline code (<code>)
case 'code': {
text += '`';
appendChildren(node.childNodes);
text += '`';
break;
}
case 'blockquote': {
const t = getText(node);
if (t) {
text += '\n> ';
text += t.split('\n').join('\n> ');
}
break;
}
case 'p':
case 'h2':
case 'h3':
case 'h4':
case 'h5':
case 'h6':
{
text += '\n\n';
appendChildren(node.childNodes);
break;
}
// other block elements
case 'div':
case 'header':
case 'footer':
case 'article':
case 'li':
case 'dt':
case 'dd':
{
text += '\n';
appendChildren(node.childNodes);
break;
}
default: // includes inline elements
{
appendChildren(node.childNodes);
break;
}
}
}
}
@bindThis
public toHtml(nodes: mfm.MfmNode[] | null, mentionedRemoteUsers: IMentionedRemoteUsers = []) {
if (nodes == null) {
return null;
}
const { window } = new Window();
const doc = window.document;
const body = doc.createElement('p');
function appendChildren(children: mfm.MfmNode[], targetElement: any): void {
if (children) {
for (const child of children.map(x => (handlers as any)[x.type](x))) targetElement.appendChild(child);
}
}
function fnDefault(node: mfm.MfmFn) {
const el = doc.createElement('i');
appendChildren(node.children, el);
return el;
}
const handlers: { [K in mfm.MfmNode['type']]: (node: mfm.NodeType<K>) => any } = {
bold: (node) => {
const el = doc.createElement('b');
appendChildren(node.children, el);
return el;
},
small: (node) => {
const el = doc.createElement('small');
appendChildren(node.children, el);
return el;
},
strike: (node) => {
const el = doc.createElement('del');
appendChildren(node.children, el);
return el;
},
italic: (node) => {
const el = doc.createElement('i');
appendChildren(node.children, el);
return el;
},
fn: (node) => {
switch (node.props.name) {
case 'unixtime': {
const text = node.children[0].type === 'text' ? node.children[0].props.text : '';
try {
const date = new Date(parseInt(text, 10) * 1000);
const el = doc.createElement('time');
el.setAttribute('datetime', date.toISOString());
el.textContent = date.toISOString();
return el;
} catch (err) {
return fnDefault(node);
}
}
case 'ruby': {
if (node.children.length === 1) {
const child = node.children[0];
const text = child.type === 'text' ? child.props.text : '';
const rubyEl = doc.createElement('ruby');
const rtEl = doc.createElement('rt');
// ruby未対応のHTMLサニタイザーを通したときにルビが「劉備りゅうび」となるようにする
const rpStartEl = doc.createElement('rp');
rpStartEl.appendChild(doc.createTextNode('('));
const rpEndEl = doc.createElement('rp');
rpEndEl.appendChild(doc.createTextNode(')'));
rubyEl.appendChild(doc.createTextNode(text.split(' ')[0]));
rtEl.appendChild(doc.createTextNode(text.split(' ')[1]));
rubyEl.appendChild(rpStartEl);
rubyEl.appendChild(rtEl);
rubyEl.appendChild(rpEndEl);
return rubyEl;
} else {
const rt = node.children.at(-1);
if (!rt) {
return fnDefault(node);
}
const text = rt.type === 'text' ? rt.props.text : '';
const rubyEl = doc.createElement('ruby');
const rtEl = doc.createElement('rt');
// ruby未対応のHTMLサニタイザーを通したときにルビが「劉備りゅうび」となるようにする
const rpStartEl = doc.createElement('rp');
rpStartEl.appendChild(doc.createTextNode('('));
const rpEndEl = doc.createElement('rp');
rpEndEl.appendChild(doc.createTextNode(')'));
appendChildren(node.children.slice(0, node.children.length - 1), rubyEl);
rtEl.appendChild(doc.createTextNode(text.trim()));
rubyEl.appendChild(rpStartEl);
rubyEl.appendChild(rtEl);
rubyEl.appendChild(rpEndEl);
return rubyEl;
}
}
default: {
return fnDefault(node);
}
}
},
blockCode: (node) => {
const pre = doc.createElement('pre');
const inner = doc.createElement('code');
inner.textContent = node.props.code;
pre.appendChild(inner);
return pre;
},
center: (node) => {
const el = doc.createElement('div');
appendChildren(node.children, el);
return el;
},
emojiCode: (node) => {
return doc.createTextNode(`\u200B:${node.props.name}:\u200B`);
},
unicodeEmoji: (node) => {
return doc.createTextNode(node.props.emoji);
},
hashtag: (node) => {
const a = doc.createElement('a');
a.setAttribute('href', `${this.config.url}/tags/${node.props.hashtag}`);
a.textContent = `#${node.props.hashtag}`;
a.setAttribute('rel', 'tag');
return a;
},
inlineCode: (node) => {
const el = doc.createElement('code');
el.textContent = node.props.code;
return el;
},
mathInline: (node) => {
const el = doc.createElement('code');
el.textContent = node.props.formula;
return el;
},
mathBlock: (node) => {
const el = doc.createElement('code');
el.textContent = node.props.formula;
return el;
},
link: (node) => {
const a = doc.createElement('a');
a.setAttribute('href', node.props.url);
appendChildren(node.children, a);
return a;
},
mention: (node) => {
const a = doc.createElement('a');
const { username, host, acct } = node.props;
const remoteUserInfo = mentionedRemoteUsers.find(remoteUser => remoteUser.username === username && remoteUser.host === host);
a.setAttribute('href', remoteUserInfo ? (remoteUserInfo.url ? remoteUserInfo.url : remoteUserInfo.uri) : `${this.config.url}/${acct}`);
a.className = 'u-url mention';
a.textContent = acct;
return a;
},
quote: (node) => {
const el = doc.createElement('blockquote');
appendChildren(node.children, el);
return el;
},
text: (node) => {
if (!node.props.text.match(/[\r\n]/)) {
return doc.createTextNode(node.props.text);
}
const el = doc.createElement('span');
const nodes = node.props.text.split(/\r\n|\r|\n/).map(x => doc.createTextNode(x));
for (const x of intersperse<FIXME | 'br'>('br', nodes)) {
el.appendChild(x === 'br' ? doc.createElement('br') : x);
}
return el;
},
url: (node) => {
const a = doc.createElement('a');
a.setAttribute('href', node.props.url);
a.textContent = node.props.url;
return a;
},
search: (node) => {
const a = doc.createElement('a');
a.setAttribute('href', `https://www.google.com/search?q=${node.props.query}`);
a.textContent = node.props.content;
return a;
},
plain: (node) => {
const el = doc.createElement('span');
appendChildren(node.children, el);
return el;
},
};
appendChildren(nodes, body);
return new XMLSerializer().serializeToString(body);
}
}