Reimplement misleading link tagging in new HTML handling code (#3254)

This commit is contained in:
Claire
2025-10-28 21:59:53 +01:00
committed by GitHub
parent 70b8281730
commit 3bd56b92c1
4 changed files with 118 additions and 92 deletions

View File

@@ -1,4 +1,4 @@
import { useCallback } from 'react';
import { useCallback, useEffect, useRef } from 'react';
import type { ComponentProps, FC } from 'react';
import classNames from 'classnames';
@@ -7,6 +7,7 @@ import { Link } from 'react-router-dom';
import type { ApiMentionJSON } from '@/flavours/glitch/api_types/statuses';
import { useAppSelector } from '@/flavours/glitch/store';
import type { OnElementHandler } from '@/flavours/glitch/utils/html';
import { decode as decodeIDNA } from 'flavours/glitch/utils/idna';
export interface HandledLinkProps {
href: string;
@@ -16,6 +17,94 @@ export interface HandledLinkProps {
mention?: Pick<ApiMentionJSON, 'id' | 'acct' | 'username'>;
}
const textMatchesTarget = (text: string, origin: string, host: string) => {
return (
text === origin ||
text === host ||
text.startsWith(origin + '/') ||
text.startsWith(host + '/') ||
'www.' + text === host ||
('www.' + text).startsWith(host + '/')
);
};
export const isLinkMisleading = (link: HTMLAnchorElement) => {
const linkTextParts: string[] = [];
// Reconstruct visible text, as we do not have much control over how links
// from remote software look, and we can't rely on `innerText` because the
// `invisible` class does not set `display` to `none`.
const walk = (node: Node) => {
if (node instanceof Text) {
linkTextParts.push(node.textContent);
} else if (node instanceof HTMLElement) {
if (node.classList.contains('invisible')) return;
for (const child of node.childNodes) {
walk(child);
}
}
};
walk(link);
const linkText = linkTextParts.join('');
const targetURL = new URL(link.href);
if (targetURL.protocol === 'magnet:') {
return !linkText.startsWith('magnet:');
}
if (targetURL.protocol === 'xmpp:') {
return !(
linkText === targetURL.href || 'xmpp:' + linkText === targetURL.href
);
}
// The following may not work with international domain names
if (
textMatchesTarget(linkText, targetURL.origin, targetURL.host) ||
textMatchesTarget(linkText.toLowerCase(), targetURL.origin, targetURL.host)
) {
return false;
}
// The link hasn't been recognized, maybe it features an international domain name
const hostname = decodeIDNA(targetURL.hostname).normalize('NFKC');
const host = targetURL.host.replace(targetURL.hostname, hostname);
const origin = targetURL.origin.replace(targetURL.host, host);
const text = linkText.normalize('NFKC');
return !(
textMatchesTarget(text, origin, host) ||
textMatchesTarget(text.toLowerCase(), origin, host)
);
};
export const tagMisleadingLink = (link: HTMLAnchorElement) => {
try {
if (isLinkMisleading(link)) {
const url = new URL(link.href);
const tag = document.createElement('span');
tag.classList.add('link-origin-tag');
switch (url.protocol) {
case 'xmpp:':
tag.textContent = `[${url.href}]`;
break;
case 'magnet:':
tag.textContent = '(magnet)';
break;
default:
tag.textContent = `[${url.host}]`;
}
link.insertAdjacentText('beforeend', ' ');
link.insertAdjacentElement('beforeend', tag);
}
} catch (e) {
// The URL is invalid, remove the href just to be safe
if (e instanceof TypeError) link.removeAttribute('href');
}
};
export const HandledLink: FC<HandledLinkProps & ComponentProps<'a'>> = ({
href,
text,
@@ -30,6 +119,17 @@ export const HandledLink: FC<HandledLinkProps & ComponentProps<'a'>> = ({
// eslint-disable-next-line @typescript-eslint/no-unsafe-call, @typescript-eslint/no-unsafe-member-access
(state) => state.local_settings.get('rewrite_mentions', 'no') as string,
);
const tagLinks = useAppSelector(
(state) =>
// eslint-disable-next-line @typescript-eslint/no-unsafe-call, @typescript-eslint/no-unsafe-member-access
state.local_settings.get('tag_misleading_links', false) as string,
);
const linkRef = useRef<HTMLAnchorElement>(null);
useEffect(() => {
if (tagLinks && linkRef.current) tagMisleadingLink(linkRef.current);
}, [tagLinks]);
// Handle hashtags
if (text.startsWith('#') || prevText?.endsWith('#')) {
@@ -93,6 +193,7 @@ export const HandledLink: FC<HandledLinkProps & ComponentProps<'a'>> = ({
target='_blank'
rel='noreferrer noopener'
translate='no'
ref={linkRef}
>
{children}
</a>

View File

@@ -14,70 +14,14 @@ import { Icon } from 'flavours/glitch/components/icon';
import { Poll } from 'flavours/glitch/components/poll';
import { identityContextPropShape, withIdentity } from 'flavours/glitch/identity_context';
import { languages as preloadedLanguages } from 'flavours/glitch/initial_state';
import { decode as decodeIDNA } from 'flavours/glitch/utils/idna';
import { isModernEmojiEnabled } from '../utils/environment';
import { EmojiHTML } from './emoji/html';
import { HandledLink } from './status/handled_link';
import { HandledLink, isLinkMisleading, tagMisleadingLink } from './status/handled_link';
const MAX_HEIGHT = 706; // 22px * 32 (+ 2px padding at the top)
const textMatchesTarget = (text, origin, host) => {
return (text === origin || text === host
|| text.startsWith(origin + '/') || text.startsWith(host + '/')
|| 'www.' + text === host || ('www.' + text).startsWith(host + '/'));
};
const isLinkMisleading = (link) => {
let linkTextParts = [];
// Reconstruct visible text, as we do not have much control over how links
// from remote software look, and we can't rely on `innerText` because the
// `invisible` class does not set `display` to `none`.
const walk = (node) => {
switch (node.nodeType) {
case Node.TEXT_NODE:
linkTextParts.push(node.textContent);
break;
case Node.ELEMENT_NODE: {
if (node.classList.contains('invisible')) return;
const children = node.childNodes;
for (let i = 0; i < children.length; i++) {
walk(children[i]);
}
break;
}
}
};
walk(link);
const linkText = linkTextParts.join('');
const targetURL = new URL(link.href);
if (targetURL.protocol === 'magnet:') {
return !linkText.startsWith('magnet:');
}
if (targetURL.protocol === 'xmpp:') {
return !(linkText === targetURL.href || 'xmpp:' + linkText === targetURL.href);
}
// The following may not work with international domain names
if (textMatchesTarget(linkText, targetURL.origin, targetURL.host) || textMatchesTarget(linkText.toLowerCase(), targetURL.origin, targetURL.host)) {
return false;
}
// The link hasn't been recognized, maybe it features an international domain name
const hostname = decodeIDNA(targetURL.hostname).normalize('NFKC');
const host = targetURL.host.replace(targetURL.hostname, hostname);
const origin = targetURL.origin.replace(targetURL.host, host);
const text = linkText.normalize('NFKC');
return !(textMatchesTarget(text, origin, host) || textMatchesTarget(text.toLowerCase(), origin, host));
};
/**
*
* @param {any} status
@@ -214,30 +158,7 @@ class StatusContent extends PureComponent {
link.setAttribute('target', '_blank');
link.setAttribute('rel', 'noopener nofollow');
try {
if (tagLinks && isLinkMisleading(link)) {
// Add a tag besides the link to display its origin
const url = new URL(link.href);
const tag = document.createElement('span');
tag.classList.add('link-origin-tag');
switch (url.protocol) {
case 'xmpp:':
tag.textContent = `[${url.href}]`;
break;
case 'magnet:':
tag.textContent = '(magnet)';
break;
default:
tag.textContent = `[${url.host}]`;
}
link.insertAdjacentText('beforeend', ' ');
link.insertAdjacentElement('beforeend', tag);
}
} catch (e) {
// The URL is invalid, remove the href just to be safe
if (tagLinks && e instanceof TypeError) link.removeAttribute('href');
}
if (tagLinks) tagMisleadingLink(link);
}
}
}

View File

@@ -1,10 +0,0 @@
import punycode from 'punycode';
const IDNA_PREFIX = 'xn--';
export const decode = domain => {
return domain
.split('.')
.map(part => part.indexOf(IDNA_PREFIX) === 0 ? punycode.decode(part.slice(IDNA_PREFIX.length)) : part)
.join('.');
};

View File

@@ -0,0 +1,14 @@
import punycode from 'punycode/';
const IDNA_PREFIX = 'xn--';
export const decode = (domain: string) => {
return domain
.split('.')
.map((part) =>
part.startsWith(IDNA_PREFIX)
? punycode.decode(part.slice(IDNA_PREFIX.length))
: part,
)
.join('.');
};