diff --git a/lib/rehype-sn.js b/lib/rehype-sn.js index 231c0104d..99ba6ddd0 100644 --- a/lib/rehype-sn.js +++ b/lib/rehype-sn.js @@ -1,5 +1,5 @@ import { SKIP, visit } from 'unist-util-visit' -import { parseEmbedUrl, parseInternalLinks } from './url' +import { parseEmbedUrl, parseInternalLinks, isMisleadingLink } from './url' import { slug } from 'github-slugger' import { toString } from 'mdast-util-to-string' @@ -255,22 +255,6 @@ export default function rehypeSN (options = {}) { } } - function isMisleadingLink (text, href) { - let misleading = false - - if (/^\s*(\w+\.)+\w+/.test(text)) { - try { - const hrefUrl = new URL(href) - - if (new URL(hrefUrl.protocol + text).origin !== hrefUrl.origin) { - misleading = true - } - } catch {} - } - - return misleading - } - function replaceNostrId (value, id) { return { type: 'element', diff --git a/lib/url.js b/lib/url.js index 3bb51e3f2..bd11697f9 100644 --- a/lib/url.js +++ b/lib/url.js @@ -241,6 +241,29 @@ export function decodeProxyUrl (imgproxyUrl) { return originalUrl } +export function isMisleadingLink (text, href) { + let misleading = false + + try { + const hrefUrl = new URL(href) + + try { + const textUrl = new URL(text) + if (textUrl.origin !== hrefUrl.origin) { + misleading = true + } + } catch {} + + if (/^\s*([\w-]+\.)+\w+/.test(text)) { + if (new URL(hrefUrl.protocol + text).origin !== hrefUrl.origin) { + misleading = true + } + } + } catch {} + + return misleading +} + // eslint-disable-next-line export const URL_REGEXP = /^((https?|ftp):\/\/)?(www.)?(((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:)*@)?(((\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5]))|((([a-z]|\d|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(([a-z]|\d|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])*([a-z]|\d|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])))\.)+(([a-z]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(([a-z]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])*([a-z]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])))\.?)(:\d*)?)(\/((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|@)+(\/(([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|@)*)*)?)?(\?((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|@)|[\uE000-\uF8FF]|\/|\?)*)?(\#((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|@)|\/|\?)*)?$/i diff --git a/lib/url.spec.js b/lib/url.spec.js index 8c3013644..34030ad98 100644 --- a/lib/url.spec.js +++ b/lib/url.spec.js @@ -1,8 +1,8 @@ /* eslint-env jest */ -import { parseInternalLinks } from './url.js' +import { parseInternalLinks, isMisleadingLink } from './url.js' -const cases = [ +const internalLinkCases = [ ['https://stacker.news/items/123', '#123'], ['https://stacker.news/items/123/related', '#123/related'], // invalid links should not be parsed so user can spot error @@ -20,7 +20,7 @@ const cases = [ ] describe('internal links', () => { - test.each(cases)( + test.each(internalLinkCases)( 'parses %p as %p', (href, expected) => { process.env.NEXT_PUBLIC_URL = 'https://stacker.news' @@ -29,3 +29,30 @@ describe('internal links', () => { } ) }) + +const misleadingLinkCases = [ + // if text is the same as the link, it's not misleading + ['https://stacker.news/items/1234', 'https://stacker.news/items/1234', false], + // same origin is not misleading + ['https://stacker.news/items/1235', 'https://stacker.news/items/1234', false], + ['www.google.com', 'https://www.google.com', false], + ['stacker.news', 'https://stacker.news', false], + // if text is obviously not a link, it's not misleading + ['innocent text', 'https://stacker.news/items/1234', false], + ['innocenttext', 'https://stacker.news/items/1234', false], + // if text might be a link to a different origin, it's misleading + ['innocent.text', 'https://stacker.news/items/1234', true], + ['https://google.com', 'https://bing.com', true], + ['www.google.com', 'https://bing.com', true], + ['s-tacker.news', 'https://snacker.news', true] +] + +describe('misleading links', () => { + test.each(misleadingLinkCases)( + 'identifies [%p](%p) as misleading: %p', + (text, href, expected) => { + const actual = isMisleadingLink(text, href) + expect(actual).toBe(expected) + } + ) +})