From a797414f059b05a23f36782d112c637305fc214e Mon Sep 17 00:00:00 2001 From: louis Date: Wed, 5 Feb 2025 11:40:32 +0100 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20Include=20Fragment=20to=20Extrac?= =?UTF-8?q?tURL?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- internal/util/url.go | 2 +- internal/util/url_test.go | 66 ++++++++++++++++++++++----------------- 2 files changed, 38 insertions(+), 30 deletions(-) diff --git a/internal/util/url.go b/internal/util/url.go index fd7192a..7a58e38 100644 --- a/internal/util/url.go +++ b/internal/util/url.go @@ -4,6 +4,6 @@ import "regexp" // ExtractURLs extracts URLs from a text. func ExtractURLs(text string) []string { - urlRegex := regexp.MustCompile(`(http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+)`) + urlRegex := regexp.MustCompile(`(https?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(),]|%[0-9a-fA-F][0-9a-fA-F]|#)+)`) return urlRegex.FindAllString(text, -1) } diff --git a/internal/util/url_test.go b/internal/util/url_test.go index 00a0007..8051b47 100644 --- a/internal/util/url_test.go +++ b/internal/util/url_test.go @@ -7,33 +7,41 @@ import ( ) func TestExtractURL(t *testing.T) { - text := "This is a text with a URL https://example.com" - urls := ExtractURLs(text) - - assert.Equal(t, 1, len(urls)) - assert.Equal(t, "https://example.com", urls[0]) - - text = "This is a text with a URL https://example.com and another URL http://example.org" - urls = ExtractURLs(text) - - assert.Equal(t, 2, len(urls)) - assert.Equal(t, "https://example.com", urls[0]) - assert.Equal(t, "http://example.org", urls[1]) - - text = "This is a text without a URL" - urls = ExtractURLs(text) - - assert.Equal(t, 0, len(urls)) - - text = "This is a text with a URL https://www.systemli.org/en/contact/" - urls = ExtractURLs(text) - - assert.Equal(t, 1, len(urls)) - assert.Equal(t, "https://www.systemli.org/en/contact/", urls[0]) - - text = "This is a text with a URL https://www.systemli.org/en/contact/?key=value" - urls = ExtractURLs(text) - - assert.Equal(t, 1, len(urls)) - assert.Equal(t, "https://www.systemli.org/en/contact/?key=value", urls[0]) + testCases := []struct { + text string + expected []string + }{ + { + "This is a text with a URL https://example.com", + []string{"https://example.com"}, + }, + { + "This is a text with a URL https://example.com and another URL http://example.org", + []string{"https://example.com", "http://example.org"}, + }, + { + "This is a text without a URL", + []string{}, + }, + { + "This is a text with a URL https://www.systemli.org/en/contact/", + []string{"https://www.systemli.org/en/contact/"}, + }, + { + "This is a text with a URL https://www.systemli.org/en/contact/?key=value", + []string{"https://www.systemli.org/en/contact/?key=value"}, + }, + { + "This is a text with a URL https://www.systemli.org/en/contact/?key=value#fragment", + []string{"https://www.systemli.org/en/contact/?key=value#fragment"}, + }, + } + + for _, tc := range testCases { + urls := ExtractURLs(tc.text) + assert.Equal(t, len(tc.expected), len(urls)) + for i, url := range tc.expected { + assert.Equal(t, url, urls[i]) + } + } }