Skip to content

Commit

Permalink
Add unfurling for Twitter links
Browse files Browse the repository at this point in the history
Fix lint issues

Wrap json.Unmarshall() errors in urls.go

Bump version 0.83.9 -> 0.83.10
  • Loading branch information
ismaeldm95 authored and flexsurfer committed Aug 16, 2021
1 parent 7dfeda1 commit 1efe023
Show file tree
Hide file tree
Showing 4 changed files with 155 additions and 4 deletions.
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.83.9
0.83.10
61 changes: 58 additions & 3 deletions protocol/urls/urls.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package urls
import (
"encoding/json"
"fmt"
"html"
"io/ioutil"
"net/http"
"net/url"
Expand All @@ -18,6 +19,12 @@ type YoutubeOembedData struct {
ThumbnailURL string `json:"thumbnail_url"`
}

type TwitterOembedData struct {
ProviderName string `json:"provider_name"`
AuthorName string `json:"author_name"`
HTML string `json:"html"`
}

type GiphyOembedData struct {
ProviderName string `json:"provider_name"`
Title string `json:"title"`
Expand Down Expand Up @@ -50,6 +57,7 @@ type Site struct {
}

const YoutubeOembedLink = "https://www.youtube.com/oembed?format=json&url=%s"
const TwitterOembedLink = "https://publish.twitter.com/oembed?url=%s"
const GiphyOembedLink = "https://giphy.com/services/oembed?url=%s"
const TenorOembedLink = "https://tenor.com/oembed?url=%s"

Expand All @@ -74,6 +82,11 @@ func LinkPreviewWhitelist() []Site {
Address: "youtu.be",
ImageSite: false,
},
Site{
Title: "Twitter",
Address: "twitter.com",
ImageSite: false,
},
// Site{
// Title: "Tenor GIFs",
// Address: "tenor.com",
Expand Down Expand Up @@ -129,7 +142,7 @@ func GetYoutubeOembed(url string) (data YoutubeOembedData, err error) {

err = json.Unmarshal(jsonBytes, &data)
if err != nil {
return data, fmt.Errorf("can't unmarshall json")
return data, fmt.Errorf("can't unmarshall json %w", err)
}

return data, nil
Expand All @@ -148,6 +161,46 @@ func GetYoutubePreviewData(link string) (previewData LinkPreviewData, err error)
return previewData, nil
}

func GetTwitterOembed(url string) (data TwitterOembedData, err error) {
oembedLink := fmt.Sprintf(TwitterOembedLink, url)
jsonBytes, err := GetURLContent(oembedLink)
if err != nil {
return data, fmt.Errorf("can't get bytes from twitter oembed response on %s link", oembedLink)
}

err = json.Unmarshal(jsonBytes, &data)
if err != nil {
return data, fmt.Errorf("can't unmarshall json %w", err)
}

return data, nil
}

func GetTwitterPreviewData(link string) (previewData LinkPreviewData, err error) {
oembedData, err := GetTwitterOembed(link)
if err != nil {
return previewData, err
}

previewData.Title = GetReadableTextFromTweetHTML(oembedData.HTML)
previewData.Site = oembedData.ProviderName

return previewData, nil
}

func GetReadableTextFromTweetHTML(s string) string {

s = strings.ReplaceAll(s, "\u003Cbr\u003E", "\n") // Adds line break for all <br>
s = strings.ReplaceAll(s, "https://", "\nhttps://") // Displays links in next line
s = html.UnescapeString(s) // Parses html special characters like &#225;
s = stripHTMLTags(s)
s = strings.TrimSpace(s)
s = strings.TrimRight(s, "\n")
s = strings.TrimLeft(s, "\n")

return s
}

func GetGenericLinkPreviewData(link string) (previewData LinkPreviewData, err error) {
// nolint: gosec
res, err := httpClient.Get(link)
Expand Down Expand Up @@ -175,7 +228,7 @@ func GetGiphyOembed(url string) (data GiphyOembedData, err error) {

err = json.Unmarshal(jsonBytes, &data)
if err != nil {
return data, fmt.Errorf("can't unmarshall json")
return data, fmt.Errorf("can't unmarshall json %w", err)
}

return data, nil
Expand Down Expand Up @@ -236,7 +289,7 @@ func GetTenorOembed(url string) (data TenorOembedData, err error) {

err = json.Unmarshal(jsonBytes, &data)
if err != nil {
return data, fmt.Errorf("can't unmarshall json")
return data, fmt.Errorf("can't unmarshall json %w", err)
}

return data, nil
Expand Down Expand Up @@ -276,6 +329,8 @@ func GetLinkPreviewData(link string) (previewData LinkPreviewData, err error) {
return GetGiphyShortURLPreviewData(link)
case "tenor.com":
return GetTenorPreviewData(link)
case "twitter.com":
return GetTwitterPreviewData(link)
default:
return previewData, fmt.Errorf("link %s isn't whitelisted. Hostname - %s", link, url.Hostname())
}
Expand Down
42 changes: 42 additions & 0 deletions protocol/urls/urls_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -151,3 +151,45 @@ func TestStatusLinkPreviewData(t *testing.T) {
// require.Equal(t, statusSecurityAudit.Title, previewData.Title)
// require.Equal(t, statusSecurityAudit.ThumbnailURL, previewData.ThumbnailURL)
// }

func TestTwitterLinkPreviewData(t *testing.T) {

statusTweet1 := LinkPreviewData{
Site: "Twitter",
Title: "Crypto isn't going anywhere.— Status (@ethstatus) July 26, 2021",
}

previewData1, err := GetLinkPreviewData("https://twitter.com/ethstatus/status/1419674733885407236")
require.NoError(t, err)
require.Equal(t, statusTweet1.Site, previewData1.Site)
require.Equal(t, statusTweet1.Title, previewData1.Title)
require.Equal(t, statusTweet1.ThumbnailURL, "")

statusTweet2 := LinkPreviewData{
Site: "Twitter",
Title: "🎉 Status v1.15 is a go! 🎉\n\n📌 Pin important messages in chats and groups" +
"\n✏️ Edit messages after sending\n🔬 Scan QR codes with the browser\n⚡️ FASTER app navigation!" +
"\nhttps://t.co/qKrhDArVKb— Status (@ethstatus) July 27, 2021",
}

previewData2, err := GetLinkPreviewData("https://twitter.com/ethstatus/status/1420035091997278214")
require.NoError(t, err)
require.Equal(t, statusTweet2.Site, previewData2.Site)
require.Equal(t, statusTweet2.Title, previewData2.Title)
require.Equal(t, statusTweet2.ThumbnailURL, "")

statusProfile := LinkPreviewData{
Site: "Twitter",
Title: "Tweets by ethstatus",
}

previewData3, err := GetLinkPreviewData("https://twitter.com/ethstatus")
require.NoError(t, err)
require.Equal(t, statusProfile.Site, previewData3.Site)
require.Equal(t, statusProfile.Title, previewData3.Title)
require.Equal(t, statusProfile.ThumbnailURL, "")

_, err = GetLinkPreviewData("https://www.test.com/unknown")
require.Error(t, err)

}
54 changes: 54 additions & 0 deletions protocol/urls/utils.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
package urls

import (
"strings"
"unicode/utf8"
)

const (
htmlTagStart = 60 // Unicode `<`
htmlTagEnd = 62 // Unicode `>`
)

// Taken from https://stackoverflow.com/a/64701836
// Aggressively strips HTML tags from a string.
// It will only keep anything between `>` and `<`.
func stripHTMLTags(s string) string {
// Setup a string builder and allocate enough memory for the new string.
var builder strings.Builder
builder.Grow(len(s) + utf8.UTFMax)

in := false // True if we are inside an HTML tag.
start := 0 // The index of the previous start tag character `<`
end := 0 // The index of the previous end tag character `>`

for i, c := range s {
// If this is the last character and we are not in an HTML tag, save it.
if (i+1) == len(s) && end >= start {
builder.WriteString(s[end:])
}

// Keep going if the character is not `<` or `>`
if c != htmlTagStart && c != htmlTagEnd {
continue
}

if c == htmlTagStart {
// Only update the start if we are not in a tag.
// This make sure we strip out `<<br>` not just `<br>`
if !in {
start = i
}
in = true

// Write the valid string between the close and start of the two tags.
builder.WriteString(s[end:start])
continue
}
// else c == htmlTagEnd
in = false
end = i + 1
}
s = builder.String()
return s
}

0 comments on commit 1efe023

Please sign in to comment.