-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathindex.js
76 lines (65 loc) · 2.06 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
// A Worker that uses async HTMLRewriter.
//
// In this example, each `img` tag in the HTML body is fetch'd
// on the edge to check if it exists. If the image returns a
// non-200 response, rewrite the `src` attribute to use the
// latest snapshot from the Internet Archive. (https://archive.org)
addEventListener('fetch', event => {
event.respondWith(fetchWithImageFix(event.request))
})
const cf = {
cacheEverything: true,
cacheTtl: 86400, // 1 day
scrapeShield: false,
mirage: true,
polish: 'lossy',
}
async function fetchWithImageFix(request) {
var url = new URL(request.url)
if (url.pathname.startsWith('/_archive')) {
return fetch(url.searchParams.get('url'), { cf })
}
return new HTMLRewriter()
.on('img', new ImageFixer())
.transform(await fetch(request))
}
class ImageFixer {
// The `async` keyword enables async/await for this handler.
async element(element) {
var src = element.getAttribute('src')
if (!src) {
src = element.getAttribute('data-cfsrc')
element.removeAttribute('data-cfsrc')
}
// Rewrite the URL with the fixed image.
if (src) {
element.setAttribute('src', await fixImageUrl(src))
}
}
}
async function fixImageUrl(url) {
if (url.startsWith('/')) {
return url
}
var response = await fetch(url.toString(), { method: 'HEAD', cf })
if (response.ok || response.status === 405) {
return response.url
}
var archive = await fetch(`https://archive.org/wayback/available?url=${url}`, { cf })
try {
var json = await archive.json()
var archiveUrl = new URL(json.archived_snapshots.closest.url)
var index = archiveUrl.pathname.indexOf('http')
// Insert `im_` to archived URL so it renders as an image.
archiveUrl.pathname =
archiveUrl.pathname.substring(0, index - 1) +
'im_' +
archiveUrl.pathname.substring(index)
console.log('Fixed image: ' + archiveUrl)
// Proxy the image url so we can heavily cache it
return `/_archive?url=${archiveUrl}`
} catch (err) {
console.log('Missing image: ' + url)
return response.url
}
}