Skip to content

Commit e23eda5

Browse files
authored
feat: fetch HTTP retrievals directly (#82)
When retrieving content using the Trustless HTTP Gateway protocol ("http"), fetch the content directly from the provider, do not use Lassie. This should give us better visibility into various error statuses returned by providers, e.g. 429 Too Many Requests, which Lassie converts to generic 502 Bad Gateway error. List of synthetic status codes corresponding to different errors we may encounter along the new codepath: - 600 - unknown error (fallback) - 701 - provider's multiaddr has unsupported hostname type (e.g. `ipv4` instead of `ip4`) - 702 - provider's multiaddr is not "tcp" - 703 - provider's multiaddr is not "https" - 704 - provider's multiaddr has too many parts - 801 - provider's hostname cannot be resolved via DNS - 802 - TCP connection error - 901 - CID uses an unsupported hash algorithm - 902 - payload's hash does not match the CID - 903 - provider returned unexpected blocks in the CAR response Signed-off-by: Miroslav Bajtoš <oss@bajtos.net>
1 parent 410ce4c commit e23eda5

File tree

7 files changed

+8519
-70
lines changed

7 files changed

+8519
-70
lines changed

deps.ts

+8
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,11 @@
77
export { encodeHex } from 'https://deno.land/std@0.203.0/encoding/hex.ts'
88
export { decodeBase64 } from 'https://deno.land/std@0.203.0/encoding/base64.ts'
99
export { decode as decodeVarint } from 'https://deno.land/x/varint@v2.0.0/varint.ts'
10+
11+
// Deno Bundle does not support npm dependencies, we have to load them via CDN
12+
export { CarBlockIterator } from 'https://cdn.skypack.dev/@ipld/car@5.3.2/?dts'
13+
export {
14+
UnsupportedHashError,
15+
HashMismatchError,
16+
validateBlock
17+
} from 'https://cdn.skypack.dev/@web3-storage/car-block-validator@1.2.0/?dts'

lib/multiaddr.js

+55
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
/**
2+
* @param {string} addr Multiaddr, e.g. `/ip4/127.0.0.1/tcp/80/http`
3+
* @returns {string} Parsed URI, e.g. `http://127.0.0.1:80`
4+
*/
5+
export function multiaddrToHttpUrl (addr) {
6+
const [, hostType, hostValue, ipProtocol, port, scheme, ...rest] = addr.split('/')
7+
8+
if (ipProtocol !== 'tcp') {
9+
throw Object.assign(
10+
new Error(`Cannot parse "${addr}": unsupported protocol "${ipProtocol}"`),
11+
{ code: 'UNSUPPORTED_MULTIADDR_PROTO' }
12+
)
13+
}
14+
15+
if (scheme !== 'http' && scheme !== 'https') {
16+
throw Object.assign(
17+
new Error(`Cannot parse "${addr}": unsupported scheme "${scheme}"`),
18+
{ code: 'UNSUPPORTED_MULTIADDR_SCHEME' }
19+
)
20+
}
21+
22+
if (rest.length) {
23+
throw Object.assign(
24+
new Error(`Cannot parse "${addr}": too many parts`),
25+
{ code: 'MULTIADDR_HAS_TOO_MANY_PARTS' }
26+
)
27+
}
28+
29+
return `${scheme}://${getUriHost(hostType, hostValue)}${getUriPort(scheme, port)}`
30+
}
31+
32+
function getUriHost (hostType, hostValue) {
33+
switch (hostType) {
34+
case 'ip4':
35+
case 'dns':
36+
case 'dns4':
37+
case 'dns6':
38+
return hostValue
39+
case 'ip6':
40+
// See https://superuser.com/a/367788/135774:
41+
// According to RFC2732, literal IPv6 addresses should be put inside square brackets in URLs
42+
return `[${hostValue}]`
43+
}
44+
45+
throw Object.assign(
46+
new Error(`Unsupported multiaddr host type "${hostType}"`),
47+
{ code: 'UNSUPPORTED_MULTIADDR_HOST_TYPE' }
48+
)
49+
}
50+
51+
function getUriPort (scheme, port) {
52+
if (scheme === 'http' && port === '80') return ''
53+
if (scheme === 'https' && port === '443') return ''
54+
return `:${port}`
55+
}

lib/spark.js

+111-30
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,14 @@ import { ActivityState } from './activity-state.js'
44
import { SPARK_VERSION, MAX_CAR_SIZE, APPROX_ROUND_LENGTH_IN_MS } from './constants.js'
55
import { queryTheIndex } from './ipni-client.js'
66
import { getMinerPeerId as defaultGetMinerPeerId } from './miner-info.js'
7+
import { multiaddrToHttpUrl } from './multiaddr.js'
8+
79
import {
8-
encodeHex
10+
CarBlockIterator,
11+
encodeHex,
12+
HashMismatchError,
13+
UnsupportedHashError,
14+
validateBlock
915
} from '../vendor/deno-deps.js'
1016

1117
const sleep = dt => new Promise(resolve => setTimeout(resolve, dt))
@@ -77,25 +83,10 @@ export default class Spark {
7783
stats.protocol = provider.protocol
7884
stats.providerAddress = provider.address
7985

80-
const searchParams = new URLSearchParams({
81-
// See https://github.com/filecoin-project/lassie/blob/main/docs/HTTP_SPEC.md#dag-scope-request-query-parameter
82-
// Only the root block at the end of the path is returned after blocks required to verify the specified path segments.
83-
'dag-scope': 'block',
84-
protocols: provider.protocol,
85-
providers: provider.address
86-
})
87-
const url = `ipfs://${retrieval.cid}?${searchParams.toString()}`
88-
try {
89-
await this.fetchCAR(url, stats)
90-
} catch (err) {
91-
console.error(`Failed to fetch ${url}`)
92-
console.error(err)
93-
}
86+
await this.fetchCAR(provider.protocol, provider.address, retrieval.cid, stats)
9487
}
9588

96-
async fetchCAR (url, stats) {
97-
console.log(`Fetching: ${url}`)
98-
89+
async fetchCAR (protocol, address, cid, stats) {
9990
// Abort if no progress was made for 60 seconds
10091
const controller = new AbortController()
10192
const { signal } = controller
@@ -116,6 +107,9 @@ export default class Spark {
116107
const carBytes = new Uint8Array(carBuffer)
117108

118109
try {
110+
const url = getRetrievalUrl(protocol, address, cid)
111+
console.log(`Fetching: ${url}`)
112+
119113
resetTimeout()
120114
const res = await this.#fetch(url, { signal })
121115
stats.statusCode = res.status
@@ -146,6 +140,8 @@ export default class Spark {
146140
}
147141

148142
if (!stats.carTooLarge) {
143+
await verifyContent(cid, carBytes)
144+
149145
const digest = await crypto.subtle.digest('sha-256', carBytes)
150146
// 12 is the code for sha2-256
151147
// 20 is the digest length (32 bytes = 256 bits)
@@ -155,6 +151,12 @@ export default class Spark {
155151
console.error('Retrieval failed with status code %s: %s',
156152
res.status, (await res.text()).trimEnd())
157153
}
154+
} catch (err) {
155+
console.error(`Failed to fetch ${cid} from ${address} using ${protocol}`)
156+
console.error(err)
157+
if (!stats.statusCode || stats.statusCode === 200) {
158+
stats.statusCode = mapErrorToStatusCode(err)
159+
}
158160
} finally {
159161
clearTimeout(timeout)
160162
}
@@ -190,18 +192,7 @@ export default class Spark {
190192
async nextRetrieval () {
191193
const { id: retrievalId, ...retrieval } = await this.getRetrieval()
192194

193-
const stats = {
194-
timeout: false,
195-
startAt: new Date(),
196-
firstByteAt: null,
197-
endAt: null,
198-
carTooLarge: false,
199-
byteLength: 0,
200-
carChecksum: null,
201-
statusCode: null,
202-
providerId: null,
203-
indexerResult: null
204-
}
195+
const stats = newStats()
205196

206197
await this.executeRetrievalCheck(retrieval, stats)
207198

@@ -240,6 +231,96 @@ export default class Spark {
240231
}
241232
}
242233

234+
export function newStats () {
235+
return {
236+
timeout: false,
237+
startAt: new Date(),
238+
firstByteAt: null,
239+
endAt: null,
240+
carTooLarge: false,
241+
byteLength: 0,
242+
carChecksum: null,
243+
statusCode: null
244+
}
245+
}
246+
247+
function getRetrievalUrl (protocol, address, cid) {
248+
if (protocol === 'http') {
249+
const baseUrl = multiaddrToHttpUrl(address)
250+
return `${baseUrl}/ipfs/${cid}?dag-scope=block`
251+
}
252+
253+
const searchParams = new URLSearchParams({
254+
// See https://github.com/filecoin-project/lassie/blob/main/docs/HTTP_SPEC.md#dag-scope-request-query-parameter
255+
// Only the root block at the end of the path is returned after blocks required to verify the specified path segments.
256+
'dag-scope': 'block',
257+
protocols: protocol,
258+
providers: address
259+
})
260+
return `ipfs://${cid}?${searchParams.toString()}`
261+
}
262+
263+
/**
264+
* @param {string} cid
265+
* @param {Uint8Array} carBytes
266+
*/
267+
async function verifyContent (cid, carBytes) {
268+
let reader
269+
try {
270+
reader = await CarBlockIterator.fromBytes(carBytes)
271+
} catch (err) {
272+
throw Object.assign(err, { code: 'CANNOT_PARSE_CAR_BYTES' })
273+
}
274+
275+
for await (const block of reader) {
276+
if (block.cid.toString() !== cid.toString()) {
277+
throw Object.assign(
278+
new Error(`Unexpected block CID ${block.cid}. Expected: ${cid}`),
279+
{ code: 'UNEXPECTED_CAR_BLOCK' }
280+
)
281+
}
282+
283+
await validateBlock(block)
284+
}
285+
}
286+
287+
function mapErrorToStatusCode (err) {
288+
// 7xx codes for multiaddr parsing errors
289+
switch (err.code) {
290+
case 'UNSUPPORTED_MULTIADDR_HOST_TYPE':
291+
return 701
292+
case 'UNSUPPORTED_MULTIADDR_PROTO':
293+
return 702
294+
case 'UNSUPPORTED_MULTIADDR_SCHEME':
295+
return 703
296+
case 'MULTIADDR_HAS_TOO_MANY_PARTS':
297+
return 704
298+
}
299+
300+
// 9xx for content verification errors
301+
if (err instanceof UnsupportedHashError) {
302+
return 901
303+
} else if (err instanceof HashMismatchError) {
304+
return 902
305+
} else if (err.code === 'UNEXPECTED_CAR_BLOCK') {
306+
return 903
307+
} else if (err.code === 'CANNOT_PARSE_CAR_BYTES') {
308+
return 904
309+
}
310+
311+
// 8xx errors for network connection errors
312+
// Unfortunately, the Fetch API does not support programmatic detection of various error
313+
// conditions. We have to check the error message text.
314+
if (err.message.includes('dns error')) {
315+
return 801
316+
} else if (err.message.includes('tcp connect error')) {
317+
return 802
318+
}
319+
320+
// Fallback code for unknown errors
321+
return 600
322+
}
323+
243324
async function assertOkResponse (res, errorMsg) {
244325
if (res.ok) return
245326

test.js

+2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
import './test/ipni-client.test.js'
22
import './test/miner-info.test.js'
3+
import './test/multiaddr.test.js'
4+
35
import './test/integration.js'
46
import './test/spark.js'

test/multiaddr.test.js

+33
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
import { test } from 'zinnia:test'
2+
import { assertEquals, assertThrows } from 'zinnia:assert'
3+
import { multiaddrToHttpUrl } from '../lib/multiaddr.js'
4+
5+
const HAPPY_CASES = [
6+
['/ip4/127.0.0.1/tcp/80/http', 'http://127.0.0.1'],
7+
['/ip4/127.0.0.1/tcp/8080/http', 'http://127.0.0.1:8080'],
8+
['/ip4/127.0.0.1/tcp/443/https', 'https://127.0.0.1'],
9+
['/ip4/127.0.0.1/tcp/8080/https', 'https://127.0.0.1:8080'],
10+
['/dns/meridian.space/tcp/8080/http', 'http://meridian.space:8080'],
11+
['/dns4/meridian.space/tcp/8080/http', 'http://meridian.space:8080'],
12+
['/dns6/meridian.space/tcp/8080/http', 'http://meridian.space:8080']
13+
]
14+
15+
for (const [multiaddr, expectedUri] of HAPPY_CASES) {
16+
test(`parse ${multiaddr}`, () => {
17+
const uri = multiaddrToHttpUrl(multiaddr)
18+
assertEquals(uri, expectedUri)
19+
})
20+
}
21+
22+
const ERROR_CASES = [
23+
['/ip4/127.0.0.1/tcp/80', 'Cannot parse "/ip4/127.0.0.1/tcp/80": unsupported scheme "undefined"'],
24+
['/ip4/127.0.0.1/udp/90', 'Cannot parse "/ip4/127.0.0.1/udp/90": unsupported protocol "udp"'],
25+
['/ip4/127.0.0.1/tcp/8080/http/p2p/pubkey', 'Cannot parse "/ip4/127.0.0.1/tcp/8080/http/p2p/pubkey": too many parts']
26+
]
27+
28+
for (const [multiaddr, expectedError] of ERROR_CASES) {
29+
test(`parse ${multiaddr}`, () => {
30+
const err = assertThrows(() => multiaddrToHttpUrl(multiaddr))
31+
assertEquals(err.message, expectedError)
32+
})
33+
}

0 commit comments

Comments
 (0)