Skip to content

Commit 8d0fe85

Browse files
committed
feat: direct fetch for HTTP retrievals
When the retrieving the content using the Trustless HTTP Gateway protocol ("http"), fetch the content directly from the provider, do not use Lassie. This should give us better visibility into various error statuses returned by providers, e.g. 429 Too Many Requests, which Lassie converts to generic 502 Bad Gateway error. List of synthetic status codes corresponding to different errors we may encounter along the new codepath: - 900 - unknown error (fallback) - 901 - provider's multiaddr is not "tcp" - 902 - provider's multiaddr is not "https" - 903 - provider's multiaddr has too many parts - 911 - provider's hostname cannot be resolved via DNS - 912 - TCP connection error - 921 - CID uses an unsupported hash algorithm - 922 - payload's hash does not match the CID - 923 - provider returned unexpected blocks in the CAR response Signed-off-by: Miroslav Bajtoš <oss@bajtos.net>
1 parent 94b95e4 commit 8d0fe85

File tree

7 files changed

+8346
-16
lines changed

7 files changed

+8346
-16
lines changed

deps.ts

+8
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,11 @@
77
export { encodeHex } from 'https://deno.land/std@0.203.0/encoding/hex.ts'
88
export { decodeBase64 } from 'https://deno.land/std@0.203.0/encoding/base64.ts'
99
export { decode as decodeVarint } from 'https://deno.land/x/varint@v2.0.0/varint.ts'
10+
11+
// Deno Bundle does not support npm dependencies, we have to load the via CDN
12+
export { CarBlockIterator } from 'https://cdn.skypack.dev/@ipld/car@5.3.2/?dts'
13+
export {
14+
UnsupportedHashError,
15+
HashMismatchError,
16+
validateBlock
17+
} from 'https://cdn.skypack.dev/@web3-storage/car-block-validator@1.2.0/?dts'

lib/multiaddr.js

+48
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
/**
2+
* @param {string} addr Multiaddr, e.g. `/ip4/127.0.0.1/tcp/80/http`
3+
* @returns {string} Parsed URI, e.g. `http://127.0.0.1:80`
4+
*/
5+
export function multiaddrToHttpUri (addr) {
6+
const [, hostType, hostValue, ipProtocol, port, scheme, ...rest] = addr.split('/')
7+
8+
if (ipProtocol !== 'tcp') {
9+
throw Object.assign(
10+
new Error(`Cannot parse "${addr}": unsupported protocol "${ipProtocol}"`),
11+
{ code: 'UNSUPPORTED_MULTIADDR_PROTO' }
12+
)
13+
}
14+
15+
if (scheme !== 'http' && scheme !== 'https') {
16+
throw Object.assign(
17+
new Error(`Cannot parse "${addr}": unsupported scheme "${scheme}"`),
18+
{ code: 'UNSUPPORTED_MULTIADDR_SCHEME' }
19+
)
20+
}
21+
22+
if (rest.length) {
23+
throw Object.assign(
24+
new Error(`Cannot parse "${addr}": too many parts`),
25+
{ code: 'MULTIADDR_HAS_TOO_MANY_PARTS' }
26+
)
27+
}
28+
29+
return `${scheme}://${getUriHost(hostType, hostValue)}${buildUriPort(scheme, port)}`
30+
}
31+
32+
function getUriHost (hostType, hostValue) {
33+
switch (hostType) {
34+
case 'ip4':
35+
case 'dns':
36+
case 'dns4':
37+
case 'dns6':
38+
return hostValue
39+
case 'ip6':
40+
return `[${hostValue}]`
41+
}
42+
}
43+
44+
function buildUriPort (scheme, port) {
45+
if (scheme === 'http' && port === '80') return ''
46+
if (scheme === 'https' && port === '443') return ''
47+
return `:${port}`
48+
}

lib/spark.js

+93-14
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,14 @@ import { ActivityState } from './activity-state.js'
44
import { SPARK_VERSION, MAX_CAR_SIZE, APPROX_ROUND_LENGTH_IN_MS } from './constants.js'
55
import { queryTheIndex } from './ipni-client.js'
66
import { getMinerPeerId as defaultGetMinerPeerId } from './miner-info.js'
7+
import { multiaddrToHttpUri } from './multiaddr.js'
8+
79
import {
8-
encodeHex
10+
CarBlockIterator,
11+
encodeHex,
12+
HashMismatchError,
13+
UnsupportedHashError,
14+
validateBlock
915
} from '../vendor/deno-deps.js'
1016

1117
const sleep = dt => new Promise(resolve => setTimeout(resolve, dt))
@@ -77,25 +83,15 @@ export default class Spark {
7783
stats.protocol = provider.protocol
7884
stats.providerAddress = provider.address
7985

80-
const searchParams = new URLSearchParams({
81-
// See https://github.com/filecoin-project/lassie/blob/main/docs/HTTP_SPEC.md#dag-scope-request-query-parameter
82-
// Only the root block at the end of the path is returned after blocks required to verify the specified path segments.
83-
'dag-scope': 'block',
84-
protocols: provider.protocol,
85-
providers: provider.address
86-
})
87-
const url = `ipfs://${retrieval.cid}?${searchParams.toString()}`
8886
try {
89-
await this.fetchCAR(url, stats)
87+
await this.fetchCAR(provider.protocol, provider.address, retrieval.cid, stats)
9088
} catch (err) {
91-
console.error(`Failed to fetch ${url}`)
89+
console.error(`Failed to fetch ${retrieval.cid} from ${provider.address} using ${provider.protocol}`)
9290
console.error(err)
9391
}
9492
}
9593

96-
async fetchCAR (url, stats) {
97-
console.log(`Fetching: ${url}`)
98-
94+
async fetchCAR (protocol, address, cid, stats) {
9995
// Abort if no progress was made for 60 seconds
10096
const controller = new AbortController()
10197
const { signal } = controller
@@ -116,6 +112,9 @@ export default class Spark {
116112
const carBytes = new Uint8Array(carBuffer)
117113

118114
try {
115+
const url = getRetrievalUrl(protocol, address, cid)
116+
console.log(`Fetching: ${url}`)
117+
119118
resetTimeout()
120119
const res = await this.#fetch(url, { signal })
121120
stats.statusCode = res.status
@@ -146,6 +145,14 @@ export default class Spark {
146145
}
147146

148147
if (!stats.carTooLarge) {
148+
try {
149+
await verifyContent(cid, carBytes)
150+
stats.contentVerification = 'OK'
151+
} catch (err) {
152+
console.error('Content verification failed.', err)
153+
stats.contentVerification = 'ERROR_' + (err.code ?? 'UNKNOWN')
154+
}
155+
149156
const digest = await crypto.subtle.digest('sha-256', carBytes)
150157
// 12 is the code for sha2-256
151158
// 20 is the digest length (32 bytes = 256 bits)
@@ -155,6 +162,11 @@ export default class Spark {
155162
console.error('Retrieval failed with status code %s: %s',
156163
res.status, (await res.text()).trimEnd())
157164
}
165+
} catch (err) {
166+
if (!stats.statusCode) {
167+
stats.statusCode = mapErrorToStatusCode(err)
168+
}
169+
throw err
158170
} finally {
159171
clearTimeout(timeout)
160172
}
@@ -240,6 +252,73 @@ export default class Spark {
240252
}
241253
}
242254

255+
function getRetrievalUrl (protocol, address, cid) {
256+
if (protocol === 'http') {
257+
const baseUrl = multiaddrToHttpUri(address)
258+
return `${baseUrl}/ipfs/${cid}?dag-scope=block`
259+
}
260+
261+
const searchParams = new URLSearchParams({
262+
// See https://github.com/filecoin-project/lassie/blob/main/docs/HTTP_SPEC.md#dag-scope-request-query-parameter
263+
// Only the root block at the end of the path is returned after blocks required to verify the specified path segments.
264+
'dag-scope': 'block',
265+
protocols: protocol,
266+
providers: address
267+
})
268+
return `ipfs://${cid}?${searchParams.toString()}`
269+
}
270+
271+
/**
272+
* @param {string} cid
273+
* @param {Uint8Array} carBytes
274+
*/
275+
async function verifyContent (cid, carBytes) {
276+
const reader = await CarBlockIterator.fromBytes(carBytes)
277+
for await (const block of reader) {
278+
if (block.cid.toString() !== cid.toString()) {
279+
throw Object.assign(
280+
new Error(`Unexpected block CID ${block.cid}. Expected: ${cid}`),
281+
{ code: 'UNEXPECTED_CAR_BLOCK' }
282+
)
283+
}
284+
285+
await validateBlock(block)
286+
}
287+
}
288+
289+
function mapErrorToStatusCode (err) {
290+
// 90x codes for multiaddr parsing errors
291+
switch (err.code) {
292+
case 'UNSUPPORTED_MULTIADDR_PROTO':
293+
return 901
294+
case 'UNSUPPORTED_MULTIADDR_SCHEME':
295+
return 902
296+
case 'MULTIADDR_HAS_TOO_MANY_PARTS':
297+
return 903
298+
}
299+
300+
// 92x for content verification errors
301+
if (err instanceof UnsupportedHashError) {
302+
return 921
303+
} else if (err instanceof HashMismatchError) {
304+
return 922
305+
} else if (err.code === 'UNEXPECTED_CAR_BLOCK') {
306+
return 923
307+
}
308+
309+
// 91x errors for network connection errors
310+
// Unfortunately, the Fetch API does not support programmatic detection of various error
311+
// conditions. We have to check the error message text.
312+
if (err.message.includes('dns error')) {
313+
return 911
314+
} else if (err.message.includes('tcp connect error')) {
315+
return 912
316+
}
317+
318+
// Fallback code for unknown errors
319+
return 900
320+
}
321+
243322
async function assertOkResponse (res, errorMsg) {
244323
if (res.ok) return
245324

test.js

+2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
import './test/ipni-client.test.js'
22
import './test/miner-info.test.js'
3+
import './test/multiaddr.test.js'
4+
35
import './test/integration.js'
46
import './test/spark.js'

test/multiaddr.test.js

+33
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
import { test } from 'zinnia:test'
2+
import { assertEquals, assertThrows } from 'zinnia:assert'
3+
import { multiaddrToHttpUri } from '../lib/multiaddr.js'
4+
5+
const HAPPY_CASES = [
6+
['/ip4/127.0.0.1/tcp/80/http', 'http://127.0.0.1'],
7+
['/ip4/127.0.0.1/tcp/8080/http', 'http://127.0.0.1:8080'],
8+
['/ip4/127.0.0.1/tcp/443/https', 'https://127.0.0.1'],
9+
['/ip4/127.0.0.1/tcp/8080/https', 'https://127.0.0.1:8080'],
10+
['/dns/meridian.space/tcp/8080/http', 'http://meridian.space:8080'],
11+
['/dns4/meridian.space/tcp/8080/http', 'http://meridian.space:8080'],
12+
['/dns6/meridian.space/tcp/8080/http', 'http://meridian.space:8080']
13+
]
14+
15+
for (const [multiaddr, expectedUri] of HAPPY_CASES) {
16+
test(`parse ${multiaddr}`, () => {
17+
const uri = multiaddrToHttpUri(multiaddr)
18+
assertEquals(uri, expectedUri)
19+
})
20+
}
21+
22+
const ERROR_CASES = [
23+
['/ip4/127.0.0.1/tcp/80', 'Cannot parse "/ip4/127.0.0.1/tcp/80": unsupported scheme "undefined"'],
24+
['/ip4/127.0.0.1/udp/90', 'Cannot parse "/ip4/127.0.0.1/udp/90": unsupported protocol "udp"'],
25+
['/ip4/127.0.0.1/tcp/8080/http/p2p/pubkey', 'Cannot parse "/ip4/127.0.0.1/tcp/8080/http/p2p/pubkey": too many parts']
26+
]
27+
28+
for (const [multiaddr, expectedError] of ERROR_CASES) {
29+
test(`parse ${multiaddr}`, () => {
30+
const err = assertThrows(() => multiaddrToHttpUri(multiaddr))
31+
assertEquals(err.message, expectedError)
32+
})
33+
}

test/spark.js

+2-2
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ test('fetchCAR', async () => {
7070
carChecksum: null,
7171
statusCode: null
7272
}
73-
await spark.fetchCAR(URL, stats)
73+
await spark.fetchCAR('http', '127.0.0.1', 'bafy', stats)
7474
assertEquals(stats.timeout, false)
7575
assertInstanceOf(stats.startAt, Date)
7676
assertInstanceOf(stats.firstByteAt, Date)
@@ -104,7 +104,7 @@ test('fetchCAR exceeding MAX_CAR_SIZE', async () => {
104104
carChecksum: null,
105105
statusCode: null
106106
}
107-
await spark.fetchCAR(URL, stats)
107+
await spark.fetchCAR('http', '127.0.0.1', 'bafy', stats)
108108
assertEquals(stats.timeout, false)
109109
assertEquals(stats.carTooLarge, true)
110110
assertEquals(stats.byteLength, MAX_CAR_SIZE + 1)

0 commit comments

Comments
 (0)