Skip to content

Commit 314931d

Browse files
committed
Use RFC5952 canonical form for IPv6 addresses in WARC-IP-Address
Per suggestion in iipc/warc-specifications#100
1 parent f207143 commit 314931d

File tree

3 files changed

+80
-1
lines changed

3 files changed

+80
-1
lines changed

src/org/netpreserve/jwarc/InetAddresses.java

+38
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
/*
77
* Copyright (C) 2008 The Guava Authors
8+
* Copyright (C) 2024 National Library of Australia and the jwarc contributors
89
*
910
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
1011
* in compliance with the License. You may obtain a copy of the License at
@@ -19,6 +20,7 @@
1920

2021
package org.netpreserve.jwarc;
2122

23+
import java.net.Inet6Address;
2224
import java.net.InetAddress;
2325
import java.net.UnknownHostException;
2426
import java.nio.ByteBuffer;
@@ -224,4 +226,40 @@ private static IllegalArgumentException formatIllegalArgumentException(
224226
String format, Object... args) {
225227
return new IllegalArgumentException(String.format(Locale.ROOT, format, args));
226228
}
229+
230+
/**
231+
* Formats an IPv6 address as the RFC5952 canonical textual representation.
232+
*/
233+
static String canonicalInet6(Inet6Address address) {
234+
byte[] bytes = address.getAddress();
235+
StringBuilder full = new StringBuilder();
236+
for (int i = 0; i < bytes.length; i += 2) {
237+
if (i > 0) full.append(':');
238+
int group = ((bytes[i] & 0xFF) << 8) | (bytes[i + 1] & 0xFF);
239+
full.append(Integer.toHexString(group));
240+
}
241+
242+
// Compress longest zero sequence
243+
int lengthOfLongestZeroSequence = 2;
244+
int startOfLongestZeroSequence = 0;
245+
for (int i = 0; i < full.length(); i++) {
246+
if (i > 0 && full.charAt(i) != ':') continue;
247+
248+
// Find the end of the zero sequence
249+
int j;
250+
for (j = i; j < full.length(); j++) {
251+
char c = full.charAt(j);
252+
if (c != ':' && c != '0') break;
253+
}
254+
255+
int length = j - i;
256+
if (length > lengthOfLongestZeroSequence) {
257+
startOfLongestZeroSequence = i;
258+
lengthOfLongestZeroSequence = length;
259+
}
260+
}
261+
if (lengthOfLongestZeroSequence <= 2) return full.toString();
262+
return full.substring(0, startOfLongestZeroSequence) + "::" +
263+
full.substring(startOfLongestZeroSequence + lengthOfLongestZeroSequence);
264+
}
227265
}

src/org/netpreserve/jwarc/WarcCaptureRecord.java

+8-1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
package org.netpreserve.jwarc;
77

88
import java.io.IOException;
9+
import java.net.Inet6Address;
910
import java.net.InetAddress;
1011
import java.net.URI;
1112
import java.nio.ByteBuffer;
@@ -60,7 +61,13 @@ public B concurrentTo(URI recordId) {
6061
}
6162

6263
public B ipAddress(InetAddress ipAddress) {
63-
return addHeader("WARC-IP-Address", ipAddress.getHostAddress());
64+
String formatted;
65+
if (ipAddress instanceof Inet6Address) {
66+
formatted = InetAddresses.canonicalInet6((Inet6Address) ipAddress);
67+
} else {
68+
formatted = ipAddress.getHostAddress();
69+
}
70+
return addHeader("WARC-IP-Address", formatted);
6471
}
6572
}
6673
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
package org.netpreserve.jwarc;
2+
3+
import org.junit.Test;
4+
5+
import java.net.Inet6Address;
6+
import java.net.InetAddress;
7+
8+
import static org.junit.Assert.*;
9+
import static org.netpreserve.jwarc.InetAddresses.canonicalInet6;
10+
11+
public class InetAddressesTest {
12+
@Test
13+
public void testCanonicalInet6() throws Exception {
14+
assertEquals("2001:db8::1",
15+
canonicalInet6((Inet6Address) InetAddress.getByName("2001:db8:0:0:0:0:0:1")));
16+
assertEquals("::",
17+
canonicalInet6((Inet6Address) InetAddress.getByName("0:0:0:0:0:0:0:0")));
18+
assertEquals("::1",
19+
canonicalInet6((Inet6Address) InetAddress.getByName("0:0:0:0:0:0:0:1")));
20+
assertEquals("2001:db8:1:1:1:1:1:1",
21+
canonicalInet6((Inet6Address) InetAddress.getByName("2001:db8:1:1:1:1:1:1")));
22+
assertEquals("2001:0:0:1::1",
23+
canonicalInet6((Inet6Address) InetAddress.getByName("2001:0:0:1:0:0:0:1")));
24+
assertEquals("2001:db8:f::1",
25+
canonicalInet6((Inet6Address) InetAddress.getByName("2001:db8:000f:0:0:0:0:1")));
26+
assertEquals("2001:db8::1:0:0:1",
27+
canonicalInet6((Inet6Address) InetAddress.getByName("2001:0db8:0000:0000:0001:0000:0000:0001")));
28+
assertEquals("ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff",
29+
canonicalInet6((Inet6Address) InetAddress.getByName("ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff")));
30+
assertEquals("2001:200f::1",
31+
canonicalInet6((Inet6Address) InetAddress.getByName("2001:200f:0:0:0:0:0:1")));
32+
}
33+
34+
}

0 commit comments

Comments
 (0)