-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathformat-lua
executable file
·120 lines (103 loc) · 4.86 KB
/
format-lua
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
#!/usr/bin/env python3
from ipaddress import IPv4Network, IPv4Address, IPv6Network, IPv6Address
import json
import os
import struct
import sys
import tempfile
# Lua has a limit of 2^18 constants per function. See the following pages for details:
# http://boston.conman.org/2009/11/09.2 & http://lua-users.org/lists/lua-l/2008-02/msg00257.html
# This hack is needed as a workaround for "lua: constant table overflow" error for 170k v4 + 97k v6.
# If the value is too low, we might hit another limit. E.g. 2**10 _may_ give the following error
# if every function is a new local variable:
# lua: dat.lua:199591: main function has more than 200 local variables
LUA_CHUNK = 2 ** 16
def make_32bit_trie(net, IPxxNetwork):
default = {
IPv4Network: '0.0.0.0/32',
IPv6Network: '::/32', # The minimum [RIPE] allocation size for IPv6 address space is /32.
}[IPxxNetwork]
discard = { IPv4Network: 0, IPv6Network: 96 }[IPxxNetwork]
net = sorted(IPxxNetwork(_) for _ in net)
prefix = min(net, default=IPxxNetwork(default), key=lambda n: n.prefixlen).prefixlen
prefix = min(prefix, 32)
trie_mask = ((0xffffFFFF << (32 - prefix)) & 0xffffFFFF)
t1 = {} # one-level trie
for n in net:
trie = (int(n.network_address) >> discard) & trie_mask
if trie not in t1:
t1[trie] = []
t1[trie].append(n)
return t1, trie_mask
# Lua's `bit` module returns i32 as a result of operation, but it tolerates ui32 input.
# See http://bitop.luajit.org/semantics.html
Int32 = struct.Struct('i')
Uint32 = struct.Struct('I')
def ui32_to_i32(i):
return Int32.unpack(Uint32.pack(i))[0]
assert Int32.size == Uint32.size == 4 and ui32_to_i32(0xc169c000) == -0x3e964000
def uint128_to_lua_string(i):
l = ['"']
l.extend('\\{:d}'.format(b) for b in i.to_bytes(16, 'big'))
l.append('"')
return ''.join(l)
def write_lua_table_with_f(out, name, table):
# Allocate the first chunk in-place to avoid realloc() of small table? May it save some RAM?
# Seems, it actually makes things worse. Maybe something is not GCed...
out.write('local {name:s} = {{}}\n'.format(name=name))
for start in range(0, len(table), LUA_CHUNK):
out.write('f = function (t)\n')
out.write('local x = true;\n') # to make lua file ~24% smaller.
for token in table[start:start+LUA_CHUNK]:
out.write('t[{}]=x\n'.format(token))
out.write('end\n')
out.write('f({name:s})\n'.format(name=name))
def main():
dst, = sys.argv[1:]
if os.path.exists(dst):
raise RuntimeError('Destination should not exist', dst)
doc = json.load(sys.stdin)
ip4, net4, ip6, net6 = doc['ip'], doc['ipSubnet'], doc['ipv6'], doc['ipv6Subnet']
# TODO: parse doc['cdnSubnet'] and doc['cdnv6Subnet']
# int32 instead of IPv4 address string saves ~34 bytes of RAM per IP.
# int32 hex encoding instead of base-10 int shrinks lua.gz by ~4%.
ip4 = sorted(int(IPv4Address(_)) for _ in ip4)
ip4 = ['0x{:x}'.format(_) for _ in ip4]
net4, net4_mask = make_32bit_trie(net4, IPv4Network)
ip6 = sorted(int(IPv6Address(_)) for _ in ip6)
ip6 = [uint128_to_lua_string(_) for _ in ip6]
net6, net6_mask = make_32bit_trie(net6, IPv6Network)
destdir = os.path.dirname(dst)
with tempfile.NamedTemporaryFile('w', dir=destdir) as out:
out.write('-- This is a generated file. DO NOT EDIT! It is data for kabysdoh DoH policy.\n')
out.write('local f = nil;\n')
write_lua_table_with_f(out, 'ip', ip4)
write_lua_table_with_f(out, 'ipv6', ip6)
out.write('f = nil\n')
# It's 6 lists with /19 mask as of 20201227 ⇒ only ~0.001% of IPs hit the list test.
out.write('local ipSubnetTrieMask = 0x{:x};\n'.format(net4_mask))
out.write('local ipSubnet = {\n')
for trie in sorted(net4):
out.write('[{:#x}] = {{\n'.format(ui32_to_i32(trie)))
for n in net4[trie]:
out.write(' {{ 0x{:x}, 0x{:x} }},\n'.format(int(n.network_address), int(n.netmask)))
out.write('},\n')
out.write('};\n')
out.write('local ipv6SubnetTrieMask = 0x{:x};\n'.format(net6_mask))
out.write('local ipv6Subnet = {\n')
for trie in sorted(net6):
out.write('[{:#x}] = {{\n'.format(ui32_to_i32(trie)))
for n in net6[trie]:
out.write(' {{ {:s}, {:d} }},\n'.format(uint128_to_lua_string(int(n.network_address)), n.prefixlen))
out.write('},\n')
out.write('};\n')
# out.write('collectgarbage("collect")\n')
# out.write('print(collectgarbage("count"))\n')
out.write('return {{ {} }};\n'.format(', '.join('{} = {}'.format(_, _) for _ in (
'ip', 'ipSubnet', 'ipSubnetTrieMask', 'ipv6', 'ipv6Subnet', 'ipv6SubnetTrieMask',
))))
out.write('-- EOF')
out.flush()
os.link(out.name, dst)
if __name__ == '__main__':
main()