-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgzip.sh
executable file
·1952 lines (1508 loc) · 48.8 KB
/
gzip.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/bin/bash
declare -a GZIP_input=()
declare -a GZIP_output=()
GZIP_buffer_size=65535
declare -A GZIP_dummy=()
GZIP_byte_ptr=-1
GZIP_bit_ptr=-1
declare -a GZIP_window=() # indexed modulo 32768
GZIP_window_size=32768
GZIP_total_bytes_read=0
GZIP_total_bytes_written=0
GZIP_byte=0
# window start, end
GZIP_wstart=0
GZIP_wend=-1
declare -A GZIP_keys=()
declare -A GZIP_prevs=()
declare -a GZIP_parsed_litlen=()
declare -a GZIP_parsed_dist=()
declare -A GZIP_min_heap=()
declare -a GZIP_code_lengths=()
# to compute dynamic huffman codes
declare -A GZIP_litlen_freqs=()
declare -A GZIP_dist_freqs=()
declare -A GZIP_fixed_llcodes_inverse=()
declare -A GZIP_fixed_dcodes_inverse=()
declare -A GZIP_fixed_llcodes=()
declare -A GZIP_fixed_dcodes=()
declare -A GZIP_dynamic_llcodes_inverse=()
declare -A GZIP_dynamic_dcodes_inverse=()
declare -A GZIP_dynamic_llcodes=()
declare -A GZIP_dynamic_dcodes=()
# for huffman output
declare -a GZIP_dist_codes=()
declare -a GZIP_dist_extra_bits=()
declare -a GZIP_dist_extra_vals=()
declare -a GZIP_len_codes=()
declare -a GZIP_len_extra_bits=()
declare -a GZIP_len_extra_vals=()
declare -a GZIP_bytes=()
declare -a GZIP_bits=()
# the choice of symbols is arbitrary, the important thing is
# that we encode things in just one char to do RLE more easily
declare -A GZIP_rle_symbols=(
[0]=0 [1]=1 [2]=2 [3]=3 [4]=4 [5]=5 [6]=6 [7]=7 [8]=8 [9]=9 [10]="a"
[11]="b" [12]="c" [13]="d" [14]="e" [15]="f" [16]="g" [17]="h" [18]="i"
)
declare -A GZIP_rle_symbols_inv=(
[0]=0 [1]=1 [2]=2 [3]=3 [4]=4 [5]=5 [6]=6 [7]=7 [8]=8 [9]=9 ["a"]=10
["b"]=11 ["c"]=12 ["d"]=13 ["e"]=14 ["f"]=15 ["g"]=16 ["h"]=17 ["i"]=18
)
declare -a GZIP_crc_lookup=(
0 1996959894 3993919788 2567524794 124634137 1886057615 3915621685 2657392035
249268274 2044508324 3772115230 2547177864 162941995 2125561021 3887607047 2428444049
498536548 1789927666 4089016648 2227061214 450548861 1843258603 4107580753 2211677639
325883990 1684777152 4251122042 2321926636 335633487 1661365465 4195302755 2366115317
997073096 1281953886 3579855332 2724688242 1006888145 1258607687 3524101629 2768942443
901097722 1119000684 3686517206 2898065728 853044451 1172266101 3705015759 2882616665
651767980 1373503546 3369554304 3218104598 565507253 1454621731 3485111705 3099436303
671266974 1594198024 3322730930 2970347812 795835527 1483230225 3244367275 3060149565
1994146192 31158534 2563907772 4023717930 1907459465 112637215 2680153253 3904427059
2013776290 251722036 2517215374 3775830040 2137656763 141376813 2439277719 3865271297
1802195444 476864866 2238001368 4066508878 1812370925 453092731 2181625025 4111451223
1706088902 314042704 2344532202 4240017532 1658658271 366619977 2362670323 4224994405
1303535960 984961486 2747007092 3569037538 1256170817 1037604311 2765210733 3554079995
1131014506 879679996 2909243462 3663771856 1141124467 855842277 2852801631 3708648649
1342533948 654459306 3188396048 3373015174 1466479909 544179635 3110523913 3462522015
1591671054 702138776 2966460450 3352799412 1504918807 783551873 3082640443 3233442989
3988292384 2596254646 62317068 1957810842 3939845945 2647816111 81470997 1943803523
3814918930 2489596804 225274430 2053790376 3826175755 2466906013 167816743 2097651377
4027552580 2265490386 503444072 1762050814 4150417245 2154129355 426522225 1852507879
4275313526 2312317920 282753626 1742555852 4189708143 2394877945 397917763 1622183637
3604390888 2714866558 953729732 1340076626 3518719985 2797360999 1068828381 1219638859
3624741850 2936675148 906185462 1090812512 3747672003 2825379669 829329135 1181335161
3412177804 3160834842 628085408 1382605366 3423369109 3138078467 570562233 1426400815
3317316542 2998733608 733239954 1555261956 3268935591 3050360625 752459403 1541320221
2607071920 3965973030 1969922972 40735498 2617837225 3943577151 1913087877 83908371
2512341634 3803740692 2075208622 213261112 2463272603 3855990285 2094854071 198958881
2262029012 4057260610 1759359992 534414190 2176718541 4139329115 1873836001 414664567
2282248934 4279200368 1711684554 285281116 2405801727 4167216745 1634467795 376229701
2685067896 3608007406 1308918612 956543938 2808555105 3495958263 1231636301 1047427035
2932959818 3654703836 1088359270 936918000 2847714899 3736837829 1202900863 817233897
3183342108 3401237130 1404277552 615818150 3134207493 3453421203 1423857449 601450431
3009837614 3294710456 1567103746 711928724 3020668471 3272380065 1510334235 755167117
)
declare -a GZIP_length_factors=( 11 13 15 17 19 23 27
31 35 43 51 59 67 83
99 115 131 163 195 227 )
declare -a GZIP_distance_factors=( 4 6 8 12 16 24 32 48
64 96 128 192 256 384
512 768 1024 1536 2048
3072 4096 6144 8192
12288 16384 24576 )
GZIP_log(){
printf '%s\n' "$1" >&2
}
GZIP_die(){
GZIP_log "$1"
exit 1
}
GZIP_init_crc32(){
# Initialize CRC-32 to starting value
GZIP_crc32=4294967295 # all ones
}
GZIP_update_crc32(){
local index char=$1
index=$(( (GZIP_crc32 ^ char) & 255 ))
GZIP_crc32=$(( (GZIP_crc32 >> 8) ^ GZIP_crc_lookup[index] ))
}
GZIP_finalize_crc32(){
# invert all bits
GZIP_crc32=$(( GZIP_crc32 ^ 4294967295 ))
}
GZIP_add_to_heap(){
local symbol=$1
local -n __freqs=$2
local parent parent_index parent_elem tmp cur_pos
# add at the end then bubble up if needed
((GZIP_heap_end++))
GZIP_min_heap[$GZIP_heap_end]=$symbol
if [ $GZIP_heap_end -gt 0 ]; then
cur_pos=$GZIP_heap_end
while true; do
parent_index=$(( (cur_pos - 1) / 2 ))
parent_elem=${GZIP_min_heap[$parent_index]}
if [ ${__freqs[$parent_elem]} -gt ${__freqs[$symbol]} ]; then
# swap
tmp=${GZIP_min_heap[$parent_index]}
GZIP_min_heap[$parent_index]=${GZIP_min_heap[$cur_pos]}
GZIP_min_heap[$cur_pos]=$tmp
cur_pos=$parent_index
else
break
fi
done
fi
}
# extract minimum element
GZIP_get_from_heap(){
local -n __freqs=$1
local parent_index parent_elem tmp cur_pos
local c1_index c2_index c1_elem c2_elem min_index
GZIP_min_heap_value=${GZIP_min_heap[0]}
# put last element at top
GZIP_min_heap[0]=${GZIP_min_heap[$GZIP_heap_end]}
((GZIP_heap_end--))
# heapify
if [ $GZIP_heap_end -gt 0 ]; then
cur_pos=0
while true; do
c1_index=$(( 2 * cur_pos + 1 ))
c2_index=$(( c1_index + 1 ))
c1_elem=${GZIP_min_heap[$c1_index]}
c2_elem=${GZIP_min_heap[$c2_index]}
if { [ $c1_index -le $GZIP_heap_end ] && [ ${__freqs[${GZIP_min_heap[$cur_pos]}]} -gt ${__freqs[$c1_elem]} ]; } || \
{ [ $c2_index -le $GZIP_heap_end ] && [ ${__freqs[${GZIP_min_heap[$cur_pos]}]} -gt ${__freqs[$c2_elem]} ]; }; then
# swap with the smallest child
if [ $c2_index -gt $GZIP_heap_end ]; then
min_index=$c1_index
elif [ $c1_index -gt $GZIP_heap_end ]; then
min_index=$c2_index
else
min_index=$(( __freqs[$c1_elem] < __freqs[$c2_elem] ? c1_index : c2_index ))
fi
tmp=${GZIP_min_heap[$cur_pos]}
GZIP_min_heap[$cur_pos]=${GZIP_min_heap[$min_index]}
GZIP_min_heap[$min_index]=$tmp
cur_pos=$min_index
else
break
fi
done
fi
}
# Textbook algorithm
GZIP_compute_dynamic_tree(){
local -n _freqs=$1
local nsymbols=$2
local length_limit=$3
local -n dst_fwd=$4
local -n dst_inv=$5
local i max_sum sum symbol internal_node v1 v2
# create min-heap
GZIP_min_heap=()
GZIP_heap_end=-1
for symbol in "${!_freqs[@]}"; do
GZIP_add_to_heap "$symbol" _freqs
done
local -A parents=()
# internal nodes have values that cannot occur in the data (>= 1000)
internal_node=999
if [ $GZIP_heap_end -gt -1 ]; then
while true; do
GZIP_get_from_heap _freqs
v1=$GZIP_min_heap_value
if [ $GZIP_heap_end -eq -1 ]; then
parents[$v1]=-1
break
fi
GZIP_get_from_heap _freqs
v2=$GZIP_min_heap_value
# create internal node with the two extracted minimum values as children
((internal_node++))
parents[$v1]=$internal_node
parents[$v2]=$internal_node
# this is needed so add_to_heap can properly heapify
_freqs[$internal_node]=$(( _freqs[$v1] + _freqs[$v2] ))
GZIP_add_to_heap $internal_node _freqs
done
fi
local len ptr
GZIP_code_lengths=()
# now walk up parents and get length for each code
for symbol in "${!_freqs[@]}"; do
# ignore internal nodes
if [ $symbol -ge 1000 ]; then
continue
fi
# walk up the chain
len=0
ptr=$symbol
while true; do
ptr=${parents[$ptr]}
if [ $ptr -eq -1 ]; then
break
fi
((len++))
done
# special case
if [ $len -eq 0 ]; then
len=1
fi
GZIP_code_lengths[$symbol]=$len
done
# there might be corner cases where the length of some codeword
# exceeds the maximum permitted for the tree, so we should check
# and fix the lengths if necessary before proceeding.
# The fix exploits what's known as the "Kraft-McMillan inequality"
# https://en.wikipedia.org/wiki/Kraft%E2%80%93McMillan_inequality
# but without actually doing the fractionary calculations for
# obvious reasons
# All credit goes to this page:
# https://create.stephan-brumme.com/length-limited-prefix-codes/
# for explaining all the fine details.
max_sum=$(( 2 ** length_limit ))
sum=0
for ((i = 0; i < nsymbols; i++)); do
if [ "${GZIP_code_lengths[$i]}" != "" ]; then
if [ ${GZIP_code_lengths[$i]} -gt $length_limit ]; then
GZIP_code_lengths[$i]=$length_limit
fi
# normalize each element and sum
sum=$(( sum + (max_sum / (2 ** GZIP_code_lengths[i])) ))
fi
done
if [ $sum -gt $max_sum ]; then
# we need to fix lengths. This uses MiniZ's method from the above page.
i=0
while true; do
# choose a maximum length code and another (shorter) code
maxlen_code_index=-1
shorter_code_index=-1
shorter_len=-1
for ((i=0; i < nsymbols; i++)); do
if [ $maxlen_code_index -ne -1 ] && [ $shorter_code_index -ne -1 ] && [ ${GZIP_code_lengths[$shorter_code_index]} -eq $(( length_limit - 1 )) ]; then
break
fi
if [ "${GZIP_code_lengths[$i]}" = "" ]; then
continue
fi
if [ $maxlen_code_index -eq -1 ] && [ ${GZIP_code_lengths[$i]} -eq $length_limit ]; then
maxlen_code_index=$i
continue
fi
if [ ${GZIP_code_lengths[$i]} -lt $length_limit ] && [ ${GZIP_code_lengths[$i]} -gt $shorter_len ]; then
shorter_len=${GZIP_code_lengths[$i]}
shorter_code_index=$i
continue
fi
done
# we now have two elements, lengthen the shorter one by one and give the first one the same length
# lengthen the second one and update sum
sum=$(( sum - (max_sum / (2 ** GZIP_code_lengths[$shorter_code_index])) + (max_sum / (2 ** (GZIP_code_lengths[$shorter_code_index]+1) )) ))
((GZIP_code_lengths[$shorter_code_index]++))
# give the first element the same length and update sum
sum=$(( sum - (max_sum / (2 ** GZIP_code_lengths[$maxlen_code_index])) + (max_sum / (2 ** GZIP_code_lengths[$shorter_code_index])) ))
GZIP_code_lengths[$maxlen_code_index]=${GZIP_code_lengths[$shorter_code_index]}
# stop as soon as we get within the limit
if [ $sum -le $max_sum ]; then
break
fi
done
fi
# with lengths, we can finally build the huffman tree
GZIP_build_huffman_tree GZIP_code_lengths $nsymbols 0 dst_fwd dst_inv
}
# GENERAL INPUT FROM STDIN; USED BY DEFLATE AND INFLATE
# Uses GZIP_buffer_size to know how many bytes to read
# Sets: GZIP_input[] with actual bytes, GZIP_bytes_read
# with number of read bytes, GZIP_total_bytes_read for the
# grand total (used for the trailer)
GZIP_read_input(){
local data length bytes status i
local fd=$1
bytes=0
local to_read=$GZIP_buffer_size
GZIP_eof=0
while true; do
IFS= read -u $fd -d '' -r -n $to_read data
status=$?
length=${#data}
for ((i=0; i < length; i++)); do
printf -v "GZIP_input[bytes+i]" "%d" "'${data:i:1}"
done
# if we read less than we wanted, and it's not EOF, it means we also have
# a delimiter (NUL)
if [ $length -lt $to_read ] && [ $status -eq 0 ]; then
GZIP_input[bytes+length]=0
((length++))
#echo "Read NUL"
fi
((bytes+=length))
if [ $bytes -ge $GZIP_buffer_size ]; then
break
fi
if [ $status -ne 0 ]; then
GZIP_eof=1
break
fi
((to_read-=length))
done
GZIP_bytes_read=$bytes
((GZIP_total_bytes_read+=bytes))
}
# Get some bytes from GZIP_input[]
# USED ONLY FOR DECOMPRESSION to read header/trailer
# and when decompressing literal bytes (block type 00)
# If you call this function directly, it's the
# caller's responsibility to ensure there are
# no pending bits to read before
# Arguments: number of bytes to read (default 1)
# Puts bytes read into GZIP_bytes[]
# Also, as a convenience for the caller, puts decoded byte values
# into GZIP_decoded_byteval
# This works because we read at most 4 bytes at a time
# Updates: GZIP_byte_ptr (index into GZIP_input[])
GZIP_get_bytes(){
local count=$1 i
if [ "$count" = "" ]; then
count=1
fi
GZIP_decoded_byteval=0
for ((i = 0; i < count; i++)); do
if [ $GZIP_byte_ptr -eq -1 ]; then
local fd=0
GZIP_read_input $fd # input into GZIP_input
fi
((GZIP_byte_ptr++))
GZIP_bytes[i]=${GZIP_input[$GZIP_byte_ptr]}
GZIP_decoded_byteval=$(( GZIP_decoded_byteval + (GZIP_bytes[i] << 8*i) ))
if [ $GZIP_byte_ptr -ge $(( GZIP_bytes_read - 1)) ]; then
GZIP_byte_ptr=-1
fi
done
}
# get some input bits
# Read bits from GZIP_bits, when empty read another
# byte with GZIP_get_bytes
# Store read bits into GZIP_bits (indexed by GZIP_bit_ptr)
# Updates: GZIP_bit_ptr
# Sets GZIP_decoded_bitval as a convenience
GZIP_get_bits(){
local count=$1 i
if [ "$count" = "" ]; then
count=1
fi
GZIP_decoded_bitval=0
for ((i = 0; i < count; i++)); do
if [ $GZIP_bit_ptr -eq -1 ]; then
GZIP_get_bytes
fi
((GZIP_bit_ptr++))
GZIP_bits[i]=$(( ( GZIP_bytes[0] & (1 << GZIP_bit_ptr) ) >> GZIP_bit_ptr ))
GZIP_decoded_bitval=$(( GZIP_decoded_bitval + (GZIP_bits[i] << i) ))
if [ $GZIP_bit_ptr -ge 7 ]; then
GZIP_bit_ptr=-1
fi
done
}
# read bit-by-bit until a valid code according to the
# Huffman codes in $arr is found
GZIP_read_one_code(){
local -n _codes=$1
local b=
local count=0
while true; do
GZIP_get_bits
b="${b}${GZIP_bits[0]}"
if [ "${_codes[$b]}" != "" ]; then
break
fi
((count++))
# if we read too much without finding a code, something is wrong
if [ $count -gt 16 ]; then
GZIP_die "Read $count bits without finding a valid code"
fi
done
GZIP_decoded_value=${_codes[$b]}
}
# converts a value to a bitstring
# Sets: GZIP_bitstring
# Arguments: value, length
GZIP_to_bitstring(){
local v=$1 len=$2 i
GZIP_bitstring=
for (( i = len-1; i >= 0; i--)); do
GZIP_bitstring="${GZIP_bitstring}$(( (v & (1 << i)) >> i ))"
done
}
GZIP_to_bitstring_rev(){
local v=$1 len=$2 i
GZIP_bitstring=
for (( i = 0; i < len; i++)); do
GZIP_bitstring="${GZIP_bitstring}$(( (v & (1 << i)) >> i ))"
done
}
# header is byte-oriented
# Used for decompression
GZIP_parse_header(){
local b1 b2 compression_method flgs ftext
local fhcrc fextra timestamp
local fname fcomment xlen len si1 si2
local extra_flags os i val vx
# get magic number
GZIP_get_bytes 2
b1=${GZIP_bytes[0]}
b2=${GZIP_bytes[1]}
if [ $b1 -ne 31 ] || [ $b2 -ne 139 ]; then
GZIP_die "No gzip signature, terminating"
fi
GZIP_get_bytes
compression_method=${GZIP_bytes[0]} # always 8
if [ $compression_method -ne 8 ]; then
GZIP_die "No deflate compression method byte (8), terminating"
fi
GZIP_get_bytes
flgs=${GZIP_bytes[0]}
ftext=$(( flgs & 1 ))
fhcrc=$(( flgs & 2 ))
fextra=$(( flgs & 4 ))
fname=$(( flgs & 8 ))
fcomment=$(( flgs & 16 ))
GZIP_get_bytes 4
timestamp=$GZIP_decoded_byteval
GZIP_get_bytes
extra_flags=${GZIP_bytes[0]}
GZIP_get_bytes
os=${GZIP_bytes[0]}
# 0 - FAT filesystem (MS-DOS, OS/2, NT/Win32)
# 1 - Amiga
# 2 - VMS (or OpenVMS)
# 3 - Unix
# 4 - VM/CMS
# 5 - Atari TOS
# 6 - HPFS filesystem (OS/2, NT)
# 7 - Macintosh
# 8 - Z-System
# 9 - CP/M
# 10 - TOPS-20
# 11 - NTFS filesystem (NT)
# 12 - QDOS
# 13 - Acorn RISCOS
# 255 - unknown
if [ $fextra -ne 0 ]; then
GZIP_get_bytes 2
xlen=$GZIP_decoded_byteval
GZIP_get_bytes $xlen
count=$xlen
while true; do
si1=${GZIP_bytes[0]}
si2=${GZIP_bytes[1]}
len=$(( GZIP_bytes[2] + (GZIP_bytes[3] << 8) ))
val=
vx=
for ((i = 0; i < len; i++)); do
printf -v vx '%s%x' '\x' "${GZIP_bytes[4+i]}"
printf -v val "%s${vx}" "$val"
done
count=$(( count + len + 4 ))
if [ $count -ge $xlen ]; then
break
fi
done
fi
if [ $fname -ne 0 ]; then
# read until null
val=
while true; do
GZIP_get_bytes
if [ ${GZIP_bytes[0]} -eq 0 ]; then
break
fi
# unused anyway
printf -v vx '%s%x' '\x' "${GZIP_bytes[0]}"
printf -v val "%s${vx}" "$val"
done
fi
if [ $fcomment -ne 0 ]; then
# read until null
val=
while true; do
GZIP_get_bytes
if [ ${GZIP_bytes[0]} -eq 0 ]; then
break
fi
# unused
printf -v vx '%s%x' '\x' "${GZIP_bytes[4+1]}"
printf -v val "%s${vx}" "$val"
done
fi
if [ $fhcrc -ne 0 ]; then
# read 2 bytes
GZIP_get_bytes 2
fi
}
GZIP_parse_trailer(){
local crc length computed_length
GZIP_get_bytes 4
crc=$GZIP_decoded_byteval
if [ $crc -ne $GZIP_crc32 ]; then
GZIP_log "WARNING: different CRC (got: $crc, computed: $GZIP_crc32)"
fi
# length
GZIP_get_bytes 4
length=$GZIP_decoded_byteval
computed_length=$(( GZIP_total_bytes_written % (2 ** 32) ))
if [ $length -ne $computed_length ]; then
GZIP_log "WARNING: different size (got: $length, computed: $computed_length)"
fi
}
# Given a source array containing the encoding lengths of symbols,
# builds a Huffman tree in canonical form inside dst_fwd and dst_inv
# (forward ie symbol->bitstring and inverse ie bitstring->symbol)
GZIP_build_huffman_tree(){
local -n __lengths=$1
local ncodes=$2
local offset=$3
local -n _dst_fwd=$4
local -n _dst_inv=$5
local i l maxl code v
local -A length_counts=()
local -A firsts=()
for ((i = 0; i <= ncodes; i++)); do
length_counts[$i]=0
done
# count how many codes there are for each length
# and calculate max code length
maxl=0
for ((i = 0; i <= ncodes; i++)); do
l=${__lengths[$((i+offset))]:-0}
if [ $l -gt $maxl ]; then
maxl=$l
fi
if [ $l -gt 0 ]; then
((length_counts[$l]++))
fi
done
firsts[0]=0
code=0
# calculate first value for each length
for (( l = 1; l <= maxl; l++ )); do
code=$(( (code + length_counts[$((l-1))]) << 1 ))
if [ ${length_counts[$l]} -gt 0 ]; then
firsts[$l]=$code
fi
done
# starting with src, calculate codes
for ((i = 0; i <= ncodes; i++)); do
v=${__lengths[$((i+offset))]}
if [ "$v" != "" ] && [ "$v" != "0" ]; then
# code length: $v
# value: ${firsts[$v]} (then incremented)
GZIP_to_bitstring ${firsts[$v]} $v
_dst_inv["${GZIP_bitstring}"]=$i
_dst_fwd[$i]=${GZIP_bitstring}
((firsts[$v]++))
fi
done
}
### Real decompress routine
GZIP_inflate(){
local compression block last_block
GZIP_parse_header
GZIP_init_crc32
block=-1
while true; do
((block++))
GZIP_get_bits
last_block=${GZIP_bits[0]}
GZIP_get_bits 2
compression=$GZIP_decoded_bitval
# 0: no compression
# 1: fixed huffman codes
# 2: dynamic huffman code
# 3: error
if [ $compression -eq 0 ]; then
GZIP_inflate_uncompressed
elif [ $compression -eq 1 ]; then
GZIP_inflate_huffman 0
elif [ $compression -eq 2 ]; then
GZIP_inflate_huffman 1
else
GZIP_die "Invalid compression value for block $block: $compression"
fi
if [ $last_block -eq 1 ]; then
break
fi
done
GZIP_finalize_crc32
GZIP_parse_trailer
}
# decompress a block stored with no compression (type 0)
GZIP_inflate_uncompressed(){
local length clength i v
# skip irrelevant bits
GZIP_get_bits 5
# get length
GZIP_get_bytes 2
length=$GZIP_decoded_byteval
# get one's complement of length
GZIP_get_bytes 2
clength=$GZIP_decoded_byteval # unused
for ((i = 0; i < length; i++)); do
GZIP_get_bytes
GZIP_output_byte ${GZIP_bytes[0]}
done
}
# decompress a block compressed with dynamic or fixed huffman codes
GZIP_inflate_huffman(){
local value i v
local end index start_index
local hlit hdist
local dynamic=$1 # 1 if dynamic
local -n llcodes
local -n dcodes
if [ $dynamic -eq 1 ]; then
# read and rebuild the dynamic Huffman codes
GZIP_dynamic_llcodes=()
GZIP_dynamic_llcodes_inverse=()
GZIP_dynamic_dcodes=()
GZIP_dynamic_dcodes_inverse=()
local -A lengths=()
GZIP_get_s1codes lengths hlit hdist
GZIP_get_s2codes lengths GZIP_dynamic_llcodes_inverse GZIP_dynamic_dcodes_inverse $hlit $hdist
llcodes=GZIP_dynamic_llcodes_inverse
dcodes=GZIP_dynamic_dcodes_inverse
else
llcodes=GZIP_fixed_llcodes_inverse
dcodes=GZIP_fixed_dcodes_inverse
fi
# now we can read the actual LZSS-compressed/Huffman-encoded data
while true; do
end=0
GZIP_read_one_code llcodes
value=${GZIP_decoded_value}
if [ ${value} -lt 256 ]; then
# literal, output as is
GZIP_output_byte $value
GZIP_add_to_window $value
elif [ ${value} -eq 256 ]; then
# stop
end=1
else
# backpointer
local length distance
GZIP_decode_backpointer $value length distance dcodes
# we have length and distance, read and output data from window
start_index=$(( GZIP_wend - distance ))
for ((i = 0; i < length; i++)); do
index=$(( (start_index + i) % GZIP_window_size ))
GZIP_output_byte ${GZIP_window[$index]}
GZIP_add_to_window ${GZIP_window[$index]}
done
fi
if [ $end -eq 1 ]; then
break
fi
done
}
# Output one byte of stdout
# Used when decompressing
GZIP_output_byte(){
local byte=$1
local no_update=$2
local v
if [ "$no_update" = "" ]; then
no_update=0
fi
printf -v v "%o" ${byte}
printf "\\$v"
if [ $no_update -eq 0 ]; then
GZIP_update_crc32 ${byte}
((GZIP_total_bytes_written++))
fi
}
GZIP_get_s1codes(){
local -n _lengths=$1
local -n _hlit=$2
local -n _hdist=$3
local -A s1codes=()
local hclen
local i value count nzeros repeats prev
# HLIT, number of literal/length codes
GZIP_get_bits 5
_hlit=$GZIP_decoded_bitval
# HDIST, number of distance codes
GZIP_get_bits 5
_hdist=$GZIP_decoded_bitval
# HCLEN, number of code length codes
GZIP_get_bits 4
hclen=$GZIP_decoded_bitval
#GZIP_log "DECOMPRESS: HLIT: $_hlit, HDIST: $_hdist, HCLEN: $hclen"
local -a s=(16 17 18 0 8 7 9 6 10 5 11 4 12 3 13 2 14 1 15)
# we now need $hclen + 4 3-bit numbers
for (( i = 0; i < hclen + 4; i++ )); do
GZIP_get_bits 3
_lengths[${s[$i]}]=$GZIP_decoded_bitval
done
GZIP_build_huffman_tree _lengths 18 0 GZIP_dummy s1codes
# now with s1codes we can read the actual huffman tree for the compressed data
count=0
_lengths=()
while true; do
GZIP_read_one_code s1codes
value=${GZIP_decoded_value}
# special cases
if [ ${value} -eq 17 ]; then
# a run of zeros; how many is in next 3 bits + 3
GZIP_get_bits 3
nzeros=$(( GZIP_decoded_bitval + 3 ))
for ((i = 0; i < nzeros; i++)); do
_lengths[$count]=0
((count++))
done
elif [ ${value} -eq 18 ]; then
# a run of zeros; how many is in next 7 bits + 11
GZIP_get_bits 7
nzeros=$(( GZIP_decoded_bitval + 11 ))
for ((i = 0; i < nzeros; i++)); do
_lengths[$count]=0
((count++))
done
elif [ ${value} -eq 16 ]; then
# repeat previous character n times; how many is in next 2 bits + 3
GZIP_get_bits 2
repeats=$(( GZIP_decoded_bitval + 3))
prev=${_lengths[$((count-1))]}
for ((i = 0; i < repeats; i++)); do
_lengths[$count]=$prev
((count++))
done
else
# normal case
_lengths[$count]=${value}
((count++))
fi