-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmerge-www-logs.sh
executable file
·114 lines (98 loc) · 2.94 KB
/
merge-www-logs.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#!/bin/sh
#
# Merges Apache httpd log files from different directories (e.g. backups).
#
# usage: sh merge-www-logs.sh source-directory destination-directory
#
# This script assumes that the sets of log files are stored in sub-directories
# of the given source-directory and that the sub-directories' names are in
# chronological order when sorted lexicographically (according to LC_COLLATE as
# specified by POSIX).
# All log files named access.log and [vhost].access.log including the common
# logrotate(8) forms of it (i.e. .log.n and .log.n.gz) are processed.
#
# This script will store the merged log files in the given
# destination-directory.
#
set -e -u
awstats_installdir=/usr/share/awstats
incr() { eval ": \$(($1+=1))"; }
zmv() {
# NOTE: this function only works with two operands!
printf 'gunzip %s -> %s\n' "$1" "$2"
zcat "$1" >"$2"
}
SRC_DIR=${1:?}
DST_DIR=${2:?}
# merge directories
for d in "${SRC_DIR:?}"/*
do
test -d "${d}" || continue
incr i
printf 'merging in log directory "%s" (%u)\n' "${d}" $((i))
for f in "${d:?}"/access.log "${d:?}"/*.access.log
do
test -f "${f}" || continue
mv -v "${f}" "${DST_DIR:?}/$(basename "${f%.log}")_$((i))_0.log"
done
for f in "${d:?}"/access.log.*[0-9] "${d:?}"/*.access.log.*[0-9]
do
test -f "${f}" || continue
mv -v "${f}" "${DST_DIR:?}/$(basename "${f%.log*}")_$((i))_${f##*.}.log"
done
for f in "${d:?}"/access.log.*[0-9].gz "${d:?}"/*.access.log.*[0-9].gz
do
test -f "${f}" || continue
f=${f%.gz}
zmv "${f}.gz" "${DST_DIR:?}/$(basename "${f%.log*}")_$((i))_${f##*.}.log"
done
unset -v f
done
unset -v d
# dedup
printf '\ndeduplicating log files\n'
while read -r crc sz file
do
case ${last_cksum-}
in
("${crc}${sz}"*)
printf 'duplicate of %s: ' "${last_cksum#* }"
rm -v "${file}"
continue
;;
(*)
last_cksum="${crc}${sz} ${file}"
# update mtime to first log timestamp
btime=$(sed -n \
-e 's/^.*\[\([^:]*:[^]]* [+-][0-9]\{4\}\)\].*$/\1/' \
-e 's|/| |g' \
-e 's/:/ /' \
-e 'p' \
-e 'q' \
"${file}")
touch -c --date="${btime}" "${file}"
;;
esac
done <<EOF
$(cksum "${DST_DIR:?}"/*.log | sort -t ' ' -k 1,2n -k 3)
EOF
unset -v crc sz file last_cksum
# report number of requests per day
printf '\ncounting logged requests per day (QA)…\n'
cat "${DST_DIR:?}"/*.log \
| sed -n -e 's/^.*\[\([^:]*\):[^]]* [+-][0-9]\{4\}\].*$/\1/p' \
| awk '{ x[$0]++ } END { for (k in x) printf "%s %u" RS, k, x[k] }' \
| sort -k 1.8,1.12n -k 1.4,1.7M -k 1.1,1.2n
# resolve log files
printf '\nmerging together log files…\n'
while read -r log_stem
do
printf 'logresolve: %s\n' "${log_stem}"
"${awstats_installdir:?}"/tools/logresolvemerge.pl \
"${DST_DIR:?}/${log_stem}${log_stem:+.}"access_*.log \
| tee "${DST_DIR:?}/${log_stem}${log_stem:+.}access.log" \
| awk 'END { printf "%u records" RS, NR }'
rm -f "${DST_DIR:?}/${log_stem}${log_stem:+.}"access_*.log
done <<EOF
$(ls -1 "${DST_DIR:?}"/*.log | sed -e 's|.*/||' -e 's/\(^\|\.\)access.*$//' | sort -u)
EOF