-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathfulltext.sh
executable file
·47 lines (37 loc) · 995 Bytes
/
fulltext.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
#! /bin/bash
# collect together the patent text for the given years into a tgz file
# ./fulltext.sh HR &
# ./fulltext.sh ES 2010 &
# exit on the first error
set -e
COUNTRY=${1:-HR}
YEAR_START=${2:-1800}
YEAR_END=${3:-${2:-2021}}
if [ "$#" -gt 3 ]; then
shift 3
else
shift "$#"
fi
INFODIR="${INFODIR:-/data/patents/pdfpatents}"
TMPDIR="${TMPDIR:-/tmp}"
TMPFILE="${TMPDIR}/${COUNTRY}.tar"
DSTDIR="${INFODIR}/fulltext"
DSTFILE="${DSTDIR}/${COUNTRY}_text.tgz"
mkdir -p "${DSTDIR}"
# create the empty tarball
> "${TMPFILE}"
for (( YEAR="${YEAR_START}"; YEAR<="${YEAR_END}"; YEAR++ ))
do
YEARDIR="${COUNTRY}-${YEAR}"
SRCDIR="${INFODIR}/${YEARDIR}"
SRCFILE="${SRCDIR}/${YEARDIR}-text.tab"
if [ -r "${SRCFILE}" ]
then
# append the file to the tarball, stripping the path
tar -Pp --transform "flags=r;s|${SRCDIR}/||" \
-rf "${TMPFILE}" "${SRCFILE}"
fi
done
# compress the tarball
gzip -9 < "${TMPFILE}" > "${DSTFILE}"
rm "${TMPFILE}"