From e7e34825bee8dc6da864f996837695f1bbf37af1 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Tue, 10 Sep 2024 09:12:45 +0900 Subject: [PATCH] GH-16: Add Apache Rat CI Fix GH-16 Apache Rat: https://creadur.apache.org/rat/ It's a release audit tool. Related files: * https://github.com/apache/arrow/blob/main/dev/release/run-rat.sh * https://github.com/apache/arrow/blob/main/dev/release/rat_exclude_files.txt --- .gitignore | 21 +++++++++++ .gitmodules | 17 +++++++++ .pre-commit-config.yaml | 13 +++++++ dev/release/check_rat_report.py | 59 +++++++++++++++++++++++++++++++ dev/release/rat_exclude_files.txt | 34 ++++++++++++++++++ dev/release/run_rat.sh | 54 ++++++++++++++++++++++++++++ 6 files changed, 198 insertions(+) create mode 100644 .gitignore create mode 100755 dev/release/check_rat_report.py create mode 100644 dev/release/rat_exclude_files.txt create mode 100755 dev/release/run_rat.sh diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..06f40703 --- /dev/null +++ b/.gitignore @@ -0,0 +1,21 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +/apache-arrow-go.tar.gz +/dev/release/apache-rat-*.jar +/dev/release/filtered_rat.txt +/dev/release/rat.xml diff --git a/.gitmodules b/.gitmodules index f298cdf8..e03b48fa 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + [submodule "arrow-testing"] path = arrow-testing url = https://github.com/apache/arrow-testing.git diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b0e6666d..72aeb1e7 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -30,6 +30,19 @@ repos: - id: golangci-lint-full name: golangci-lint-full-internal entry: bash -c 'cd internal && golangci-lint run' + - repo: local + hooks: + - id: rat + name: Release Audit Tool + language: system + entry: | + bash -c " \ + git archive HEAD \ + --prefix=apache-arrow-go/ \ + --output=apache-arrow-go.tar.gz && \ + dev/release/run_rat.sh apache-arrow-go.tar.gz" + always_run: true + pass_filenames: false - repo: https://github.com/koalaman/shellcheck-precommit rev: v0.10.0 hooks: diff --git a/dev/release/check_rat_report.py b/dev/release/check_rat_report.py new file mode 100755 index 00000000..c45baa06 --- /dev/null +++ b/dev/release/check_rat_report.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python3 +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import fnmatch +import re +import sys +import xml.etree.ElementTree as ET + +if len(sys.argv) != 3: + sys.stderr.write("Usage: %s exclude_globs.lst rat_report.xml\n" % + sys.argv[0]) + sys.exit(1) + +exclude_globs_filename = sys.argv[1] +xml_filename = sys.argv[2] + +globs = [line.strip() for line in open(exclude_globs_filename, "r")] + +tree = ET.parse(xml_filename) +root = tree.getroot() +resources = root.findall('resource') + +all_ok = True +for r in resources: + approvals = r.findall('license-approval') + if not approvals or approvals[0].attrib['name'] == 'true': + continue + clean_name = re.sub('^[^/]+/', '', r.attrib['name']) + excluded = False + for g in globs: + if fnmatch.fnmatch(clean_name, g): + excluded = True + break + if not excluded: + sys.stdout.write("NOT APPROVED: %s (%s): %s\n" % ( + clean_name, r.attrib['name'], approvals[0].attrib['name'])) + all_ok = False + +if not all_ok: + sys.exit(1) + +print('OK') +sys.exit(0) diff --git a/dev/release/rat_exclude_files.txt b/dev/release/rat_exclude_files.txt new file mode 100644 index 00000000..6365be90 --- /dev/null +++ b/dev/release/rat_exclude_files.txt @@ -0,0 +1,34 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +*.s +*.tmpldata +go.sum + +arrow/flight/gen/flight/*.pb.go +arrow/type_string.go +arrow/unionmode_string.go +arrow/compute/datumkind_string.go +arrow/compute/funckind_string.go +arrow/compute/internal/kernels/compareoperator_string.go +arrow/compute/internal/kernels/roundmode_string.go +arrow/compute/internal/kernels/_lib/vendored/* + +parquet/internal/gen-go/parquet/GoUnusedProtection__.go +parquet/internal/gen-go/parquet/parquet-consts.go +parquet/internal/gen-go/parquet/parquet.go +parquet/version_string.go diff --git a/dev/release/run_rat.sh b/dev/release/run_rat.sh new file mode 100755 index 00000000..9b9ce346 --- /dev/null +++ b/dev/release/run_rat.sh @@ -0,0 +1,54 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -eu + +RELEASE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +RAT_VERSION=0.16.1 + +RAT_JAR="${RELEASE_DIR}/apache-rat-${RAT_VERSION}.jar" +if [ ! -f "${RAT_JAR}" ]; then + curl \ + --fail \ + --output "${RAT_JAR}" \ + --show-error \ + --silent \ + https://repo1.maven.org/maven2/org/apache/rat/apache-rat/${RAT_VERSION}/apache-rat-${RAT_VERSION}.jar +fi + +RAT_XML="${RELEASE_DIR}/rat.xml" +java \ + -jar ${RAT_JAR} \ + --out "${RAT_XML}" \ + --xml \ + "$1" +FILTERED_RAT_TXT="${RELEASE_DIR}/filtered_rat.txt" +if ${PYTHON:-python3} \ + "${RELEASE_DIR}/check_rat_report.py" \ + "${RELEASE_DIR}/rat_exclude_files.txt" \ + "${RAT_XML}" > \ + "${FILTERED_RAT_TXT}"; then + echo "No unapproved licenses" +else + cat "${FILTERED_RAT_TXT}" + N_UNAPPROVED=$(grep "NOT APPROVED" "${FILTERED_RAT_TXT}" | wc -l) + echo "${N_UNAPPROVED} unapproved licenses. Check Rat report: ${RAT_XML}" + exit 1 +fi