Skip to content

Commit

Permalink
add submission
Browse files Browse the repository at this point in the history
  • Loading branch information
lschlessinger1 committed Feb 5, 2024
1 parent 6f6e20e commit 5f77f9c
Show file tree
Hide file tree
Showing 183 changed files with 27,561 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
USER=
PASS=
DATA_DIR=
37 changes: 37 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
name: build
on: [ push, pull_request ]
jobs:
build:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [ "3.10", "3.11" ]

steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}

- name: Install Poetry
uses: snok/install-poetry@v1

- name: Set up cache
uses: actions/cache@v3
with:
path: .venv
key: venv-${{ matrix.python-version }}-${{ hashFiles('pyproject.toml') }}-${{ hashFiles('poetry.lock') }}
- name: Install dependencies
run: |
poetry config virtualenvs.in-project true
poetry install
- name: Run style checks
run: |
make check-codestyle
- name: Run unit tests
run: |
make test-unit
- name: Run safety checks
run: |
make check-safety
104 changes: 104 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -158,3 +158,107 @@ cython_debug/
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/

# Created by https://www.toptal.com/developers/gitignore/api/intellij+iml
# Edit at https://www.toptal.com/developers/gitignore?templates=intellij+iml

### Intellij+iml ###
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839

# User-specific stuff
.idea/**/workspace.xml
.idea/**/tasks.xml
.idea/**/usage.statistics.xml
.idea/**/dictionaries
.idea/**/shelf

# AWS User-specific
.idea/**/aws.xml

# Generated files
.idea/**/contentModel.xml

# Sensitive or high-churn files
.idea/**/dataSources/
.idea/**/dataSources.ids
.idea/**/dataSources.local.xml
.idea/**/sqlDataSources.xml
.idea/**/dynamic.xml
.idea/**/uiDesigner.xml
.idea/**/dbnavigator.xml

# Gradle
.idea/**/gradle.xml
.idea/**/libraries

# Gradle and Maven with auto-import
# When using Gradle or Maven with auto-import, you should exclude module files,
# since they will be recreated, and may cause churn. Uncomment if using
# auto-import.
# .idea/artifacts
# .idea/compiler.xml
# .idea/jarRepositories.xml
# .idea/modules.xml
# .idea/*.iml
# .idea/modules
# *.iml
# *.ipr

# CMake
cmake-build-*/

# Mongo Explorer plugin
.idea/**/mongoSettings.xml

# File-based project format
*.iws

# IntelliJ
out/

# mpeltonen/sbt-idea plugin
.idea_modules/

# JIRA plugin
atlassian-ide-plugin.xml

# Cursive Clojure plugin
.idea/replstate.xml

# SonarLint plugin
.idea/sonarlint/

# Crashlytics plugin (for Android Studio and IntelliJ)
com_crashlytics_export_strings.xml
crashlytics.properties
crashlytics-build.properties
fabric.properties

# Editor-based Rest Client
.idea/httpRequests

# Android studio 3.1+ serialized cache file
.idea/caches/build_file_checksums.ser

### Intellij+iml Patch ###
# Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-249601023

*.iml
modules.xml
.idea/misc.xml
*.ipr

.idea/modules.xml

# End of https://www.toptal.com/developers/gitignore/api/intellij+iml

# Data
data/**

*.env

# External
external/youssef-nader-first-letters/*.ckpt
external/youssef-nader-first-letters/labels/*.png
external/youssef_nader_first_letters/labels.zip
41 changes: 41 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
default_language_version:
python: python3.10

default_stages: [commit, push]

repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.4.0
hooks:
- id: check-ast
- id: check-yaml
- id: check-toml
- id: check-merge-conflict
- id: check-added-large-files
args: ["--maxkb=8000"]
- id: end-of-file-fixer
exclude: LICENSE

- repo: local
hooks:
- id: pyupgrade
name: pyupgrade
entry: poetry run pyupgrade --py310-plus
types: [ python ]
language: system

- repo: local
hooks:
- id: isort
name: isort
entry: poetry run isort --settings-path pyproject.toml
types: [python]
language: system

- repo: local
hooks:
- id: black
name: black
entry: poetry run black --config pyproject.toml
types: [ python ]
language: system
102 changes: 102 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
#* Variables
SHELL := /usr/bin/env bash
PYTHON := python

# Determine OS.
ifeq ($(OS),Windows_NT)
OS := windows
else
UNAME_S := $(shell uname -s)
ifeq ($(UNAME_S),Linux)
OS := linux
endif
ifeq ($(UNAME_S),Darwin)
OS := macos
endif
endif

#* Poetry
.PHONY: poetry-download
poetry-download:
curl -sSL https://install.python-poetry.org | python3 -

#* Installation
.PHONY: install
install:
poetry lock -n && poetry export --without-hashes > requirements.txt
poetry install -n
-poetry run mypy --install-types --non-interactive ./

.PHONY: pre-commit-install
pre-commit-install:
poetry run pre-commit install

.PHONY: rclone-install
rclone-install:
ifeq ($(OS),windows)
@echo "This command is not supported on Windows. Please download rclone from https://rclone.org/downloads/"
else
sudo -v ; curl https://rclone.org/install.sh | sudo bash
endif

.PHONY: download-all-fragments
download-all-fragments:
./scripts/download-fragments.sh 1 2 3

.PHONY: download-all-scrolls
download-all-scrolls:
./scripts/download-scroll-surface-vols.sh 1 2 PHerc1667 PHerc0332

.PHONY: download-monster-segment
download-monster-segment:
./scripts/download-monster-segment-surface-vols.sh recto verso

#* Formatters
.PHONY: codestyle
codestyle:
poetry run isort --settings-path pyproject.toml ./
poetry run black --config pyproject.toml ./

.PHONY: formatting
formatting: codestyle

#* Linting
.PHONY: test
test:
poetry run pytest -c pyproject.toml tests/ --cov-report=html --cov=vesuvius_challenge_rnd

test-unit:
poetry run pytest -m "not fragment_data and not scroll_data" -c pyproject.toml tests/ --cov-report=html --cov=vesuvius_challenge_rnd

.PHONY: check-codestyle
check-codestyle:
poetry run isort --diff --check-only --settings-path pyproject.toml ./
poetry run black --diff --check --config pyproject.toml ./

.PHONY: mypy
mypy:
poetry run mypy --config-file pyproject.toml ./

.PHONY: check-safety
check-safety:
poetry check

.PHONY: lint
lint: test-unit check-codestyle check-safety

#* Docker
.PHONY: frag-ink-det-gpu-build
frag-ink-det-gpu-build:
docker build -t frag-ink-det-gpu -f docker/fragment-ink-detection-gpu/Dockerfile .

.PHONY: frag-ink-det-gpu-run
frag-ink-det-gpu-run:
docker run -it --rm --gpus all -e WANDB_DOCKER=frag-ink-det-gpu frag-ink-det-gpu

.PHONY: scroll-ink-det-gpu-build
scroll-ink-det-gpu-build:
docker build -t scroll-ink-det-gpu -f docker/scroll-ink-detection-gpu/Dockerfile .

.PHONY: scroll-ink-det-gpu-run
scroll-ink-det-gpu-run:
docker run -it --rm --gpus all -e WANDB_DOCKER=scroll-ink-det-gpu scroll-ink-det-gpu
30 changes: 30 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,32 @@
# vesuvius-grand-prize-submission
Vesuvius challenge grand prize submission

## About

We approached the ink detection task as a 3D-to-2D binary semantic segmentation problem using surface volumes from
scroll 1 (PHerc Paris 3). We followed a human-assisted pseudo-label-based self-training approach using the crackle signal as a surrogate
to the ink signal.

For a summary of the methods used, please see [docs/methods.md](docs/methods.md).

## Getting started

For instructions on how to train and run inference, please see [docs/submission_reproduction_instructions.md](docs/submission_reproduction_instructions.md).

A pretrained checkpoint is available [here](https://drive.google.com/file/d/1bY14CjSfY8VbqlKmjv1MW-bzhScLZOoV/view?usp=sharing)
(associated with [val_3336_C3.yaml](vesuvius_challenge_rnd/scroll_ink_detection/experiment_runner/configs/unet3d_segformer/submission/val_3336_C3.yaml)).

## Authors
Louis Schlessinger, Arefeh Sherafati

## License

[MIT](https://choosealicense.com/licenses/mit/)

## Credits
- [EduceLab-Scrolls: Verifiable Recovery of Text from Herculaneum Papyri using X-ray CT](https://arxiv.org/abs/2304.02084)
- [Introducing Hann windows for reducing edge-effects in patch-based image segmentation](https://arxiv.org/abs/1910.07831)
- [1st place Kaggle Vesuvius Challenge - Ink Detection](https://www.kaggle.com/competitions/vesuvius-challenge-ink-detection/discussion/417496)
- [4th place Kaggle Vesuvius Challenge - Ink Detection](https://www.kaggle.com/competitions/vesuvius-challenge-ink-detection/discussion/417779)
- [First Ink Vesuvius Challenge](https://caseyhandmer.wordpress.com/2023/08/05/reading-ancient-scrolls/)
- [2nd place Vesuvius Challenge First Letters](https://github.com/younader/Vesuvius-First-Letters)
40 changes: 40 additions & 0 deletions docker/fragment-ink-detection-gpu/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# Base CUDA devel image.
FROM nvidia/cuda:11.8.0-devel-ubuntu22.04

ARG DEBIAN_FRONTEND=noninteractive

WORKDIR /workspace

# Apt-get installs.
RUN apt update
RUN apt install -y python3 python3-pip libmagickwand-dev
RUN python3 -m pip install --no-cache-dir --upgrade pip

# Install poetry.
ENV \
PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1 \
PYTHONFAULTHANDLER=1
ENV \
POETRY_VERSION='1.5.1' \
POETRY_VIRTUALENVS_IN_PROJECT=true \
POETRY_NO_INTERACTION=1

RUN pip3 install --no-cache-dir "poetry==$POETRY_VERSION"
ENV PATH="$POETRY_HOME/bin:$PATH"
RUN poetry --version

# Install rclone.
RUN apt install rclone -y
RUN rclone version

# Install htop.
RUN apt install htop -y

# Install requirements.
COPY pyproject.toml poetry.lock ./
RUN poetry export --without-hashes --with fragment-ink-det,torch_gpu -o requirements.txt
RUN pip3 install --no-cache-dir -r requirements.txt

COPY vesuvius_challenge_rnd ./vesuvius_challenge_rnd
COPY scripts ./scripts
Loading

0 comments on commit 5f77f9c

Please sign in to comment.