Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add try except around datasets to train on broken datasets #2

Draft
wants to merge 52 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
52 commits
Select commit Hold shift + click to select a range
8222dc0
add try except around datasets to train on broken datasets
odulcy-mindee Jan 5, 2024
e6faaf6
fix in collate_fn
odulcy-mindee Jan 6, 2024
b5a41bd
Problems with augmentations involving _gaussian_filter
odulcy-mindee Jan 6, 2024
fbef2cf
from https://github.com/mindee/doctr/pull/1444
odulcy-mindee Feb 1, 2024
c350bd7
send message on slack (pytorch script)
odulcy-mindee Feb 6, 2024
f16b192
exclude l1_loss in db model
odulcy-mindee Feb 6, 2024
9b0ae92
send_on_slack tf
odulcy-mindee Feb 13, 2024
2036599
Revert "fix test"
odulcy-mindee Feb 13, 2024
c02a477
fix
odulcy-mindee Feb 14, 2024
2602a5d
Display pbar before starting training
odulcy-mindee Feb 19, 2024
578d9b8
temp eval with cord funsd from felix
odulcy-mindee Mar 6, 2024
ccf19d9
try_except on sec_evaluate
odulcy-mindee Mar 8, 2024
a5a6101
pbar on evaluate fn
odulcy-mindee Mar 8, 2024
cc795f6
pbar on sec_evaluate
odulcy-mindee Mar 8, 2024
34e32eb
apply patch from https://github.com/felixdittrich92/doctr/commit/27bc…
odulcy-mindee Mar 11, 2024
2e90794
stop using custom ds for val
odulcy-mindee Mar 20, 2024
e6d393b
train_pytorch_orientation send_on_slack
odulcy-mindee Apr 15, 2024
05a6779
feat: :sparkles: orientation dataset walk
odulcy-mindee Apr 15, 2024
bd18864
(32, 32) -> (128, 128)
odulcy-mindee Apr 16, 2024
ea58710
(256, 256) -> (512, 512)
odulcy-mindee Apr 16, 2024
1049fab
train_tensorflow_orientation.py: size for crop
odulcy-mindee Apr 16, 2024
9c68b21
Merge branch 'main' into try_except
odulcy-mindee Apr 19, 2024
53aa99b
slack display args on train_pytorch_orientation
odulcy-mindee Apr 22, 2024
6688c6f
Merge branch 'main' into try_except
odulcy-mindee Apr 26, 2024
4d751f1
pbar `train_tensorflow_orientation`
odulcy-mindee Apr 30, 2024
9f034cb
Frankenstein script to train TF model with Torch DataLoader
odulcy-mindee May 22, 2024
483eed3
Merge branch 'main' into try_except
odulcy-mindee May 22, 2024
e56b7c0
fix send_on_slack
odulcy-mindee Jun 5, 2024
983e815
upd
odulcy-mindee Jun 5, 2024
5fd3355
Merge branch 'main' into try_except
odulcy-mindee Jun 6, 2024
32d699f
Merge branch 'main' into try_except
odulcy-mindee Jun 12, 2024
163db75
Merge branch 'main' into try_except
odulcy-mindee Dec 17, 2024
1efc8ee
revert few changes
odulcy-mindee Dec 17, 2024
536b93e
Merge branch 'main' into try_except
odulcy-mindee Dec 17, 2024
3fda58e
Merge branch 'main' into try_except
odulcy-mindee Dec 18, 2024
46c41b7
Merge branch 'main' into try_except
odulcy-mindee Dec 20, 2024
7300c1c
Merge branch 'main' into try_except
odulcy-mindee Jan 13, 2025
432f63f
Merge branch 'main' into try_except
odulcy-mindee Jan 14, 2025
60a63bb
clean `detection/train_pytorch.py`
odulcy-mindee Jan 14, 2025
4dcd972
add clearml logging
odulcy-mindee Jan 14, 2025
e182ae7
add boto3
odulcy-mindee Jan 14, 2025
6494f87
`config`
odulcy-mindee Jan 14, 2025
2bcf01c
Merge branch 'main' into try_except
odulcy-mindee Jan 16, 2025
2a12cfc
Grad accumulation - testing
odulcy-mindee Jan 16, 2025
8f5ccf9
Revert "Grad accumulation - testing"
odulcy-mindee Jan 16, 2025
934730e
Grad accumulation - testing
odulcy-mindee Jan 16, 2025
d3bcd09
`power=0.5` for polynomialLR
odulcy-mindee Jan 21, 2025
814fb13
Merge branch 'main' into try_except
odulcy-mindee Jan 23, 2025
b912d8f
clean branch
odulcy-mindee Jan 23, 2025
5548afa
remove grad accumu
odulcy-mindee Jan 27, 2025
1162d8b
tqdm disable
odulcy-mindee Jan 27, 2025
185ce11
enable pbar.write
odulcy-mindee Jan 27, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 32 additions & 22 deletions doctr/datasets/datasets/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import os
import shutil
import traceback
from collections.abc import Callable
from pathlib import Path
from typing import Any
Expand Down Expand Up @@ -47,28 +48,37 @@ def _read_sample(self, index: int) -> tuple[Any, Any]:

def __getitem__(self, index: int) -> tuple[Any, Any]:
# Read image
img, target = self._read_sample(index)
# Pre-transforms (format conversion at run-time etc.)
if self._pre_transforms is not None:
img, target = self._pre_transforms(img, target)

if self.img_transforms is not None:
# typing issue cf. https://github.com/python/mypy/issues/5485
img = self.img_transforms(img)

if self.sample_transforms is not None:
# Conditions to assess it is detection model with multiple classes and avoid confusion with other tasks.
if (
isinstance(target, dict)
and all(isinstance(item, np.ndarray) for item in target.values())
and set(target.keys()) != {"boxes", "labels"} # avoid confusion with obj detection target
):
img_transformed = _copy_tensor(img)
for class_name, bboxes in target.items():
img_transformed, target[class_name] = self.sample_transforms(img, bboxes)
img = img_transformed
else:
img, target = self.sample_transforms(img, target)
try:
img, target = self._read_sample(index)
# Pre-transforms (format conversion at run-time etc.)
if self._pre_transforms is not None:
img, target = self._pre_transforms(img, target)

if self.img_transforms is not None:
# typing issue cf. https://github.com/python/mypy/issues/5485
img = self.img_transforms(img)

if self.sample_transforms is not None:
# Conditions to assess it is detection model with multiple classes and avoid confusion with other tasks.
if (
isinstance(target, dict)
and all(isinstance(item, np.ndarray) for item in target.values())
and set(target.keys()) != {"boxes", "labels"} # avoid confusion with obj detection target
):
img_transformed = _copy_tensor(img)
for class_name, bboxes in target.items():
img_transformed, target[class_name] = self.sample_transforms(img, bboxes)
img = img_transformed
else:
img, target = self.sample_transforms(img, target)
except Exception:
img_name = self.data[index][0]
# Write
print()
print(f"!!!ERROR in Dataset on filename {img_name}")
traceback.print_exc()
print()
return self.__getitem__(0) # should exists ^^

return img, target

Expand Down
16 changes: 11 additions & 5 deletions doctr/datasets/detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,14 +54,20 @@ def __init__(

self.data: list[tuple[str, tuple[np.ndarray, list[str]]]] = []
np_dtype = np.float32
missing_files = []
for img_name, label in labels.items():
# File existence check
if not os.path.exists(os.path.join(self.root, img_name)):
raise FileNotFoundError(f"unable to locate {os.path.join(self.root, img_name)}")

geoms, polygons_classes = self.format_polygons(label["polygons"], use_polygons, np_dtype)

self.data.append((img_name, (np.asarray(geoms, dtype=np_dtype), polygons_classes)))
missing_files.append(img_name)
# raise FileNotFoundError(f"unable to locate {os.path.join(self.root, img_name)}")
else:
geoms, polygons_classes = self.format_polygons(label["polygons"], use_polygons, np_dtype)
self.data.append((img_name, (np.asarray(geoms, dtype=np_dtype), polygons_classes)))
print("List of missing files:")
print(f"MISSING FILES: {len(missing_files)}")
from pprint import pprint

pprint(missing_files)

def format_polygons(
self, polygons: list | dict, use_polygons: bool, np_dtype: type
Expand Down
13 changes: 10 additions & 3 deletions doctr/datasets/recognition.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,18 @@ def __init__(
with open(labels_path, encoding="utf-8") as f:
labels = json.load(f)

missing_files = []
for img_name, label in labels.items():
if not os.path.exists(os.path.join(self.root, img_name)):
raise FileNotFoundError(f"unable to locate {os.path.join(self.root, img_name)}")

self.data.append((img_name, label))
missing_files.append(img_name)
# raise FileNotFoundError(f"unable to locate {os.path.join(self.root, img_name)}")
else:
self.data.append((img_name, label))
print("List of missing files:")
print(f"MISSING FILES: {len(missing_files)}")
from pprint import pprint

pprint(missing_files)

def merge_dataset(self, ds: AbstractDataset) -> None:
# Update data with new root for self
Expand Down
Loading
Loading