Skip to content

Commit

Permalink
Merge pull request #1 from CherifiImene/developement
Browse files Browse the repository at this point in the history
uploading full code
  • Loading branch information
AdiChat authored Jan 5, 2023
2 parents 60343b9 + 762866a commit 33175ba
Show file tree
Hide file tree
Showing 11 changed files with 4,922 additions and 1 deletion.
1 change: 1 addition & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*.h5 filter=lfs diff=lfs merge=lfs -text
32 changes: 31 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1 +1,31 @@
# Fault-Detection-System
# Fault-Detection-System
A repository for CNN based binary classification model for the task of detecting defective solar module cells.

## Execution
All the codes have been run on Google Colab.
The codes are coded in the python 3.x version

Additional Libraries are:
- run "!pip install tf-keras-vis" in a cell to use Score-Cam
## Dataset
The dataset used in this project can be downloaded from [this repo](https://github.com/zae-bayern/elpv-dataset)
It is composed of 2,624 samples of 300x300 pixels 8-bit grayscale images of functional and defective solar cells with varying degree of degradations extracted from 44 different solar modules.

## Code
The notebook "fault_detection.ipynb" builds a binary classification model using Transfer Learning from a pre-trained EfficientNetv2B2 on ImageNet Dataset.
The notebook also shows how to use ScoreCam to explain the predictions of the model.

The folder utils have utility function to load the dataset.

The models folder is where the best trained model will be saved
## Notes
* To reproduce this work:
- download the dataset using the following command "git clone https://github.com/zae-bayern/elpv-dataset.git"
- Move the images of the dataset to the images folder of this repository


## References:

[1] Buerhop-Lutz, C.; Deitsch, S.; Maier, A.; Gallwitz, F.; Berger, S.; Doll, B.; Hauch, J.; Camus, C. & Brabec, C. J. A Benchmark for Visual Identification of Defective Solar Cells in Electroluminescence Imagery. European PV Solar Energy Conference and Exhibition (EU PVSEC), 2018. DOI: 10.4229/35thEUPVSEC20182018-5CV.3.15

[2] Deitsch, S., Buerhop-Lutz, C., Sovetkin, E., Steland, A., Maier, A., Gallwitz, F., & Riess, C. (2021). Segmentation of photovoltaic module cells in uncalibrated electroluminescence images. Machine Vision and Applications, 32(4). DOI: 10.1007/s00138-021-01191-9
1,933 changes: 1,933 additions & 0 deletions fault_detection.ipynb

Large diffs are not rendered by default.

2,624 changes: 2,624 additions & 0 deletions labels.csv

Large diffs are not rendered by default.

Binary file added utils/__pycache__/data_loader.cpython-310.pyc
Binary file not shown.
Binary file added utils/__pycache__/data_loader.cpython-38.pyc
Binary file not shown.
Binary file added utils/__pycache__/elpv_reader.cpython-310.pyc
Binary file not shown.
Binary file added utils/__pycache__/elpv_reader.cpython-38.pyc
Binary file not shown.
242 changes: 242 additions & 0 deletions utils/data_loader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,242 @@
import numpy as np
import tensorflow as tf
import os
from utils.elpv_reader import load_dataset
from sklearn.model_selection import train_test_split
from tensorflow.data import Dataset
from tensorflow.keras import layers

HOME_DIR = os.path.dirname(os.path.dirname(__file__))
CONF_PATH = os.path.join(HOME_DIR,"labels.csv")

class DataLoader:
def __init__(self,path=CONF_PATH,
image_size=224,
shuffle=True,
augment=True,
batch_size=32,
val_size = 0.2,
random_state=42,
include_cell_type=False) -> None:

# The model can support
# images of size 192 or 224 only
if image_size!= 224 and image_size!= 192:
raise Exception("Image size can only be 224 or 192")

# Wether to use the cell type
# as a feature for the model
self.include_cell_type = include_cell_type

self.augment = augment
self.batch_size = batch_size
self.val_size = val_size
self.random_state = random_state
self.shuffle = shuffle
self.path = path
self.image_size = image_size


def load_dataset(self):
# Load dataset
images, defect_probas, cell_types = load_dataset(fname=self.path)

# preprocess dataset
self.X1, self.X2, self.Y = self._preprocess_data(images=images,
defect_proba=defect_probas,
cell_type=cell_types)

# Stratify split dataset
# to train and validation

X1_train,X1_val,Y_train,Y_val = train_test_split(self.X1,self.Y,
test_size=self.val_size,
random_state=self.random_state,
stratify=self.Y)

X2_train,X2_val,Y_train,Y_val = train_test_split(self.X2,self.Y,
test_size=self.val_size,
random_state=self.random_state,
stratify=self.Y)

# create tensorflow datasets
if not self.include_cell_type:
train_dataset, val_dataset = self._create_tf_dataset(x_train=X1_train,
x_val=X1_val,
y_train=Y_train,
y_val=Y_val)
train_batches = self.batch_data(train_dataset,
augment=self.augment,
shuffle=self.shuffle,
batch_size=self.batch_size,
image_size=self.image_size)
val_batches = self.batch_data(val_dataset,
augment=False,
shuffle=self.shuffle,
batch_size=self.batch_size,
image_size=self.image_size)

return train_batches, val_batches

else:
train1_dataset,val1_dataset, train2_dataset, val2_dataset = self._create_tf_dataset(x1_train=X1_train,
x1_val=X1_val,
x2_train=X2_train,
x2_val=X2_val,
y_train=Y_train,
y_val=Y_val)
train1_batches = self.batch_data(train1_dataset,
augment=self.augment,
shuffle=self.shuffle,
batch_size=self.batch_size,
image_size=self.image_size)
val1_batches = self.batch_data(val1_dataset,
augment=False,
shuffle=self.shuffle,
batch_size=self.batch_size,
image_size=self.image_size)
# TODO verify this feature
# since we can't augment the data
# and the function applies image rescaling
train2_batches = self.batch_data(train2_dataset,
augment=False,
shuffle=self.shuffle,
batch_size=self.batch_size,
image_size=self.image_size)
val2_batches = self.batch_data(val2_dataset,
augment=False,
shuffle=self.shuffle,
batch_size=self.batch_size,
image_size=self.image_size)

return train1_batches, val1_batches, train2_batches, val2_batches


def batch_data(self,ds,
augment=True,
shuffle=True,
batch_size=32,
image_size=224,
buffer_size=1000
):

AUTOTUNE = tf.data.AUTOTUNE

IMG_SIZE = image_size

# Rescaling the images
resize_and_rescale = tf.keras.Sequential([
layers.Resizing(IMG_SIZE, IMG_SIZE),
layers.Rescaling(1./255)
])

# Defining data augmentation technique
data_augmentation = tf.keras.Sequential([
layers.RandomFlip("horizontal_and_vertical"),
layers.RandomRotation(0.2),
])


# Batch all datasets.
ds = ds.batch(batch_size)

# Resize and rescale all datasets.
ds = ds.map(lambda x, y: (resize_and_rescale(x), y),
num_parallel_calls=AUTOTUNE)

if shuffle:
ds = ds.shuffle(buffer_size)


# Use data augmentation only on the training set.
if augment:
ds = ds.map(lambda x, y: (data_augmentation(x, training=True), y),
num_parallel_calls=AUTOTUNE)


# Use buffered prefetching on all datasets.
ds = ds.cache()
return ds.prefetch(buffer_size=AUTOTUNE)

def _preprocess_data(self,images,defect_proba,cell_type):

# Convert the probabilities to classes
Y = defect_proba.copy()
Y[Y >= 0.5] = 1. # the cell is defective
Y[Y < 0.5] = 0. # the cell is not defective

# Convert grayscale to rgb
X1 = self._grayscale_to_rgb(images)

# convert cell type to numerical data
# in case it is used as a second feature
X2 = cell_type.copy()
if self.include_cell_type:
X2[X2 == "mono"] = 0
X2[X2 == "poly"] = 1

return X1,X2,Y

def _grayscale_to_rgb(self,images):
# Convert grayscale image to rgb
# by repeating the grayscale image
# over the three channels
# This is needed to adapt the images
# to the inputs of the transfer learning model

# images.shape = (batch_size,x,y)
# rgb_imgs.shape = (batch_size,x,y,3)
rgb_imgs = np.repeat(images[..., np.newaxis],3,-1)
return rgb_imgs

def _create_tf_dataset(self, **kwargs):

try:
if "x1_train" in kwargs.keys():

X1_train = kwargs["x1_train"]
X2_train = kwargs["x2_train"]

X1_val = kwargs["x1_val"]
X2_val = kwargs["x2_val"]

Y_train = kwargs["y_train"]
Y_val = kwargs["y_val"]

# Creating tensorflow datasets
# for the two features : images + cell type
train1_dataset = Dataset.from_tensor_slices((X1_train,Y_train))
val1_dataset = Dataset.from_tensor_slices((X1_val,Y_val))

train2_dataset = Dataset.from_tensor_slices((X2_train,Y_train))
val2_dataset = Dataset.from_tensor_slices((X2_val,Y_val))

return train1_dataset,val1_dataset, train2_dataset, val2_dataset

elif "x_train" in kwargs.keys():
X_train = kwargs["x_train"]
X_val = kwargs["x_val"]


Y_train = kwargs["y_train"]
Y_val = kwargs["y_val"]

# Creating tensorflow datasets
# for images
train_dataset = Dataset.from_tensor_slices((X_train,Y_train))
val_dataset = Dataset.from_tensor_slices((X_val,Y_val))

return train_dataset, val_dataset
else :
raise KeyError("The only key allowed are:"+
" (x_train,y_train,x_val,y_val)"+
" or (x1_train, x2_train,y_train,"+
" x1_val, x2_val, y_val)")

except KeyError:
raise KeyError("The only key allowed are:"+
" (x_train,y_train,x_val,y_val)"+
" or (x1_train, x2_train,y_train,"+
" x1_val, x2_val, y_val)")


56 changes: 56 additions & 0 deletions utils/elpv_reader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# Copyright (C) 2018 Sergiu Deitsch
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its contributors
# may be used to endorse or promote products derived from this software without
# specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

from PIL import Image
import numpy as np
import os


def load_dataset(fname=None):
if fname is None:
# Assume we are in the utils folder and get the absolute path to the
# parent directory.
fname = os.path.abspath(os.path.join(os.path.dirname(__file__),
os.path.pardir))
fname = os.path.join(fname, 'labels.csv')

data = np.genfromtxt(fname, dtype=['|S19', '<f8', '|S4'], names=[
'path', 'probability', 'type'])
image_fnames = np.char.decode(data['path'])
probs = data['probability']
types = np.char.decode(data['type'])

def load_cell_image(fname):
with Image.open(fname) as image:
return np.asarray(image)

dir = os.path.dirname(fname)

images = np.array([load_cell_image(os.path.join(dir, fn))
for fn in image_fnames])

return images, probs, types
35 changes: 35 additions & 0 deletions utils/model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from tensorflow import layers
import tensorflow as tf

def create_transfer_learning_model(base_model,
fine_tune=False,
fine_tune_at=None,
input_shape=(224,224,3)):

# Freeze the base model
base_model.Trainable = False

if fine_tune :
if not fine_tune_at:
raise Exception("You should specify from which"+
" layer the model will be fine tuned"
)
else:
base_model.Trainable = True

# Freeze the lowest layers
# and fine tune the top layers
# starting from index "fine_tune_at"
for layer in base_model.layers[:fine_tune_at]:
layer.Trainable = False


inputs = tf.keras.Input(shape=input_shape)
x = base_model(inputs, training=False)
x = layers.GlobalAveragePooling2D()(x)
x = tf.keras.layers.Dropout(0.2)(x)
outputs = layers.Dense(1)(x)

model = tf.keras.Model(inputs, outputs)

return model

0 comments on commit 33175ba

Please sign in to comment.