From 90907536df15e8223c8d015198bfa4a4a08f26e9 Mon Sep 17 00:00:00 2001 From: fsx950223 Date: Tue, 28 Sep 2021 15:40:17 +0800 Subject: [PATCH 1/6] add tflite support --- guesslang/guess.py | 6 ++++++ guesslang/model.py | 45 ++++++++++++++++++++++++++++++++------------- requirements.txt | 3 ++- 3 files changed, 40 insertions(+), 14 deletions(-) diff --git a/guesslang/guess.py b/guesslang/guess.py index fab3fb2..fb93844 100644 --- a/guesslang/guess.py +++ b/guesslang/guess.py @@ -7,6 +7,8 @@ from tempfile import TemporaryDirectory from typing import List, Tuple, Optional +from tensorflow_estimator.python.estimator.api._v2 import estimator + from guesslang import model @@ -164,6 +166,10 @@ def _is_reliable(probabilities: List[float]) -> bool: predicted_language_probability = max(probabilities) return predicted_language_probability > threshold + def export(self, model_logs_dir, ckpt_path, tflite=False): + estimator = model.build(model_logs_dir, list(self._extension_map)) + model.save(estimator, self._saved_model_dir, ckpt_path, tflite) + class GuesslangError(Exception): """Guesslang exception class""" diff --git a/guesslang/model.py b/guesslang/model.py index d6775ea..59e69b8 100644 --- a/guesslang/model.py +++ b/guesslang/model.py @@ -1,6 +1,8 @@ """Machine learning model""" from copy import deepcopy +import os +import functools import logging from operator import itemgetter from pathlib import Path @@ -11,6 +13,7 @@ import tensorflow as tf from tensorflow.estimator import ModeKeys, Estimator from tensorflow.python.training.tracking.tracking import AutoTrackable +import tensorflow_text as text LOGGER = logging.getLogger(__name__) @@ -98,15 +101,22 @@ def train(estimator: Estimator, data_root_dir: str, max_steps: int) -> Any: return training_metrics -def save(estimator: Estimator, saved_model_dir: str) -> None: +def save(estimator: Estimator, saved_model_dir: str, ckpt_path:str=None, is_tflite:bool=False) -> None: """Save a Tensorflow estimator""" with TemporaryDirectory() as temporary_model_base_dir: export_dir = estimator.export_saved_model( - temporary_model_base_dir, _serving_input_receiver_fn + temporary_model_base_dir, functools.partial(_serving_input_receiver_fn, is_tflite=is_tflite), + checkpoint_path=ckpt_path ) - Path(saved_model_dir).mkdir(exist_ok=True) export_path = Path(export_dir.decode()).absolute() + if is_tflite: + converter = tf.lite.TFLiteConverter.from_saved_model(export_dir, signature_keys=['predict']) + converter.optimizations=[tf.lite.Optimize.DEFAULT] + converter.inference_type=tf.float32 + converter.target_spec.supported_ops=[tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS] + model = converter.convert() + tf.io.write_file(os.path.join(saved_model_dir, 'guesslang.tflite'), model) for path in export_path.glob('*'): shutil.move(str(path), saved_model_dir) @@ -178,12 +188,17 @@ def input_function() -> tf.data.Dataset: return input_function -def _serving_input_receiver_fn() -> tf.estimator.export.ServingInputReceiver: +def _serving_input_receiver_fn(is_tflite=False) -> tf.estimator.export.ServingInputReceiver: """Function to serve model for predictions.""" - - content = tf.compat.v1.placeholder(tf.string, [None]) - receiver_tensors = {'content': content} - features = {'content': tf.map_fn(_preprocess_text, content)} + if is_tflite: + content = tf.compat.v1.placeholder(tf.string, [HyperParameter.NB_TOKENS+1]) + length = tf.compat.v1.placeholder(tf.int32, []) + receiver_tensors = {'content': content, 'length':length} + features = {'content': _preprocess_text_tflite(content, length)} + else: + content = tf.compat.v1.placeholder(tf.string, [None]) + receiver_tensors = {'content': content} + features = {'content': _preprocess_text(content)} return tf.estimator.export.ServingInputReceiver( receiver_tensors=receiver_tensors, @@ -209,9 +224,13 @@ def _preprocess( def _preprocess_text(data: tf.Tensor) -> tf.Tensor: """Feature engineering""" - padding = tf.constant(['']*HyperParameter.NB_TOKENS) data = tf.strings.bytes_split(data) - data = tf.strings.ngrams(data, HyperParameter.N_GRAM) - data = tf.concat((data, padding), axis=0) - data = data[:HyperParameter.NB_TOKENS] - return data + data = text.ngrams(data, HyperParameter.N_GRAM, reduction_type=text.Reduction.STRING_JOIN) + return data.to_tensor(shape=(data.shape[0], HyperParameter.NB_TOKENS)) + + +def _preprocess_text_tflite(data: tf.Tensor, length: tf.Tensor) -> tf.Tensor: + processed_data, unprocessed_data = tf.split(data, [length, HyperParameter.NB_TOKENS-length+1], num=2, axis=0) + processed_data = text.ngrams(processed_data, HyperParameter.N_GRAM, reduction_type=text.Reduction.STRING_JOIN) + return tf.expand_dims(tf.concat([processed_data, unprocessed_data], axis=0), axis=0) + diff --git a/requirements.txt b/requirements.txt index 8e256a9..1e9e0c4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,2 @@ -tensorflow==2.5.0 +tensorflow==2.6.0 +tensorflow-text==2.6.0 \ No newline at end of file From a8755e882f61413892a6ad70ccd3196b748baf46 Mon Sep 17 00:00:00 2001 From: fsx950223 Date: Tue, 28 Sep 2021 20:09:58 +0800 Subject: [PATCH 2/6] remove useless line --- guesslang/guess.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/guesslang/guess.py b/guesslang/guess.py index fb93844..2b32120 100644 --- a/guesslang/guess.py +++ b/guesslang/guess.py @@ -7,8 +7,6 @@ from tempfile import TemporaryDirectory from typing import List, Tuple, Optional -from tensorflow_estimator.python.estimator.api._v2 import estimator - from guesslang import model From 04398ce2673f594262f041738c274d8ecac5b5cf Mon Sep 17 00:00:00 2001 From: fsx950223 Date: Thu, 30 Sep 2021 08:11:17 +0800 Subject: [PATCH 3/6] fix train script --- guesslang/model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/guesslang/model.py b/guesslang/model.py index 59e69b8..8017b0d 100644 --- a/guesslang/model.py +++ b/guesslang/model.py @@ -183,7 +183,7 @@ def input_function() -> tf.data.Dataset: if mode == ModeKeys.TRAIN: dataset = dataset.shuffle(Training.SHUFFLE_BUFFER).repeat() - return dataset.map(_preprocess).batch(HyperParameter.BATCH_SIZE) + return dataset.batch(HyperParameter.BATCH_SIZE).map(_preprocess) return input_function From af27bfd14053eaf298704d4b08e8212b6cfba1fe Mon Sep 17 00:00:00 2001 From: fsx950223 Date: Thu, 30 Sep 2021 08:30:08 +0800 Subject: [PATCH 4/6] fix code --- guesslang/guess.py | 5 ++++- guesslang/model.py | 47 ++++++++++++++++++++++++++++++---------------- 2 files changed, 35 insertions(+), 17 deletions(-) diff --git a/guesslang/guess.py b/guesslang/guess.py index 2b32120..a80e88a 100644 --- a/guesslang/guess.py +++ b/guesslang/guess.py @@ -164,7 +164,10 @@ def _is_reliable(probabilities: List[float]) -> bool: predicted_language_probability = max(probabilities) return predicted_language_probability > threshold - def export(self, model_logs_dir, ckpt_path, tflite=False): + def export(self, + model_logs_dir: str, + ckpt_path: str, + tflite: bool = False) -> None: estimator = model.build(model_logs_dir, list(self._extension_map)) model.save(estimator, self._saved_model_dir, ckpt_path, tflite) diff --git a/guesslang/model.py b/guesslang/model.py index 8017b0d..8e2d4e6 100644 --- a/guesslang/model.py +++ b/guesslang/model.py @@ -8,7 +8,7 @@ from pathlib import Path import shutil from tempfile import TemporaryDirectory -from typing import List, Tuple, Dict, Any, Callable +from typing import List, Tuple, Dict, Any, Callable, Optional import tensorflow as tf from tensorflow.estimator import ModeKeys, Estimator @@ -101,22 +101,31 @@ def train(estimator: Estimator, data_root_dir: str, max_steps: int) -> Any: return training_metrics -def save(estimator: Estimator, saved_model_dir: str, ckpt_path:str=None, is_tflite:bool=False) -> None: +def save(estimator: Estimator, + saved_model_dir: str, + ckpt_path: Optional[str] = None, + is_tflite: bool = False) -> None: """Save a Tensorflow estimator""" with TemporaryDirectory() as temporary_model_base_dir: export_dir = estimator.export_saved_model( - temporary_model_base_dir, functools.partial(_serving_input_receiver_fn, is_tflite=is_tflite), + temporary_model_base_dir, + functools.partial(_serving_input_receiver_fn, + is_tflite=is_tflite), checkpoint_path=ckpt_path ) Path(saved_model_dir).mkdir(exist_ok=True) export_path = Path(export_dir.decode()).absolute() if is_tflite: - converter = tf.lite.TFLiteConverter.from_saved_model(export_dir, signature_keys=['predict']) - converter.optimizations=[tf.lite.Optimize.DEFAULT] - converter.inference_type=tf.float32 - converter.target_spec.supported_ops=[tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS] + converter = tf.lite.TFLiteConverter.from_saved_model( + export_dir, signature_keys=['predict']) + converter.optimizations = [tf.lite.Optimize.DEFAULT] + converter.inference_type = tf.float32 + ops_sets = [tf.lite.OpsSet.TFLITE_BUILTINS, + tf.lite.OpsSet.SELECT_TF_OPS] + converter.target_spec.supported_ops = ops_sets model = converter.convert() - tf.io.write_file(os.path.join(saved_model_dir, 'guesslang.tflite'), model) + tflite_path = os.path.join(saved_model_dir, 'guesslang.tflite') + tf.io.write_file(tflite_path, model) for path in export_path.glob('*'): shutil.move(str(path), saved_model_dir) @@ -188,12 +197,14 @@ def input_function() -> tf.data.Dataset: return input_function -def _serving_input_receiver_fn(is_tflite=False) -> tf.estimator.export.ServingInputReceiver: +def _serving_input_receiver_fn( + is_tflite: bool = False) -> tf.estimator.export.ServingInputReceiver: """Function to serve model for predictions.""" if is_tflite: - content = tf.compat.v1.placeholder(tf.string, [HyperParameter.NB_TOKENS+1]) + content = tf.compat.v1.placeholder(tf.string, + [HyperParameter.NB_TOKENS+1]) length = tf.compat.v1.placeholder(tf.int32, []) - receiver_tensors = {'content': content, 'length':length} + receiver_tensors = {'content': content, 'length': length} features = {'content': _preprocess_text_tflite(content, length)} else: content = tf.compat.v1.placeholder(tf.string, [None]) @@ -225,12 +236,16 @@ def _preprocess( def _preprocess_text(data: tf.Tensor) -> tf.Tensor: """Feature engineering""" data = tf.strings.bytes_split(data) - data = text.ngrams(data, HyperParameter.N_GRAM, reduction_type=text.Reduction.STRING_JOIN) + data = text.ngrams( + data, HyperParameter.N_GRAM, reduction_type=text.Reduction.STRING_JOIN) return data.to_tensor(shape=(data.shape[0], HyperParameter.NB_TOKENS)) def _preprocess_text_tflite(data: tf.Tensor, length: tf.Tensor) -> tf.Tensor: - processed_data, unprocessed_data = tf.split(data, [length, HyperParameter.NB_TOKENS-length+1], num=2, axis=0) - processed_data = text.ngrams(processed_data, HyperParameter.N_GRAM, reduction_type=text.Reduction.STRING_JOIN) - return tf.expand_dims(tf.concat([processed_data, unprocessed_data], axis=0), axis=0) - + processed_data, unprocessed_data = tf.split( + data, [length, HyperParameter.NB_TOKENS-length+1], num=2, axis=0) + processed_data = text.ngrams( + processed_data, HyperParameter.N_GRAM, + reduction_type=text.Reduction.STRING_JOIN) + return tf.expand_dims( + tf.concat([processed_data, unprocessed_data], axis=0), axis=0) From f68d0537bfb4932924183e98a27748cca18f7d44 Mon Sep 17 00:00:00 2001 From: fsx950223 Date: Thu, 30 Sep 2021 08:36:14 +0800 Subject: [PATCH 5/6] pin keras --- requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 1e9e0c4..8ec284a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,3 @@ tensorflow==2.6.0 -tensorflow-text==2.6.0 \ No newline at end of file +tensorflow-text==2.6.0 +keras==2.6.0 \ No newline at end of file From 055c244a75b15abe80594640863c8951c227e6e7 Mon Sep 17 00:00:00 2001 From: fsx950223 Date: Thu, 30 Sep 2021 10:04:14 +0800 Subject: [PATCH 6/6] remove useless args --- guesslang/guess.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/guesslang/guess.py b/guesslang/guess.py index a80e88a..4f7724b 100644 --- a/guesslang/guess.py +++ b/guesslang/guess.py @@ -165,11 +165,12 @@ def _is_reliable(probabilities: List[float]) -> bool: return predicted_language_probability > threshold def export(self, - model_logs_dir: str, ckpt_path: str, tflite: bool = False) -> None: - estimator = model.build(model_logs_dir, list(self._extension_map)) - model.save(estimator, self._saved_model_dir, ckpt_path, tflite) + with TemporaryDirectory() as model_logs_dir: + estimator = model.build(model_logs_dir, + list(self._extension_map)) + model.save(estimator, self._saved_model_dir, ckpt_path, tflite) class GuesslangError(Exception):