Skip to content

Commit 95d20b6

Browse files
author
nailixing
committed
Rename some field, and fix some bugs
1 parent 19c3df6 commit 95d20b6

File tree

12 files changed

+80
-43
lines changed

12 files changed

+80
-43
lines changed

docs/src/user/client-upload-pretrained-models.rst

+2-2
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ Example:
1212
task='IMAGE_CLASSIFICATION',
1313
model_file_path='./examples/models/image_classification/TfFeedForward.py',
1414
model_class='TfFeedForward',
15-
model_pretrained_params_id="b42cde03-0bc3-4b15-a276-4d95f6c88fa8.model",
15+
model_preload_file_path="b42cde03-0bc3-4b15-a276-4d95f6c88fa8.model",
1616
dependencies={ModelDependency.TENSORFLOW: '1.12.0'}
1717
)
1818
@@ -53,7 +53,7 @@ FoodLg model upload
5353
task='IMAGE_CLASSIFICATION',
5454
model_file_path='./examples/models/image_object_detection/food_darknet_xception1.py',
5555
model_class='FoodDetection',
56-
model_pretrained_params_id="model231.zip",
56+
model_preload_file_path="model231.zip",
5757
dependencies={"keras": "2.2.4", "tensorflow": "1.12.0"}
5858
)
5959

examples/models/image_object_detection/food_darknet_xception.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -93,15 +93,15 @@ def load_parameters(self, params):
9393

9494
self.class_dict = {v: k for k, v in np.load(self.npy_index)[()].items()}
9595

96-
h5_models_base64 = params['h5_model_base64']
96+
zip_file_base64 = params['zip_file_base64']
9797

9898
self.xception_model = self._build_model(classes=self.classes, image_size=self.image_size)
9999

100100
with tempfile.NamedTemporaryFile() as tmp:
101101
# Convert back to bytes & write to temp file
102-
h5_models_bytes = base64.b64decode(h5_models_base64.encode('utf-8'))
102+
zip_file_base64 = base64.b64decode(zip_file_base64.encode('utf-8'))
103103
with open(tmp.name, 'wb') as f:
104-
f.write(h5_models_bytes)
104+
f.write(zip_file_base64)
105105
with tempfile.TemporaryDirectory() as d:
106106
dataset_zipfile = zipfile.ZipFile(tmp.name, 'r')
107107
dataset_zipfile.extractall(path=d)

examples/models/image_object_detection/food_darknet_xception1.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -93,15 +93,15 @@ def load_parameters(self, params):
9393

9494
self.class_dict = {v: k for k, v in np.load(self.npy_index)[()].items()}
9595

96-
h5_models_base64 = params['h5_model_base64']
96+
zip_file_base64 = params['zip_file_base64']
9797

9898
self.xception_model = self._build_model(classes=self.classes, image_size=self.image_size)
9999

100100
with tempfile.NamedTemporaryFile() as tmp:
101101
# Convert back to bytes & write to temp file
102-
h5_models_bytes = base64.b64decode(h5_models_base64.encode('utf-8'))
102+
zip_file_base64 = base64.b64decode(zip_file_base64.encode('utf-8'))
103103
with open(tmp.name, 'wb') as f:
104-
f.write(h5_models_bytes)
104+
f.write(zip_file_base64)
105105
with tempfile.TemporaryDirectory() as d:
106106
dataset_zipfile = zipfile.ZipFile(tmp.name, 'r')
107107
dataset_zipfile.extractall(path=d)

examples/models/image_object_detection/food_darknet_xception2.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -93,15 +93,15 @@ def load_parameters(self, params):
9393

9494
self.class_dict = {v: k for k, v in np.load(self.npy_index)[()].items()}
9595

96-
h5_models_base64 = params['h5_model_base64']
96+
zip_file_base64 = params['zip_file_base64']
9797

9898
self.xception_model = self._build_model(classes=self.classes, image_size=self.image_size)
9999

100100
with tempfile.NamedTemporaryFile() as tmp:
101101
# Convert back to bytes & write to temp file
102-
h5_models_bytes = base64.b64decode(h5_models_base64.encode('utf-8'))
102+
zip_file_base64 = base64.b64decode(zip_file_base64.encode('utf-8'))
103103
with open(tmp.name, 'wb') as f:
104-
f.write(h5_models_bytes)
104+
f.write(zip_file_base64)
105105
with tempfile.TemporaryDirectory() as d:
106106
dataset_zipfile = zipfile.ZipFile(tmp.name, 'r')
107107
dataset_zipfile.extractall(path=d)

scripts/docker_swarm/build_images.sh

+1
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ title "Using docker swarm"
3131

3232
echo "using $APP_MODE docker files"
3333
if [[ $APP_MODE = "DEV" ]]
34+
then
3435
title "Building SINGA-Auto Admin's image..."
3536
docker build -t $SINGA_AUTO_IMAGE_ADMIN:$SINGA_AUTO_VERSION -f ./dockerfiles/dev_dockerfiles/admin.Dockerfile \
3637
--build-arg DOCKER_WORKDIR_PATH=$DOCKER_WORKDIR_PATH \

singa_auto/admin/view/model.py

+13-5
Original file line numberDiff line numberDiff line change
@@ -61,12 +61,14 @@ def create_model(auth, params):
6161
if 'checkpoint_id' in params and params['checkpoint_id'] is not None:
6262

6363
# if the checkpoint is not .model file, serialize it first
64-
if params['checkpoint_id'].filename.split(".")[-1] != 'model':
65-
h5_model_bytes = params['checkpoint_id'].read()
66-
checkpoint_id = FileParamStore().save({'h5_model_base64': base64.b64encode(h5_model_bytes).decode('utf-8')})
64+
if params['checkpoint_id'].filename.split(".")[-1] == 'zip':
65+
zip_file_base64 = params['checkpoint_id'].read()
66+
checkpoint_id = FileParamStore().save({'zip_file_base64': base64.b64encode(zip_file_base64).decode('utf-8')})
6767
feed_params['checkpoint_id'] = checkpoint_id
68-
# if the model is trained with singa_auto, copy it to params files
69-
else:
68+
69+
# if the model is trained with singa_auto (the model name is ended with 'model'), copy it to params files
70+
# no need to encode it with b54 as it is already encoded in singa-auto after training
71+
elif params['checkpoint_id'].filename.split(".")[-1] == 'model':
7072
with tempfile.NamedTemporaryFile() as f:
7173
file_storage = params['checkpoint_id']
7274
file_storage.save(f.name)
@@ -77,6 +79,12 @@ def create_model(auth, params):
7779
checkpoint_id)
7880
shutil.copyfile(f.name, dest_file_path)
7981
feed_params['checkpoint_id'] = checkpoint_id
82+
else:
83+
84+
# if the checkpoint name is not zip or model, return errormessage
85+
return jsonify({'ErrorMsg': 'model preload file should be ended with "zip" or "model", '
86+
'if it is a "*.model" file,'
87+
'it should be the model_file saved after training by using singa-auto'}), 400
8088
with admin:
8189
return jsonify(admin.create_model(**feed_params))
8290

singa_auto/client/client.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,7 @@ def create_model(self,
230230
task: str,
231231
model_file_path: str,
232232
model_class: str,
233-
model_pretrained_params_id: str = None,
233+
model_preload_file_path: str = None,
234234
dependencies: ModelDependencies = None,
235235
access_right: ModelAccessRight = ModelAccessRight.PRIVATE,
236236
docker_image: str = None) -> Dict[str, Any]:
@@ -245,7 +245,7 @@ def create_model(self,
245245
:param model_class: The name of the model class inside the Python file. This class should implement :class:`singa_auto.model.BaseModel`
246246
:param dependencies: List of Python dependencies & their versions
247247
:param access_right: Model access right
248-
:param model_pretrained_params_id: pretrained mdoel file
248+
:param model_preload_file_path: pretrained mdoel file
249249
:param docker_image: A custom Docker image that extends ``singa_auto/singa_auto_worker``, publicly available on Docker Hub.
250250
:returns: Created model as dictionary
251251
@@ -270,10 +270,10 @@ def create_model(self,
270270
}
271271
pretrained_files = {}
272272

273-
if model_pretrained_params_id is not None:
273+
if model_preload_file_path is not None:
274274
pretrained_files = {'checkpoint_id': (
275-
model_pretrained_params_id,
276-
open(model_pretrained_params_id, 'rb'),
275+
model_preload_file_path,
276+
open(model_preload_file_path, 'rb'),
277277
'application/octet-stream')}
278278

279279
files = {**model_files, **pretrained_files}

singa_auto/predictor/app.py

+19-4
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323
from .predictor import Predictor
2424
from singa_auto.model import utils
2525
import traceback
26+
import json
27+
2628
service_id = os.environ['SINGA_AUTO_SERVICE_ID']
2729

2830
logger = logging.getLogger(__name__)
@@ -56,19 +58,32 @@ def predict():
5658
img for img in [img_store.read() for img_store in img_stores] if img
5759
]
5860
print("img_stores", img_stores)
59-
print("img_bytes", img_bytes)
6061
if not img_bytes:
6162
return jsonify({'ErrorMsg': 'No image provided'}), 400
63+
print("img_bytes_first 10 bytes", img_bytes[0][:10])
64+
queries = utils.dataset.load_images_from_bytes(img_bytes).tolist()
65+
print("queries_sizes", len(queries))
66+
elif request.get_json():
67+
data = request.get_json()
68+
queries = [data]
69+
elif request.data:
70+
data = json.loads(request.data)
71+
print(data)
72+
queries = [data]
6273
else:
63-
return jsonify({'ErrorMsg': 'No image provided'}), 400
74+
return jsonify({'ErrorMsg': 'data should be either at files or json payload'}), 400
6475
try:
6576
predictor = get_predictor()
66-
queries = utils.dataset.load_images_from_bytes(img_bytes).tolist()
77+
# this queries is type of List[Any]
6778
predictions = predictor.predict(queries)
68-
79+
print(type(predictions))
6980
if isinstance(predictions[0], list):
7081
# this is only for pandavgg demo as the frontend only accept the dictionary.
7182
return jsonify(predictions[0][0]), 200
83+
elif isinstance(predictions, list) and isinstance(predictions[0], str):
84+
# this is only match qa model,
85+
print("this is only match qa model")
86+
return predictions[0], 200
7287
else:
7388
return jsonify(predictions), 200
7489
except:

singa_auto/predictor/ensemble.py

-1
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,6 @@ def ensemble_probabilities(predictions: List[Any]) -> Any:
5252
def ensemble(predictions: List[Any]) -> Any:
5353
if len(predictions) == 0:
5454
return None
55-
print("predictions is (in ensemble)", predictions)
5655
# Return some worker's predictions
5756
index = 0
5857
prediction = predictions[index]

singa_auto/predictor/predictor.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ def start(self):
6868

6969
def predict(self, queries):
7070
worker_predictions_list = self._get_predictions_from_workers(queries)
71+
print("Getting prediction list")
7172
predictions = self._combine_worker_predictions(worker_predictions_list)
7273
return predictions
7374

@@ -124,8 +125,11 @@ def _get_predictions_from_workers(
124125

125126
# Wait for at least 1 free worker
126127
worker_ids = []
128+
127129
while len(worker_ids) == 0:
130+
print("Getting free worker from redis...")
128131
worker_ids = self._redis_cache.get_workers()
132+
time.sleep(0.5)
129133

130134
# For each worker, send queries to worker
131135
pending_queries = set() # {(query_id, worker_id)}
@@ -150,7 +154,7 @@ def _get_predictions_from_workers(
150154
# Record prediction & mark as not pending
151155
query_id_to_predictions[query_id].append(prediction)
152156
pending_queries.remove((query_id, worker_id))
153-
157+
print("Getting prediction result from kafka...")
154158
time.sleep(PREDICT_LOOP_SLEEP_SECS)
155159

156160
# Reorganize predictions

singa_auto/utils/service.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -31,13 +31,12 @@
3131
curr_time = datetime.now().strftime("%Y-%m-%d_%I.%M.%S.%p")
3232

3333

34-
3534
def run_worker(meta_store, start_worker, stop_worker):
3635
service_id = os.environ['SINGA_AUTO_SERVICE_ID']
3736
service_type = os.environ['SINGA_AUTO_SERVICE_TYPE']
3837
container_id = os.environ.get('HOSTNAME', 'localhost')
39-
configure_logging('{}-SvcID-{}'
40-
.format(curr_time, service_id))
38+
configure_logging('{}-SvcID-{}-ContainerID-{}'
39+
.format(curr_time, service_id,container_id))
4140

4241
def _sigterm_handler(_signo, _stack_frame):
4342
logger.warn("Terminal signal received: %s, %s" % (_signo, _stack_frame))

singa_auto/worker/inference.py

+24-13
Original file line numberDiff line numberDiff line change
@@ -124,25 +124,35 @@ def _pull_job_info(self):
124124

125125
inference_job = self._meta_store.get_inference_job(
126126
worker.inference_job_id)
127+
127128
if inference_job is None:
128129
raise InvalidWorkerError(
129130
'No such inference job with ID "{}"'.format(
130131
worker.inference_job_id))
131-
if inference_job.model_id:
132-
model = self._meta_store.get_model(inference_job.model_id)
133-
logger.info(f'Using checkpoint of the model "{model.name}"...')
134-
135-
self._proposal = Proposal.from_jsonable({
136-
"trial_no": 1,
137-
"knobs": {}
138-
})
139-
self._store_params_id = model.checkpoint_id
140-
else:
141132

142-
trial = self._meta_store.get_trial(worker.trial_id)
143-
if trial is None or trial.store_params_id is None: # Must have model saved
133+
trial = self._meta_store.get_trial(worker.trial_id)
134+
135+
# check if there are trained model saved
136+
if trial is None or trial.store_params_id is None:
137+
138+
# if there are no train job, then check if there is checkpoint uplaoded
139+
if inference_job.model_id:
140+
model = self._meta_store.get_model(inference_job.model_id)
141+
logger.info(f'Using checkpoint of the model "{model.name}"...')
142+
143+
self._proposal = Proposal.from_jsonable({
144+
"trial_no": 1,
145+
"knobs": {}
146+
})
147+
self._store_params_id = model.checkpoint_id
148+
else:
149+
150+
# if there is no checkpoint id and no trained model saved
144151
raise InvalidTrialError(
145-
'No saved trial with ID "{}"'.format(worker.trial_id))
152+
'No saved trial with ID "{}" and no checkpoint uploaded'.format(worker.trial_id))
153+
else:
154+
155+
# create inference with trained parameters first
146156
logger.info(f'Using trial "{trial.id}"...')
147157

148158
model = self._meta_store.get_model(trial.model_id)
@@ -183,6 +193,7 @@ def _predict(self, queries: List[Query]) -> List[Prediction]:
183193
try:
184194
predictions = self._model_inst.predict([x.query for x in queries])
185195
except:
196+
print('Error while making predictions:')
186197
logger.error('Error while making predictions:')
187198
logger.error(traceback.format_exc())
188199
predictions = [None for x in range(len(queries))]

0 commit comments

Comments
 (0)