forked from openvinotoolkit/model_server
-
Notifications
You must be signed in to change notification settings - Fork 1
/
predict_utils.py
160 lines (139 loc) · 6.59 KB
/
predict_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
#
# Copyright (c) 2018-2020 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import falcon
from grpc import StatusCode
import numpy as np
from tensorflow.core.framework import tensor_pb2
from tensorflow.python.framework import tensor_shape
from tensorflow_serving.apis import predict_pb2
from tensorflow.python.framework import dtypes as dtypes
from tensorflow.python.framework import tensor_util as tensor_util
from ie_serving.config import GLOBAL_CONFIG
from ie_serving.models.shape_management.utils import BatchingMode, ShapeMode
from ie_serving.server.constants import \
INVALID_INPUT_KEY, INVALID_SHAPE, INVALID_BATCHSIZE, GRPC, REST
from ie_serving.logger import get_logger
from tensorflow import __version__ as tf_version
if tf_version.split(".")[0] == "2":
from tensorflow import make_ndarray, make_tensor_proto
else: # TF version 1.x
from tensorflow.contrib.util import make_ndarray, make_tensor_proto
logger = get_logger(__name__)
statusCodes = {
'invalid_arg': {GRPC: StatusCode.INVALID_ARGUMENT,
REST: falcon.HTTP_BAD_REQUEST},
}
def prepare_input_data(target_engine, data, service_type):
# returns:
# inference_input, None on success
# None, error_message on error
model_inputs_in_input_request = list(dict(data).keys())
input_keys = target_engine.input_key_names
inference_input = {}
for requested_input_blob in model_inputs_in_input_request:
if requested_input_blob not in input_keys:
message = INVALID_INPUT_KEY % (model_inputs_in_input_request,
input_keys)
logger.debug("PREDICT error: {}".format(message))
return None, message
tensor_name = target_engine.model_keys['inputs'][requested_input_blob]
if service_type == GRPC:
try:
tensor_input = make_ndarray(data[requested_input_blob])
except Exception as e:
message = str(e)
logger.debug("PREDICT prepare_input_data make_ndarray error: "
"{}".format(message))
return None, message
else:
tensor_input = np.asarray(data[requested_input_blob])
# Validate shape if shape not in auto mode
if target_engine.shape_info.mode != ShapeMode.AUTO:
shape_required_in_model = target_engine.net.inputs[
tensor_name].shape
# For reshapable models check all dimensions,
# for non-reshapable, check all starting from the second (omit
# batch size)
if target_engine.shape_info.mode == ShapeMode.DISABLED:
starting_dim = 1
else:
starting_dim = 0
# check requested shape and model shape
if shape_required_in_model[starting_dim:] != list(
tensor_input.shape)[starting_dim:]:
message = INVALID_SHAPE.format(list(tensor_input.shape),
shape_required_in_model)
logger.debug("PREDICT error: {}".format(message))
return None, message
# check if input batch size match the model only if not auto mode
if target_engine.batching_info.mode != \
BatchingMode.AUTO and shape_required_in_model[0] != \
tensor_input.shape[0]:
message = INVALID_BATCHSIZE.format(
tensor_input.shape[0],
target_engine.batching_info.batch_size)
logger.debug("PREDICT error,Invalid batchsize:{}".format(
message))
return None, message
inference_input[tensor_name] = tensor_input
return inference_input, None
'''
function _prepare_output_as_AppendArrayToTensorProto returns inference
results in a form of flattened list of array elements. It is serialized
using tensor_util._NP_TO_APPEND_FN function which employs module
fast_tensor_util and functions Append<dtype>ArrayToTensorProto.
Despite the module name, it is slower from make_tensor_proto.
'''
def _prepare_output_as_AppendArrayToTensorProto(
inference_output,
model_available_outputs):
response = predict_pb2.PredictResponse()
for response_output_name, model_output_name in \
model_available_outputs.items():
if model_output_name in inference_output:
dtype = dtypes.as_dtype(inference_output[model_output_name].dtype)
output_tensor = tensor_pb2.TensorProto(
dtype=dtype.as_datatype_enum,
tensor_shape=tensor_shape.as_shape(
inference_output[model_output_name].shape).as_proto())
result = inference_output[model_output_name].flatten()
tensor_util._NP_TO_APPEND_FN[dtype.as_numpy_dtype](output_tensor,
result)
response.outputs[response_output_name].CopyFrom(output_tensor)
return response
'''
The function prepare_output_with_make_tensor_proto implements faster
serialization mechanism. For most of the models it will return
data in string format converted via numpy.toString calls.
On the client side the inference response can be deserialized using
Tensorflow make_ndarray function.
'''
def _prepare_output_with_make_tensor_proto(
inference_output,
model_available_outputs):
response = predict_pb2.PredictResponse()
for response_output_name in model_available_outputs:
model_output_name = model_available_outputs[response_output_name]
response.outputs[response_output_name].CopyFrom(
make_tensor_proto(inference_output[model_output_name]))
return response
# Serialization method selection
prepare_output = None
if GLOBAL_CONFIG['serialization_function'] == \
'_prepare_output_as_AppendArrayToTensorProto':
prepare_output = _prepare_output_as_AppendArrayToTensorProto
else:
prepare_output = _prepare_output_with_make_tensor_proto