FastDeploy  latest
Fast & Easy to Deploy!
runtime_option.h
Go to the documentation of this file.
1 // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
20 #pragma once
21 
22 #include <algorithm>
23 #include <map>
24 #include <vector>
26 #include "fastdeploy/runtime/backends/lite/option.h"
27 #include "fastdeploy/runtime/backends/openvino/option.h"
28 #include "fastdeploy/runtime/backends/ort/option.h"
29 #include "fastdeploy/runtime/backends/paddle/option.h"
30 #include "fastdeploy/runtime/backends/poros/option.h"
31 #include "fastdeploy/runtime/backends/rknpu2/option.h"
32 #include "fastdeploy/runtime/backends/sophgo/option.h"
33 #include "fastdeploy/runtime/backends/tensorrt/option.h"
34 #include "fastdeploy/benchmark/option.h"
35 
36 namespace fastdeploy {
37 
40 struct FASTDEPLOY_DECL RuntimeOption {
47  void SetModelPath(const std::string& model_path,
48  const std::string& params_path = "",
49  const ModelFormat& format = ModelFormat::PADDLE);
50 
57  void SetModelBuffer(const std::string& model_buffer,
58  const std::string& params_buffer = "",
59  const ModelFormat& format = ModelFormat::PADDLE);
60 
65  void SetEncryptionKey(const std::string& encryption_key);
66 
68  void UseCpu();
70  void UseGpu(int gpu_id = 0);
72  void UseRKNPU2(fastdeploy::rknpu2::CpuName rknpu2_name =
73  fastdeploy::rknpu2::CpuName::RK356X,
74  fastdeploy::rknpu2::CoreMask rknpu2_core =
75  fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_AUTO);
77  void UseTimVX();
79  void UseAscend();
80 
82  void UseDirectML();
83 
85  void UseSophgo();
108  void UseKunlunXin(int kunlunxin_id = 0, int l3_workspace_size = 0xfffc00,
109  bool locked = false, bool autotune = true,
110  const std::string& autotune_file = "",
111  const std::string& precision = "int16",
112  bool adaptive_seqlen = false,
113  bool enable_multi_stream = false);
114 
115  void SetExternalStream(void* external_stream);
116 
117  /*
118  * @brief Set number of cpu threads while inference on CPU, by default it will decided by the different backends
119  */
120  void SetCpuThreadNum(int thread_num);
122  void UsePaddleInferBackend() { return UsePaddleBackend(); }
124  void UseOrtBackend();
126  void UseSophgoBackend();
128  void UseTrtBackend();
130  void UsePorosBackend();
132  void UseOpenVINOBackend();
134  void UsePaddleLiteBackend() { return UseLiteBackend(); }
142  void UseIpu(int device_num = 1, int micro_batch_size = 1,
143  bool enable_pipelining = false, int batches_per_step = 1);
144 
158  RKNPU2BackendOption rknpu2_option;
159 
160  // \brief Set the profile mode as 'true'.
161  //
162  // \param[in] inclue_h2d_d2h Whether to
163  // include time of H2D_D2H for time of runtime.
164  // \param[in] repeat Repeat times for runtime inference.
165  // \param[in] warmup Warmup times for runtime inference.
166  //
167  void EnableProfiling(bool inclue_h2d_d2h = false,
168  int repeat = 100, int warmup = 50) {
169  benchmark_option.enable_profile = true;
170  benchmark_option.warmup = warmup;
171  benchmark_option.repeats = repeat;
172  benchmark_option.include_h2d_d2h = inclue_h2d_d2h;
173  }
174 
175  // \brief Set the profile mode as 'false'.
176  //
177  void DisableProfiling() {
178  benchmark_option.enable_profile = false;
179  }
180 
181  // \brief Enable to check if current backend set by
182  // user can be found at valid_xxx_backend.
183  //
184  void EnableValidBackendCheck() {
185  enable_valid_backend_check = true;
186  }
187  // \brief Disable to check if current backend set by
188  // user can be found at valid_xxx_backend.
189  //
190  void DisableValidBackendCheck() {
191  enable_valid_backend_check = false;
192  }
193 
194  // Benchmark option
195  benchmark::BenchmarkOption benchmark_option;
196  // enable the check for valid backend, default true.
197  bool enable_valid_backend_check = true;
198 
199  // If model_from_memory is true, the model_file and params_file is
200  // binary stream in memory;
201  // Otherwise, the model_file and params_file means the path of file
202  std::string model_file = "";
203  std::string params_file = "";
204  bool model_from_memory_ = false;
205  // format of input model
206  ModelFormat model_format = ModelFormat::PADDLE;
207 
208  std::string encryption_key_ = "";
209 
210  // for cpu inference
211  // default will let the backend choose their own default value
212  int cpu_thread_num = -1;
213  int device_id = 0;
214  Backend backend = Backend::UNKNOWN;
215 
216  Device device = Device::CPU;
217 
218  void* external_stream_ = nullptr;
219 
220  bool enable_pinned_memory = false;
221 
222  // *** The belowing api are deprecated, will be removed in v1.2.0
223  // *** Do not use it anymore
224  void SetPaddleMKLDNN(bool pd_mkldnn = true);
225  void EnablePaddleToTrt();
226  void DeletePaddleBackendPass(const std::string& delete_pass_name);
227  void EnablePaddleLogInfo();
228  void DisablePaddleLogInfo();
229  void SetPaddleMKLDNNCacheSize(int size);
230  void SetOpenVINODevice(const std::string& name = "CPU");
231  void SetOpenVINOShapeInfo(
232  const std::map<std::string, std::vector<int64_t>>& shape_info) {
233  openvino_option.shape_infos = shape_info;
234  }
235  void SetOpenVINOCpuOperators(const std::vector<std::string>& operators) {
236  openvino_option.SetCpuOperators(operators);
237  }
238  void SetLiteOptimizedModelDir(const std::string& optimized_model_dir);
239  void SetLiteSubgraphPartitionPath(
240  const std::string& nnadapter_subgraph_partition_config_path);
241  void SetLiteSubgraphPartitionConfigBuffer(
242  const std::string& nnadapter_subgraph_partition_config_buffer);
243  void
244  SetLiteContextProperties(const std::string& nnadapter_context_properties);
245  void SetLiteModelCacheDir(const std::string& nnadapter_model_cache_dir);
246  void SetLiteDynamicShapeInfo(
247  const std::map<std::string, std::vector<std::vector<int64_t>>>&
248  nnadapter_dynamic_shape_info);
249  void SetLiteMixedPrecisionQuantizationConfigPath(
250  const std::string& nnadapter_mixed_precision_quantization_config_path);
251  void EnableLiteFP16();
252  void DisableLiteFP16();
253  void EnableLiteInt8();
254  void DisableLiteInt8();
255  void SetLitePowerMode(LitePowerMode mode);
256  void SetTrtInputShape(
257  const std::string& input_name, const std::vector<int32_t>& min_shape,
258  const std::vector<int32_t>& opt_shape = std::vector<int32_t>(),
259  const std::vector<int32_t>& max_shape = std::vector<int32_t>());
260  void SetTrtMaxWorkspaceSize(size_t trt_max_workspace_size);
261  void SetTrtMaxBatchSize(size_t max_batch_size);
262  void EnableTrtFP16();
263  void DisableTrtFP16();
264  void SetTrtCacheFile(const std::string& cache_file_path);
265  void EnablePinnedMemory();
266  void DisablePinnedMemory();
267  void EnablePaddleTrtCollectShape();
268  void DisablePaddleTrtCollectShape();
269  void DisablePaddleTrtOPs(const std::vector<std::string>& ops);
270  void SetOpenVINOStreams(int num_streams);
271  void SetOrtGraphOptLevel(int level = -1);
272  void UsePaddleBackend();
273  void UseLiteBackend();
274 };
275 
276 } // namespace fastdeploy
Option object used when create a new Runtime object.
Definition: runtime_option.h:40
void UsePaddleInferBackend()
Set Paddle Inference as inference backend, support CPU/GPU.
Definition: runtime_option.h:122
Option object to configure ONNX Runtime backend.
Definition: option.h:28
void SetCpuOperators(const std::vector< std::string > &operators)
While use OpenVINO backend with intel GPU, use this interface to specify operators run on CPU...
Definition: option.h:59
ModelFormat
Definition: enum_variables.h:67
Backend
Definition: enum_variables.h:30
A brief file description.
Option object to configure Paddle Inference backend.
Definition: option.h:50
PaddleBackendOption paddle_infer_option
Option to configure Paddle Inference backend.
Definition: runtime_option.h:150
LitePowerMode
Definition: option.h:42
TrtBackendOption trt_option
Option to configure TensorRT backend.
Definition: runtime_option.h:148
Option object to configure Paddle Lite backend.
Definition: option.h:53
Option object to configure Poros backend.
Definition: option.h:27
OpenVINOBackendOption openvino_option
Option to configure OpenVINO backend.
Definition: runtime_option.h:154
LiteBackendOption paddle_lite_option
Option to configure Paddle Lite backend.
Definition: runtime_option.h:156
Option object to configure TensorRT backend.
Definition: option.h:26
Model with paddlepaddle format.
Definition: enum_variables.h:69
RKNPU2BackendOption rknpu2_option
Option to configure RKNPU2 backend.
Definition: runtime_option.h:158
Option object to configure OpenVINO backend.
Definition: option.h:28
void UsePaddleLiteBackend()
Set Paddle Lite as inference backend, only support arm cpu.
Definition: runtime_option.h:134
All C++ FastDeploy APIs are defined inside this namespace.
Definition: option.h:16
Unknown inference backend.
Definition: enum_variables.h:31
PorosBackendOption poros_option
Option to configure Poros backend.
Definition: runtime_option.h:152
OrtBackendOption ort_option
Option to configure ONNX Runtime backend.
Definition: runtime_option.h:146