FastDeploy  latest
Fast & Easy to Deploy!
fastdeploy_model.h
1 // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 #pragma once
15 #include "fastdeploy/runtime.h"
16 
17 namespace fastdeploy {
18 
21 class FASTDEPLOY_DECL FastDeployModel {
22  public:
24  virtual std::string ModelName() const { return "NameUndefined"; }
25 
28  virtual bool Infer(std::vector<FDTensor>& input_tensors,
29  std::vector<FDTensor>* output_tensors);
30 
33  virtual bool Infer();
34 
35  RuntimeOption runtime_option;
38  std::vector<Backend> valid_cpu_backends = {Backend::ORT};
41  std::vector<Backend> valid_gpu_backends = {Backend::ORT};
44  std::vector<Backend> valid_ipu_backends = {};
47  std::vector<Backend> valid_timvx_backends = {};
50  std::vector<Backend> valid_directml_backends = {};
53  std::vector<Backend> valid_ascend_backends = {};
56  std::vector<Backend> valid_kunlunxin_backends = {};
59  std::vector<Backend> valid_rknpu_backends = {};
62  std::vector<Backend> valid_sophgonpu_backends = {};
63 
65  virtual int NumInputsOfRuntime() { return runtime_->NumInputs(); }
67  virtual int NumOutputsOfRuntime() { return runtime_->NumOutputs(); }
69  virtual TensorInfo InputInfoOfRuntime(int index) {
70  return runtime_->GetInputInfo(index);
71  }
73  virtual TensorInfo OutputInfoOfRuntime(int index) {
74  return runtime_->GetOutputInfo(index);
75  }
77  virtual bool Initialized() const {
78  return runtime_initialized_ && initialized;
79  }
80 
98  virtual void EnableRecordTimeOfRuntime() {
99  time_of_runtime_.clear();
100  std::vector<double>().swap(time_of_runtime_);
101  enable_record_time_of_runtime_ = true;
102  }
103 
106  virtual void DisableRecordTimeOfRuntime() {
107  enable_record_time_of_runtime_ = false;
108  }
109 
112  virtual std::map<std::string, float> PrintStatisInfoOfRuntime();
113 
116  virtual bool EnabledRecordTimeOfRuntime() {
117  return enable_record_time_of_runtime_;
118  }
121  virtual double GetProfileTime() {
122  return runtime_->GetProfileTime();
123  }
126  virtual void ReleaseReusedBuffer() {
127  std::vector<FDTensor>().swap(reused_input_tensors_);
128  std::vector<FDTensor>().swap(reused_output_tensors_);
129  }
130 
131  virtual fastdeploy::Runtime* CloneRuntime() { return runtime_->Clone(); }
132 
133  virtual bool SetRuntime(fastdeploy::Runtime* clone_runtime) {
134  runtime_ = std::unique_ptr<Runtime>(clone_runtime);
135  return true;
136  }
137 
138  virtual std::unique_ptr<FastDeployModel> Clone() {
139  FDERROR << ModelName() << " doesn't support Cone() now." << std::endl;
140  return nullptr;
141  }
142 
143  protected:
144  virtual bool InitRuntime();
145 
146  bool initialized = false;
147  // Reused input tensors
148  std::vector<FDTensor> reused_input_tensors_;
149  // Reused output tensors
150  std::vector<FDTensor> reused_output_tensors_;
151 
152  private:
153  bool InitRuntimeWithSpecifiedBackend();
154  bool InitRuntimeWithSpecifiedDevice();
155  bool CreateCpuBackend();
156  bool CreateGpuBackend();
157  bool CreateIpuBackend();
158  bool CreateRKNPUBackend();
159  bool CreateSophgoNPUBackend();
160  bool CreateTimVXBackend();
161  bool CreateKunlunXinBackend();
162  bool CreateASCENDBackend();
163  bool CreateDirectMLBackend();
164  bool IsSupported(const std::vector<Backend>& backends,
165  Backend backend);
166 
167  std::shared_ptr<Runtime> runtime_;
168  bool runtime_initialized_ = false;
169  // whether to record inference time
170  bool enable_record_time_of_runtime_ = false;
171  std::vector<double> time_of_runtime_;
172 };
173 
174 } // namespace fastdeploy
Runtime object used to inference the loaded model on different devices.
Definition: runtime.h:37
Option object used when create a new Runtime object.
Definition: runtime_option.h:40
virtual void ReleaseReusedBuffer()
Release reused input/output buffers.
Definition: fastdeploy_model.h:126
Base model object for all the vision models.
Definition: fastdeploy_model.h:21
virtual bool EnabledRecordTimeOfRuntime()
Check if the EnableRecordTimeOfRuntime() method is enabled.
Definition: fastdeploy_model.h:116
Backend
Definition: enum_variables.h:30
virtual int NumInputsOfRuntime()
Get number of inputs for this model.
Definition: fastdeploy_model.h:65
virtual bool Initialized() const
Check if the model is initialized successfully.
Definition: fastdeploy_model.h:77
virtual double GetProfileTime()
Get profile time of Runtime after the profile process is done.
Definition: fastdeploy_model.h:121
virtual TensorInfo InputInfoOfRuntime(int index)
Get input information for this model.
Definition: fastdeploy_model.h:69
virtual void DisableRecordTimeOfRuntime()
Disable to record the time of runtime, see EnableRecordTimeOfRuntime() for more detail.
Definition: fastdeploy_model.h:106
virtual void EnableRecordTimeOfRuntime()
This is a debug interface, used to record the time of runtime (backend + h2d + d2h) ...
Definition: fastdeploy_model.h:98
Information of Tensor.
Definition: backend.h:31
virtual std::string ModelName() const
Get model&#39;s name.
Definition: fastdeploy_model.h:24
virtual TensorInfo OutputInfoOfRuntime(int index)
Get output information for this model.
Definition: fastdeploy_model.h:73
Runtime * Clone(void *stream=nullptr, int device_id=-1)
Clone new Runtime when multiple instances of the same model are created.
Definition: runtime.cc:351
All C++ FastDeploy APIs are defined inside this namespace.
Definition: option.h:16
virtual int NumOutputsOfRuntime()
Get number of outputs for this model.
Definition: fastdeploy_model.h:67