24 #define FD_ALIGN(x) __attribute__((aligned(x))) 26 #define FD_ALIGN(x) __declspec(align(x)) 31 struct FD_ALIGN(2) float16 {
38 float16(
const float16& o) =
default;
39 float16& operator=(
const float16& o) =
default;
40 float16(float16&& o) =
default;
41 float16& operator=(float16&& o) =
default;
46 #ifdef FD_WITH_NATIVE_FP16 49 inline explicit float16(
const float16_t& h) {
50 x = *
reinterpret_cast<const uint16_t*
>(&h);
54 inline explicit float16(
float val) {
55 #if defined(FD_WITH_NATIVE_FP16) 56 float32x4_t tmp = vld1q_dup_f32(&val);
57 float16_t res = vget_lane_f16(vcvt_f16_f32(tmp), 0);
58 x = *
reinterpret_cast<uint16_t*
>(&res);
60 #elif defined(__F16C__) 61 x = _cvtss_sh(val, 0);
68 uint32_t sign = v.si & sigN;
73 v.si ^= (s.si ^ v.si) & -(minN > v.si);
74 v.si ^= (infN ^ v.si) & -((infN > v.si) & (v.si > maxN));
75 v.si ^= (nanN ^ v.si) & -((nanN > v.si) & (v.si > infN));
77 v.si ^= ((v.si - maxD) ^ v.si) & -(v.si > maxC);
78 v.si ^= ((v.si - minD) ^ v.si) & -(v.si > subC);
84 inline explicit float16(
bool b) : x(b ? 0x3c00 : 0) {}
87 inline explicit float16(
const T& val)
88 : x(float16(static_cast<float>(val)).x) {}
92 #ifdef FD_WITH_NATIVE_FP16 93 inline float16& operator=(
const float16_t& rhs) {
94 x = *
reinterpret_cast<const uint16_t*
>(&rhs);
99 inline float16& operator=(
bool b) {
104 inline float16& operator=(int8_t val) {
109 inline float16& operator=(uint8_t val) {
114 inline float16& operator=(int16_t val) {
119 inline float16& operator=(uint16_t val) {
124 inline float16& operator=(int32_t val) {
129 inline float16& operator=(uint32_t val) {
134 inline float16& operator=(int64_t val) {
139 inline float16& operator=(uint64_t val) {
144 inline float16& operator=(
float val) {
149 inline float16& operator=(
double val) {
155 #ifdef FD_WITH_NATIVE_FP16 156 HOSTDEVICE
inline explicit operator float16_t()
const {
157 return *
reinterpret_cast<const float16_t*
>(
this);
161 inline operator float()
const {
162 #if defined(FD_WITH_NATIVE_FP16) 163 float16x4_t res = vld1_dup_f16(reinterpret_cast<const float16_t*>(
this));
164 return vgetq_lane_f32(vcvt_f32_f16(res), 0);
166 #elif defined(__F16C__) 167 return _cvtsh_ss(this->x);
174 int32_t sign = v.si & sigC;
177 v.si ^= ((v.si + minD) ^ v.si) & -(v.si > subC);
178 v.si ^= ((v.si + maxD) ^ v.si) & -(v.si > maxC);
182 int32_t mask = -(norC > v.si);
184 v.si ^= (s.si ^ v.si) & mask;
191 inline explicit operator bool()
const {
return (x & 0x7fff) != 0; }
193 inline explicit operator int8_t()
const {
194 return static_cast<int8_t
>(
static_cast<float>(*this));
197 inline explicit operator uint8_t()
const {
198 return static_cast<uint8_t
>(
static_cast<float>(*this));
201 inline explicit operator int16_t()
const {
202 return static_cast<int16_t
>(
static_cast<float>(*this));
205 inline explicit operator uint16_t()
const {
206 return static_cast<uint16_t
>(
static_cast<float>(*this));
209 inline explicit operator int32_t()
const {
210 return static_cast<int32_t
>(
static_cast<float>(*this));
213 inline explicit operator uint32_t()
const {
214 return static_cast<uint32_t
>(
static_cast<float>(*this));
217 inline explicit operator int64_t()
const {
218 return static_cast<int64_t
>(
static_cast<float>(*this));
221 inline explicit operator uint64_t()
const {
222 return static_cast<uint64_t
>(
static_cast<float>(*this));
225 inline operator double()
const {
226 return static_cast<double>(
static_cast<float>(*this));
229 inline bool operator>(
const float& other)
const {
230 return this->
operator float() > other;
233 inline bool operator>(
const double& other)
const {
234 return this->
operator double() > other;
237 inline bool operator<(
const float& other)
const {
238 return this->
operator float() > other;
241 inline bool operator<(
const double& other)
const {
242 return this->
operator double() > other;
245 template <
typename T,
246 typename std::enable_if<!std::is_same<T, float16>::value,
248 inline float16& operator+=(
const T& other) {
249 *
this = float16(static_cast<T>(*
this) + other);
260 static const int shift = 13;
261 static const int shiftSign = 16;
263 static const int32_t infN = 0x7F800000;
264 static const int32_t maxN = 0x477FE000;
265 static const int32_t minN = 0x38800000;
266 static const int32_t sigN = 0x80000000;
268 static constexpr int32_t infC = infN >> shift;
269 static constexpr int32_t nanN = (infC + 1)
271 static constexpr int32_t maxC = maxN >> shift;
272 static constexpr int32_t minC = minN >> shift;
273 static constexpr int32_t sigC = sigN >> shiftSign;
275 static const int32_t mulN = 0x52000000;
276 static const int32_t mulC = 0x33800000;
277 static const int32_t subC = 0x003FF;
278 static const int32_t norC = 0x00400;
280 static constexpr int32_t maxD = infC - maxC - 1;
281 static constexpr int32_t minD = minC - subC - 1;
285 #if defined(FD_WITH_NATIVE_FP16) 286 inline float16 operator+(
const float16& a,
const float16& b) {
289 "ld1 {v0.h}[0], [%[a_ptr]]\n" 290 "ld1 {v1.h}[0], [%[b_ptr]]\n" 292 "st1 {v0.h}[0], [%[res_ptr]]\n" 295 [a_ptr]
"r"(&(a.x)), [b_ptr]
"r"(&(b.x)),
296 [res_ptr]
"r"(&(res.x))
298 "memory",
"v0",
"v1");
302 inline float16 operator-(
const float16& a,
const float16& b) {
305 "ld1 {v0.h}[0], [%[a_ptr]]\n" 306 "ld1 {v1.h}[0], [%[b_ptr]]\n" 308 "st1 {v0.h}[0], [%[res_ptr]]\n" 311 [a_ptr]
"r"(&(a.x)), [b_ptr]
"r"(&(b.x)),
312 [res_ptr]
"r"(&(res.x))
314 "memory",
"v0",
"v1");
318 inline float16 operator*(
const float16& a,
const float16& b) {
321 "ld1 {v0.h}[0], [%[a_ptr]]\n" 322 "ld1 {v1.h}[0], [%[b_ptr]]\n" 324 "st1 {v0.h}[0], [%[res_ptr]]\n" 327 [a_ptr]
"r"(&(a.x)), [b_ptr]
"r"(&(b.x)),
328 [res_ptr]
"r"(&(res.x))
330 "memory",
"v0",
"v1");
334 inline float16 operator/(
const float16& a,
const float16& b) {
337 "ld1 {v0.h}[0], [%[a_ptr]]\n" 338 "ld1 {v1.h}[0], [%[b_ptr]]\n" 340 "st1 {v0.h}[0], [%[res_ptr]]\n" 343 [a_ptr]
"r"(&(a.x)), [b_ptr]
"r"(&(b.x)),
344 [res_ptr]
"r"(&(res.x))
346 "memory",
"v0",
"v1");
350 inline float16 operator-(
const float16& a) {
353 "ld1 {v0.h}[0], [%[a_ptr]]\n" 355 "st1 {v0.h}[0], [%[res_ptr]]\n" 359 [res_ptr]
"r"(&(res.x))
365 inline float16& operator+=(float16& a,
const float16& b) {
370 inline float16& operator-=(float16& a,
const float16& b) {
375 inline float16& operator*=(float16& a,
const float16& b) {
380 inline float16& operator/=(float16& a,
const float16& b) {
385 inline bool operator==(
const float16& a,
const float16& b) {
388 "ld1 {v0.h}[0], [%[a_ptr]]\n" 389 "ld1 {v1.h}[0], [%[b_ptr]]\n" 391 "st1 {v0.h}[0], [%[res_ptr]]\n" 394 [a_ptr]
"r"(&(a.x)), [b_ptr]
"r"(&(b.x)),
397 "memory",
"v0",
"v1");
398 return (res & 0xffff) != 0;
401 inline bool operator!=(
const float16& a,
const float16& b) {
return !(a == b); }
403 inline bool operator<(
const float16& a,
const float16& b) {
406 "ld1 {v1.h}[0], [%[a_ptr]]\n" 407 "ld1 {v0.h}[0], [%[b_ptr]]\n" 409 "st1 {v0.h}[0], [%[res_ptr]]\n" 412 [a_ptr]
"r"(&(a.x)), [b_ptr]
"r"(&(b.x)),
415 "memory",
"v0",
"v1");
416 return (res & 0xffff) != 0;
419 inline bool operator<=(
const float16& a,
const float16& b) {
422 "ld1 {v1.h}[0], [%[a_ptr]]\n" 423 "ld1 {v0.h}[0], [%[b_ptr]]\n" 425 "st1 {v0.h}[0], [%[res_ptr]]\n" 428 [a_ptr]
"r"(&(a.x)), [b_ptr]
"r"(&(b.x)),
431 "memory",
"v0",
"v1");
432 return (res & 0xffff) != 0;
435 inline bool operator>(
const float16& a,
const float16& b) {
438 "ld1 {v0.h}[0], [%[a_ptr]]\n" 439 "ld1 {v1.h}[0], [%[b_ptr]]\n" 441 "st1 {v0.h}[0], [%[res_ptr]]\n" 444 [a_ptr]
"r"(&(a.x)), [b_ptr]
"r"(&(b.x)),
447 "memory",
"v0",
"v1");
448 return (res & 0xffff) != 0;
451 inline bool operator>=(
const float16& a,
const float16& b) {
454 "ld1 {v0.h}[0], [%[a_ptr]]\n" 455 "ld1 {v1.h}[0], [%[b_ptr]]\n" 457 "st1 {v0.h}[0], [%[res_ptr]]\n" 460 [a_ptr]
"r"(&(a.x)), [b_ptr]
"r"(&(b.x)),
463 "memory",
"v0",
"v1");
464 return (res & 0xffff) != 0;
466 inline float16 operator+(
const float16& a,
const float16& b) {
467 return float16(static_cast<float>(a) + static_cast<float>(b));
470 inline float16 operator-(
const float16& a,
const float16& b) {
471 return float16(static_cast<float>(a) - static_cast<float>(b));
474 inline float16 operator*(
const float16& a,
const float16& b) {
475 return float16(static_cast<float>(a) * static_cast<float>(b));
478 inline float16 operator/(
const float16& a,
const float16& b) {
479 return float16(static_cast<float>(a) / static_cast<float>(b));
482 inline float16 operator-(
const float16& a) {
484 res.x = a.x ^ 0x8000;
488 inline float16& operator+=(float16& a,
const float16& b) {
489 a = float16(static_cast<float>(a) + static_cast<float>(b));
493 inline float16& operator-=(float16& a,
const float16& b) {
494 a = float16(static_cast<float>(a) - static_cast<float>(b));
498 inline float16& operator*=(float16& a,
const float16& b) {
499 a = float16(static_cast<float>(a) * static_cast<float>(b));
503 inline float16& operator/=(float16& a,
const float16& b) {
504 a = float16(static_cast<float>(a) / static_cast<float>(b));
508 inline bool operator==(
const float16& a,
const float16& b) {
509 return static_cast<float>(a) == static_cast<float>(b);
512 inline bool operator!=(
const float16& a,
const float16& b) {
513 return static_cast<float>(a) != static_cast<float>(b);
516 inline bool operator<(
const float16& a,
const float16& b) {
517 return static_cast<float>(a) < static_cast<float>(b);
520 inline bool operator<=(
const float16& a,
const float16& b) {
521 return static_cast<float>(a) <= static_cast<float>(b);
524 inline bool operator>(
const float16& a,
const float16& b) {
525 return static_cast<float>(a) > static_cast<float>(b);
528 inline bool operator>=(
const float16& a,
const float16& b) {
529 return static_cast<float>(a) >= static_cast<float>(b);
533 template <
typename T,
534 typename std::enable_if<std::is_integral<T>::value ||
535 std::is_same<T, float>::value,
537 inline T& operator+=(T& a,
const float16& b) {
538 auto c =
static_cast<float>(a) + static_cast<float>(b);
539 a =
static_cast<T
>(c);
543 inline double& operator+=(
double& a,
const float16& b) {
544 a = a +
static_cast<double>(b);
548 inline float16 raw_uint16_to_float16(uint16_t a) {
554 inline bool(isnan)(
const float16& a) {
return (a.x & 0x7fff) > 0x7c00; }
556 inline bool(isinf)(
const float16& a) {
return (a.x & 0x7fff) == 0x7c00; }
558 inline bool(isfinite)(
const float16& a) {
559 return !((isnan)(a)) && !((isinf)(a));
562 inline float16(abs)(
const float16& a) {
563 return float16(std::abs(static_cast<float>(a)));
566 inline std::ostream& operator<<(std::ostream& os,
const float16& a) {
567 os << static_cast<float>(a);
584 static const bool value = is_trivial<fastdeploy::float16>::value &&
585 is_standard_layout<fastdeploy::float16>::value;
590 : std::integral_constant<
591 bool, std::is_same<fastdeploy::float16,
592 typename std::remove_cv<
593 fastdeploy::float16>::type>::value> {};
595 struct is_signed<fastdeploy::float16> {
596 static const bool value =
true;
600 struct is_unsigned<fastdeploy::float16> {
601 static const bool value =
false;
604 inline bool isnan(
const fastdeploy::float16& a) {
return fastdeploy::isnan(a); }
606 inline bool isinf(
const fastdeploy::float16& a) {
return fastdeploy::isinf(a); }
609 struct numeric_limits<fastdeploy::float16> {
610 static const bool is_specialized =
true;
611 static const bool is_signed =
true;
612 static const bool is_integer =
false;
613 static const bool is_exact =
false;
614 static const bool has_infinity =
true;
615 static const bool has_quiet_NaN =
true;
616 static const bool has_signaling_NaN =
true;
617 static const float_denorm_style has_denorm = denorm_present;
618 static const bool has_denorm_loss =
false;
619 static const std::float_round_style round_style = std::round_to_nearest;
620 static const bool is_iec559 =
false;
621 static const bool is_bounded =
false;
622 static const bool is_modulo =
false;
623 static const int digits = 11;
624 static const int digits10 = 3;
625 static const int max_digits10 = 5;
626 static const int radix = 2;
627 static const int min_exponent = -13;
628 static const int min_exponent10 = -4;
629 static const int max_exponent = 16;
630 static const int max_exponent10 = 4;
631 static const bool traps =
true;
632 static const bool tinyness_before =
false;
634 static fastdeploy::float16(min)() {
635 return fastdeploy::raw_uint16_to_float16(0x400);
637 static fastdeploy::float16 lowest() {
638 return fastdeploy::raw_uint16_to_float16(0xfbff);
640 static fastdeploy::float16(max)() {
641 return fastdeploy::raw_uint16_to_float16(0x7bff);
643 static fastdeploy::float16 epsilon() {
644 return fastdeploy::raw_uint16_to_float16(0x0800);
646 static fastdeploy::float16 round_error() {
return fastdeploy::float16(0.5); }
647 static fastdeploy::float16 infinity() {
648 return fastdeploy::raw_uint16_to_float16(0x7c00);
650 static fastdeploy::float16 quiet_NaN() {
651 return fastdeploy::raw_uint16_to_float16(0x7e00);
653 static fastdeploy::float16 signaling_NaN() {
654 return fastdeploy::raw_uint16_to_float16(0x7e00);
656 static fastdeploy::float16 denorm_min() {
657 return fastdeploy::raw_uint16_to_float16(0x1);
661 inline fastdeploy::float16 abs(
const fastdeploy::float16& a) {
662 return fastdeploy::abs(a);
Definition: float16.h:572
All C++ FastDeploy APIs are defined inside this namespace.
Definition: option.h:16