fast_math.hpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411
  1. /*M///////////////////////////////////////////////////////////////////////////////////////
  2. //
  3. // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
  4. //
  5. // By downloading, copying, installing or using the software you agree to this license.
  6. // If you do not agree to this license, do not download, install,
  7. // copy or use the software.
  8. //
  9. //
  10. // License Agreement
  11. // For Open Source Computer Vision Library
  12. //
  13. // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
  14. // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
  15. // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
  16. // Copyright (C) 2015, Itseez Inc., all rights reserved.
  17. // Third party copyrights are property of their respective owners.
  18. //
  19. // Redistribution and use in source and binary forms, with or without modification,
  20. // are permitted provided that the following conditions are met:
  21. //
  22. // * Redistribution's of source code must retain the above copyright notice,
  23. // this list of conditions and the following disclaimer.
  24. //
  25. // * Redistribution's in binary form must reproduce the above copyright notice,
  26. // this list of conditions and the following disclaimer in the documentation
  27. // and/or other materials provided with the distribution.
  28. //
  29. // * The name of the copyright holders may not be used to endorse or promote products
  30. // derived from this software without specific prior written permission.
  31. //
  32. // This software is provided by the copyright holders and contributors "as is" and
  33. // any express or implied warranties, including, but not limited to, the implied
  34. // warranties of merchantability and fitness for a particular purpose are disclaimed.
  35. // In no event shall the Intel Corporation or contributors be liable for any direct,
  36. // indirect, incidental, special, exemplary, or consequential damages
  37. // (including, but not limited to, procurement of substitute goods or services;
  38. // loss of use, data, or profits; or business interruption) however caused
  39. // and on any theory of liability, whether in contract, strict liability,
  40. // or tort (including negligence or otherwise) arising in any way out of
  41. // the use of this software, even if advised of the possibility of such damage.
  42. //
  43. //M*/
  44. #ifndef OPENCV_CORE_FAST_MATH_HPP
  45. #define OPENCV_CORE_FAST_MATH_HPP
  46. #include "opencv2/core/cvdef.h"
  47. //! @addtogroup core_utils
  48. //! @{
  49. /****************************************************************************************\
  50. * fast math *
  51. \****************************************************************************************/
  52. #ifdef __cplusplus
  53. # include <cmath>
  54. #else
  55. # ifdef __BORLANDC__
  56. # include <fastmath.h>
  57. # else
  58. # include <math.h>
  59. # endif
  60. #endif
  61. #if defined(__CUDACC__)
  62. // nothing, intrinsics/asm code is not supported
  63. #else
  64. #if ((defined _MSC_VER && defined _M_X64) \
  65. || (defined __GNUC__ && defined __x86_64__ && defined __SSE2__)) \
  66. && !defined(OPENCV_SKIP_INCLUDE_EMMINTRIN_H)
  67. #include <emmintrin.h>
  68. #endif
  69. #if defined __PPC64__ && defined __GNUC__ && defined _ARCH_PWR8 \
  70. && !defined(OPENCV_SKIP_INCLUDE_ALTIVEC_H)
  71. #include <altivec.h>
  72. #undef vector
  73. #undef bool
  74. #undef pixel
  75. #endif
  76. #if defined(CV_INLINE_ROUND_FLT)
  77. // user-specified version
  78. // CV_INLINE_ROUND_DBL should be defined too
  79. #elif defined __GNUC__ && defined __arm__ && (defined __ARM_PCS_VFP || defined __ARM_VFPV3__ || defined __ARM_NEON__) && !defined __SOFTFP__
  80. // 1. general scheme
  81. #define ARM_ROUND(_value, _asm_string) \
  82. int res; \
  83. float temp; \
  84. CV_UNUSED(temp); \
  85. __asm__(_asm_string : [res] "=r" (res), [temp] "=w" (temp) : [value] "w" (_value)); \
  86. return res
  87. // 2. version for double
  88. #ifdef __clang__
  89. #define CV_INLINE_ROUND_DBL(value) ARM_ROUND(value, "vcvtr.s32.f64 %[temp], %[value] \n vmov %[res], %[temp]")
  90. #else
  91. #define CV_INLINE_ROUND_DBL(value) ARM_ROUND(value, "vcvtr.s32.f64 %[temp], %P[value] \n vmov %[res], %[temp]")
  92. #endif
  93. // 3. version for float
  94. #define CV_INLINE_ROUND_FLT(value) ARM_ROUND(value, "vcvtr.s32.f32 %[temp], %[value]\n vmov %[res], %[temp]")
  95. #elif defined __PPC64__ && defined __GNUC__ && defined _ARCH_PWR8
  96. // P8 and newer machines can convert fp32/64 to int quickly.
  97. #define CV_INLINE_ROUND_DBL(value) \
  98. int out; \
  99. double temp; \
  100. __asm__( "fctiw %[temp],%[in]\n\tmfvsrwz %[out],%[temp]\n\t" : [out] "=r" (out), [temp] "=d" (temp) : [in] "d" ((double)(value)) : ); \
  101. return out;
  102. // FP32 also works with FP64 routine above
  103. #define CV_INLINE_ROUND_FLT(value) CV_INLINE_ROUND_DBL(value)
  104. #endif
  105. #ifdef CV_INLINE_ISINF_FLT
  106. // user-specified version
  107. // CV_INLINE_ISINF_DBL should be defined too
  108. #elif defined __PPC64__ && defined _ARCH_PWR9 && defined(scalar_test_data_class)
  109. #define CV_INLINE_ISINF_DBL(value) return scalar_test_data_class(value, 0x30);
  110. #define CV_INLINE_ISINF_FLT(value) CV_INLINE_ISINF_DBL(value)
  111. #endif
  112. #ifdef CV_INLINE_ISNAN_FLT
  113. // user-specified version
  114. // CV_INLINE_ISNAN_DBL should be defined too
  115. #elif defined __PPC64__ && defined _ARCH_PWR9 && defined(scalar_test_data_class)
  116. #define CV_INLINE_ISNAN_DBL(value) return scalar_test_data_class(value, 0x40);
  117. #define CV_INLINE_ISNAN_FLT(value) CV_INLINE_ISNAN_DBL(value)
  118. #endif
  119. #if !defined(OPENCV_USE_FASTMATH_BUILTINS) \
  120. && ( \
  121. defined(__x86_64__) || defined(__i686__) \
  122. || defined(__arm__) \
  123. || defined(__PPC64__) \
  124. )
  125. /* Let builtin C math functions when available. Dedicated hardware is available to
  126. round and convert FP values. */
  127. #define OPENCV_USE_FASTMATH_BUILTINS 1
  128. #endif
  129. /* Enable builtin math functions if possible, desired, and available.
  130. Note, not all math functions inline equally. E.g lrint will not inline
  131. without the -fno-math-errno option. */
  132. #if defined(CV_ICC)
  133. // nothing
  134. #elif defined(OPENCV_USE_FASTMATH_BUILTINS) && OPENCV_USE_FASTMATH_BUILTINS
  135. #if defined(__clang__)
  136. #define CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS
  137. #if !defined(CV_INLINE_ISNAN_DBL) && __has_builtin(__builtin_isnan)
  138. #define CV_INLINE_ISNAN_DBL(value) return __builtin_isnan(value);
  139. #endif
  140. #if !defined(CV_INLINE_ISNAN_FLT) && __has_builtin(__builtin_isnan)
  141. #define CV_INLINE_ISNAN_FLT(value) return __builtin_isnan(value);
  142. #endif
  143. #if !defined(CV_INLINE_ISINF_DBL) && __has_builtin(__builtin_isinf)
  144. #define CV_INLINE_ISINF_DBL(value) return __builtin_isinf(value);
  145. #endif
  146. #if !defined(CV_INLINE_ISINF_FLT) && __has_builtin(__builtin_isinf)
  147. #define CV_INLINE_ISINF_FLT(value) return __builtin_isinf(value);
  148. #endif
  149. #elif defined(__GNUC__)
  150. #define CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS
  151. #if !defined(CV_INLINE_ISNAN_DBL)
  152. #define CV_INLINE_ISNAN_DBL(value) return __builtin_isnan(value);
  153. #endif
  154. #if !defined(CV_INLINE_ISNAN_FLT)
  155. #define CV_INLINE_ISNAN_FLT(value) return __builtin_isnanf(value);
  156. #endif
  157. #if !defined(CV_INLINE_ISINF_DBL)
  158. #define CV_INLINE_ISINF_DBL(value) return __builtin_isinf(value);
  159. #endif
  160. #if !defined(CV_INLINE_ISINF_FLT)
  161. #define CV_INLINE_ISINF_FLT(value) return __builtin_isinff(value);
  162. #endif
  163. #elif defined(_MSC_VER)
  164. #if !defined(CV_INLINE_ISNAN_DBL)
  165. #define CV_INLINE_ISNAN_DBL(value) return isnan(value);
  166. #endif
  167. #if !defined(CV_INLINE_ISNAN_FLT)
  168. #define CV_INLINE_ISNAN_FLT(value) return isnan(value);
  169. #endif
  170. #if !defined(CV_INLINE_ISINF_DBL)
  171. #define CV_INLINE_ISINF_DBL(value) return isinf(value);
  172. #endif
  173. #if !defined(CV_INLINE_ISINF_FLT)
  174. #define CV_INLINE_ISINF_FLT(value) return isinf(value);
  175. #endif
  176. #endif
  177. #endif
  178. #endif // defined(__CUDACC__)
  179. /** @brief Rounds floating-point number to the nearest integer
  180. @param value floating-point number. If the value is outside of INT_MIN ... INT_MAX range, the
  181. result is not defined.
  182. */
  183. CV_INLINE int
  184. cvRound( double value )
  185. {
  186. #if defined CV_INLINE_ROUND_DBL
  187. CV_INLINE_ROUND_DBL(value);
  188. #elif ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \
  189. && defined __SSE2__ && !defined __APPLE__) || CV_SSE2) \
  190. && !defined(__CUDACC__)
  191. __m128d t = _mm_set_sd( value );
  192. return _mm_cvtsd_si32(t);
  193. #elif defined _MSC_VER && defined _M_IX86
  194. int t;
  195. __asm
  196. {
  197. fld value;
  198. fistp t;
  199. }
  200. return t;
  201. #elif defined CV_ICC || defined __GNUC__
  202. return (int)(lrint(value));
  203. #else
  204. /* it's ok if round does not comply with IEEE754 standard;
  205. the tests should allow +/-1 difference when the tested functions use round */
  206. return (int)(value + (value >= 0 ? 0.5 : -0.5));
  207. #endif
  208. }
  209. /** @brief Rounds floating-point number to the nearest integer not larger than the original.
  210. The function computes an integer i such that:
  211. \f[i \le \texttt{value} < i+1\f]
  212. @param value floating-point number. If the value is outside of INT_MIN ... INT_MAX range, the
  213. result is not defined.
  214. */
  215. CV_INLINE int cvFloor( double value )
  216. {
  217. #if (defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS) \
  218. && ( \
  219. defined(__PPC64__) \
  220. )
  221. return __builtin_floor(value);
  222. #else
  223. int i = (int)value;
  224. return i - (i > value);
  225. #endif
  226. }
  227. /** @brief Rounds floating-point number to the nearest integer not smaller than the original.
  228. The function computes an integer i such that:
  229. \f[i \le \texttt{value} < i+1\f]
  230. @param value floating-point number. If the value is outside of INT_MIN ... INT_MAX range, the
  231. result is not defined.
  232. */
  233. CV_INLINE int cvCeil( double value )
  234. {
  235. #if (defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS) \
  236. && ( \
  237. defined(__PPC64__) \
  238. )
  239. return __builtin_ceil(value);
  240. #else
  241. int i = (int)value;
  242. return i + (i < value);
  243. #endif
  244. }
  245. /** @brief Determines if the argument is Not A Number.
  246. @param value The input floating-point value
  247. The function returns 1 if the argument is Not A Number (as defined by IEEE754 standard), 0
  248. otherwise. */
  249. CV_INLINE int cvIsNaN( double value )
  250. {
  251. #if defined CV_INLINE_ISNAN_DBL
  252. CV_INLINE_ISNAN_DBL(value);
  253. #else
  254. Cv64suf ieee754;
  255. ieee754.f = value;
  256. return ((unsigned)(ieee754.u >> 32) & 0x7fffffff) +
  257. ((unsigned)ieee754.u != 0) > 0x7ff00000;
  258. #endif
  259. }
  260. /** @brief Determines if the argument is Infinity.
  261. @param value The input floating-point value
  262. The function returns 1 if the argument is a plus or minus infinity (as defined by IEEE754 standard)
  263. and 0 otherwise. */
  264. CV_INLINE int cvIsInf( double value )
  265. {
  266. #if defined CV_INLINE_ISINF_DBL
  267. CV_INLINE_ISINF_DBL(value);
  268. #elif defined(__x86_64__) || defined(_M_X64) || defined(__aarch64__) || defined(_M_ARM64) || defined(__PPC64__)
  269. Cv64suf ieee754;
  270. ieee754.f = value;
  271. return (ieee754.u & 0x7fffffff00000000) ==
  272. 0x7ff0000000000000;
  273. #else
  274. Cv64suf ieee754;
  275. ieee754.f = value;
  276. return ((unsigned)(ieee754.u >> 32) & 0x7fffffff) == 0x7ff00000 &&
  277. (unsigned)ieee754.u == 0;
  278. #endif
  279. }
  280. #ifdef __cplusplus
  281. /** @overload */
  282. CV_INLINE int cvRound(float value)
  283. {
  284. #if defined CV_INLINE_ROUND_FLT
  285. CV_INLINE_ROUND_FLT(value);
  286. #elif ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \
  287. && defined __SSE2__ && !defined __APPLE__) || CV_SSE2) \
  288. && !defined(__CUDACC__)
  289. __m128 t = _mm_set_ss( value );
  290. return _mm_cvtss_si32(t);
  291. #elif defined _MSC_VER && defined _M_IX86
  292. int t;
  293. __asm
  294. {
  295. fld value;
  296. fistp t;
  297. }
  298. return t;
  299. #elif defined CV_ICC || defined __GNUC__
  300. return (int)(lrintf(value));
  301. #else
  302. /* it's ok if round does not comply with IEEE754 standard;
  303. the tests should allow +/-1 difference when the tested functions use round */
  304. return (int)(value + (value >= 0 ? 0.5f : -0.5f));
  305. #endif
  306. }
  307. /** @overload */
  308. CV_INLINE int cvRound( int value )
  309. {
  310. return value;
  311. }
  312. /** @overload */
  313. CV_INLINE int cvFloor( float value )
  314. {
  315. #if (defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS) \
  316. && ( \
  317. defined(__PPC64__) \
  318. )
  319. return __builtin_floorf(value);
  320. #else
  321. int i = (int)value;
  322. return i - (i > value);
  323. #endif
  324. }
  325. /** @overload */
  326. CV_INLINE int cvFloor( int value )
  327. {
  328. return value;
  329. }
  330. /** @overload */
  331. CV_INLINE int cvCeil( float value )
  332. {
  333. #if (defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS) \
  334. && ( \
  335. defined(__PPC64__) \
  336. )
  337. return __builtin_ceilf(value);
  338. #else
  339. int i = (int)value;
  340. return i + (i < value);
  341. #endif
  342. }
  343. /** @overload */
  344. CV_INLINE int cvCeil( int value )
  345. {
  346. return value;
  347. }
  348. /** @overload */
  349. CV_INLINE int cvIsNaN( float value )
  350. {
  351. #if defined CV_INLINE_ISNAN_FLT
  352. CV_INLINE_ISNAN_FLT(value);
  353. #else
  354. Cv32suf ieee754;
  355. ieee754.f = value;
  356. return (ieee754.u & 0x7fffffff) > 0x7f800000;
  357. #endif
  358. }
  359. /** @overload */
  360. CV_INLINE int cvIsInf( float value )
  361. {
  362. #if defined CV_INLINE_ISINF_FLT
  363. CV_INLINE_ISINF_FLT(value);
  364. #else
  365. Cv32suf ieee754;
  366. ieee754.f = value;
  367. return (ieee754.u & 0x7fffffff) == 0x7f800000;
  368. #endif
  369. }
  370. #endif // __cplusplus
  371. //! @} core_utils
  372. #endif