Compare commits
4 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 01b988fcd7 | |||
| db0580ead1 | |||
| 523d8379b1 | |||
| 785b33b089 |
1
.gitignore
vendored
1
.gitignore
vendored
@@ -11,7 +11,6 @@ TactileIpc3D_autogen/
|
|||||||
*.ninja
|
*.ninja
|
||||||
*.ninja_deps
|
*.ninja_deps
|
||||||
*.ninja_log
|
*.ninja_log
|
||||||
OpenCV/
|
|
||||||
# Qt generated files
|
# Qt generated files
|
||||||
*.moc
|
*.moc
|
||||||
moc_*.cpp
|
moc_*.cpp
|
||||||
|
|||||||
4453
3rdpart/OpenCV/include/opencv2/calib3d.hpp
Normal file
4453
3rdpart/OpenCV/include/opencv2/calib3d.hpp
Normal file
File diff suppressed because it is too large
Load Diff
48
3rdpart/OpenCV/include/opencv2/calib3d/calib3d.hpp
Normal file
48
3rdpart/OpenCV/include/opencv2/calib3d/calib3d.hpp
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||||
|
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#ifdef __OPENCV_BUILD
|
||||||
|
#error this is a compatibility header which should not be used inside the OpenCV library
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "opencv2/calib3d.hpp"
|
||||||
150
3rdpart/OpenCV/include/opencv2/calib3d/calib3d_c.h
Normal file
150
3rdpart/OpenCV/include/opencv2/calib3d/calib3d_c.h
Normal file
@@ -0,0 +1,150 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||||
|
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#ifndef OPENCV_CALIB3D_C_H
|
||||||
|
#define OPENCV_CALIB3D_C_H
|
||||||
|
|
||||||
|
#include "opencv2/core/types_c.h"
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Calculates fundamental matrix given a set of corresponding points */
|
||||||
|
#define CV_FM_7POINT 1
|
||||||
|
#define CV_FM_8POINT 2
|
||||||
|
|
||||||
|
#define CV_LMEDS 4
|
||||||
|
#define CV_RANSAC 8
|
||||||
|
|
||||||
|
#define CV_FM_LMEDS_ONLY CV_LMEDS
|
||||||
|
#define CV_FM_RANSAC_ONLY CV_RANSAC
|
||||||
|
#define CV_FM_LMEDS CV_LMEDS
|
||||||
|
#define CV_FM_RANSAC CV_RANSAC
|
||||||
|
|
||||||
|
enum
|
||||||
|
{
|
||||||
|
CV_ITERATIVE = 0,
|
||||||
|
CV_EPNP = 1, // F.Moreno-Noguer, V.Lepetit and P.Fua "EPnP: Efficient Perspective-n-Point Camera Pose Estimation"
|
||||||
|
CV_P3P = 2, // X.S. Gao, X.-R. Hou, J. Tang, H.-F. Chang; "Complete Solution Classification for the Perspective-Three-Point Problem"
|
||||||
|
CV_DLS = 3 // Joel A. Hesch and Stergios I. Roumeliotis. "A Direct Least-Squares (DLS) Method for PnP"
|
||||||
|
};
|
||||||
|
|
||||||
|
#define CV_CALIB_CB_ADAPTIVE_THRESH 1
|
||||||
|
#define CV_CALIB_CB_NORMALIZE_IMAGE 2
|
||||||
|
#define CV_CALIB_CB_FILTER_QUADS 4
|
||||||
|
#define CV_CALIB_CB_FAST_CHECK 8
|
||||||
|
|
||||||
|
#define CV_CALIB_USE_INTRINSIC_GUESS 1
|
||||||
|
#define CV_CALIB_FIX_ASPECT_RATIO 2
|
||||||
|
#define CV_CALIB_FIX_PRINCIPAL_POINT 4
|
||||||
|
#define CV_CALIB_ZERO_TANGENT_DIST 8
|
||||||
|
#define CV_CALIB_FIX_FOCAL_LENGTH 16
|
||||||
|
#define CV_CALIB_FIX_K1 32
|
||||||
|
#define CV_CALIB_FIX_K2 64
|
||||||
|
#define CV_CALIB_FIX_K3 128
|
||||||
|
#define CV_CALIB_FIX_K4 2048
|
||||||
|
#define CV_CALIB_FIX_K5 4096
|
||||||
|
#define CV_CALIB_FIX_K6 8192
|
||||||
|
#define CV_CALIB_RATIONAL_MODEL 16384
|
||||||
|
#define CV_CALIB_THIN_PRISM_MODEL 32768
|
||||||
|
#define CV_CALIB_FIX_S1_S2_S3_S4 65536
|
||||||
|
#define CV_CALIB_TILTED_MODEL 262144
|
||||||
|
#define CV_CALIB_FIX_TAUX_TAUY 524288
|
||||||
|
#define CV_CALIB_FIX_TANGENT_DIST 2097152
|
||||||
|
|
||||||
|
#define CV_CALIB_NINTRINSIC 18
|
||||||
|
|
||||||
|
#define CV_CALIB_FIX_INTRINSIC 256
|
||||||
|
#define CV_CALIB_SAME_FOCAL_LENGTH 512
|
||||||
|
|
||||||
|
#define CV_CALIB_ZERO_DISPARITY 1024
|
||||||
|
|
||||||
|
/* stereo correspondence parameters and functions */
|
||||||
|
#define CV_STEREO_BM_NORMALIZED_RESPONSE 0
|
||||||
|
#define CV_STEREO_BM_XSOBEL 1
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
} // extern "C"
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
class CV_EXPORTS CvLevMarq
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
CvLevMarq();
|
||||||
|
CvLevMarq( int nparams, int nerrs, CvTermCriteria criteria=
|
||||||
|
cvTermCriteria(CV_TERMCRIT_EPS+CV_TERMCRIT_ITER,30,DBL_EPSILON),
|
||||||
|
bool completeSymmFlag=false );
|
||||||
|
~CvLevMarq();
|
||||||
|
void init( int nparams, int nerrs, CvTermCriteria criteria=
|
||||||
|
cvTermCriteria(CV_TERMCRIT_EPS+CV_TERMCRIT_ITER,30,DBL_EPSILON),
|
||||||
|
bool completeSymmFlag=false );
|
||||||
|
bool update( const CvMat*& param, CvMat*& J, CvMat*& err );
|
||||||
|
bool updateAlt( const CvMat*& param, CvMat*& JtJ, CvMat*& JtErr, double*& errNorm );
|
||||||
|
|
||||||
|
void clear();
|
||||||
|
void step();
|
||||||
|
enum { DONE=0, STARTED=1, CALC_J=2, CHECK_ERR=3 };
|
||||||
|
|
||||||
|
cv::Ptr<CvMat> mask;
|
||||||
|
cv::Ptr<CvMat> prevParam;
|
||||||
|
cv::Ptr<CvMat> param;
|
||||||
|
cv::Ptr<CvMat> J;
|
||||||
|
cv::Ptr<CvMat> err;
|
||||||
|
cv::Ptr<CvMat> JtJ;
|
||||||
|
cv::Ptr<CvMat> JtJN;
|
||||||
|
cv::Ptr<CvMat> JtErr;
|
||||||
|
cv::Ptr<CvMat> JtJV;
|
||||||
|
cv::Ptr<CvMat> JtJW;
|
||||||
|
double prevErrNorm, errNorm;
|
||||||
|
int lambdaLg10;
|
||||||
|
CvTermCriteria criteria;
|
||||||
|
int state;
|
||||||
|
int iters;
|
||||||
|
bool completeSymmFlag;
|
||||||
|
int solveMethod;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif /* OPENCV_CALIB3D_C_H */
|
||||||
3427
3rdpart/OpenCV/include/opencv2/core.hpp
Normal file
3427
3rdpart/OpenCV/include/opencv2/core.hpp
Normal file
File diff suppressed because it is too large
Load Diff
678
3rdpart/OpenCV/include/opencv2/core/affine.hpp
Normal file
678
3rdpart/OpenCV/include/opencv2/core/affine.hpp
Normal file
@@ -0,0 +1,678 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||||
|
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#ifndef OPENCV_CORE_AFFINE3_HPP
|
||||||
|
#define OPENCV_CORE_AFFINE3_HPP
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
|
||||||
|
#include <opencv2/core.hpp>
|
||||||
|
|
||||||
|
namespace cv
|
||||||
|
{
|
||||||
|
|
||||||
|
//! @addtogroup core_eigen
|
||||||
|
//! @{
|
||||||
|
|
||||||
|
/** @brief Affine transform
|
||||||
|
*
|
||||||
|
* It represents a 4x4 homogeneous transformation matrix \f$T\f$
|
||||||
|
*
|
||||||
|
* \f[T =
|
||||||
|
* \begin{bmatrix}
|
||||||
|
* R & t\\
|
||||||
|
* 0 & 1\\
|
||||||
|
* \end{bmatrix}
|
||||||
|
* \f]
|
||||||
|
*
|
||||||
|
* where \f$R\f$ is a 3x3 rotation matrix and \f$t\f$ is a 3x1 translation vector.
|
||||||
|
*
|
||||||
|
* You can specify \f$R\f$ either by a 3x3 rotation matrix or by a 3x1 rotation vector,
|
||||||
|
* which is converted to a 3x3 rotation matrix by the Rodrigues formula.
|
||||||
|
*
|
||||||
|
* To construct a matrix \f$T\f$ representing first rotation around the axis \f$r\f$ with rotation
|
||||||
|
* angle \f$|r|\f$ in radian (right hand rule) and then translation by the vector \f$t\f$, you can use
|
||||||
|
*
|
||||||
|
* @code
|
||||||
|
* cv::Vec3f r, t;
|
||||||
|
* cv::Affine3f T(r, t);
|
||||||
|
* @endcode
|
||||||
|
*
|
||||||
|
* If you already have the rotation matrix \f$R\f$, then you can use
|
||||||
|
*
|
||||||
|
* @code
|
||||||
|
* cv::Matx33f R;
|
||||||
|
* cv::Affine3f T(R, t);
|
||||||
|
* @endcode
|
||||||
|
*
|
||||||
|
* To extract the rotation matrix \f$R\f$ from \f$T\f$, use
|
||||||
|
*
|
||||||
|
* @code
|
||||||
|
* cv::Matx33f R = T.rotation();
|
||||||
|
* @endcode
|
||||||
|
*
|
||||||
|
* To extract the translation vector \f$t\f$ from \f$T\f$, use
|
||||||
|
*
|
||||||
|
* @code
|
||||||
|
* cv::Vec3f t = T.translation();
|
||||||
|
* @endcode
|
||||||
|
*
|
||||||
|
* To extract the rotation vector \f$r\f$ from \f$T\f$, use
|
||||||
|
*
|
||||||
|
* @code
|
||||||
|
* cv::Vec3f r = T.rvec();
|
||||||
|
* @endcode
|
||||||
|
*
|
||||||
|
* Note that since the mapping from rotation vectors to rotation matrices
|
||||||
|
* is many to one. The returned rotation vector is not necessarily the one
|
||||||
|
* you used before to set the matrix.
|
||||||
|
*
|
||||||
|
* If you have two transformations \f$T = T_1 * T_2\f$, use
|
||||||
|
*
|
||||||
|
* @code
|
||||||
|
* cv::Affine3f T, T1, T2;
|
||||||
|
* T = T2.concatenate(T1);
|
||||||
|
* @endcode
|
||||||
|
*
|
||||||
|
* To get the inverse transform of \f$T\f$, use
|
||||||
|
*
|
||||||
|
* @code
|
||||||
|
* cv::Affine3f T, T_inv;
|
||||||
|
* T_inv = T.inv();
|
||||||
|
* @endcode
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
template<typename T>
|
||||||
|
class Affine3
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
typedef T float_type;
|
||||||
|
typedef Matx<float_type, 3, 3> Mat3;
|
||||||
|
typedef Matx<float_type, 4, 4> Mat4;
|
||||||
|
typedef Vec<float_type, 3> Vec3;
|
||||||
|
|
||||||
|
//! Default constructor. It represents a 4x4 identity matrix.
|
||||||
|
Affine3();
|
||||||
|
|
||||||
|
//! Augmented affine matrix
|
||||||
|
Affine3(const Mat4& affine);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The resulting 4x4 matrix is
|
||||||
|
*
|
||||||
|
* \f[
|
||||||
|
* \begin{bmatrix}
|
||||||
|
* R & t\\
|
||||||
|
* 0 & 1\\
|
||||||
|
* \end{bmatrix}
|
||||||
|
* \f]
|
||||||
|
*
|
||||||
|
* @param R 3x3 rotation matrix.
|
||||||
|
* @param t 3x1 translation vector.
|
||||||
|
*/
|
||||||
|
Affine3(const Mat3& R, const Vec3& t = Vec3::all(0));
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Rodrigues vector.
|
||||||
|
*
|
||||||
|
* The last row of the current matrix is set to [0,0,0,1].
|
||||||
|
*
|
||||||
|
* @param rvec 3x1 rotation vector. Its direction indicates the rotation axis and its length
|
||||||
|
* indicates the rotation angle in radian (using right hand rule).
|
||||||
|
* @param t 3x1 translation vector.
|
||||||
|
*/
|
||||||
|
Affine3(const Vec3& rvec, const Vec3& t = Vec3::all(0));
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Combines all constructors above. Supports 4x4, 3x4, 3x3, 1x3, 3x1 sizes of data matrix.
|
||||||
|
*
|
||||||
|
* The last row of the current matrix is set to [0,0,0,1] when data is not 4x4.
|
||||||
|
*
|
||||||
|
* @param data 1-channel matrix.
|
||||||
|
* when it is 4x4, it is copied to the current matrix and t is not used.
|
||||||
|
* When it is 3x4, it is copied to the upper part 3x4 of the current matrix and t is not used.
|
||||||
|
* When it is 3x3, it is copied to the upper left 3x3 part of the current matrix.
|
||||||
|
* When it is 3x1 or 1x3, it is treated as a rotation vector and the Rodrigues formula is used
|
||||||
|
* to compute a 3x3 rotation matrix.
|
||||||
|
* @param t 3x1 translation vector. It is used only when data is neither 4x4 nor 3x4.
|
||||||
|
*/
|
||||||
|
explicit Affine3(const Mat& data, const Vec3& t = Vec3::all(0));
|
||||||
|
|
||||||
|
//! From 16-element array
|
||||||
|
explicit Affine3(const float_type* vals);
|
||||||
|
|
||||||
|
//! Create an 4x4 identity transform
|
||||||
|
static Affine3 Identity();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Rotation matrix.
|
||||||
|
*
|
||||||
|
* Copy the rotation matrix to the upper left 3x3 part of the current matrix.
|
||||||
|
* The remaining elements of the current matrix are not changed.
|
||||||
|
*
|
||||||
|
* @param R 3x3 rotation matrix.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
void rotation(const Mat3& R);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Rodrigues vector.
|
||||||
|
*
|
||||||
|
* It sets the upper left 3x3 part of the matrix. The remaining part is unaffected.
|
||||||
|
*
|
||||||
|
* @param rvec 3x1 rotation vector. The direction indicates the rotation axis and
|
||||||
|
* its length indicates the rotation angle in radian (using the right thumb convention).
|
||||||
|
*/
|
||||||
|
void rotation(const Vec3& rvec);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Combines rotation methods above. Supports 3x3, 1x3, 3x1 sizes of data matrix.
|
||||||
|
*
|
||||||
|
* It sets the upper left 3x3 part of the matrix. The remaining part is unaffected.
|
||||||
|
*
|
||||||
|
* @param data 1-channel matrix.
|
||||||
|
* When it is a 3x3 matrix, it sets the upper left 3x3 part of the current matrix.
|
||||||
|
* When it is a 1x3 or 3x1 matrix, it is used as a rotation vector. The Rodrigues formula
|
||||||
|
* is used to compute the rotation matrix and sets the upper left 3x3 part of the current matrix.
|
||||||
|
*/
|
||||||
|
void rotation(const Mat& data);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Copy the 3x3 matrix L to the upper left part of the current matrix
|
||||||
|
*
|
||||||
|
* It sets the upper left 3x3 part of the matrix. The remaining part is unaffected.
|
||||||
|
*
|
||||||
|
* @param L 3x3 matrix.
|
||||||
|
*/
|
||||||
|
void linear(const Mat3& L);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Copy t to the first three elements of the last column of the current matrix
|
||||||
|
*
|
||||||
|
* It sets the upper right 3x1 part of the matrix. The remaining part is unaffected.
|
||||||
|
*
|
||||||
|
* @param t 3x1 translation vector.
|
||||||
|
*/
|
||||||
|
void translation(const Vec3& t);
|
||||||
|
|
||||||
|
//! @return the upper left 3x3 part
|
||||||
|
Mat3 rotation() const;
|
||||||
|
|
||||||
|
//! @return the upper left 3x3 part
|
||||||
|
Mat3 linear() const;
|
||||||
|
|
||||||
|
//! @return the upper right 3x1 part
|
||||||
|
Vec3 translation() const;
|
||||||
|
|
||||||
|
//! Rodrigues vector.
|
||||||
|
//! @return a vector representing the upper left 3x3 rotation matrix of the current matrix.
|
||||||
|
//! @warning Since the mapping between rotation vectors and rotation matrices is many to one,
|
||||||
|
//! this function returns only one rotation vector that represents the current rotation matrix,
|
||||||
|
//! which is not necessarily the same one set by `rotation(const Vec3& rvec)`.
|
||||||
|
Vec3 rvec() const;
|
||||||
|
|
||||||
|
//! @return the inverse of the current matrix.
|
||||||
|
Affine3 inv(int method = cv::DECOMP_SVD) const;
|
||||||
|
|
||||||
|
//! a.rotate(R) is equivalent to Affine(R, 0) * a;
|
||||||
|
Affine3 rotate(const Mat3& R) const;
|
||||||
|
|
||||||
|
//! a.rotate(rvec) is equivalent to Affine(rvec, 0) * a;
|
||||||
|
Affine3 rotate(const Vec3& rvec) const;
|
||||||
|
|
||||||
|
//! a.translate(t) is equivalent to Affine(E, t) * a, where E is an identity matrix
|
||||||
|
Affine3 translate(const Vec3& t) const;
|
||||||
|
|
||||||
|
//! a.concatenate(affine) is equivalent to affine * a;
|
||||||
|
Affine3 concatenate(const Affine3& affine) const;
|
||||||
|
|
||||||
|
template <typename Y> operator Affine3<Y>() const;
|
||||||
|
|
||||||
|
template <typename Y> Affine3<Y> cast() const;
|
||||||
|
|
||||||
|
Mat4 matrix;
|
||||||
|
|
||||||
|
#if defined EIGEN_WORLD_VERSION && defined EIGEN_GEOMETRY_MODULE_H
|
||||||
|
Affine3(const Eigen::Transform<T, 3, Eigen::Affine, (Eigen::RowMajor)>& affine);
|
||||||
|
Affine3(const Eigen::Transform<T, 3, Eigen::Affine>& affine);
|
||||||
|
operator Eigen::Transform<T, 3, Eigen::Affine, (Eigen::RowMajor)>() const;
|
||||||
|
operator Eigen::Transform<T, 3, Eigen::Affine>() const;
|
||||||
|
#endif
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename T> static
|
||||||
|
Affine3<T> operator*(const Affine3<T>& affine1, const Affine3<T>& affine2);
|
||||||
|
|
||||||
|
//! V is a 3-element vector with member fields x, y and z
|
||||||
|
template<typename T, typename V> static
|
||||||
|
V operator*(const Affine3<T>& affine, const V& vector);
|
||||||
|
|
||||||
|
typedef Affine3<float> Affine3f;
|
||||||
|
typedef Affine3<double> Affine3d;
|
||||||
|
|
||||||
|
static Vec3f operator*(const Affine3f& affine, const Vec3f& vector);
|
||||||
|
static Vec3d operator*(const Affine3d& affine, const Vec3d& vector);
|
||||||
|
|
||||||
|
template<typename _Tp> class DataType< Affine3<_Tp> >
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
typedef Affine3<_Tp> value_type;
|
||||||
|
typedef Affine3<typename DataType<_Tp>::work_type> work_type;
|
||||||
|
typedef _Tp channel_type;
|
||||||
|
|
||||||
|
enum { generic_type = 0,
|
||||||
|
channels = 16,
|
||||||
|
fmt = traits::SafeFmt<channel_type>::fmt + ((channels - 1) << 8)
|
||||||
|
#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED
|
||||||
|
,depth = DataType<channel_type>::depth
|
||||||
|
,type = CV_MAKETYPE(depth, channels)
|
||||||
|
#endif
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef Vec<channel_type, channels> vec_type;
|
||||||
|
};
|
||||||
|
|
||||||
|
namespace traits {
|
||||||
|
template<typename _Tp>
|
||||||
|
struct Depth< Affine3<_Tp> > { enum { value = Depth<_Tp>::value }; };
|
||||||
|
template<typename _Tp>
|
||||||
|
struct Type< Affine3<_Tp> > { enum { value = CV_MAKETYPE(Depth<_Tp>::value, 16) }; };
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
//! @} core
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
//! @cond IGNORED
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Implementation
|
||||||
|
|
||||||
|
template<typename T> inline
|
||||||
|
cv::Affine3<T>::Affine3()
|
||||||
|
: matrix(Mat4::eye())
|
||||||
|
{}
|
||||||
|
|
||||||
|
template<typename T> inline
|
||||||
|
cv::Affine3<T>::Affine3(const Mat4& affine)
|
||||||
|
: matrix(affine)
|
||||||
|
{}
|
||||||
|
|
||||||
|
template<typename T> inline
|
||||||
|
cv::Affine3<T>::Affine3(const Mat3& R, const Vec3& t)
|
||||||
|
{
|
||||||
|
rotation(R);
|
||||||
|
translation(t);
|
||||||
|
matrix.val[12] = matrix.val[13] = matrix.val[14] = 0;
|
||||||
|
matrix.val[15] = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T> inline
|
||||||
|
cv::Affine3<T>::Affine3(const Vec3& _rvec, const Vec3& t)
|
||||||
|
{
|
||||||
|
rotation(_rvec);
|
||||||
|
translation(t);
|
||||||
|
matrix.val[12] = matrix.val[13] = matrix.val[14] = 0;
|
||||||
|
matrix.val[15] = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T> inline
|
||||||
|
cv::Affine3<T>::Affine3(const cv::Mat& data, const Vec3& t)
|
||||||
|
{
|
||||||
|
CV_Assert(data.type() == cv::traits::Type<T>::value);
|
||||||
|
CV_Assert(data.channels() == 1);
|
||||||
|
|
||||||
|
if (data.cols == 4 && data.rows == 4)
|
||||||
|
{
|
||||||
|
data.copyTo(matrix);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
else if (data.cols == 4 && data.rows == 3)
|
||||||
|
{
|
||||||
|
rotation(data(Rect(0, 0, 3, 3)));
|
||||||
|
translation(data(Rect(3, 0, 1, 3)));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
rotation(data);
|
||||||
|
translation(t);
|
||||||
|
}
|
||||||
|
|
||||||
|
matrix.val[12] = matrix.val[13] = matrix.val[14] = 0;
|
||||||
|
matrix.val[15] = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T> inline
|
||||||
|
cv::Affine3<T>::Affine3(const float_type* vals) : matrix(vals)
|
||||||
|
{}
|
||||||
|
|
||||||
|
template<typename T> inline
|
||||||
|
cv::Affine3<T> cv::Affine3<T>::Identity()
|
||||||
|
{
|
||||||
|
return Affine3<T>(cv::Affine3<T>::Mat4::eye());
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T> inline
|
||||||
|
void cv::Affine3<T>::rotation(const Mat3& R)
|
||||||
|
{
|
||||||
|
linear(R);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T> inline
|
||||||
|
void cv::Affine3<T>::rotation(const Vec3& _rvec)
|
||||||
|
{
|
||||||
|
double theta = norm(_rvec);
|
||||||
|
|
||||||
|
if (theta < DBL_EPSILON)
|
||||||
|
rotation(Mat3::eye());
|
||||||
|
else
|
||||||
|
{
|
||||||
|
double c = std::cos(theta);
|
||||||
|
double s = std::sin(theta);
|
||||||
|
double c1 = 1. - c;
|
||||||
|
double itheta = (theta != 0) ? 1./theta : 0.;
|
||||||
|
|
||||||
|
Point3_<T> r = _rvec*itheta;
|
||||||
|
|
||||||
|
Mat3 rrt( r.x*r.x, r.x*r.y, r.x*r.z, r.x*r.y, r.y*r.y, r.y*r.z, r.x*r.z, r.y*r.z, r.z*r.z );
|
||||||
|
Mat3 r_x( 0, -r.z, r.y, r.z, 0, -r.x, -r.y, r.x, 0 );
|
||||||
|
|
||||||
|
// R = cos(theta)*I + (1 - cos(theta))*r*rT + sin(theta)*[r_x]
|
||||||
|
// where [r_x] is [0 -rz ry; rz 0 -rx; -ry rx 0]
|
||||||
|
Mat3 R = c*Mat3::eye() + c1*rrt + s*r_x;
|
||||||
|
|
||||||
|
rotation(R);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//Combines rotation methods above. Supports 3x3, 1x3, 3x1 sizes of data matrix;
|
||||||
|
template<typename T> inline
|
||||||
|
void cv::Affine3<T>::rotation(const cv::Mat& data)
|
||||||
|
{
|
||||||
|
CV_Assert(data.type() == cv::traits::Type<T>::value);
|
||||||
|
CV_Assert(data.channels() == 1);
|
||||||
|
|
||||||
|
if (data.cols == 3 && data.rows == 3)
|
||||||
|
{
|
||||||
|
Mat3 R;
|
||||||
|
data.copyTo(R);
|
||||||
|
rotation(R);
|
||||||
|
}
|
||||||
|
else if ((data.cols == 3 && data.rows == 1) || (data.cols == 1 && data.rows == 3))
|
||||||
|
{
|
||||||
|
Vec3 _rvec;
|
||||||
|
data.reshape(1, 3).copyTo(_rvec);
|
||||||
|
rotation(_rvec);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
CV_Error(Error::StsError, "Input matrix can only be 3x3, 1x3 or 3x1");
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T> inline
|
||||||
|
void cv::Affine3<T>::linear(const Mat3& L)
|
||||||
|
{
|
||||||
|
matrix.val[0] = L.val[0]; matrix.val[1] = L.val[1]; matrix.val[ 2] = L.val[2];
|
||||||
|
matrix.val[4] = L.val[3]; matrix.val[5] = L.val[4]; matrix.val[ 6] = L.val[5];
|
||||||
|
matrix.val[8] = L.val[6]; matrix.val[9] = L.val[7]; matrix.val[10] = L.val[8];
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T> inline
|
||||||
|
void cv::Affine3<T>::translation(const Vec3& t)
|
||||||
|
{
|
||||||
|
matrix.val[3] = t[0]; matrix.val[7] = t[1]; matrix.val[11] = t[2];
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T> inline
|
||||||
|
typename cv::Affine3<T>::Mat3 cv::Affine3<T>::rotation() const
|
||||||
|
{
|
||||||
|
return linear();
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T> inline
|
||||||
|
typename cv::Affine3<T>::Mat3 cv::Affine3<T>::linear() const
|
||||||
|
{
|
||||||
|
typename cv::Affine3<T>::Mat3 R;
|
||||||
|
R.val[0] = matrix.val[0]; R.val[1] = matrix.val[1]; R.val[2] = matrix.val[ 2];
|
||||||
|
R.val[3] = matrix.val[4]; R.val[4] = matrix.val[5]; R.val[5] = matrix.val[ 6];
|
||||||
|
R.val[6] = matrix.val[8]; R.val[7] = matrix.val[9]; R.val[8] = matrix.val[10];
|
||||||
|
return R;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T> inline
|
||||||
|
typename cv::Affine3<T>::Vec3 cv::Affine3<T>::translation() const
|
||||||
|
{
|
||||||
|
return Vec3(matrix.val[3], matrix.val[7], matrix.val[11]);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T> inline
|
||||||
|
typename cv::Affine3<T>::Vec3 cv::Affine3<T>::rvec() const
|
||||||
|
{
|
||||||
|
cv::Vec3d w;
|
||||||
|
cv::Matx33d u, vt, R = rotation();
|
||||||
|
cv::SVD::compute(R, w, u, vt, cv::SVD::FULL_UV + cv::SVD::MODIFY_A);
|
||||||
|
R = u * vt;
|
||||||
|
|
||||||
|
double rx = R.val[7] - R.val[5];
|
||||||
|
double ry = R.val[2] - R.val[6];
|
||||||
|
double rz = R.val[3] - R.val[1];
|
||||||
|
|
||||||
|
double s = std::sqrt((rx*rx + ry*ry + rz*rz)*0.25);
|
||||||
|
double c = (R.val[0] + R.val[4] + R.val[8] - 1) * 0.5;
|
||||||
|
c = c > 1.0 ? 1.0 : c < -1.0 ? -1.0 : c;
|
||||||
|
double theta = std::acos(c);
|
||||||
|
|
||||||
|
if( s < 1e-5 )
|
||||||
|
{
|
||||||
|
if( c > 0 )
|
||||||
|
rx = ry = rz = 0;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
double t;
|
||||||
|
t = (R.val[0] + 1) * 0.5;
|
||||||
|
rx = std::sqrt(std::max(t, 0.0));
|
||||||
|
t = (R.val[4] + 1) * 0.5;
|
||||||
|
ry = std::sqrt(std::max(t, 0.0)) * (R.val[1] < 0 ? -1.0 : 1.0);
|
||||||
|
t = (R.val[8] + 1) * 0.5;
|
||||||
|
rz = std::sqrt(std::max(t, 0.0)) * (R.val[2] < 0 ? -1.0 : 1.0);
|
||||||
|
|
||||||
|
if( fabs(rx) < fabs(ry) && fabs(rx) < fabs(rz) && (R.val[5] > 0) != (ry*rz > 0) )
|
||||||
|
rz = -rz;
|
||||||
|
theta /= std::sqrt(rx*rx + ry*ry + rz*rz);
|
||||||
|
rx *= theta;
|
||||||
|
ry *= theta;
|
||||||
|
rz *= theta;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
double vth = 1/(2*s);
|
||||||
|
vth *= theta;
|
||||||
|
rx *= vth; ry *= vth; rz *= vth;
|
||||||
|
}
|
||||||
|
|
||||||
|
return cv::Vec3d(rx, ry, rz);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T> inline
|
||||||
|
cv::Affine3<T> cv::Affine3<T>::inv(int method) const
|
||||||
|
{
|
||||||
|
return matrix.inv(method);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T> inline
|
||||||
|
cv::Affine3<T> cv::Affine3<T>::rotate(const Mat3& R) const
|
||||||
|
{
|
||||||
|
Mat3 Lc = linear();
|
||||||
|
Vec3 tc = translation();
|
||||||
|
Mat4 result;
|
||||||
|
result.val[12] = result.val[13] = result.val[14] = 0;
|
||||||
|
result.val[15] = 1;
|
||||||
|
|
||||||
|
for(int j = 0; j < 3; ++j)
|
||||||
|
{
|
||||||
|
for(int i = 0; i < 3; ++i)
|
||||||
|
{
|
||||||
|
float_type value = 0;
|
||||||
|
for(int k = 0; k < 3; ++k)
|
||||||
|
value += R(j, k) * Lc(k, i);
|
||||||
|
result(j, i) = value;
|
||||||
|
}
|
||||||
|
|
||||||
|
result(j, 3) = R.row(j).dot(tc.t());
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T> inline
|
||||||
|
cv::Affine3<T> cv::Affine3<T>::rotate(const Vec3& _rvec) const
|
||||||
|
{
|
||||||
|
return rotate(Affine3f(_rvec).rotation());
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T> inline
|
||||||
|
cv::Affine3<T> cv::Affine3<T>::translate(const Vec3& t) const
|
||||||
|
{
|
||||||
|
Mat4 m = matrix;
|
||||||
|
m.val[ 3] += t[0];
|
||||||
|
m.val[ 7] += t[1];
|
||||||
|
m.val[11] += t[2];
|
||||||
|
return m;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T> inline
|
||||||
|
cv::Affine3<T> cv::Affine3<T>::concatenate(const Affine3<T>& affine) const
|
||||||
|
{
|
||||||
|
return (*this).rotate(affine.rotation()).translate(affine.translation());
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T> template <typename Y> inline
|
||||||
|
cv::Affine3<T>::operator Affine3<Y>() const
|
||||||
|
{
|
||||||
|
return Affine3<Y>(matrix);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T> template <typename Y> inline
|
||||||
|
cv::Affine3<Y> cv::Affine3<T>::cast() const
|
||||||
|
{
|
||||||
|
return Affine3<Y>(matrix);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T> inline
|
||||||
|
cv::Affine3<T> cv::operator*(const cv::Affine3<T>& affine1, const cv::Affine3<T>& affine2)
|
||||||
|
{
|
||||||
|
return affine2.concatenate(affine1);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T, typename V> inline
|
||||||
|
V cv::operator*(const cv::Affine3<T>& affine, const V& v)
|
||||||
|
{
|
||||||
|
const typename Affine3<T>::Mat4& m = affine.matrix;
|
||||||
|
|
||||||
|
V r;
|
||||||
|
r.x = m.val[0] * v.x + m.val[1] * v.y + m.val[ 2] * v.z + m.val[ 3];
|
||||||
|
r.y = m.val[4] * v.x + m.val[5] * v.y + m.val[ 6] * v.z + m.val[ 7];
|
||||||
|
r.z = m.val[8] * v.x + m.val[9] * v.y + m.val[10] * v.z + m.val[11];
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline
|
||||||
|
cv::Vec3f cv::operator*(const cv::Affine3f& affine, const cv::Vec3f& v)
|
||||||
|
{
|
||||||
|
const cv::Matx44f& m = affine.matrix;
|
||||||
|
cv::Vec3f r;
|
||||||
|
r.val[0] = m.val[0] * v[0] + m.val[1] * v[1] + m.val[ 2] * v[2] + m.val[ 3];
|
||||||
|
r.val[1] = m.val[4] * v[0] + m.val[5] * v[1] + m.val[ 6] * v[2] + m.val[ 7];
|
||||||
|
r.val[2] = m.val[8] * v[0] + m.val[9] * v[1] + m.val[10] * v[2] + m.val[11];
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline
|
||||||
|
cv::Vec3d cv::operator*(const cv::Affine3d& affine, const cv::Vec3d& v)
|
||||||
|
{
|
||||||
|
const cv::Matx44d& m = affine.matrix;
|
||||||
|
cv::Vec3d r;
|
||||||
|
r.val[0] = m.val[0] * v[0] + m.val[1] * v[1] + m.val[ 2] * v[2] + m.val[ 3];
|
||||||
|
r.val[1] = m.val[4] * v[0] + m.val[5] * v[1] + m.val[ 6] * v[2] + m.val[ 7];
|
||||||
|
r.val[2] = m.val[8] * v[0] + m.val[9] * v[1] + m.val[10] * v[2] + m.val[11];
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#if defined EIGEN_WORLD_VERSION && defined EIGEN_GEOMETRY_MODULE_H
|
||||||
|
|
||||||
|
template<typename T> inline
|
||||||
|
cv::Affine3<T>::Affine3(const Eigen::Transform<T, 3, Eigen::Affine, (Eigen::RowMajor)>& affine)
|
||||||
|
{
|
||||||
|
cv::Mat(4, 4, cv::traits::Type<T>::value, affine.matrix().data()).copyTo(matrix);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T> inline
|
||||||
|
cv::Affine3<T>::Affine3(const Eigen::Transform<T, 3, Eigen::Affine>& affine)
|
||||||
|
{
|
||||||
|
Eigen::Transform<T, 3, Eigen::Affine, (Eigen::RowMajor)> a = affine;
|
||||||
|
cv::Mat(4, 4, cv::traits::Type<T>::value, a.matrix().data()).copyTo(matrix);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T> inline
|
||||||
|
cv::Affine3<T>::operator Eigen::Transform<T, 3, Eigen::Affine, (Eigen::RowMajor)>() const
|
||||||
|
{
|
||||||
|
Eigen::Transform<T, 3, Eigen::Affine, (Eigen::RowMajor)> r;
|
||||||
|
cv::Mat hdr(4, 4, cv::traits::Type<T>::value, r.matrix().data());
|
||||||
|
cv::Mat(matrix, false).copyTo(hdr);
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T> inline
|
||||||
|
cv::Affine3<T>::operator Eigen::Transform<T, 3, Eigen::Affine>() const
|
||||||
|
{
|
||||||
|
return this->operator Eigen::Transform<T, 3, Eigen::Affine, (Eigen::RowMajor)>();
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* defined EIGEN_WORLD_VERSION && defined EIGEN_GEOMETRY_MODULE_H */
|
||||||
|
|
||||||
|
//! @endcond
|
||||||
|
|
||||||
|
#endif /* __cplusplus */
|
||||||
|
|
||||||
|
#endif /* OPENCV_CORE_AFFINE3_HPP */
|
||||||
101
3rdpart/OpenCV/include/opencv2/core/async.hpp
Normal file
101
3rdpart/OpenCV/include/opencv2/core/async.hpp
Normal file
@@ -0,0 +1,101 @@
|
|||||||
|
// This file is part of OpenCV project.
|
||||||
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||||
|
// of this distribution and at http://opencv.org/license.html.
|
||||||
|
|
||||||
|
#ifndef OPENCV_CORE_ASYNC_HPP
|
||||||
|
#define OPENCV_CORE_ASYNC_HPP
|
||||||
|
|
||||||
|
#include <opencv2/core/mat.hpp>
|
||||||
|
|
||||||
|
//#include <future>
|
||||||
|
#include <chrono>
|
||||||
|
|
||||||
|
namespace cv {
|
||||||
|
|
||||||
|
/** @addtogroup core_async
|
||||||
|
|
||||||
|
@{
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
/** @brief Returns result of asynchronous operations
|
||||||
|
|
||||||
|
Object has attached asynchronous state.
|
||||||
|
Assignment operator doesn't clone asynchronous state (it is shared between all instances).
|
||||||
|
|
||||||
|
Result can be fetched via get() method only once.
|
||||||
|
|
||||||
|
*/
|
||||||
|
class CV_EXPORTS_W AsyncArray
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
~AsyncArray() CV_NOEXCEPT;
|
||||||
|
CV_WRAP AsyncArray() CV_NOEXCEPT;
|
||||||
|
AsyncArray(const AsyncArray& o) CV_NOEXCEPT;
|
||||||
|
AsyncArray& operator=(const AsyncArray& o) CV_NOEXCEPT;
|
||||||
|
CV_WRAP void release() CV_NOEXCEPT;
|
||||||
|
|
||||||
|
/** Fetch the result.
|
||||||
|
@param[out] dst destination array
|
||||||
|
|
||||||
|
Waits for result until container has valid result.
|
||||||
|
Throws exception if exception was stored as a result.
|
||||||
|
|
||||||
|
Throws exception on invalid container state.
|
||||||
|
|
||||||
|
@note Result or stored exception can be fetched only once.
|
||||||
|
*/
|
||||||
|
CV_WRAP void get(OutputArray dst) const;
|
||||||
|
|
||||||
|
/** Retrieving the result with timeout
|
||||||
|
@param[out] dst destination array
|
||||||
|
@param[in] timeoutNs timeout in nanoseconds, -1 for infinite wait
|
||||||
|
|
||||||
|
@returns true if result is ready, false if the timeout has expired
|
||||||
|
|
||||||
|
@note Result or stored exception can be fetched only once.
|
||||||
|
*/
|
||||||
|
bool get(OutputArray dst, int64 timeoutNs) const;
|
||||||
|
|
||||||
|
CV_WRAP inline
|
||||||
|
bool get(OutputArray dst, double timeoutNs) const { return get(dst, (int64)timeoutNs); }
|
||||||
|
|
||||||
|
bool wait_for(int64 timeoutNs) const;
|
||||||
|
|
||||||
|
CV_WRAP inline
|
||||||
|
bool wait_for(double timeoutNs) const { return wait_for((int64)timeoutNs); }
|
||||||
|
|
||||||
|
CV_WRAP bool valid() const CV_NOEXCEPT;
|
||||||
|
|
||||||
|
inline AsyncArray(AsyncArray&& o) { p = o.p; o.p = NULL; }
|
||||||
|
inline AsyncArray& operator=(AsyncArray&& o) CV_NOEXCEPT { std::swap(p, o.p); return *this; }
|
||||||
|
|
||||||
|
template<typename _Rep, typename _Period>
|
||||||
|
inline bool get(OutputArray dst, const std::chrono::duration<_Rep, _Period>& timeout)
|
||||||
|
{
|
||||||
|
return get(dst, (int64)(std::chrono::nanoseconds(timeout).count()));
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename _Rep, typename _Period>
|
||||||
|
inline bool wait_for(const std::chrono::duration<_Rep, _Period>& timeout)
|
||||||
|
{
|
||||||
|
return wait_for((int64)(std::chrono::nanoseconds(timeout).count()));
|
||||||
|
}
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
std::future<Mat> getFutureMat() const;
|
||||||
|
std::future<UMat> getFutureUMat() const;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
// PImpl
|
||||||
|
struct Impl; friend struct Impl;
|
||||||
|
inline void* _getImpl() const CV_NOEXCEPT { return p; }
|
||||||
|
protected:
|
||||||
|
Impl* p;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
//! @}
|
||||||
|
} // namespace
|
||||||
|
#endif // OPENCV_CORE_ASYNC_HPP
|
||||||
747
3rdpart/OpenCV/include/opencv2/core/base.hpp
Normal file
747
3rdpart/OpenCV/include/opencv2/core/base.hpp
Normal file
@@ -0,0 +1,747 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||||
|
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||||
|
// Copyright (C) 2014, Itseez Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#ifndef OPENCV_CORE_BASE_HPP
|
||||||
|
#define OPENCV_CORE_BASE_HPP
|
||||||
|
|
||||||
|
#ifndef __cplusplus
|
||||||
|
# error base.hpp header must be compiled as C++
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "opencv2/opencv_modules.hpp"
|
||||||
|
|
||||||
|
#include <climits>
|
||||||
|
#include <algorithm>
|
||||||
|
|
||||||
|
#include "opencv2/core/cvdef.h"
|
||||||
|
#include "opencv2/core/cvstd.hpp"
|
||||||
|
|
||||||
|
namespace cv
|
||||||
|
{
|
||||||
|
|
||||||
|
//! @addtogroup core_utils
|
||||||
|
//! @{
|
||||||
|
|
||||||
|
namespace Error {
|
||||||
|
//! error codes
|
||||||
|
enum Code {
|
||||||
|
StsOk= 0, //!< everything is ok
|
||||||
|
StsBackTrace= -1, //!< pseudo error for back trace
|
||||||
|
StsError= -2, //!< unknown /unspecified error
|
||||||
|
StsInternal= -3, //!< internal error (bad state)
|
||||||
|
StsNoMem= -4, //!< insufficient memory
|
||||||
|
StsBadArg= -5, //!< function arg/param is bad
|
||||||
|
StsBadFunc= -6, //!< unsupported function
|
||||||
|
StsNoConv= -7, //!< iteration didn't converge
|
||||||
|
StsAutoTrace= -8, //!< tracing
|
||||||
|
HeaderIsNull= -9, //!< image header is NULL
|
||||||
|
BadImageSize= -10, //!< image size is invalid
|
||||||
|
BadOffset= -11, //!< offset is invalid
|
||||||
|
BadDataPtr= -12, //!<
|
||||||
|
BadStep= -13, //!< image step is wrong, this may happen for a non-continuous matrix.
|
||||||
|
BadModelOrChSeq= -14, //!<
|
||||||
|
BadNumChannels= -15, //!< bad number of channels, for example, some functions accept only single channel matrices.
|
||||||
|
BadNumChannel1U= -16, //!<
|
||||||
|
BadDepth= -17, //!< input image depth is not supported by the function
|
||||||
|
BadAlphaChannel= -18, //!<
|
||||||
|
BadOrder= -19, //!< number of dimensions is out of range
|
||||||
|
BadOrigin= -20, //!< incorrect input origin
|
||||||
|
BadAlign= -21, //!< incorrect input align
|
||||||
|
BadCallBack= -22, //!<
|
||||||
|
BadTileSize= -23, //!<
|
||||||
|
BadCOI= -24, //!< input COI is not supported
|
||||||
|
BadROISize= -25, //!< incorrect input roi
|
||||||
|
MaskIsTiled= -26, //!<
|
||||||
|
StsNullPtr= -27, //!< null pointer
|
||||||
|
StsVecLengthErr= -28, //!< incorrect vector length
|
||||||
|
StsFilterStructContentErr= -29, //!< incorrect filter structure content
|
||||||
|
StsKernelStructContentErr= -30, //!< incorrect transform kernel content
|
||||||
|
StsFilterOffsetErr= -31, //!< incorrect filter offset value
|
||||||
|
StsBadSize= -201, //!< the input/output structure size is incorrect
|
||||||
|
StsDivByZero= -202, //!< division by zero
|
||||||
|
StsInplaceNotSupported= -203, //!< in-place operation is not supported
|
||||||
|
StsObjectNotFound= -204, //!< request can't be completed
|
||||||
|
StsUnmatchedFormats= -205, //!< formats of input/output arrays differ
|
||||||
|
StsBadFlag= -206, //!< flag is wrong or not supported
|
||||||
|
StsBadPoint= -207, //!< bad CvPoint
|
||||||
|
StsBadMask= -208, //!< bad format of mask (neither 8uC1 nor 8sC1)
|
||||||
|
StsUnmatchedSizes= -209, //!< sizes of input/output structures do not match
|
||||||
|
StsUnsupportedFormat= -210, //!< the data format/type is not supported by the function
|
||||||
|
StsOutOfRange= -211, //!< some of parameters are out of range
|
||||||
|
StsParseError= -212, //!< invalid syntax/structure of the parsed file
|
||||||
|
StsNotImplemented= -213, //!< the requested function/feature is not implemented
|
||||||
|
StsBadMemBlock= -214, //!< an allocated block has been corrupted
|
||||||
|
StsAssert= -215, //!< assertion failed
|
||||||
|
GpuNotSupported= -216, //!< no CUDA support
|
||||||
|
GpuApiCallError= -217, //!< GPU API call error
|
||||||
|
OpenGlNotSupported= -218, //!< no OpenGL support
|
||||||
|
OpenGlApiCallError= -219, //!< OpenGL API call error
|
||||||
|
OpenCLApiCallError= -220, //!< OpenCL API call error
|
||||||
|
OpenCLDoubleNotSupported= -221,
|
||||||
|
OpenCLInitError= -222, //!< OpenCL initialization error
|
||||||
|
OpenCLNoAMDBlasFft= -223
|
||||||
|
};
|
||||||
|
} //Error
|
||||||
|
|
||||||
|
//! @} core_utils
|
||||||
|
|
||||||
|
//! @addtogroup core_array
|
||||||
|
//! @{
|
||||||
|
|
||||||
|
//! matrix decomposition types
|
||||||
|
enum DecompTypes {
|
||||||
|
/** Gaussian elimination with the optimal pivot element chosen. */
|
||||||
|
DECOMP_LU = 0,
|
||||||
|
/** singular value decomposition (SVD) method; the system can be over-defined and/or the matrix
|
||||||
|
src1 can be singular */
|
||||||
|
DECOMP_SVD = 1,
|
||||||
|
/** eigenvalue decomposition; the matrix src1 must be symmetrical */
|
||||||
|
DECOMP_EIG = 2,
|
||||||
|
/** Cholesky \f$LL^T\f$ factorization; the matrix src1 must be symmetrical and positively
|
||||||
|
defined */
|
||||||
|
DECOMP_CHOLESKY = 3,
|
||||||
|
/** QR factorization; the system can be over-defined and/or the matrix src1 can be singular */
|
||||||
|
DECOMP_QR = 4,
|
||||||
|
/** while all the previous flags are mutually exclusive, this flag can be used together with
|
||||||
|
any of the previous; it means that the normal equations
|
||||||
|
\f$\texttt{src1}^T\cdot\texttt{src1}\cdot\texttt{dst}=\texttt{src1}^T\texttt{src2}\f$ are
|
||||||
|
solved instead of the original system
|
||||||
|
\f$\texttt{src1}\cdot\texttt{dst}=\texttt{src2}\f$ */
|
||||||
|
DECOMP_NORMAL = 16
|
||||||
|
};
|
||||||
|
|
||||||
|
/** norm types
|
||||||
|
|
||||||
|
src1 and src2 denote input arrays.
|
||||||
|
*/
|
||||||
|
|
||||||
|
enum NormTypes {
|
||||||
|
/**
|
||||||
|
\f[
|
||||||
|
norm = \forkthree
|
||||||
|
{\|\texttt{src1}\|_{L_{\infty}} = \max _I | \texttt{src1} (I)|}{if \(\texttt{normType} = \texttt{NORM_INF}\) }
|
||||||
|
{\|\texttt{src1}-\texttt{src2}\|_{L_{\infty}} = \max _I | \texttt{src1} (I) - \texttt{src2} (I)|}{if \(\texttt{normType} = \texttt{NORM_INF}\) }
|
||||||
|
{\frac{\|\texttt{src1}-\texttt{src2}\|_{L_{\infty}} }{\|\texttt{src2}\|_{L_{\infty}} }}{if \(\texttt{normType} = \texttt{NORM_RELATIVE | NORM_INF}\) }
|
||||||
|
\f]
|
||||||
|
*/
|
||||||
|
NORM_INF = 1,
|
||||||
|
/**
|
||||||
|
\f[
|
||||||
|
norm = \forkthree
|
||||||
|
{\| \texttt{src1} \| _{L_1} = \sum _I | \texttt{src1} (I)|}{if \(\texttt{normType} = \texttt{NORM_L1}\)}
|
||||||
|
{ \| \texttt{src1} - \texttt{src2} \| _{L_1} = \sum _I | \texttt{src1} (I) - \texttt{src2} (I)|}{if \(\texttt{normType} = \texttt{NORM_L1}\) }
|
||||||
|
{ \frac{\|\texttt{src1}-\texttt{src2}\|_{L_1} }{\|\texttt{src2}\|_{L_1}} }{if \(\texttt{normType} = \texttt{NORM_RELATIVE | NORM_L1}\) }
|
||||||
|
\f]*/
|
||||||
|
NORM_L1 = 2,
|
||||||
|
/**
|
||||||
|
\f[
|
||||||
|
norm = \forkthree
|
||||||
|
{ \| \texttt{src1} \| _{L_2} = \sqrt{\sum_I \texttt{src1}(I)^2} }{if \(\texttt{normType} = \texttt{NORM_L2}\) }
|
||||||
|
{ \| \texttt{src1} - \texttt{src2} \| _{L_2} = \sqrt{\sum_I (\texttt{src1}(I) - \texttt{src2}(I))^2} }{if \(\texttt{normType} = \texttt{NORM_L2}\) }
|
||||||
|
{ \frac{\|\texttt{src1}-\texttt{src2}\|_{L_2} }{\|\texttt{src2}\|_{L_2}} }{if \(\texttt{normType} = \texttt{NORM_RELATIVE | NORM_L2}\) }
|
||||||
|
\f]
|
||||||
|
*/
|
||||||
|
NORM_L2 = 4,
|
||||||
|
/**
|
||||||
|
\f[
|
||||||
|
norm = \forkthree
|
||||||
|
{ \| \texttt{src1} \| _{L_2} ^{2} = \sum_I \texttt{src1}(I)^2} {if \(\texttt{normType} = \texttt{NORM_L2SQR}\)}
|
||||||
|
{ \| \texttt{src1} - \texttt{src2} \| _{L_2} ^{2} = \sum_I (\texttt{src1}(I) - \texttt{src2}(I))^2 }{if \(\texttt{normType} = \texttt{NORM_L2SQR}\) }
|
||||||
|
{ \left(\frac{\|\texttt{src1}-\texttt{src2}\|_{L_2} }{\|\texttt{src2}\|_{L_2}}\right)^2 }{if \(\texttt{normType} = \texttt{NORM_RELATIVE | NORM_L2SQR}\) }
|
||||||
|
\f]
|
||||||
|
*/
|
||||||
|
NORM_L2SQR = 5,
|
||||||
|
/**
|
||||||
|
In the case of one input array, calculates the Hamming distance of the array from zero,
|
||||||
|
In the case of two input arrays, calculates the Hamming distance between the arrays.
|
||||||
|
*/
|
||||||
|
NORM_HAMMING = 6,
|
||||||
|
/**
|
||||||
|
Similar to NORM_HAMMING, but in the calculation, each two bits of the input sequence will
|
||||||
|
be added and treated as a single bit to be used in the same calculation as NORM_HAMMING.
|
||||||
|
*/
|
||||||
|
NORM_HAMMING2 = 7,
|
||||||
|
NORM_TYPE_MASK = 7, //!< bit-mask which can be used to separate norm type from norm flags
|
||||||
|
NORM_RELATIVE = 8, //!< flag
|
||||||
|
NORM_MINMAX = 32 //!< flag
|
||||||
|
};
|
||||||
|
|
||||||
|
//! comparison types
|
||||||
|
enum CmpTypes { CMP_EQ = 0, //!< src1 is equal to src2.
|
||||||
|
CMP_GT = 1, //!< src1 is greater than src2.
|
||||||
|
CMP_GE = 2, //!< src1 is greater than or equal to src2.
|
||||||
|
CMP_LT = 3, //!< src1 is less than src2.
|
||||||
|
CMP_LE = 4, //!< src1 is less than or equal to src2.
|
||||||
|
CMP_NE = 5 //!< src1 is unequal to src2.
|
||||||
|
};
|
||||||
|
|
||||||
|
//! generalized matrix multiplication flags
|
||||||
|
enum GemmFlags { GEMM_1_T = 1, //!< transposes src1
|
||||||
|
GEMM_2_T = 2, //!< transposes src2
|
||||||
|
GEMM_3_T = 4 //!< transposes src3
|
||||||
|
};
|
||||||
|
|
||||||
|
enum DftFlags {
|
||||||
|
/** performs an inverse 1D or 2D transform instead of the default forward
|
||||||
|
transform. */
|
||||||
|
DFT_INVERSE = 1,
|
||||||
|
/** scales the result: divide it by the number of array elements. Normally, it is
|
||||||
|
combined with DFT_INVERSE. */
|
||||||
|
DFT_SCALE = 2,
|
||||||
|
/** performs a forward or inverse transform of every individual row of the input
|
||||||
|
matrix; this flag enables you to transform multiple vectors simultaneously and can be used to
|
||||||
|
decrease the overhead (which is sometimes several times larger than the processing itself) to
|
||||||
|
perform 3D and higher-dimensional transformations and so forth.*/
|
||||||
|
DFT_ROWS = 4,
|
||||||
|
/** performs a forward transformation of 1D or 2D real array; the result,
|
||||||
|
though being a complex array, has complex-conjugate symmetry (*CCS*, see the function
|
||||||
|
description below for details), and such an array can be packed into a real array of the same
|
||||||
|
size as input, which is the fastest option and which is what the function does by default;
|
||||||
|
however, you may wish to get a full complex array (for simpler spectrum analysis, and so on) -
|
||||||
|
pass the flag to enable the function to produce a full-size complex output array. */
|
||||||
|
DFT_COMPLEX_OUTPUT = 16,
|
||||||
|
/** performs an inverse transformation of a 1D or 2D complex array; the
|
||||||
|
result is normally a complex array of the same size, however, if the input array has
|
||||||
|
conjugate-complex symmetry (for example, it is a result of forward transformation with
|
||||||
|
DFT_COMPLEX_OUTPUT flag), the output is a real array; while the function itself does not
|
||||||
|
check whether the input is symmetrical or not, you can pass the flag and then the function
|
||||||
|
will assume the symmetry and produce the real output array (note that when the input is packed
|
||||||
|
into a real array and inverse transformation is executed, the function treats the input as a
|
||||||
|
packed complex-conjugate symmetrical array, and the output will also be a real array). */
|
||||||
|
DFT_REAL_OUTPUT = 32,
|
||||||
|
/** specifies that input is complex input. If this flag is set, the input must have 2 channels.
|
||||||
|
On the other hand, for backwards compatibility reason, if input has 2 channels, input is
|
||||||
|
already considered complex. */
|
||||||
|
DFT_COMPLEX_INPUT = 64,
|
||||||
|
/** performs an inverse 1D or 2D transform instead of the default forward transform. */
|
||||||
|
DCT_INVERSE = DFT_INVERSE,
|
||||||
|
/** performs a forward or inverse transform of every individual row of the input
|
||||||
|
matrix. This flag enables you to transform multiple vectors simultaneously and can be used to
|
||||||
|
decrease the overhead (which is sometimes several times larger than the processing itself) to
|
||||||
|
perform 3D and higher-dimensional transforms and so forth.*/
|
||||||
|
DCT_ROWS = DFT_ROWS
|
||||||
|
};
|
||||||
|
|
||||||
|
/*! Various border types, image boundaries are denoted with the `|` character in the table below, when describing each method.
|
||||||
|
|
||||||
|
The following examples show the result of the @ref copyMakeBorder call according to different methods.
|
||||||
|
Input image is `6x4` (width x height) size and the @ref copyMakeBorder function is used with a border size of 2 pixels
|
||||||
|
in each direction, giving a resulting image of `10x8` resolution.
|
||||||
|
|
||||||
|
@code
|
||||||
|
Input image:
|
||||||
|
[[ 0 1 2 3 4 5]
|
||||||
|
[ 6 7 8 9 10 11]
|
||||||
|
[12 13 14 15 16 17]
|
||||||
|
[18 19 20 21 22 23]]
|
||||||
|
|
||||||
|
Border type: BORDER_CONSTANT (a constant value of 255 is used)
|
||||||
|
[[255 255 255 255 255 255 255 255 255 255]
|
||||||
|
[255 255 255 255 255 255 255 255 255 255]
|
||||||
|
[255 255 0 1 2 3 4 5 255 255]
|
||||||
|
[255 255 6 7 8 9 10 11 255 255]
|
||||||
|
[255 255 12 13 14 15 16 17 255 255]
|
||||||
|
[255 255 18 19 20 21 22 23 255 255]
|
||||||
|
[255 255 255 255 255 255 255 255 255 255]
|
||||||
|
[255 255 255 255 255 255 255 255 255 255]]
|
||||||
|
|
||||||
|
Border type: BORDER_REPLICATE
|
||||||
|
[[ 0 0 0 1 2 3 4 5 5 5]
|
||||||
|
[ 0 0 0 1 2 3 4 5 5 5]
|
||||||
|
[ 0 0 0 1 2 3 4 5 5 5]
|
||||||
|
[ 6 6 6 7 8 9 10 11 11 11]
|
||||||
|
[12 12 12 13 14 15 16 17 17 17]
|
||||||
|
[18 18 18 19 20 21 22 23 23 23]
|
||||||
|
[18 18 18 19 20 21 22 23 23 23]
|
||||||
|
[18 18 18 19 20 21 22 23 23 23]]
|
||||||
|
|
||||||
|
Border type: BORDER_REFLECT
|
||||||
|
[[ 7 6 6 7 8 9 10 11 11 10]
|
||||||
|
[ 1 0 0 1 2 3 4 5 5 4]
|
||||||
|
[ 1 0 0 1 2 3 4 5 5 4]
|
||||||
|
[ 7 6 6 7 8 9 10 11 11 10]
|
||||||
|
[13 12 12 13 14 15 16 17 17 16]
|
||||||
|
[19 18 18 19 20 21 22 23 23 22]
|
||||||
|
[19 18 18 19 20 21 22 23 23 22]
|
||||||
|
[13 12 12 13 14 15 16 17 17 16]]
|
||||||
|
|
||||||
|
Border type: BORDER_WRAP
|
||||||
|
[[16 17 12 13 14 15 16 17 12 13]
|
||||||
|
[22 23 18 19 20 21 22 23 18 19]
|
||||||
|
[ 4 5 0 1 2 3 4 5 0 1]
|
||||||
|
[10 11 6 7 8 9 10 11 6 7]
|
||||||
|
[16 17 12 13 14 15 16 17 12 13]
|
||||||
|
[22 23 18 19 20 21 22 23 18 19]
|
||||||
|
[ 4 5 0 1 2 3 4 5 0 1]
|
||||||
|
[10 11 6 7 8 9 10 11 6 7]]
|
||||||
|
|
||||||
|
Border type: BORDER_REFLECT_101
|
||||||
|
[[14 13 12 13 14 15 16 17 16 15]
|
||||||
|
[ 8 7 6 7 8 9 10 11 10 9]
|
||||||
|
[ 2 1 0 1 2 3 4 5 4 3]
|
||||||
|
[ 8 7 6 7 8 9 10 11 10 9]
|
||||||
|
[14 13 12 13 14 15 16 17 16 15]
|
||||||
|
[20 19 18 19 20 21 22 23 22 21]
|
||||||
|
[14 13 12 13 14 15 16 17 16 15]
|
||||||
|
[ 8 7 6 7 8 9 10 11 10 9]]
|
||||||
|
@endcode
|
||||||
|
|
||||||
|
@see borderInterpolate, copyMakeBorder
|
||||||
|
*/
|
||||||
|
enum BorderTypes {
|
||||||
|
BORDER_CONSTANT = 0, //!< `iiiiii|abcdefgh|iiiiiii` with some specified `i`
|
||||||
|
BORDER_REPLICATE = 1, //!< `aaaaaa|abcdefgh|hhhhhhh`
|
||||||
|
BORDER_REFLECT = 2, //!< `fedcba|abcdefgh|hgfedcb`
|
||||||
|
BORDER_WRAP = 3, //!< `cdefgh|abcdefgh|abcdefg`
|
||||||
|
BORDER_REFLECT_101 = 4, //!< `gfedcb|abcdefgh|gfedcba`
|
||||||
|
BORDER_TRANSPARENT = 5, //!< `uvwxyz|abcdefgh|ijklmno` - Treats outliers as transparent.
|
||||||
|
|
||||||
|
BORDER_REFLECT101 = BORDER_REFLECT_101, //!< same as BORDER_REFLECT_101
|
||||||
|
BORDER_DEFAULT = BORDER_REFLECT_101, //!< same as BORDER_REFLECT_101
|
||||||
|
BORDER_ISOLATED = 16 //!< Interpolation restricted within the ROI boundaries.
|
||||||
|
};
|
||||||
|
|
||||||
|
//! @} core_array
|
||||||
|
|
||||||
|
//! @addtogroup core_utils
|
||||||
|
//! @{
|
||||||
|
|
||||||
|
/*! @brief Signals an error and raises the exception.
|
||||||
|
|
||||||
|
By default the function prints information about the error to stderr,
|
||||||
|
then it either stops if setBreakOnError() had been called before or raises the exception.
|
||||||
|
It is possible to alternate error processing by using redirectError().
|
||||||
|
@param code - error code (Error::Code)
|
||||||
|
@param err - error description
|
||||||
|
@param func - function name. Available only when the compiler supports getting it
|
||||||
|
@param file - source file name where the error has occurred
|
||||||
|
@param line - line number in the source file where the error has occurred
|
||||||
|
@see CV_Error, CV_Error_, CV_Assert, CV_DbgAssert
|
||||||
|
*/
|
||||||
|
CV_EXPORTS CV_NORETURN void error(int code, const String& err, const char* func, const char* file, int line);
|
||||||
|
|
||||||
|
/*! @brief Signals an error and terminate application.
|
||||||
|
|
||||||
|
By default the function prints information about the error to stderr, then it terminates application
|
||||||
|
with std::terminate. The function is designed for invariants check in functions and methods with
|
||||||
|
noexcept attribute.
|
||||||
|
@param code - error code (Error::Code)
|
||||||
|
@param err - error description
|
||||||
|
@param func - function name. Available only when the compiler supports getting it
|
||||||
|
@param file - source file name where the error has occurred
|
||||||
|
@param line - line number in the source file where the error has occurred
|
||||||
|
@see CV_AssertTerminate
|
||||||
|
*/
|
||||||
|
CV_EXPORTS CV_NORETURN void terminate(int code, const String& err, const char* func, const char* file, int line) CV_NOEXCEPT;
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef CV_STATIC_ANALYSIS
|
||||||
|
|
||||||
|
// In practice, some macro are not processed correctly (noreturn is not detected).
|
||||||
|
// We need to use simplified definition for them.
|
||||||
|
#define CV_Error(code, msg) do { (void)(code); (void)(msg); abort(); } while (0)
|
||||||
|
#define CV_Error_(code, args) do { (void)(code); (void)(cv::format args); abort(); } while (0)
|
||||||
|
#define CV_Assert( expr ) do { if (!(expr)) abort(); } while (0)
|
||||||
|
|
||||||
|
#else // CV_STATIC_ANALYSIS
|
||||||
|
|
||||||
|
/** @brief Call the error handler.
|
||||||
|
|
||||||
|
Currently, the error handler prints the error code and the error message to the standard
|
||||||
|
error stream `stderr`. In the Debug configuration, it then provokes memory access violation, so that
|
||||||
|
the execution stack and all the parameters can be analyzed by the debugger. In the Release
|
||||||
|
configuration, the exception is thrown.
|
||||||
|
|
||||||
|
@param code one of Error::Code
|
||||||
|
@param msg error message
|
||||||
|
*/
|
||||||
|
#define CV_Error( code, msg ) cv::error( code, msg, CV_Func, __FILE__, __LINE__ )
|
||||||
|
|
||||||
|
/** @brief Call the error handler.
|
||||||
|
|
||||||
|
This macro can be used to construct an error message on-fly to include some dynamic information,
|
||||||
|
for example:
|
||||||
|
@code
|
||||||
|
// note the extra parentheses around the formatted text message
|
||||||
|
CV_Error_(Error::StsOutOfRange,
|
||||||
|
("the value at (%d, %d)=%g is out of range", badPt.x, badPt.y, badValue));
|
||||||
|
@endcode
|
||||||
|
@param code one of Error::Code
|
||||||
|
@param args printf-like formatted error message in parentheses
|
||||||
|
*/
|
||||||
|
#define CV_Error_( code, args ) cv::error( code, cv::format args, CV_Func, __FILE__, __LINE__ )
|
||||||
|
|
||||||
|
/** @brief Checks a condition at runtime and throws exception if it fails
|
||||||
|
|
||||||
|
The macros CV_Assert (and CV_DbgAssert(expr)) evaluate the specified expression. If it is 0, the macros
|
||||||
|
raise an error (see cv::error). The macro CV_Assert checks the condition in both Debug and Release
|
||||||
|
configurations while CV_DbgAssert is only retained in the Debug configuration.
|
||||||
|
CV_AssertTerminate is analog of CV_Assert for invariants check in functions with noexcept attribute.
|
||||||
|
It does not throw exception, but terminates the application.
|
||||||
|
*/
|
||||||
|
#define CV_Assert( expr ) do { if(!!(expr)) ; else cv::error( cv::Error::StsAssert, #expr, CV_Func, __FILE__, __LINE__ ); } while(0)
|
||||||
|
#define CV_AssertTerminate( expr ) do { if(!!(expr)) ; else cv::terminate( #expr, CV_Func, __FILE__, __LINE__ ); } while(0)
|
||||||
|
|
||||||
|
#endif // CV_STATIC_ANALYSIS
|
||||||
|
|
||||||
|
//! @cond IGNORED
|
||||||
|
#if !defined(__OPENCV_BUILD) // TODO: backward compatibility only
|
||||||
|
#ifndef CV_ErrorNoReturn
|
||||||
|
#define CV_ErrorNoReturn CV_Error
|
||||||
|
#endif
|
||||||
|
#ifndef CV_ErrorNoReturn_
|
||||||
|
#define CV_ErrorNoReturn_ CV_Error_
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define CV_Assert_1 CV_Assert
|
||||||
|
#define CV_Assert_2( expr, ... ) CV_Assert_1(expr); __CV_EXPAND(CV_Assert_1( __VA_ARGS__ ))
|
||||||
|
#define CV_Assert_3( expr, ... ) CV_Assert_1(expr); __CV_EXPAND(CV_Assert_2( __VA_ARGS__ ))
|
||||||
|
#define CV_Assert_4( expr, ... ) CV_Assert_1(expr); __CV_EXPAND(CV_Assert_3( __VA_ARGS__ ))
|
||||||
|
#define CV_Assert_5( expr, ... ) CV_Assert_1(expr); __CV_EXPAND(CV_Assert_4( __VA_ARGS__ ))
|
||||||
|
#define CV_Assert_6( expr, ... ) CV_Assert_1(expr); __CV_EXPAND(CV_Assert_5( __VA_ARGS__ ))
|
||||||
|
#define CV_Assert_7( expr, ... ) CV_Assert_1(expr); __CV_EXPAND(CV_Assert_6( __VA_ARGS__ ))
|
||||||
|
#define CV_Assert_8( expr, ... ) CV_Assert_1(expr); __CV_EXPAND(CV_Assert_7( __VA_ARGS__ ))
|
||||||
|
#define CV_Assert_9( expr, ... ) CV_Assert_1(expr); __CV_EXPAND(CV_Assert_8( __VA_ARGS__ ))
|
||||||
|
#define CV_Assert_10( expr, ... ) CV_Assert_1(expr); __CV_EXPAND(CV_Assert_9( __VA_ARGS__ ))
|
||||||
|
|
||||||
|
#define CV_Assert_N(...) do { __CV_EXPAND(__CV_CAT(CV_Assert_, __CV_VA_NUM_ARGS(__VA_ARGS__)) (__VA_ARGS__)); } while(0)
|
||||||
|
|
||||||
|
//! @endcond
|
||||||
|
|
||||||
|
#if !defined(NDEBUG) || defined(CV_STATIC_ANALYSIS)
|
||||||
|
# define CV_DbgAssert(expr) CV_Assert(expr)
|
||||||
|
#else
|
||||||
|
/** replaced with CV_Assert(expr) in Debug configuration */
|
||||||
|
# define CV_DbgAssert(expr)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Hamming distance functor - counts the bit differences between two strings - useful for the Brief descriptor
|
||||||
|
* bit count of A exclusive XOR'ed with B
|
||||||
|
*/
|
||||||
|
struct CV_EXPORTS Hamming
|
||||||
|
{
|
||||||
|
static const NormTypes normType = NORM_HAMMING;
|
||||||
|
typedef unsigned char ValueType;
|
||||||
|
typedef int ResultType;
|
||||||
|
|
||||||
|
/** this will count the bits in a ^ b
|
||||||
|
*/
|
||||||
|
ResultType operator()( const unsigned char* a, const unsigned char* b, int size ) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef Hamming HammingLUT;
|
||||||
|
|
||||||
|
/////////////////////////////////// inline norms ////////////////////////////////////
|
||||||
|
|
||||||
|
template<typename _Tp> inline _Tp cv_abs(_Tp x) { return std::abs(x); }
|
||||||
|
inline int cv_abs(uchar x) { return x; }
|
||||||
|
inline int cv_abs(schar x) { return std::abs(x); }
|
||||||
|
inline int cv_abs(ushort x) { return x; }
|
||||||
|
inline int cv_abs(short x) { return std::abs(x); }
|
||||||
|
|
||||||
|
template<typename _Tp, typename _AccTp> static inline
|
||||||
|
_AccTp normL2Sqr(const _Tp* a, int n)
|
||||||
|
{
|
||||||
|
_AccTp s = 0;
|
||||||
|
int i=0;
|
||||||
|
#if CV_ENABLE_UNROLLED
|
||||||
|
for( ; i <= n - 4; i += 4 )
|
||||||
|
{
|
||||||
|
_AccTp v0 = a[i], v1 = a[i+1], v2 = a[i+2], v3 = a[i+3];
|
||||||
|
s += v0*v0 + v1*v1 + v2*v2 + v3*v3;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
for( ; i < n; i++ )
|
||||||
|
{
|
||||||
|
_AccTp v = a[i];
|
||||||
|
s += v*v;
|
||||||
|
}
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename _Tp, typename _AccTp> static inline
|
||||||
|
_AccTp normL1(const _Tp* a, int n)
|
||||||
|
{
|
||||||
|
_AccTp s = 0;
|
||||||
|
int i = 0;
|
||||||
|
#if CV_ENABLE_UNROLLED
|
||||||
|
for(; i <= n - 4; i += 4 )
|
||||||
|
{
|
||||||
|
s += (_AccTp)cv_abs(a[i]) + (_AccTp)cv_abs(a[i+1]) +
|
||||||
|
(_AccTp)cv_abs(a[i+2]) + (_AccTp)cv_abs(a[i+3]);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
for( ; i < n; i++ )
|
||||||
|
s += cv_abs(a[i]);
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename _Tp, typename _AccTp> static inline
|
||||||
|
_AccTp normInf(const _Tp* a, int n)
|
||||||
|
{
|
||||||
|
_AccTp s = 0;
|
||||||
|
for( int i = 0; i < n; i++ )
|
||||||
|
s = std::max(s, (_AccTp)cv_abs(a[i]));
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename _Tp, typename _AccTp> static inline
|
||||||
|
_AccTp normL2Sqr(const _Tp* a, const _Tp* b, int n)
|
||||||
|
{
|
||||||
|
_AccTp s = 0;
|
||||||
|
int i= 0;
|
||||||
|
#if CV_ENABLE_UNROLLED
|
||||||
|
for(; i <= n - 4; i += 4 )
|
||||||
|
{
|
||||||
|
_AccTp v0 = _AccTp(a[i] - b[i]), v1 = _AccTp(a[i+1] - b[i+1]), v2 = _AccTp(a[i+2] - b[i+2]), v3 = _AccTp(a[i+3] - b[i+3]);
|
||||||
|
s += v0*v0 + v1*v1 + v2*v2 + v3*v3;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
for( ; i < n; i++ )
|
||||||
|
{
|
||||||
|
_AccTp v = _AccTp(a[i] - b[i]);
|
||||||
|
s += v*v;
|
||||||
|
}
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline float normL2Sqr(const float* a, const float* b, int n)
|
||||||
|
{
|
||||||
|
float s = 0.f;
|
||||||
|
for( int i = 0; i < n; i++ )
|
||||||
|
{
|
||||||
|
float v = a[i] - b[i];
|
||||||
|
s += v*v;
|
||||||
|
}
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename _Tp, typename _AccTp> static inline
|
||||||
|
_AccTp normL1(const _Tp* a, const _Tp* b, int n)
|
||||||
|
{
|
||||||
|
_AccTp s = 0;
|
||||||
|
int i= 0;
|
||||||
|
#if CV_ENABLE_UNROLLED
|
||||||
|
for(; i <= n - 4; i += 4 )
|
||||||
|
{
|
||||||
|
_AccTp v0 = _AccTp(a[i] - b[i]), v1 = _AccTp(a[i+1] - b[i+1]), v2 = _AccTp(a[i+2] - b[i+2]), v3 = _AccTp(a[i+3] - b[i+3]);
|
||||||
|
s += std::abs(v0) + std::abs(v1) + std::abs(v2) + std::abs(v3);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
for( ; i < n; i++ )
|
||||||
|
{
|
||||||
|
_AccTp v = _AccTp(a[i] - b[i]);
|
||||||
|
s += std::abs(v);
|
||||||
|
}
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline float normL1(const float* a, const float* b, int n)
|
||||||
|
{
|
||||||
|
float s = 0.f;
|
||||||
|
for( int i = 0; i < n; i++ )
|
||||||
|
{
|
||||||
|
s += std::abs(a[i] - b[i]);
|
||||||
|
}
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline int normL1(const uchar* a, const uchar* b, int n)
|
||||||
|
{
|
||||||
|
int s = 0;
|
||||||
|
for( int i = 0; i < n; i++ )
|
||||||
|
{
|
||||||
|
s += std::abs(a[i] - b[i]);
|
||||||
|
}
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename _Tp, typename _AccTp> static inline
|
||||||
|
_AccTp normInf(const _Tp* a, const _Tp* b, int n)
|
||||||
|
{
|
||||||
|
_AccTp s = 0;
|
||||||
|
for( int i = 0; i < n; i++ )
|
||||||
|
{
|
||||||
|
_AccTp v0 = a[i] - b[i];
|
||||||
|
s = std::max(s, std::abs(v0));
|
||||||
|
}
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** @brief Computes the cube root of an argument.
|
||||||
|
|
||||||
|
The function cubeRoot computes \f$\sqrt[3]{\texttt{val}}\f$. Negative arguments are handled correctly.
|
||||||
|
NaN and Inf are not handled. The accuracy approaches the maximum possible accuracy for
|
||||||
|
single-precision data.
|
||||||
|
@param val A function argument.
|
||||||
|
*/
|
||||||
|
CV_EXPORTS_W float cubeRoot(float val);
|
||||||
|
|
||||||
|
/** @overload
|
||||||
|
|
||||||
|
cubeRoot with argument of `double` type calls `std::cbrt(double)`
|
||||||
|
*/
|
||||||
|
static inline
|
||||||
|
double cubeRoot(double val)
|
||||||
|
{
|
||||||
|
return std::cbrt(val);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** @brief Calculates the angle of a 2D vector in degrees.
|
||||||
|
|
||||||
|
The function fastAtan2 calculates the full-range angle of an input 2D vector. The angle is measured
|
||||||
|
in degrees and varies from 0 to 360 degrees. The accuracy is about 0.3 degrees.
|
||||||
|
@param x x-coordinate of the vector.
|
||||||
|
@param y y-coordinate of the vector.
|
||||||
|
*/
|
||||||
|
CV_EXPORTS_W float fastAtan2(float y, float x);
|
||||||
|
|
||||||
|
/** proxy for hal::LU */
|
||||||
|
CV_EXPORTS int LU(float* A, size_t astep, int m, float* b, size_t bstep, int n);
|
||||||
|
/** proxy for hal::LU */
|
||||||
|
CV_EXPORTS int LU(double* A, size_t astep, int m, double* b, size_t bstep, int n);
|
||||||
|
/** proxy for hal::Cholesky */
|
||||||
|
CV_EXPORTS bool Cholesky(float* A, size_t astep, int m, float* b, size_t bstep, int n);
|
||||||
|
/** proxy for hal::Cholesky */
|
||||||
|
CV_EXPORTS bool Cholesky(double* A, size_t astep, int m, double* b, size_t bstep, int n);
|
||||||
|
|
||||||
|
////////////////// forward declarations for important OpenCV types //////////////////
|
||||||
|
|
||||||
|
//! @cond IGNORED
|
||||||
|
|
||||||
|
template<typename _Tp, int cn> class Vec;
|
||||||
|
template<typename _Tp, int m, int n> class Matx;
|
||||||
|
|
||||||
|
template<typename _Tp> class Complex;
|
||||||
|
template<typename _Tp> class Point_;
|
||||||
|
template<typename _Tp> class Point3_;
|
||||||
|
template<typename _Tp> class Size_;
|
||||||
|
template<typename _Tp> class Rect_;
|
||||||
|
template<typename _Tp> class Scalar_;
|
||||||
|
|
||||||
|
class CV_EXPORTS RotatedRect;
|
||||||
|
class CV_EXPORTS Range;
|
||||||
|
class CV_EXPORTS TermCriteria;
|
||||||
|
class CV_EXPORTS KeyPoint;
|
||||||
|
class CV_EXPORTS DMatch;
|
||||||
|
class CV_EXPORTS RNG;
|
||||||
|
|
||||||
|
class CV_EXPORTS Mat;
|
||||||
|
class CV_EXPORTS MatExpr;
|
||||||
|
|
||||||
|
class CV_EXPORTS UMat;
|
||||||
|
|
||||||
|
class CV_EXPORTS SparseMat;
|
||||||
|
typedef Mat MatND;
|
||||||
|
|
||||||
|
template<typename _Tp> class Mat_;
|
||||||
|
template<typename _Tp> class SparseMat_;
|
||||||
|
|
||||||
|
class CV_EXPORTS MatConstIterator;
|
||||||
|
class CV_EXPORTS SparseMatIterator;
|
||||||
|
class CV_EXPORTS SparseMatConstIterator;
|
||||||
|
template<typename _Tp> class MatIterator_;
|
||||||
|
template<typename _Tp> class MatConstIterator_;
|
||||||
|
template<typename _Tp> class SparseMatIterator_;
|
||||||
|
template<typename _Tp> class SparseMatConstIterator_;
|
||||||
|
|
||||||
|
namespace ogl
|
||||||
|
{
|
||||||
|
class CV_EXPORTS Buffer;
|
||||||
|
class CV_EXPORTS Texture2D;
|
||||||
|
class CV_EXPORTS Arrays;
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace cuda
|
||||||
|
{
|
||||||
|
class CV_EXPORTS GpuMat;
|
||||||
|
class CV_EXPORTS GpuMatND;
|
||||||
|
class CV_EXPORTS HostMem;
|
||||||
|
class CV_EXPORTS Stream;
|
||||||
|
class CV_EXPORTS Event;
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace cudev
|
||||||
|
{
|
||||||
|
template <typename _Tp> class GpuMat_;
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace ipp
|
||||||
|
{
|
||||||
|
CV_EXPORTS unsigned long long getIppFeatures();
|
||||||
|
CV_EXPORTS void setIppStatus(int status, const char * const funcname = NULL, const char * const filename = NULL,
|
||||||
|
int line = 0);
|
||||||
|
CV_EXPORTS int getIppStatus();
|
||||||
|
CV_EXPORTS String getIppErrorLocation();
|
||||||
|
CV_EXPORTS_W bool useIPP();
|
||||||
|
CV_EXPORTS_W void setUseIPP(bool flag);
|
||||||
|
CV_EXPORTS_W String getIppVersion();
|
||||||
|
|
||||||
|
// IPP Not-Exact mode. This function may force use of IPP then both IPP and OpenCV provide proper results
|
||||||
|
// but have internal accuracy differences which have too much direct or indirect impact on accuracy tests.
|
||||||
|
CV_EXPORTS_W bool useIPP_NotExact();
|
||||||
|
CV_EXPORTS_W void setUseIPP_NotExact(bool flag);
|
||||||
|
#ifndef DISABLE_OPENCV_3_COMPATIBILITY
|
||||||
|
static inline bool useIPP_NE() { return useIPP_NotExact(); }
|
||||||
|
static inline void setUseIPP_NE(bool flag) { setUseIPP_NotExact(flag); }
|
||||||
|
#endif
|
||||||
|
|
||||||
|
} // ipp
|
||||||
|
|
||||||
|
//! @endcond
|
||||||
|
|
||||||
|
//! @} core_utils
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
} // cv
|
||||||
|
|
||||||
|
#include "opencv2/core/neon_utils.hpp"
|
||||||
|
#include "opencv2/core/vsx_utils.hpp"
|
||||||
|
#include "opencv2/core/check.hpp"
|
||||||
|
|
||||||
|
#endif //OPENCV_CORE_BASE_HPP
|
||||||
340
3rdpart/OpenCV/include/opencv2/core/bindings_utils.hpp
Normal file
340
3rdpart/OpenCV/include/opencv2/core/bindings_utils.hpp
Normal file
@@ -0,0 +1,340 @@
|
|||||||
|
// This file is part of OpenCV project.
|
||||||
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||||
|
// of this distribution and at http://opencv.org/license.html.
|
||||||
|
|
||||||
|
#ifndef OPENCV_CORE_BINDINGS_UTILS_HPP
|
||||||
|
#define OPENCV_CORE_BINDINGS_UTILS_HPP
|
||||||
|
|
||||||
|
#include <opencv2/core/async.hpp>
|
||||||
|
#include <opencv2/core/detail/async_promise.hpp>
|
||||||
|
#include <opencv2/core/utils/logger.hpp>
|
||||||
|
|
||||||
|
#include <stdexcept>
|
||||||
|
|
||||||
|
namespace cv { namespace utils {
|
||||||
|
//! @addtogroup core_utils
|
||||||
|
//! @{
|
||||||
|
|
||||||
|
CV_EXPORTS_W String dumpInputArray(InputArray argument);
|
||||||
|
|
||||||
|
CV_EXPORTS_W String dumpInputArrayOfArrays(InputArrayOfArrays argument);
|
||||||
|
|
||||||
|
CV_EXPORTS_W String dumpInputOutputArray(InputOutputArray argument);
|
||||||
|
|
||||||
|
CV_EXPORTS_W String dumpInputOutputArrayOfArrays(InputOutputArrayOfArrays argument);
|
||||||
|
|
||||||
|
CV_WRAP static inline
|
||||||
|
String dumpBool(bool argument)
|
||||||
|
{
|
||||||
|
return (argument) ? String("Bool: True") : String("Bool: False");
|
||||||
|
}
|
||||||
|
|
||||||
|
CV_WRAP static inline
|
||||||
|
String dumpInt(int argument)
|
||||||
|
{
|
||||||
|
return cv::format("Int: %d", argument);
|
||||||
|
}
|
||||||
|
|
||||||
|
CV_WRAP static inline
|
||||||
|
String dumpInt64(int64 argument)
|
||||||
|
{
|
||||||
|
std::ostringstream oss("Int64: ", std::ios::ate);
|
||||||
|
oss << argument;
|
||||||
|
return oss.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
CV_WRAP static inline
|
||||||
|
String dumpSizeT(size_t argument)
|
||||||
|
{
|
||||||
|
std::ostringstream oss("size_t: ", std::ios::ate);
|
||||||
|
oss << argument;
|
||||||
|
return oss.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
CV_WRAP static inline
|
||||||
|
String dumpFloat(float argument)
|
||||||
|
{
|
||||||
|
return cv::format("Float: %.2f", argument);
|
||||||
|
}
|
||||||
|
|
||||||
|
CV_WRAP static inline
|
||||||
|
String dumpDouble(double argument)
|
||||||
|
{
|
||||||
|
return cv::format("Double: %.2f", argument);
|
||||||
|
}
|
||||||
|
|
||||||
|
CV_WRAP static inline
|
||||||
|
String dumpCString(const char* argument)
|
||||||
|
{
|
||||||
|
return cv::format("String: %s", argument);
|
||||||
|
}
|
||||||
|
|
||||||
|
CV_WRAP static inline
|
||||||
|
String dumpString(const String& argument)
|
||||||
|
{
|
||||||
|
return cv::format("String: %s", argument.c_str());
|
||||||
|
}
|
||||||
|
|
||||||
|
CV_WRAP static inline
|
||||||
|
String dumpRect(const Rect& argument)
|
||||||
|
{
|
||||||
|
return format("rect: (x=%d, y=%d, w=%d, h=%d)", argument.x, argument.y,
|
||||||
|
argument.width, argument.height);
|
||||||
|
}
|
||||||
|
|
||||||
|
CV_WRAP static inline
|
||||||
|
String dumpTermCriteria(const TermCriteria& argument)
|
||||||
|
{
|
||||||
|
return format("term_criteria: (type=%d, max_count=%d, epsilon=%lf",
|
||||||
|
argument.type, argument.maxCount, argument.epsilon);
|
||||||
|
}
|
||||||
|
|
||||||
|
CV_WRAP static inline
|
||||||
|
String dumpRotatedRect(const RotatedRect& argument)
|
||||||
|
{
|
||||||
|
return format("rotated_rect: (c_x=%f, c_y=%f, w=%f, h=%f, a=%f)",
|
||||||
|
argument.center.x, argument.center.y, argument.size.width,
|
||||||
|
argument.size.height, argument.angle);
|
||||||
|
}
|
||||||
|
|
||||||
|
CV_WRAP static inline
|
||||||
|
String dumpRange(const Range& argument)
|
||||||
|
{
|
||||||
|
if (argument == Range::all())
|
||||||
|
{
|
||||||
|
return "range: all";
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
return format("range: (s=%d, e=%d)", argument.start, argument.end);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
CV_EXPORTS_W String dumpVectorOfInt(const std::vector<int>& vec);
|
||||||
|
|
||||||
|
CV_EXPORTS_W String dumpVectorOfDouble(const std::vector<double>& vec);
|
||||||
|
|
||||||
|
CV_EXPORTS_W String dumpVectorOfRect(const std::vector<Rect>& vec);
|
||||||
|
|
||||||
|
|
||||||
|
//! @cond IGNORED
|
||||||
|
|
||||||
|
CV_WRAP static inline
|
||||||
|
String testOverloadResolution(int value, const Point& point = Point(42, 24))
|
||||||
|
{
|
||||||
|
return format("overload (int=%d, point=(x=%d, y=%d))", value, point.x,
|
||||||
|
point.y);
|
||||||
|
}
|
||||||
|
|
||||||
|
CV_WRAP static inline
|
||||||
|
String testOverloadResolution(const Rect& rect)
|
||||||
|
{
|
||||||
|
return format("overload (rect=(x=%d, y=%d, w=%d, h=%d))", rect.x, rect.y,
|
||||||
|
rect.width, rect.height);
|
||||||
|
}
|
||||||
|
|
||||||
|
CV_WRAP static inline
|
||||||
|
RotatedRect testRotatedRect(float x, float y, float w, float h, float angle)
|
||||||
|
{
|
||||||
|
return RotatedRect(Point2f(x, y), Size2f(w, h), angle);
|
||||||
|
}
|
||||||
|
|
||||||
|
CV_WRAP static inline
|
||||||
|
std::vector<RotatedRect> testRotatedRectVector(float x, float y, float w, float h, float angle)
|
||||||
|
{
|
||||||
|
std::vector<RotatedRect> result;
|
||||||
|
for (int i = 0; i < 10; i++)
|
||||||
|
result.push_back(RotatedRect(Point2f(x + i, y + 2 * i), Size2f(w, h), angle + 10 * i));
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
CV_WRAP static inline
|
||||||
|
int testOverwriteNativeMethod(int argument)
|
||||||
|
{
|
||||||
|
return argument;
|
||||||
|
}
|
||||||
|
|
||||||
|
CV_WRAP static inline
|
||||||
|
String testReservedKeywordConversion(int positional_argument, int lambda = 2, int from = 3)
|
||||||
|
{
|
||||||
|
return format("arg=%d, lambda=%d, from=%d", positional_argument, lambda, from);
|
||||||
|
}
|
||||||
|
|
||||||
|
CV_WRAP static inline
|
||||||
|
void generateVectorOfRect(size_t len, CV_OUT std::vector<Rect>& vec)
|
||||||
|
{
|
||||||
|
vec.resize(len);
|
||||||
|
if (len > 0)
|
||||||
|
{
|
||||||
|
RNG rng(12345);
|
||||||
|
Mat tmp(static_cast<int>(len), 1, CV_32SC4);
|
||||||
|
rng.fill(tmp, RNG::UNIFORM, 10, 20);
|
||||||
|
tmp.copyTo(vec);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
CV_WRAP static inline
|
||||||
|
void generateVectorOfInt(size_t len, CV_OUT std::vector<int>& vec)
|
||||||
|
{
|
||||||
|
vec.resize(len);
|
||||||
|
if (len > 0)
|
||||||
|
{
|
||||||
|
RNG rng(554433);
|
||||||
|
Mat tmp(static_cast<int>(len), 1, CV_32SC1);
|
||||||
|
rng.fill(tmp, RNG::UNIFORM, -10, 10);
|
||||||
|
tmp.copyTo(vec);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
CV_WRAP static inline
|
||||||
|
void generateVectorOfMat(size_t len, int rows, int cols, int dtype, CV_OUT std::vector<Mat>& vec)
|
||||||
|
{
|
||||||
|
vec.resize(len);
|
||||||
|
if (len > 0)
|
||||||
|
{
|
||||||
|
RNG rng(65431);
|
||||||
|
for (size_t i = 0; i < len; ++i)
|
||||||
|
{
|
||||||
|
vec[i].create(rows, cols, dtype);
|
||||||
|
rng.fill(vec[i], RNG::UNIFORM, 0, 10);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
CV_WRAP static inline
|
||||||
|
void testRaiseGeneralException()
|
||||||
|
{
|
||||||
|
throw std::runtime_error("exception text");
|
||||||
|
}
|
||||||
|
|
||||||
|
CV_WRAP static inline
|
||||||
|
AsyncArray testAsyncArray(InputArray argument)
|
||||||
|
{
|
||||||
|
AsyncPromise p;
|
||||||
|
p.setValue(argument);
|
||||||
|
return p.getArrayResult();
|
||||||
|
}
|
||||||
|
|
||||||
|
CV_WRAP static inline
|
||||||
|
AsyncArray testAsyncException()
|
||||||
|
{
|
||||||
|
AsyncPromise p;
|
||||||
|
try
|
||||||
|
{
|
||||||
|
CV_Error(Error::StsOk, "Test: Generated async error");
|
||||||
|
}
|
||||||
|
catch (const cv::Exception& e)
|
||||||
|
{
|
||||||
|
p.setException(e);
|
||||||
|
}
|
||||||
|
return p.getArrayResult();
|
||||||
|
}
|
||||||
|
|
||||||
|
CV_WRAP static inline
|
||||||
|
String dumpVec2i(const cv::Vec2i value = cv::Vec2i(42, 24)) {
|
||||||
|
return format("Vec2i(%d, %d)", value[0], value[1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct CV_EXPORTS_W_SIMPLE ClassWithKeywordProperties {
|
||||||
|
CV_PROP_RW int lambda;
|
||||||
|
CV_PROP int except;
|
||||||
|
|
||||||
|
CV_WRAP explicit ClassWithKeywordProperties(int lambda_arg = 24, int except_arg = 42)
|
||||||
|
{
|
||||||
|
lambda = lambda_arg;
|
||||||
|
except = except_arg;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct CV_EXPORTS_W_PARAMS FunctionParams
|
||||||
|
{
|
||||||
|
CV_PROP_RW int lambda = -1;
|
||||||
|
CV_PROP_RW float sigma = 0.0f;
|
||||||
|
|
||||||
|
FunctionParams& setLambda(int value) CV_NOEXCEPT
|
||||||
|
{
|
||||||
|
lambda = value;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
FunctionParams& setSigma(float value) CV_NOEXCEPT
|
||||||
|
{
|
||||||
|
sigma = value;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
CV_WRAP static inline String
|
||||||
|
copyMatAndDumpNamedArguments(InputArray src, OutputArray dst,
|
||||||
|
const FunctionParams& params = FunctionParams())
|
||||||
|
{
|
||||||
|
src.copyTo(dst);
|
||||||
|
return format("lambda=%d, sigma=%.1f", params.lambda,
|
||||||
|
params.sigma);
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace nested {
|
||||||
|
CV_WRAP static inline bool testEchoBooleanFunction(bool flag) {
|
||||||
|
return flag;
|
||||||
|
}
|
||||||
|
|
||||||
|
class CV_EXPORTS_W CV_WRAP_AS(ExportClassName) OriginalClassName
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
struct CV_EXPORTS_W_SIMPLE Params
|
||||||
|
{
|
||||||
|
CV_PROP_RW int int_value;
|
||||||
|
CV_PROP_RW float float_value;
|
||||||
|
|
||||||
|
CV_WRAP explicit Params(int int_param = 123, float float_param = 3.5f)
|
||||||
|
{
|
||||||
|
int_value = int_param;
|
||||||
|
float_value = float_param;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
explicit OriginalClassName(const OriginalClassName::Params& params = OriginalClassName::Params())
|
||||||
|
{
|
||||||
|
params_ = params;
|
||||||
|
}
|
||||||
|
|
||||||
|
CV_WRAP int getIntParam() const
|
||||||
|
{
|
||||||
|
return params_.int_value;
|
||||||
|
}
|
||||||
|
|
||||||
|
CV_WRAP float getFloatParam() const
|
||||||
|
{
|
||||||
|
return params_.float_value;
|
||||||
|
}
|
||||||
|
|
||||||
|
CV_WRAP static std::string originalName()
|
||||||
|
{
|
||||||
|
return "OriginalClassName";
|
||||||
|
}
|
||||||
|
|
||||||
|
CV_WRAP static Ptr<OriginalClassName>
|
||||||
|
create(const OriginalClassName::Params& params = OriginalClassName::Params())
|
||||||
|
{
|
||||||
|
return makePtr<OriginalClassName>(params);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
OriginalClassName::Params params_;
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef OriginalClassName::Params OriginalClassName_Params;
|
||||||
|
} // namespace nested
|
||||||
|
|
||||||
|
//! @endcond IGNORED
|
||||||
|
|
||||||
|
namespace fs {
|
||||||
|
CV_EXPORTS_W cv::String getCacheDirectoryForDownloads();
|
||||||
|
} // namespace fs
|
||||||
|
|
||||||
|
//! @} // core_utils
|
||||||
|
} // namespace cv::utils
|
||||||
|
|
||||||
|
} // namespaces cv / utils
|
||||||
|
|
||||||
|
#endif // OPENCV_CORE_BINDINGS_UTILS_HPP
|
||||||
40
3rdpart/OpenCV/include/opencv2/core/bufferpool.hpp
Normal file
40
3rdpart/OpenCV/include/opencv2/core/bufferpool.hpp
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
// This file is part of OpenCV project.
|
||||||
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||||
|
// of this distribution and at http://opencv.org/license.html.
|
||||||
|
//
|
||||||
|
// Copyright (C) 2014, Advanced Micro Devices, Inc., all rights reserved.
|
||||||
|
|
||||||
|
#ifndef OPENCV_CORE_BUFFER_POOL_HPP
|
||||||
|
#define OPENCV_CORE_BUFFER_POOL_HPP
|
||||||
|
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
#pragma warning(push)
|
||||||
|
#pragma warning(disable: 4265)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
namespace cv
|
||||||
|
{
|
||||||
|
|
||||||
|
//! @addtogroup core_opencl
|
||||||
|
//! @{
|
||||||
|
|
||||||
|
class BufferPoolController
|
||||||
|
{
|
||||||
|
protected:
|
||||||
|
~BufferPoolController() { }
|
||||||
|
public:
|
||||||
|
virtual size_t getReservedSize() const = 0;
|
||||||
|
virtual size_t getMaxReservedSize() const = 0;
|
||||||
|
virtual void setMaxReservedSize(size_t size) = 0;
|
||||||
|
virtual void freeAllReservedBuffers() = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
//! @}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
#pragma warning(pop)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif // OPENCV_CORE_BUFFER_POOL_HPP
|
||||||
173
3rdpart/OpenCV/include/opencv2/core/check.hpp
Normal file
173
3rdpart/OpenCV/include/opencv2/core/check.hpp
Normal file
@@ -0,0 +1,173 @@
|
|||||||
|
// This file is part of OpenCV project.
|
||||||
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||||
|
// of this distribution and at http://opencv.org/license.html.
|
||||||
|
|
||||||
|
#ifndef OPENCV_CORE_CHECK_HPP
|
||||||
|
#define OPENCV_CORE_CHECK_HPP
|
||||||
|
|
||||||
|
#include <opencv2/core/base.hpp>
|
||||||
|
|
||||||
|
namespace cv {
|
||||||
|
|
||||||
|
/** Returns string of cv::Mat depth value: CV_8U -> "CV_8U" or "<invalid depth>" */
|
||||||
|
CV_EXPORTS const char* depthToString(int depth);
|
||||||
|
|
||||||
|
/** Returns string of cv::Mat depth value: CV_8UC3 -> "CV_8UC3" or "<invalid type>" */
|
||||||
|
CV_EXPORTS String typeToString(int type);
|
||||||
|
|
||||||
|
|
||||||
|
//! @cond IGNORED
|
||||||
|
namespace detail {
|
||||||
|
|
||||||
|
/** Returns string of cv::Mat depth value: CV_8U -> "CV_8U" or NULL */
|
||||||
|
CV_EXPORTS const char* depthToString_(int depth);
|
||||||
|
|
||||||
|
/** Returns string of cv::Mat depth value: CV_8UC3 -> "CV_8UC3" or cv::String() */
|
||||||
|
CV_EXPORTS cv::String typeToString_(int type);
|
||||||
|
|
||||||
|
enum TestOp {
|
||||||
|
TEST_CUSTOM = 0,
|
||||||
|
TEST_EQ = 1,
|
||||||
|
TEST_NE = 2,
|
||||||
|
TEST_LE = 3,
|
||||||
|
TEST_LT = 4,
|
||||||
|
TEST_GE = 5,
|
||||||
|
TEST_GT = 6,
|
||||||
|
CV__LAST_TEST_OP
|
||||||
|
};
|
||||||
|
|
||||||
|
struct CheckContext {
|
||||||
|
const char* func;
|
||||||
|
const char* file;
|
||||||
|
int line;
|
||||||
|
enum TestOp testOp;
|
||||||
|
const char* message;
|
||||||
|
const char* p1_str;
|
||||||
|
const char* p2_str;
|
||||||
|
};
|
||||||
|
|
||||||
|
#ifndef CV__CHECK_FILENAME
|
||||||
|
# define CV__CHECK_FILENAME __FILE__
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef CV__CHECK_FUNCTION
|
||||||
|
# if defined _MSC_VER
|
||||||
|
# define CV__CHECK_FUNCTION __FUNCSIG__
|
||||||
|
# elif defined __GNUC__
|
||||||
|
# define CV__CHECK_FUNCTION __PRETTY_FUNCTION__
|
||||||
|
# else
|
||||||
|
# define CV__CHECK_FUNCTION "<unknown>"
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define CV__CHECK_LOCATION_VARNAME(id) CVAUX_CONCAT(CVAUX_CONCAT(__cv_check_, id), __LINE__)
|
||||||
|
#define CV__DEFINE_CHECK_CONTEXT(id, message, testOp, p1_str, p2_str) \
|
||||||
|
static const cv::detail::CheckContext CV__CHECK_LOCATION_VARNAME(id) = \
|
||||||
|
{ CV__CHECK_FUNCTION, CV__CHECK_FILENAME, __LINE__, testOp, "" message, "" p1_str, "" p2_str }
|
||||||
|
|
||||||
|
CV_EXPORTS void CV_NORETURN check_failed_auto(const bool v1, const bool v2, const CheckContext& ctx);
|
||||||
|
CV_EXPORTS void CV_NORETURN check_failed_auto(const int v1, const int v2, const CheckContext& ctx);
|
||||||
|
CV_EXPORTS void CV_NORETURN check_failed_auto(const size_t v1, const size_t v2, const CheckContext& ctx);
|
||||||
|
CV_EXPORTS void CV_NORETURN check_failed_auto(const float v1, const float v2, const CheckContext& ctx);
|
||||||
|
CV_EXPORTS void CV_NORETURN check_failed_auto(const double v1, const double v2, const CheckContext& ctx);
|
||||||
|
CV_EXPORTS void CV_NORETURN check_failed_auto(const Size_<int> v1, const Size_<int> v2, const CheckContext& ctx);
|
||||||
|
CV_EXPORTS void CV_NORETURN check_failed_MatDepth(const int v1, const int v2, const CheckContext& ctx);
|
||||||
|
CV_EXPORTS void CV_NORETURN check_failed_MatType(const int v1, const int v2, const CheckContext& ctx);
|
||||||
|
CV_EXPORTS void CV_NORETURN check_failed_MatChannels(const int v1, const int v2, const CheckContext& ctx);
|
||||||
|
|
||||||
|
CV_EXPORTS void CV_NORETURN check_failed_true(const bool v, const CheckContext& ctx);
|
||||||
|
CV_EXPORTS void CV_NORETURN check_failed_false(const bool v, const CheckContext& ctx);
|
||||||
|
|
||||||
|
CV_EXPORTS void CV_NORETURN check_failed_auto(const int v, const CheckContext& ctx);
|
||||||
|
CV_EXPORTS void CV_NORETURN check_failed_auto(const size_t v, const CheckContext& ctx);
|
||||||
|
CV_EXPORTS void CV_NORETURN check_failed_auto(const float v, const CheckContext& ctx);
|
||||||
|
CV_EXPORTS void CV_NORETURN check_failed_auto(const double v, const CheckContext& ctx);
|
||||||
|
CV_EXPORTS void CV_NORETURN check_failed_auto(const Size_<int> v, const CheckContext& ctx);
|
||||||
|
CV_EXPORTS void CV_NORETURN check_failed_auto(const std::string& v1, const CheckContext& ctx);
|
||||||
|
CV_EXPORTS void CV_NORETURN check_failed_MatDepth(const int v, const CheckContext& ctx);
|
||||||
|
CV_EXPORTS void CV_NORETURN check_failed_MatType(const int v, const CheckContext& ctx);
|
||||||
|
CV_EXPORTS void CV_NORETURN check_failed_MatChannels(const int v, const CheckContext& ctx);
|
||||||
|
|
||||||
|
|
||||||
|
#define CV__TEST_EQ(v1, v2) ((v1) == (v2))
|
||||||
|
#define CV__TEST_NE(v1, v2) ((v1) != (v2))
|
||||||
|
#define CV__TEST_LE(v1, v2) ((v1) <= (v2))
|
||||||
|
#define CV__TEST_LT(v1, v2) ((v1) < (v2))
|
||||||
|
#define CV__TEST_GE(v1, v2) ((v1) >= (v2))
|
||||||
|
#define CV__TEST_GT(v1, v2) ((v1) > (v2))
|
||||||
|
|
||||||
|
#define CV__CHECK(id, op, type, v1, v2, v1_str, v2_str, msg_str) do { \
|
||||||
|
if(CV__TEST_##op((v1), (v2))) ; else { \
|
||||||
|
CV__DEFINE_CHECK_CONTEXT(id, msg_str, cv::detail::TEST_ ## op, v1_str, v2_str); \
|
||||||
|
cv::detail::check_failed_ ## type((v1), (v2), CV__CHECK_LOCATION_VARNAME(id)); \
|
||||||
|
} \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
#define CV__CHECK_CUSTOM_TEST(id, type, v, test_expr, v_str, test_expr_str, msg_str) do { \
|
||||||
|
if(!!(test_expr)) ; else { \
|
||||||
|
CV__DEFINE_CHECK_CONTEXT(id, msg_str, cv::detail::TEST_CUSTOM, v_str, test_expr_str); \
|
||||||
|
cv::detail::check_failed_ ## type((v), CV__CHECK_LOCATION_VARNAME(id)); \
|
||||||
|
} \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
//! @endcond
|
||||||
|
|
||||||
|
|
||||||
|
/// Supported values of these types: int, float, double
|
||||||
|
#define CV_CheckEQ(v1, v2, msg) CV__CHECK(_, EQ, auto, v1, v2, #v1, #v2, msg)
|
||||||
|
#define CV_CheckNE(v1, v2, msg) CV__CHECK(_, NE, auto, v1, v2, #v1, #v2, msg)
|
||||||
|
#define CV_CheckLE(v1, v2, msg) CV__CHECK(_, LE, auto, v1, v2, #v1, #v2, msg)
|
||||||
|
#define CV_CheckLT(v1, v2, msg) CV__CHECK(_, LT, auto, v1, v2, #v1, #v2, msg)
|
||||||
|
#define CV_CheckGE(v1, v2, msg) CV__CHECK(_, GE, auto, v1, v2, #v1, #v2, msg)
|
||||||
|
#define CV_CheckGT(v1, v2, msg) CV__CHECK(_, GT, auto, v1, v2, #v1, #v2, msg)
|
||||||
|
|
||||||
|
/// Check with additional "decoding" of type values in error message
|
||||||
|
#define CV_CheckTypeEQ(t1, t2, msg) CV__CHECK(_, EQ, MatType, t1, t2, #t1, #t2, msg)
|
||||||
|
/// Check with additional "decoding" of depth values in error message
|
||||||
|
#define CV_CheckDepthEQ(d1, d2, msg) CV__CHECK(_, EQ, MatDepth, d1, d2, #d1, #d2, msg)
|
||||||
|
|
||||||
|
#define CV_CheckChannelsEQ(c1, c2, msg) CV__CHECK(_, EQ, MatChannels, c1, c2, #c1, #c2, msg)
|
||||||
|
|
||||||
|
/// Example: type == CV_8UC1 || type == CV_8UC3
|
||||||
|
#define CV_CheckType(t, test_expr, msg) CV__CHECK_CUSTOM_TEST(_, MatType, t, (test_expr), #t, #test_expr, msg)
|
||||||
|
|
||||||
|
/// Example: depth == CV_32F || depth == CV_64F
|
||||||
|
#define CV_CheckDepth(t, test_expr, msg) CV__CHECK_CUSTOM_TEST(_, MatDepth, t, (test_expr), #t, #test_expr, msg)
|
||||||
|
|
||||||
|
/// Example: channel == 1 || channel == 3
|
||||||
|
#define CV_CheckChannels(t, test_expr, msg) CV__CHECK_CUSTOM_TEST(_, MatChannels, t, (test_expr), #t, #test_expr, msg)
|
||||||
|
|
||||||
|
/// Example: v == A || v == B
|
||||||
|
#define CV_Check(v, test_expr, msg) CV__CHECK_CUSTOM_TEST(_, auto, v, (test_expr), #v, #test_expr, msg)
|
||||||
|
|
||||||
|
/// Example: v == true
|
||||||
|
#define CV_CheckTrue(v, msg) CV__CHECK_CUSTOM_TEST(_, true, v, v, #v, "", msg)
|
||||||
|
|
||||||
|
/// Example: v == false
|
||||||
|
#define CV_CheckFalse(v, msg) CV__CHECK_CUSTOM_TEST(_, false, v, (!(v)), #v, "", msg)
|
||||||
|
|
||||||
|
/// Some complex conditions: CV_Check(src2, src2.empty() || (src2.type() == src1.type() && src2.size() == src1.size()), "src2 should have same size/type as src1")
|
||||||
|
// TODO define pretty-printers
|
||||||
|
|
||||||
|
#ifndef NDEBUG
|
||||||
|
#define CV_DbgCheck(v, test_expr, msg) CV__CHECK_CUSTOM_TEST(_, auto, v, (test_expr), #v, #test_expr, msg)
|
||||||
|
#define CV_DbgCheckEQ(v1, v2, msg) CV__CHECK(_, EQ, auto, v1, v2, #v1, #v2, msg)
|
||||||
|
#define CV_DbgCheckNE(v1, v2, msg) CV__CHECK(_, NE, auto, v1, v2, #v1, #v2, msg)
|
||||||
|
#define CV_DbgCheckLE(v1, v2, msg) CV__CHECK(_, LE, auto, v1, v2, #v1, #v2, msg)
|
||||||
|
#define CV_DbgCheckLT(v1, v2, msg) CV__CHECK(_, LT, auto, v1, v2, #v1, #v2, msg)
|
||||||
|
#define CV_DbgCheckGE(v1, v2, msg) CV__CHECK(_, GE, auto, v1, v2, #v1, #v2, msg)
|
||||||
|
#define CV_DbgCheckGT(v1, v2, msg) CV__CHECK(_, GT, auto, v1, v2, #v1, #v2, msg)
|
||||||
|
#else
|
||||||
|
#define CV_DbgCheck(v, test_expr, msg) do { } while (0)
|
||||||
|
#define CV_DbgCheckEQ(v1, v2, msg) do { } while (0)
|
||||||
|
#define CV_DbgCheckNE(v1, v2, msg) do { } while (0)
|
||||||
|
#define CV_DbgCheckLE(v1, v2, msg) do { } while (0)
|
||||||
|
#define CV_DbgCheckLT(v1, v2, msg) do { } while (0)
|
||||||
|
#define CV_DbgCheckGE(v1, v2, msg) do { } while (0)
|
||||||
|
#define CV_DbgCheckGT(v1, v2, msg) do { } while (0)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
#endif // OPENCV_CORE_CHECK_HPP
|
||||||
48
3rdpart/OpenCV/include/opencv2/core/core.hpp
Normal file
48
3rdpart/OpenCV/include/opencv2/core/core.hpp
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||||
|
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#ifdef __OPENCV_BUILD
|
||||||
|
#error this is a compatibility header which should not be used inside the OpenCV library
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "opencv2/core.hpp"
|
||||||
3128
3rdpart/OpenCV/include/opencv2/core/core_c.h
Normal file
3128
3rdpart/OpenCV/include/opencv2/core/core_c.h
Normal file
File diff suppressed because it is too large
Load Diff
1346
3rdpart/OpenCV/include/opencv2/core/cuda.hpp
Normal file
1346
3rdpart/OpenCV/include/opencv2/core/cuda.hpp
Normal file
File diff suppressed because it is too large
Load Diff
763
3rdpart/OpenCV/include/opencv2/core/cuda.inl.hpp
Normal file
763
3rdpart/OpenCV/include/opencv2/core/cuda.inl.hpp
Normal file
@@ -0,0 +1,763 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||||
|
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#ifndef OPENCV_CORE_CUDAINL_HPP
|
||||||
|
#define OPENCV_CORE_CUDAINL_HPP
|
||||||
|
|
||||||
|
#include "opencv2/core/cuda.hpp"
|
||||||
|
|
||||||
|
//! @cond IGNORED
|
||||||
|
|
||||||
|
namespace cv { namespace cuda {
|
||||||
|
|
||||||
|
//===================================================================================
|
||||||
|
// GpuMat
|
||||||
|
//===================================================================================
|
||||||
|
|
||||||
|
inline
|
||||||
|
GpuMat::GpuMat(Allocator* allocator_)
|
||||||
|
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
|
||||||
|
{}
|
||||||
|
|
||||||
|
inline
|
||||||
|
GpuMat::GpuMat(int rows_, int cols_, int type_, Allocator* allocator_)
|
||||||
|
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
|
||||||
|
{
|
||||||
|
if (rows_ > 0 && cols_ > 0)
|
||||||
|
create(rows_, cols_, type_);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
GpuMat::GpuMat(Size size_, int type_, Allocator* allocator_)
|
||||||
|
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
|
||||||
|
{
|
||||||
|
if (size_.height > 0 && size_.width > 0)
|
||||||
|
create(size_.height, size_.width, type_);
|
||||||
|
}
|
||||||
|
|
||||||
|
// WARNING: unreachable code using Ninja
|
||||||
|
#if defined _MSC_VER && _MSC_VER >= 1920
|
||||||
|
#pragma warning(push)
|
||||||
|
#pragma warning(disable: 4702)
|
||||||
|
#endif
|
||||||
|
inline
|
||||||
|
GpuMat::GpuMat(int rows_, int cols_, int type_, Scalar s_, Allocator* allocator_)
|
||||||
|
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
|
||||||
|
{
|
||||||
|
if (rows_ > 0 && cols_ > 0)
|
||||||
|
{
|
||||||
|
create(rows_, cols_, type_);
|
||||||
|
setTo(s_);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
GpuMat::GpuMat(Size size_, int type_, Scalar s_, Allocator* allocator_)
|
||||||
|
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
|
||||||
|
{
|
||||||
|
if (size_.height > 0 && size_.width > 0)
|
||||||
|
{
|
||||||
|
create(size_.height, size_.width, type_);
|
||||||
|
setTo(s_);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#if defined _MSC_VER && _MSC_VER >= 1920
|
||||||
|
#pragma warning(pop)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
inline
|
||||||
|
GpuMat::GpuMat(const GpuMat& m)
|
||||||
|
: flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data), refcount(m.refcount), datastart(m.datastart), dataend(m.dataend), allocator(m.allocator)
|
||||||
|
{
|
||||||
|
if (refcount)
|
||||||
|
CV_XADD(refcount, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
GpuMat::GpuMat(InputArray arr, Allocator* allocator_) :
|
||||||
|
flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
|
||||||
|
{
|
||||||
|
upload(arr);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
GpuMat::~GpuMat()
|
||||||
|
{
|
||||||
|
release();
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
GpuMat& GpuMat::operator =(const GpuMat& m)
|
||||||
|
{
|
||||||
|
if (this != &m)
|
||||||
|
{
|
||||||
|
GpuMat temp(m);
|
||||||
|
swap(temp);
|
||||||
|
}
|
||||||
|
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
void GpuMat::create(Size size_, int type_)
|
||||||
|
{
|
||||||
|
create(size_.height, size_.width, type_);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
void GpuMat::swap(GpuMat& b)
|
||||||
|
{
|
||||||
|
std::swap(flags, b.flags);
|
||||||
|
std::swap(rows, b.rows);
|
||||||
|
std::swap(cols, b.cols);
|
||||||
|
std::swap(step, b.step);
|
||||||
|
std::swap(data, b.data);
|
||||||
|
std::swap(datastart, b.datastart);
|
||||||
|
std::swap(dataend, b.dataend);
|
||||||
|
std::swap(refcount, b.refcount);
|
||||||
|
std::swap(allocator, b.allocator);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
GpuMat GpuMat::clone() const
|
||||||
|
{
|
||||||
|
GpuMat m;
|
||||||
|
copyTo(m);
|
||||||
|
return m;
|
||||||
|
}
|
||||||
|
|
||||||
|
// WARNING: unreachable code using Ninja
|
||||||
|
#if defined _MSC_VER && _MSC_VER >= 1920
|
||||||
|
#pragma warning(push)
|
||||||
|
#pragma warning(disable: 4702)
|
||||||
|
#endif
|
||||||
|
inline
|
||||||
|
void GpuMat::copyTo(OutputArray dst, InputArray mask) const
|
||||||
|
{
|
||||||
|
copyTo(dst, mask, Stream::Null());
|
||||||
|
}
|
||||||
|
#if defined _MSC_VER && _MSC_VER >= 1920
|
||||||
|
#pragma warning(pop)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
inline
|
||||||
|
GpuMat& GpuMat::setTo(Scalar s)
|
||||||
|
{
|
||||||
|
return setTo(s, Stream::Null());
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
GpuMat& GpuMat::setTo(Scalar s, InputArray mask)
|
||||||
|
{
|
||||||
|
return setTo(s, mask, Stream::Null());
|
||||||
|
}
|
||||||
|
|
||||||
|
// WARNING: unreachable code using Ninja
|
||||||
|
#if defined _MSC_VER && _MSC_VER >= 1920
|
||||||
|
#pragma warning(push)
|
||||||
|
#pragma warning(disable: 4702)
|
||||||
|
#endif
|
||||||
|
inline
|
||||||
|
void GpuMat::convertTo(OutputArray dst, int rtype) const
|
||||||
|
{
|
||||||
|
convertTo(dst, rtype, Stream::Null());
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
void GpuMat::convertTo(OutputArray dst, int rtype, double alpha, double beta) const
|
||||||
|
{
|
||||||
|
convertTo(dst, rtype, alpha, beta, Stream::Null());
|
||||||
|
}
|
||||||
|
#if defined _MSC_VER && _MSC_VER >= 1920
|
||||||
|
#pragma warning(pop)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
inline
|
||||||
|
void GpuMat::convertTo(OutputArray dst, int rtype, double alpha, Stream& stream) const
|
||||||
|
{
|
||||||
|
convertTo(dst, rtype, alpha, 0.0, stream);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
void GpuMat::assignTo(GpuMat& m, int _type) const
|
||||||
|
{
|
||||||
|
if (_type < 0)
|
||||||
|
m = *this;
|
||||||
|
else
|
||||||
|
convertTo(m, _type);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
uchar* GpuMat::ptr(int y)
|
||||||
|
{
|
||||||
|
CV_DbgAssert( (unsigned)y < (unsigned)rows );
|
||||||
|
return data + step * y;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
const uchar* GpuMat::ptr(int y) const
|
||||||
|
{
|
||||||
|
CV_DbgAssert( (unsigned)y < (unsigned)rows );
|
||||||
|
return data + step * y;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename _Tp> inline
|
||||||
|
_Tp* GpuMat::ptr(int y)
|
||||||
|
{
|
||||||
|
return (_Tp*)ptr(y);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename _Tp> inline
|
||||||
|
const _Tp* GpuMat::ptr(int y) const
|
||||||
|
{
|
||||||
|
return (const _Tp*)ptr(y);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T> inline
|
||||||
|
GpuMat::operator PtrStepSz<T>() const
|
||||||
|
{
|
||||||
|
return PtrStepSz<T>(rows, cols, (T*)data, step);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T> inline
|
||||||
|
GpuMat::operator PtrStep<T>() const
|
||||||
|
{
|
||||||
|
return PtrStep<T>((T*)data, step);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
GpuMat GpuMat::row(int y) const
|
||||||
|
{
|
||||||
|
return GpuMat(*this, Range(y, y+1), Range::all());
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
GpuMat GpuMat::col(int x) const
|
||||||
|
{
|
||||||
|
return GpuMat(*this, Range::all(), Range(x, x+1));
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
GpuMat GpuMat::rowRange(int startrow, int endrow) const
|
||||||
|
{
|
||||||
|
return GpuMat(*this, Range(startrow, endrow), Range::all());
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
GpuMat GpuMat::rowRange(Range r) const
|
||||||
|
{
|
||||||
|
return GpuMat(*this, r, Range::all());
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
GpuMat GpuMat::colRange(int startcol, int endcol) const
|
||||||
|
{
|
||||||
|
return GpuMat(*this, Range::all(), Range(startcol, endcol));
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
GpuMat GpuMat::colRange(Range r) const
|
||||||
|
{
|
||||||
|
return GpuMat(*this, Range::all(), r);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
GpuMat GpuMat::operator ()(Range rowRange_, Range colRange_) const
|
||||||
|
{
|
||||||
|
return GpuMat(*this, rowRange_, colRange_);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
GpuMat GpuMat::operator ()(Rect roi) const
|
||||||
|
{
|
||||||
|
return GpuMat(*this, roi);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
bool GpuMat::isContinuous() const
|
||||||
|
{
|
||||||
|
return (flags & Mat::CONTINUOUS_FLAG) != 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
size_t GpuMat::elemSize() const
|
||||||
|
{
|
||||||
|
return CV_ELEM_SIZE(flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
size_t GpuMat::elemSize1() const
|
||||||
|
{
|
||||||
|
return CV_ELEM_SIZE1(flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
int GpuMat::type() const
|
||||||
|
{
|
||||||
|
return CV_MAT_TYPE(flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
int GpuMat::depth() const
|
||||||
|
{
|
||||||
|
return CV_MAT_DEPTH(flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
int GpuMat::channels() const
|
||||||
|
{
|
||||||
|
return CV_MAT_CN(flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
size_t GpuMat::step1() const
|
||||||
|
{
|
||||||
|
return step / elemSize1();
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
Size GpuMat::size() const
|
||||||
|
{
|
||||||
|
return Size(cols, rows);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
bool GpuMat::empty() const
|
||||||
|
{
|
||||||
|
return data == 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
void* GpuMat::cudaPtr() const
|
||||||
|
{
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline
|
||||||
|
GpuMat createContinuous(int rows, int cols, int type)
|
||||||
|
{
|
||||||
|
GpuMat m;
|
||||||
|
createContinuous(rows, cols, type, m);
|
||||||
|
return m;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline
|
||||||
|
void createContinuous(Size size, int type, OutputArray arr)
|
||||||
|
{
|
||||||
|
createContinuous(size.height, size.width, type, arr);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline
|
||||||
|
GpuMat createContinuous(Size size, int type)
|
||||||
|
{
|
||||||
|
GpuMat m;
|
||||||
|
createContinuous(size, type, m);
|
||||||
|
return m;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline
|
||||||
|
void ensureSizeIsEnough(Size size, int type, OutputArray arr)
|
||||||
|
{
|
||||||
|
ensureSizeIsEnough(size.height, size.width, type, arr);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline
|
||||||
|
void swap(GpuMat& a, GpuMat& b)
|
||||||
|
{
|
||||||
|
a.swap(b);
|
||||||
|
}
|
||||||
|
|
||||||
|
//===================================================================================
|
||||||
|
// GpuMatND
|
||||||
|
//===================================================================================
|
||||||
|
|
||||||
|
inline
|
||||||
|
GpuMatND::GpuMatND() :
|
||||||
|
flags(0), dims(0), data(nullptr), offset(0)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
GpuMatND::GpuMatND(SizeArray _size, int _type) :
|
||||||
|
flags(0), dims(0), data(nullptr), offset(0)
|
||||||
|
{
|
||||||
|
create(std::move(_size), _type);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
void GpuMatND::swap(GpuMatND& m) noexcept
|
||||||
|
{
|
||||||
|
std::swap(*this, m);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
bool GpuMatND::isContinuous() const
|
||||||
|
{
|
||||||
|
return (flags & Mat::CONTINUOUS_FLAG) != 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
bool GpuMatND::isSubmatrix() const
|
||||||
|
{
|
||||||
|
return (flags & Mat::SUBMATRIX_FLAG) != 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
size_t GpuMatND::elemSize() const
|
||||||
|
{
|
||||||
|
return CV_ELEM_SIZE(flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
size_t GpuMatND::elemSize1() const
|
||||||
|
{
|
||||||
|
return CV_ELEM_SIZE1(flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
bool GpuMatND::empty() const
|
||||||
|
{
|
||||||
|
return data == nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
bool GpuMatND::external() const
|
||||||
|
{
|
||||||
|
return !empty() && data_.use_count() == 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
uchar* GpuMatND::getDevicePtr() const
|
||||||
|
{
|
||||||
|
return data + offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
size_t GpuMatND::total() const
|
||||||
|
{
|
||||||
|
size_t p = 1;
|
||||||
|
for(auto s : size)
|
||||||
|
p *= s;
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
size_t GpuMatND::totalMemSize() const
|
||||||
|
{
|
||||||
|
return size[0] * step[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
int GpuMatND::type() const
|
||||||
|
{
|
||||||
|
return CV_MAT_TYPE(flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
//===================================================================================
|
||||||
|
// HostMem
|
||||||
|
//===================================================================================
|
||||||
|
|
||||||
|
inline
|
||||||
|
HostMem::HostMem(AllocType alloc_type_)
|
||||||
|
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(alloc_type_)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
HostMem::HostMem(const HostMem& m)
|
||||||
|
: flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data), refcount(m.refcount), datastart(m.datastart), dataend(m.dataend), alloc_type(m.alloc_type)
|
||||||
|
{
|
||||||
|
if( refcount )
|
||||||
|
CV_XADD(refcount, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
HostMem::HostMem(int rows_, int cols_, int type_, AllocType alloc_type_)
|
||||||
|
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(alloc_type_)
|
||||||
|
{
|
||||||
|
if (rows_ > 0 && cols_ > 0)
|
||||||
|
create(rows_, cols_, type_);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
HostMem::HostMem(Size size_, int type_, AllocType alloc_type_)
|
||||||
|
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(alloc_type_)
|
||||||
|
{
|
||||||
|
if (size_.height > 0 && size_.width > 0)
|
||||||
|
create(size_.height, size_.width, type_);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
HostMem::HostMem(InputArray arr, AllocType alloc_type_)
|
||||||
|
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(alloc_type_)
|
||||||
|
{
|
||||||
|
arr.getMat().copyTo(*this);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
HostMem::~HostMem()
|
||||||
|
{
|
||||||
|
release();
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
HostMem& HostMem::operator =(const HostMem& m)
|
||||||
|
{
|
||||||
|
if (this != &m)
|
||||||
|
{
|
||||||
|
HostMem temp(m);
|
||||||
|
swap(temp);
|
||||||
|
}
|
||||||
|
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
void HostMem::swap(HostMem& b)
|
||||||
|
{
|
||||||
|
std::swap(flags, b.flags);
|
||||||
|
std::swap(rows, b.rows);
|
||||||
|
std::swap(cols, b.cols);
|
||||||
|
std::swap(step, b.step);
|
||||||
|
std::swap(data, b.data);
|
||||||
|
std::swap(datastart, b.datastart);
|
||||||
|
std::swap(dataend, b.dataend);
|
||||||
|
std::swap(refcount, b.refcount);
|
||||||
|
std::swap(alloc_type, b.alloc_type);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
HostMem HostMem::clone() const
|
||||||
|
{
|
||||||
|
HostMem m(size(), type(), alloc_type);
|
||||||
|
createMatHeader().copyTo(m);
|
||||||
|
return m;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
void HostMem::create(Size size_, int type_)
|
||||||
|
{
|
||||||
|
create(size_.height, size_.width, type_);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
Mat HostMem::createMatHeader() const
|
||||||
|
{
|
||||||
|
return Mat(size(), type(), data, step);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
bool HostMem::isContinuous() const
|
||||||
|
{
|
||||||
|
return (flags & Mat::CONTINUOUS_FLAG) != 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
size_t HostMem::elemSize() const
|
||||||
|
{
|
||||||
|
return CV_ELEM_SIZE(flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
size_t HostMem::elemSize1() const
|
||||||
|
{
|
||||||
|
return CV_ELEM_SIZE1(flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
int HostMem::type() const
|
||||||
|
{
|
||||||
|
return CV_MAT_TYPE(flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
int HostMem::depth() const
|
||||||
|
{
|
||||||
|
return CV_MAT_DEPTH(flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
int HostMem::channels() const
|
||||||
|
{
|
||||||
|
return CV_MAT_CN(flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
size_t HostMem::step1() const
|
||||||
|
{
|
||||||
|
return step / elemSize1();
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
Size HostMem::size() const
|
||||||
|
{
|
||||||
|
return Size(cols, rows);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
bool HostMem::empty() const
|
||||||
|
{
|
||||||
|
return data == 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline
|
||||||
|
void swap(HostMem& a, HostMem& b)
|
||||||
|
{
|
||||||
|
a.swap(b);
|
||||||
|
}
|
||||||
|
|
||||||
|
//===================================================================================
|
||||||
|
// Stream
|
||||||
|
//===================================================================================
|
||||||
|
|
||||||
|
inline
|
||||||
|
Stream::Stream(const Ptr<Impl>& impl)
|
||||||
|
: impl_(impl)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
//===================================================================================
|
||||||
|
// Event
|
||||||
|
//===================================================================================
|
||||||
|
|
||||||
|
inline
|
||||||
|
Event::Event(const Ptr<Impl>& impl)
|
||||||
|
: impl_(impl)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
//===================================================================================
|
||||||
|
// Initialization & Info
|
||||||
|
//===================================================================================
|
||||||
|
|
||||||
|
// WARNING: unreachable code using Ninja
|
||||||
|
#if defined _MSC_VER && _MSC_VER >= 1920
|
||||||
|
#pragma warning(push)
|
||||||
|
#pragma warning(disable: 4702)
|
||||||
|
#endif
|
||||||
|
inline
|
||||||
|
bool TargetArchs::has(int major, int minor)
|
||||||
|
{
|
||||||
|
return hasPtx(major, minor) || hasBin(major, minor);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
bool TargetArchs::hasEqualOrGreater(int major, int minor)
|
||||||
|
{
|
||||||
|
return hasEqualOrGreaterPtx(major, minor) || hasEqualOrGreaterBin(major, minor);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
DeviceInfo::DeviceInfo()
|
||||||
|
{
|
||||||
|
device_id_ = getDevice();
|
||||||
|
}
|
||||||
|
#if defined _MSC_VER && _MSC_VER >= 1920
|
||||||
|
#pragma warning(pop)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
inline
|
||||||
|
DeviceInfo::DeviceInfo(int device_id)
|
||||||
|
{
|
||||||
|
CV_Assert( device_id >= 0 && device_id < getCudaEnabledDeviceCount() );
|
||||||
|
device_id_ = device_id;
|
||||||
|
}
|
||||||
|
|
||||||
|
// WARNING: unreachable code using Ninja
|
||||||
|
#if defined _MSC_VER && _MSC_VER >= 1920
|
||||||
|
#pragma warning(push)
|
||||||
|
#pragma warning(disable: 4702)
|
||||||
|
#endif
|
||||||
|
inline
|
||||||
|
int DeviceInfo::deviceID() const
|
||||||
|
{
|
||||||
|
return device_id_;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
size_t DeviceInfo::freeMemory() const
|
||||||
|
{
|
||||||
|
size_t _totalMemory = 0, _freeMemory = 0;
|
||||||
|
queryMemory(_totalMemory, _freeMemory);
|
||||||
|
return _freeMemory;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
size_t DeviceInfo::totalMemory() const
|
||||||
|
{
|
||||||
|
size_t _totalMemory = 0, _freeMemory = 0;
|
||||||
|
queryMemory(_totalMemory, _freeMemory);
|
||||||
|
return _totalMemory;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
bool DeviceInfo::supports(FeatureSet feature_set) const
|
||||||
|
{
|
||||||
|
int version = majorVersion() * 10 + minorVersion();
|
||||||
|
return version >= feature_set;
|
||||||
|
}
|
||||||
|
#if defined _MSC_VER && _MSC_VER >= 1920
|
||||||
|
#pragma warning(pop)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
}} // namespace cv { namespace cuda {
|
||||||
|
|
||||||
|
//===================================================================================
|
||||||
|
// Mat
|
||||||
|
//===================================================================================
|
||||||
|
|
||||||
|
namespace cv {
|
||||||
|
|
||||||
|
inline
|
||||||
|
Mat::Mat(const cuda::GpuMat& m)
|
||||||
|
: flags(0), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0), datalimit(0), allocator(0), u(0), size(&rows)
|
||||||
|
{
|
||||||
|
m.download(*this);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
//! @endcond
|
||||||
|
|
||||||
|
#endif // OPENCV_CORE_CUDAINL_HPP
|
||||||
211
3rdpart/OpenCV/include/opencv2/core/cuda/block.hpp
Normal file
211
3rdpart/OpenCV/include/opencv2/core/cuda/block.hpp
Normal file
@@ -0,0 +1,211 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#ifndef OPENCV_CUDA_DEVICE_BLOCK_HPP
|
||||||
|
#define OPENCV_CUDA_DEVICE_BLOCK_HPP
|
||||||
|
|
||||||
|
/** @file
|
||||||
|
* @deprecated Use @ref cudev instead.
|
||||||
|
*/
|
||||||
|
|
||||||
|
//! @cond IGNORED
|
||||||
|
|
||||||
|
namespace cv { namespace cuda { namespace device
|
||||||
|
{
|
||||||
|
struct Block
|
||||||
|
{
|
||||||
|
static __device__ __forceinline__ unsigned int id()
|
||||||
|
{
|
||||||
|
return blockIdx.x;
|
||||||
|
}
|
||||||
|
|
||||||
|
static __device__ __forceinline__ unsigned int stride()
|
||||||
|
{
|
||||||
|
return blockDim.x * blockDim.y * blockDim.z;
|
||||||
|
}
|
||||||
|
|
||||||
|
static __device__ __forceinline__ void sync()
|
||||||
|
{
|
||||||
|
__syncthreads();
|
||||||
|
}
|
||||||
|
|
||||||
|
static __device__ __forceinline__ int flattenedThreadId()
|
||||||
|
{
|
||||||
|
return threadIdx.z * blockDim.x * blockDim.y + threadIdx.y * blockDim.x + threadIdx.x;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename It, typename T>
|
||||||
|
static __device__ __forceinline__ void fill(It beg, It end, const T& value)
|
||||||
|
{
|
||||||
|
int STRIDE = stride();
|
||||||
|
It t = beg + flattenedThreadId();
|
||||||
|
|
||||||
|
for(; t < end; t += STRIDE)
|
||||||
|
*t = value;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename OutIt, typename T>
|
||||||
|
static __device__ __forceinline__ void yota(OutIt beg, OutIt end, T value)
|
||||||
|
{
|
||||||
|
int STRIDE = stride();
|
||||||
|
int tid = flattenedThreadId();
|
||||||
|
value += tid;
|
||||||
|
|
||||||
|
for(OutIt t = beg + tid; t < end; t += STRIDE, value += STRIDE)
|
||||||
|
*t = value;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename InIt, typename OutIt>
|
||||||
|
static __device__ __forceinline__ void copy(InIt beg, InIt end, OutIt out)
|
||||||
|
{
|
||||||
|
int STRIDE = stride();
|
||||||
|
InIt t = beg + flattenedThreadId();
|
||||||
|
OutIt o = out + (t - beg);
|
||||||
|
|
||||||
|
for(; t < end; t += STRIDE, o += STRIDE)
|
||||||
|
*o = *t;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename InIt, typename OutIt, class UnOp>
|
||||||
|
static __device__ __forceinline__ void transform(InIt beg, InIt end, OutIt out, UnOp op)
|
||||||
|
{
|
||||||
|
int STRIDE = stride();
|
||||||
|
InIt t = beg + flattenedThreadId();
|
||||||
|
OutIt o = out + (t - beg);
|
||||||
|
|
||||||
|
for(; t < end; t += STRIDE, o += STRIDE)
|
||||||
|
*o = op(*t);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename InIt1, typename InIt2, typename OutIt, class BinOp>
|
||||||
|
static __device__ __forceinline__ void transform(InIt1 beg1, InIt1 end1, InIt2 beg2, OutIt out, BinOp op)
|
||||||
|
{
|
||||||
|
int STRIDE = stride();
|
||||||
|
InIt1 t1 = beg1 + flattenedThreadId();
|
||||||
|
InIt2 t2 = beg2 + flattenedThreadId();
|
||||||
|
OutIt o = out + (t1 - beg1);
|
||||||
|
|
||||||
|
for(; t1 < end1; t1 += STRIDE, t2 += STRIDE, o += STRIDE)
|
||||||
|
*o = op(*t1, *t2);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<int CTA_SIZE, typename T, class BinOp>
|
||||||
|
static __device__ __forceinline__ void reduce(volatile T* buffer, BinOp op)
|
||||||
|
{
|
||||||
|
int tid = flattenedThreadId();
|
||||||
|
T val = buffer[tid];
|
||||||
|
|
||||||
|
if (CTA_SIZE >= 1024) { if (tid < 512) buffer[tid] = val = op(val, buffer[tid + 512]); __syncthreads(); }
|
||||||
|
if (CTA_SIZE >= 512) { if (tid < 256) buffer[tid] = val = op(val, buffer[tid + 256]); __syncthreads(); }
|
||||||
|
if (CTA_SIZE >= 256) { if (tid < 128) buffer[tid] = val = op(val, buffer[tid + 128]); __syncthreads(); }
|
||||||
|
if (CTA_SIZE >= 128) { if (tid < 64) buffer[tid] = val = op(val, buffer[tid + 64]); __syncthreads(); }
|
||||||
|
|
||||||
|
if (tid < 32)
|
||||||
|
{
|
||||||
|
if (CTA_SIZE >= 64) { buffer[tid] = val = op(val, buffer[tid + 32]); }
|
||||||
|
if (CTA_SIZE >= 32) { buffer[tid] = val = op(val, buffer[tid + 16]); }
|
||||||
|
if (CTA_SIZE >= 16) { buffer[tid] = val = op(val, buffer[tid + 8]); }
|
||||||
|
if (CTA_SIZE >= 8) { buffer[tid] = val = op(val, buffer[tid + 4]); }
|
||||||
|
if (CTA_SIZE >= 4) { buffer[tid] = val = op(val, buffer[tid + 2]); }
|
||||||
|
if (CTA_SIZE >= 2) { buffer[tid] = val = op(val, buffer[tid + 1]); }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<int CTA_SIZE, typename T, class BinOp>
|
||||||
|
static __device__ __forceinline__ T reduce(volatile T* buffer, T init, BinOp op)
|
||||||
|
{
|
||||||
|
int tid = flattenedThreadId();
|
||||||
|
T val = buffer[tid] = init;
|
||||||
|
__syncthreads();
|
||||||
|
|
||||||
|
if (CTA_SIZE >= 1024) { if (tid < 512) buffer[tid] = val = op(val, buffer[tid + 512]); __syncthreads(); }
|
||||||
|
if (CTA_SIZE >= 512) { if (tid < 256) buffer[tid] = val = op(val, buffer[tid + 256]); __syncthreads(); }
|
||||||
|
if (CTA_SIZE >= 256) { if (tid < 128) buffer[tid] = val = op(val, buffer[tid + 128]); __syncthreads(); }
|
||||||
|
if (CTA_SIZE >= 128) { if (tid < 64) buffer[tid] = val = op(val, buffer[tid + 64]); __syncthreads(); }
|
||||||
|
|
||||||
|
if (tid < 32)
|
||||||
|
{
|
||||||
|
if (CTA_SIZE >= 64) { buffer[tid] = val = op(val, buffer[tid + 32]); }
|
||||||
|
if (CTA_SIZE >= 32) { buffer[tid] = val = op(val, buffer[tid + 16]); }
|
||||||
|
if (CTA_SIZE >= 16) { buffer[tid] = val = op(val, buffer[tid + 8]); }
|
||||||
|
if (CTA_SIZE >= 8) { buffer[tid] = val = op(val, buffer[tid + 4]); }
|
||||||
|
if (CTA_SIZE >= 4) { buffer[tid] = val = op(val, buffer[tid + 2]); }
|
||||||
|
if (CTA_SIZE >= 2) { buffer[tid] = val = op(val, buffer[tid + 1]); }
|
||||||
|
}
|
||||||
|
__syncthreads();
|
||||||
|
return buffer[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T, class BinOp>
|
||||||
|
static __device__ __forceinline__ void reduce_n(T* data, unsigned int n, BinOp op)
|
||||||
|
{
|
||||||
|
int ftid = flattenedThreadId();
|
||||||
|
int sft = stride();
|
||||||
|
|
||||||
|
if (sft < n)
|
||||||
|
{
|
||||||
|
for (unsigned int i = sft + ftid; i < n; i += sft)
|
||||||
|
data[ftid] = op(data[ftid], data[i]);
|
||||||
|
|
||||||
|
__syncthreads();
|
||||||
|
|
||||||
|
n = sft;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (n > 1)
|
||||||
|
{
|
||||||
|
unsigned int half = n/2;
|
||||||
|
|
||||||
|
if (ftid < half)
|
||||||
|
data[ftid] = op(data[ftid], data[n - ftid - 1]);
|
||||||
|
|
||||||
|
__syncthreads();
|
||||||
|
|
||||||
|
n = n - half;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}}}
|
||||||
|
|
||||||
|
//! @endcond
|
||||||
|
|
||||||
|
#endif /* OPENCV_CUDA_DEVICE_BLOCK_HPP */
|
||||||
722
3rdpart/OpenCV/include/opencv2/core/cuda/border_interpolate.hpp
Normal file
722
3rdpart/OpenCV/include/opencv2/core/cuda/border_interpolate.hpp
Normal file
@@ -0,0 +1,722 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#ifndef OPENCV_CUDA_BORDER_INTERPOLATE_HPP
|
||||||
|
#define OPENCV_CUDA_BORDER_INTERPOLATE_HPP
|
||||||
|
|
||||||
|
#include "saturate_cast.hpp"
|
||||||
|
#include "vec_traits.hpp"
|
||||||
|
#include "vec_math.hpp"
|
||||||
|
|
||||||
|
/** @file
|
||||||
|
* @deprecated Use @ref cudev instead.
|
||||||
|
*/
|
||||||
|
|
||||||
|
//! @cond IGNORED
|
||||||
|
|
||||||
|
namespace cv { namespace cuda { namespace device
|
||||||
|
{
|
||||||
|
//////////////////////////////////////////////////////////////
|
||||||
|
// BrdConstant
|
||||||
|
|
||||||
|
template <typename D> struct BrdRowConstant
|
||||||
|
{
|
||||||
|
typedef D result_type;
|
||||||
|
|
||||||
|
explicit __host__ __device__ __forceinline__ BrdRowConstant(int width_, const D& val_ = VecTraits<D>::all(0)) : width(width_), val(val_) {}
|
||||||
|
|
||||||
|
template <typename T> __device__ __forceinline__ D at_low(int x, const T* data) const
|
||||||
|
{
|
||||||
|
return x >= 0 ? saturate_cast<D>(data[x]) : val;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T> __device__ __forceinline__ D at_high(int x, const T* data) const
|
||||||
|
{
|
||||||
|
return x < width ? saturate_cast<D>(data[x]) : val;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T> __device__ __forceinline__ D at(int x, const T* data) const
|
||||||
|
{
|
||||||
|
return (x >= 0 && x < width) ? saturate_cast<D>(data[x]) : val;
|
||||||
|
}
|
||||||
|
|
||||||
|
int width;
|
||||||
|
D val;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename D> struct BrdColConstant
|
||||||
|
{
|
||||||
|
typedef D result_type;
|
||||||
|
|
||||||
|
explicit __host__ __device__ __forceinline__ BrdColConstant(int height_, const D& val_ = VecTraits<D>::all(0)) : height(height_), val(val_) {}
|
||||||
|
|
||||||
|
template <typename T> __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const
|
||||||
|
{
|
||||||
|
return y >= 0 ? saturate_cast<D>(*(const T*)((const char*)data + y * step)) : val;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T> __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const
|
||||||
|
{
|
||||||
|
return y < height ? saturate_cast<D>(*(const T*)((const char*)data + y * step)) : val;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T> __device__ __forceinline__ D at(int y, const T* data, size_t step) const
|
||||||
|
{
|
||||||
|
return (y >= 0 && y < height) ? saturate_cast<D>(*(const T*)((const char*)data + y * step)) : val;
|
||||||
|
}
|
||||||
|
|
||||||
|
int height;
|
||||||
|
D val;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename D> struct BrdConstant
|
||||||
|
{
|
||||||
|
typedef D result_type;
|
||||||
|
|
||||||
|
__host__ __device__ __forceinline__ BrdConstant(int height_, int width_, const D& val_ = VecTraits<D>::all(0)) : height(height_), width(width_), val(val_)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T> __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const
|
||||||
|
{
|
||||||
|
return (x >= 0 && x < width && y >= 0 && y < height) ? saturate_cast<D>(((const T*)((const uchar*)data + y * step))[x]) : val;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename Ptr2D> __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const
|
||||||
|
{
|
||||||
|
return (x >= 0 && x < width && y >= 0 && y < height) ? saturate_cast<D>(src(y, x)) : val;
|
||||||
|
}
|
||||||
|
|
||||||
|
int height;
|
||||||
|
int width;
|
||||||
|
D val;
|
||||||
|
};
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////
|
||||||
|
// BrdReplicate
|
||||||
|
|
||||||
|
template <typename D> struct BrdRowReplicate
|
||||||
|
{
|
||||||
|
typedef D result_type;
|
||||||
|
|
||||||
|
explicit __host__ __device__ __forceinline__ BrdRowReplicate(int width) : last_col(width - 1) {}
|
||||||
|
template <typename U> __host__ __device__ __forceinline__ BrdRowReplicate(int width, U) : last_col(width - 1) {}
|
||||||
|
|
||||||
|
__device__ __forceinline__ int idx_col_low(int x) const
|
||||||
|
{
|
||||||
|
return ::max(x, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __forceinline__ int idx_col_high(int x) const
|
||||||
|
{
|
||||||
|
return ::min(x, last_col);
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __forceinline__ int idx_col(int x) const
|
||||||
|
{
|
||||||
|
return idx_col_low(idx_col_high(x));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T> __device__ __forceinline__ D at_low(int x, const T* data) const
|
||||||
|
{
|
||||||
|
return saturate_cast<D>(data[idx_col_low(x)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T> __device__ __forceinline__ D at_high(int x, const T* data) const
|
||||||
|
{
|
||||||
|
return saturate_cast<D>(data[idx_col_high(x)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T> __device__ __forceinline__ D at(int x, const T* data) const
|
||||||
|
{
|
||||||
|
return saturate_cast<D>(data[idx_col(x)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
int last_col;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename D> struct BrdColReplicate
|
||||||
|
{
|
||||||
|
typedef D result_type;
|
||||||
|
|
||||||
|
explicit __host__ __device__ __forceinline__ BrdColReplicate(int height) : last_row(height - 1) {}
|
||||||
|
template <typename U> __host__ __device__ __forceinline__ BrdColReplicate(int height, U) : last_row(height - 1) {}
|
||||||
|
|
||||||
|
__device__ __forceinline__ int idx_row_low(int y) const
|
||||||
|
{
|
||||||
|
return ::max(y, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __forceinline__ int idx_row_high(int y) const
|
||||||
|
{
|
||||||
|
return ::min(y, last_row);
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __forceinline__ int idx_row(int y) const
|
||||||
|
{
|
||||||
|
return idx_row_low(idx_row_high(y));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T> __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const
|
||||||
|
{
|
||||||
|
return saturate_cast<D>(*(const T*)((const char*)data + idx_row_low(y) * step));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T> __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const
|
||||||
|
{
|
||||||
|
return saturate_cast<D>(*(const T*)((const char*)data + idx_row_high(y) * step));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T> __device__ __forceinline__ D at(int y, const T* data, size_t step) const
|
||||||
|
{
|
||||||
|
return saturate_cast<D>(*(const T*)((const char*)data + idx_row(y) * step));
|
||||||
|
}
|
||||||
|
|
||||||
|
int last_row;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename D> struct BrdReplicate
|
||||||
|
{
|
||||||
|
typedef D result_type;
|
||||||
|
|
||||||
|
__host__ __device__ __forceinline__ BrdReplicate(int height, int width) : last_row(height - 1), last_col(width - 1) {}
|
||||||
|
template <typename U> __host__ __device__ __forceinline__ BrdReplicate(int height, int width, U) : last_row(height - 1), last_col(width - 1) {}
|
||||||
|
|
||||||
|
__device__ __forceinline__ int idx_row_low(int y) const
|
||||||
|
{
|
||||||
|
return ::max(y, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __forceinline__ int idx_row_high(int y) const
|
||||||
|
{
|
||||||
|
return ::min(y, last_row);
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __forceinline__ int idx_row(int y) const
|
||||||
|
{
|
||||||
|
return idx_row_low(idx_row_high(y));
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __forceinline__ int idx_col_low(int x) const
|
||||||
|
{
|
||||||
|
return ::max(x, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __forceinline__ int idx_col_high(int x) const
|
||||||
|
{
|
||||||
|
return ::min(x, last_col);
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __forceinline__ int idx_col(int x) const
|
||||||
|
{
|
||||||
|
return idx_col_low(idx_col_high(x));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T> __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const
|
||||||
|
{
|
||||||
|
return saturate_cast<D>(((const T*)((const char*)data + idx_row(y) * step))[idx_col(x)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename Ptr2D> __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const
|
||||||
|
{
|
||||||
|
return saturate_cast<D>(src(idx_row(y), idx_col(x)));
|
||||||
|
}
|
||||||
|
|
||||||
|
int last_row;
|
||||||
|
int last_col;
|
||||||
|
};
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////
|
||||||
|
// BrdReflect101
|
||||||
|
|
||||||
|
template <typename D> struct BrdRowReflect101
|
||||||
|
{
|
||||||
|
typedef D result_type;
|
||||||
|
|
||||||
|
explicit __host__ __device__ __forceinline__ BrdRowReflect101(int width) : last_col(width - 1) {}
|
||||||
|
template <typename U> __host__ __device__ __forceinline__ BrdRowReflect101(int width, U) : last_col(width - 1) {}
|
||||||
|
|
||||||
|
__device__ __forceinline__ int idx_col_low(int x) const
|
||||||
|
{
|
||||||
|
return ::abs(x) % (last_col + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __forceinline__ int idx_col_high(int x) const
|
||||||
|
{
|
||||||
|
return ::abs(last_col - ::abs(last_col - x)) % (last_col + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __forceinline__ int idx_col(int x) const
|
||||||
|
{
|
||||||
|
return idx_col_low(idx_col_high(x));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T> __device__ __forceinline__ D at_low(int x, const T* data) const
|
||||||
|
{
|
||||||
|
return saturate_cast<D>(data[idx_col_low(x)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T> __device__ __forceinline__ D at_high(int x, const T* data) const
|
||||||
|
{
|
||||||
|
return saturate_cast<D>(data[idx_col_high(x)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T> __device__ __forceinline__ D at(int x, const T* data) const
|
||||||
|
{
|
||||||
|
return saturate_cast<D>(data[idx_col(x)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
int last_col;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename D> struct BrdColReflect101
|
||||||
|
{
|
||||||
|
typedef D result_type;
|
||||||
|
|
||||||
|
explicit __host__ __device__ __forceinline__ BrdColReflect101(int height) : last_row(height - 1) {}
|
||||||
|
template <typename U> __host__ __device__ __forceinline__ BrdColReflect101(int height, U) : last_row(height - 1) {}
|
||||||
|
|
||||||
|
__device__ __forceinline__ int idx_row_low(int y) const
|
||||||
|
{
|
||||||
|
return ::abs(y) % (last_row + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __forceinline__ int idx_row_high(int y) const
|
||||||
|
{
|
||||||
|
return ::abs(last_row - ::abs(last_row - y)) % (last_row + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __forceinline__ int idx_row(int y) const
|
||||||
|
{
|
||||||
|
return idx_row_low(idx_row_high(y));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T> __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const
|
||||||
|
{
|
||||||
|
return saturate_cast<D>(*(const D*)((const char*)data + idx_row_low(y) * step));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T> __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const
|
||||||
|
{
|
||||||
|
return saturate_cast<D>(*(const D*)((const char*)data + idx_row_high(y) * step));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T> __device__ __forceinline__ D at(int y, const T* data, size_t step) const
|
||||||
|
{
|
||||||
|
return saturate_cast<D>(*(const D*)((const char*)data + idx_row(y) * step));
|
||||||
|
}
|
||||||
|
|
||||||
|
int last_row;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename D> struct BrdReflect101
|
||||||
|
{
|
||||||
|
typedef D result_type;
|
||||||
|
|
||||||
|
__host__ __device__ __forceinline__ BrdReflect101(int height, int width) : last_row(height - 1), last_col(width - 1) {}
|
||||||
|
template <typename U> __host__ __device__ __forceinline__ BrdReflect101(int height, int width, U) : last_row(height - 1), last_col(width - 1) {}
|
||||||
|
|
||||||
|
__device__ __forceinline__ int idx_row_low(int y) const
|
||||||
|
{
|
||||||
|
return ::abs(y) % (last_row + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __forceinline__ int idx_row_high(int y) const
|
||||||
|
{
|
||||||
|
return ::abs(last_row - ::abs(last_row - y)) % (last_row + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __forceinline__ int idx_row(int y) const
|
||||||
|
{
|
||||||
|
return idx_row_low(idx_row_high(y));
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __forceinline__ int idx_col_low(int x) const
|
||||||
|
{
|
||||||
|
return ::abs(x) % (last_col + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __forceinline__ int idx_col_high(int x) const
|
||||||
|
{
|
||||||
|
return ::abs(last_col - ::abs(last_col - x)) % (last_col + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __forceinline__ int idx_col(int x) const
|
||||||
|
{
|
||||||
|
return idx_col_low(idx_col_high(x));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T> __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const
|
||||||
|
{
|
||||||
|
return saturate_cast<D>(((const T*)((const char*)data + idx_row(y) * step))[idx_col(x)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename Ptr2D> __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const
|
||||||
|
{
|
||||||
|
return saturate_cast<D>(src(idx_row(y), idx_col(x)));
|
||||||
|
}
|
||||||
|
|
||||||
|
int last_row;
|
||||||
|
int last_col;
|
||||||
|
};
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////
|
||||||
|
// BrdReflect
|
||||||
|
|
||||||
|
template <typename D> struct BrdRowReflect
|
||||||
|
{
|
||||||
|
typedef D result_type;
|
||||||
|
|
||||||
|
explicit __host__ __device__ __forceinline__ BrdRowReflect(int width) : last_col(width - 1) {}
|
||||||
|
template <typename U> __host__ __device__ __forceinline__ BrdRowReflect(int width, U) : last_col(width - 1) {}
|
||||||
|
|
||||||
|
__device__ __forceinline__ int idx_col_low(int x) const
|
||||||
|
{
|
||||||
|
return (::abs(x) - (x < 0)) % (last_col + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __forceinline__ int idx_col_high(int x) const
|
||||||
|
{
|
||||||
|
return ::abs(last_col - ::abs(last_col - x) + (x > last_col)) % (last_col + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __forceinline__ int idx_col(int x) const
|
||||||
|
{
|
||||||
|
return idx_col_high(::abs(x) - (x < 0));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T> __device__ __forceinline__ D at_low(int x, const T* data) const
|
||||||
|
{
|
||||||
|
return saturate_cast<D>(data[idx_col_low(x)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T> __device__ __forceinline__ D at_high(int x, const T* data) const
|
||||||
|
{
|
||||||
|
return saturate_cast<D>(data[idx_col_high(x)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T> __device__ __forceinline__ D at(int x, const T* data) const
|
||||||
|
{
|
||||||
|
return saturate_cast<D>(data[idx_col(x)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
int last_col;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename D> struct BrdColReflect
|
||||||
|
{
|
||||||
|
typedef D result_type;
|
||||||
|
|
||||||
|
explicit __host__ __device__ __forceinline__ BrdColReflect(int height) : last_row(height - 1) {}
|
||||||
|
template <typename U> __host__ __device__ __forceinline__ BrdColReflect(int height, U) : last_row(height - 1) {}
|
||||||
|
|
||||||
|
__device__ __forceinline__ int idx_row_low(int y) const
|
||||||
|
{
|
||||||
|
return (::abs(y) - (y < 0)) % (last_row + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __forceinline__ int idx_row_high(int y) const
|
||||||
|
{
|
||||||
|
return ::abs(last_row - ::abs(last_row - y) + (y > last_row)) % (last_row + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __forceinline__ int idx_row(int y) const
|
||||||
|
{
|
||||||
|
return idx_row_high(::abs(y) - (y < 0));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T> __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const
|
||||||
|
{
|
||||||
|
return saturate_cast<D>(*(const D*)((const char*)data + idx_row_low(y) * step));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T> __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const
|
||||||
|
{
|
||||||
|
return saturate_cast<D>(*(const D*)((const char*)data + idx_row_high(y) * step));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T> __device__ __forceinline__ D at(int y, const T* data, size_t step) const
|
||||||
|
{
|
||||||
|
return saturate_cast<D>(*(const D*)((const char*)data + idx_row(y) * step));
|
||||||
|
}
|
||||||
|
|
||||||
|
int last_row;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename D> struct BrdReflect
|
||||||
|
{
|
||||||
|
typedef D result_type;
|
||||||
|
|
||||||
|
__host__ __device__ __forceinline__ BrdReflect(int height, int width) : last_row(height - 1), last_col(width - 1) {}
|
||||||
|
template <typename U> __host__ __device__ __forceinline__ BrdReflect(int height, int width, U) : last_row(height - 1), last_col(width - 1) {}
|
||||||
|
|
||||||
|
__device__ __forceinline__ int idx_row_low(int y) const
|
||||||
|
{
|
||||||
|
return (::abs(y) - (y < 0)) % (last_row + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __forceinline__ int idx_row_high(int y) const
|
||||||
|
{
|
||||||
|
return /*::abs*/(last_row - ::abs(last_row - y) + (y > last_row)) /*% (last_row + 1)*/;
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __forceinline__ int idx_row(int y) const
|
||||||
|
{
|
||||||
|
return idx_row_low(idx_row_high(y));
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __forceinline__ int idx_col_low(int x) const
|
||||||
|
{
|
||||||
|
return (::abs(x) - (x < 0)) % (last_col + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __forceinline__ int idx_col_high(int x) const
|
||||||
|
{
|
||||||
|
return (last_col - ::abs(last_col - x) + (x > last_col));
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __forceinline__ int idx_col(int x) const
|
||||||
|
{
|
||||||
|
return idx_col_low(idx_col_high(x));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T> __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const
|
||||||
|
{
|
||||||
|
return saturate_cast<D>(((const T*)((const char*)data + idx_row(y) * step))[idx_col(x)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename Ptr2D> __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const
|
||||||
|
{
|
||||||
|
return saturate_cast<D>(src(idx_row(y), idx_col(x)));
|
||||||
|
}
|
||||||
|
|
||||||
|
int last_row;
|
||||||
|
int last_col;
|
||||||
|
};
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////
|
||||||
|
// BrdWrap
|
||||||
|
|
||||||
|
template <typename D> struct BrdRowWrap
|
||||||
|
{
|
||||||
|
typedef D result_type;
|
||||||
|
|
||||||
|
explicit __host__ __device__ __forceinline__ BrdRowWrap(int width_) : width(width_) {}
|
||||||
|
template <typename U> __host__ __device__ __forceinline__ BrdRowWrap(int width_, U) : width(width_) {}
|
||||||
|
|
||||||
|
__device__ __forceinline__ int idx_col_low(int x) const
|
||||||
|
{
|
||||||
|
return (x >= 0) * x + (x < 0) * (x - ((x - width + 1) / width) * width);
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __forceinline__ int idx_col_high(int x) const
|
||||||
|
{
|
||||||
|
return (x < width) * x + (x >= width) * (x % width);
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __forceinline__ int idx_col(int x) const
|
||||||
|
{
|
||||||
|
return idx_col_high(idx_col_low(x));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T> __device__ __forceinline__ D at_low(int x, const T* data) const
|
||||||
|
{
|
||||||
|
return saturate_cast<D>(data[idx_col_low(x)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T> __device__ __forceinline__ D at_high(int x, const T* data) const
|
||||||
|
{
|
||||||
|
return saturate_cast<D>(data[idx_col_high(x)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T> __device__ __forceinline__ D at(int x, const T* data) const
|
||||||
|
{
|
||||||
|
return saturate_cast<D>(data[idx_col(x)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
int width;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename D> struct BrdColWrap
|
||||||
|
{
|
||||||
|
typedef D result_type;
|
||||||
|
|
||||||
|
explicit __host__ __device__ __forceinline__ BrdColWrap(int height_) : height(height_) {}
|
||||||
|
template <typename U> __host__ __device__ __forceinline__ BrdColWrap(int height_, U) : height(height_) {}
|
||||||
|
|
||||||
|
__device__ __forceinline__ int idx_row_low(int y) const
|
||||||
|
{
|
||||||
|
return (y >= 0) * y + (y < 0) * (y - ((y - height + 1) / height) * height);
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __forceinline__ int idx_row_high(int y) const
|
||||||
|
{
|
||||||
|
return (y < height) * y + (y >= height) * (y % height);
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __forceinline__ int idx_row(int y) const
|
||||||
|
{
|
||||||
|
return idx_row_high(idx_row_low(y));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T> __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const
|
||||||
|
{
|
||||||
|
return saturate_cast<D>(*(const D*)((const char*)data + idx_row_low(y) * step));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T> __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const
|
||||||
|
{
|
||||||
|
return saturate_cast<D>(*(const D*)((const char*)data + idx_row_high(y) * step));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T> __device__ __forceinline__ D at(int y, const T* data, size_t step) const
|
||||||
|
{
|
||||||
|
return saturate_cast<D>(*(const D*)((const char*)data + idx_row(y) * step));
|
||||||
|
}
|
||||||
|
|
||||||
|
int height;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename D> struct BrdWrap
|
||||||
|
{
|
||||||
|
typedef D result_type;
|
||||||
|
|
||||||
|
__host__ __device__ __forceinline__ BrdWrap(int height_, int width_) :
|
||||||
|
height(height_), width(width_)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
template <typename U>
|
||||||
|
__host__ __device__ __forceinline__ BrdWrap(int height_, int width_, U) :
|
||||||
|
height(height_), width(width_)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __forceinline__ int idx_row_low(int y) const
|
||||||
|
{
|
||||||
|
return (y >= 0) ? y : (y - ((y - height + 1) / height) * height);
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __forceinline__ int idx_row_high(int y) const
|
||||||
|
{
|
||||||
|
return (y < height) ? y : (y % height);
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __forceinline__ int idx_row(int y) const
|
||||||
|
{
|
||||||
|
return idx_row_high(idx_row_low(y));
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __forceinline__ int idx_col_low(int x) const
|
||||||
|
{
|
||||||
|
return (x >= 0) ? x : (x - ((x - width + 1) / width) * width);
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __forceinline__ int idx_col_high(int x) const
|
||||||
|
{
|
||||||
|
return (x < width) ? x : (x % width);
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __forceinline__ int idx_col(int x) const
|
||||||
|
{
|
||||||
|
return idx_col_high(idx_col_low(x));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T> __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const
|
||||||
|
{
|
||||||
|
return saturate_cast<D>(((const T*)((const char*)data + idx_row(y) * step))[idx_col(x)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename Ptr2D> __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const
|
||||||
|
{
|
||||||
|
return saturate_cast<D>(src(idx_row(y), idx_col(x)));
|
||||||
|
}
|
||||||
|
|
||||||
|
int height;
|
||||||
|
int width;
|
||||||
|
};
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////
|
||||||
|
// BorderReader
|
||||||
|
|
||||||
|
template <typename Ptr2D, typename B> struct BorderReader
|
||||||
|
{
|
||||||
|
typedef typename B::result_type elem_type;
|
||||||
|
typedef typename Ptr2D::index_type index_type;
|
||||||
|
|
||||||
|
__host__ __device__ __forceinline__ BorderReader(const Ptr2D& ptr_, const B& b_) : ptr(ptr_), b(b_) {}
|
||||||
|
|
||||||
|
__device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const
|
||||||
|
{
|
||||||
|
return b.at(y, x, ptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ptr2D ptr;
|
||||||
|
B b;
|
||||||
|
};
|
||||||
|
|
||||||
|
// under win32 there is some bug with templated types that passed as kernel parameters
|
||||||
|
// with this specialization all works fine
|
||||||
|
template <typename Ptr2D, typename D> struct BorderReader< Ptr2D, BrdConstant<D> >
|
||||||
|
{
|
||||||
|
typedef typename BrdConstant<D>::result_type elem_type;
|
||||||
|
typedef typename Ptr2D::index_type index_type;
|
||||||
|
|
||||||
|
__host__ __device__ __forceinline__ BorderReader(const Ptr2D& src_, const BrdConstant<D>& b) :
|
||||||
|
src(src_), height(b.height), width(b.width), val(b.val)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __forceinline__ D operator ()(index_type y, index_type x) const
|
||||||
|
{
|
||||||
|
return (x >= 0 && x < width && y >= 0 && y < height) ? saturate_cast<D>(src(y, x)) : val;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ptr2D src;
|
||||||
|
int height;
|
||||||
|
int width;
|
||||||
|
D val;
|
||||||
|
};
|
||||||
|
}}} // namespace cv { namespace cuda { namespace cudev
|
||||||
|
|
||||||
|
//! @endcond
|
||||||
|
|
||||||
|
#endif // OPENCV_CUDA_BORDER_INTERPOLATE_HPP
|
||||||
309
3rdpart/OpenCV/include/opencv2/core/cuda/color.hpp
Normal file
309
3rdpart/OpenCV/include/opencv2/core/cuda/color.hpp
Normal file
@@ -0,0 +1,309 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#ifndef OPENCV_CUDA_COLOR_HPP
|
||||||
|
#define OPENCV_CUDA_COLOR_HPP
|
||||||
|
|
||||||
|
#include "detail/color_detail.hpp"
|
||||||
|
|
||||||
|
/** @file
|
||||||
|
* @deprecated Use @ref cudev instead.
|
||||||
|
*/
|
||||||
|
|
||||||
|
//! @cond IGNORED
|
||||||
|
|
||||||
|
namespace cv { namespace cuda { namespace device
|
||||||
|
{
|
||||||
|
// All OPENCV_CUDA_IMPLEMENT_*_TRAITS(ColorSpace1_to_ColorSpace2, ...) macros implements
|
||||||
|
// template <typename T> class ColorSpace1_to_ColorSpace2_traits
|
||||||
|
// {
|
||||||
|
// typedef ... functor_type;
|
||||||
|
// static __host__ __device__ functor_type create_functor();
|
||||||
|
// };
|
||||||
|
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(bgr_to_rgb, 3, 3, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(bgr_to_bgra, 3, 4, 0)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(bgr_to_rgba, 3, 4, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(bgra_to_bgr, 4, 3, 0)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(bgra_to_rgb, 4, 3, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(bgra_to_rgba, 4, 4, 2)
|
||||||
|
|
||||||
|
#undef OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS
|
||||||
|
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(bgr_to_bgr555, 3, 0, 5)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(bgr_to_bgr565, 3, 0, 6)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(rgb_to_bgr555, 3, 2, 5)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(rgb_to_bgr565, 3, 2, 6)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(bgra_to_bgr555, 4, 0, 5)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(bgra_to_bgr565, 4, 0, 6)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(rgba_to_bgr555, 4, 2, 5)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(rgba_to_bgr565, 4, 2, 6)
|
||||||
|
|
||||||
|
#undef OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS
|
||||||
|
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_rgb, 3, 2, 5)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_rgb, 3, 2, 6)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_bgr, 3, 0, 5)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_bgr, 3, 0, 6)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_rgba, 4, 2, 5)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_rgba, 4, 2, 6)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_bgra, 4, 0, 5)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_bgra, 4, 0, 6)
|
||||||
|
|
||||||
|
#undef OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS
|
||||||
|
|
||||||
|
OPENCV_CUDA_IMPLEMENT_GRAY2RGB_TRAITS(gray_to_bgr, 3)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_GRAY2RGB_TRAITS(gray_to_bgra, 4)
|
||||||
|
|
||||||
|
#undef OPENCV_CUDA_IMPLEMENT_GRAY2RGB_TRAITS
|
||||||
|
|
||||||
|
OPENCV_CUDA_IMPLEMENT_GRAY2RGB5x5_TRAITS(gray_to_bgr555, 5)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_GRAY2RGB5x5_TRAITS(gray_to_bgr565, 6)
|
||||||
|
|
||||||
|
#undef OPENCV_CUDA_IMPLEMENT_GRAY2RGB5x5_TRAITS
|
||||||
|
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB5x52GRAY_TRAITS(bgr555_to_gray, 5)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB5x52GRAY_TRAITS(bgr565_to_gray, 6)
|
||||||
|
|
||||||
|
#undef OPENCV_CUDA_IMPLEMENT_RGB5x52GRAY_TRAITS
|
||||||
|
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2GRAY_TRAITS(rgb_to_gray, 3, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2GRAY_TRAITS(bgr_to_gray, 3, 0)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2GRAY_TRAITS(rgba_to_gray, 4, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2GRAY_TRAITS(bgra_to_gray, 4, 0)
|
||||||
|
|
||||||
|
#undef OPENCV_CUDA_IMPLEMENT_RGB2GRAY_TRAITS
|
||||||
|
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(rgb_to_yuv, 3, 3, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(rgba_to_yuv, 4, 3, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(rgb_to_yuv4, 3, 4, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(rgba_to_yuv4, 4, 4, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(bgr_to_yuv, 3, 3, 0)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(bgra_to_yuv, 4, 3, 0)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(bgr_to_yuv4, 3, 4, 0)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(bgra_to_yuv4, 4, 4, 0)
|
||||||
|
|
||||||
|
#undef OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS
|
||||||
|
|
||||||
|
OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_rgb, 3, 3, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_rgba, 3, 4, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_rgb, 4, 3, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_rgba, 4, 4, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_bgr, 3, 3, 0)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_bgra, 3, 4, 0)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_bgr, 4, 3, 0)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_bgra, 4, 4, 0)
|
||||||
|
|
||||||
|
#undef OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS
|
||||||
|
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(rgb_to_YCrCb, 3, 3, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(rgba_to_YCrCb, 4, 3, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(rgb_to_YCrCb4, 3, 4, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(rgba_to_YCrCb4, 4, 4, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(bgr_to_YCrCb, 3, 3, 0)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(bgra_to_YCrCb, 4, 3, 0)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(bgr_to_YCrCb4, 3, 4, 0)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(bgra_to_YCrCb4, 4, 4, 0)
|
||||||
|
|
||||||
|
#undef OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS
|
||||||
|
|
||||||
|
OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_rgb, 3, 3, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_rgba, 3, 4, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_rgb, 4, 3, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_rgba, 4, 4, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_bgr, 3, 3, 0)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_bgra, 3, 4, 0)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_bgr, 4, 3, 0)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_bgra, 4, 4, 0)
|
||||||
|
|
||||||
|
#undef OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS
|
||||||
|
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(rgb_to_xyz, 3, 3, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(rgba_to_xyz, 4, 3, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(rgb_to_xyz4, 3, 4, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(rgba_to_xyz4, 4, 4, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(bgr_to_xyz, 3, 3, 0)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(bgra_to_xyz, 4, 3, 0)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(bgr_to_xyz4, 3, 4, 0)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(bgra_to_xyz4, 4, 4, 0)
|
||||||
|
|
||||||
|
#undef OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS
|
||||||
|
|
||||||
|
OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_rgb, 3, 3, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_rgb, 4, 3, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_rgba, 3, 4, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_rgba, 4, 4, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_bgr, 3, 3, 0)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_bgr, 4, 3, 0)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_bgra, 3, 4, 0)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_bgra, 4, 4, 0)
|
||||||
|
|
||||||
|
#undef OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS
|
||||||
|
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(rgb_to_hsv, 3, 3, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(rgba_to_hsv, 4, 3, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(rgb_to_hsv4, 3, 4, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(rgba_to_hsv4, 4, 4, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(bgr_to_hsv, 3, 3, 0)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(bgra_to_hsv, 4, 3, 0)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(bgr_to_hsv4, 3, 4, 0)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(bgra_to_hsv4, 4, 4, 0)
|
||||||
|
|
||||||
|
#undef OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS
|
||||||
|
|
||||||
|
OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_rgb, 3, 3, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_rgba, 3, 4, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_rgb, 4, 3, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_rgba, 4, 4, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_bgr, 3, 3, 0)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_bgra, 3, 4, 0)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_bgr, 4, 3, 0)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_bgra, 4, 4, 0)
|
||||||
|
|
||||||
|
#undef OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS
|
||||||
|
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(rgb_to_hls, 3, 3, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(rgba_to_hls, 4, 3, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(rgb_to_hls4, 3, 4, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(rgba_to_hls4, 4, 4, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(bgr_to_hls, 3, 3, 0)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(bgra_to_hls, 4, 3, 0)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(bgr_to_hls4, 3, 4, 0)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(bgra_to_hls4, 4, 4, 0)
|
||||||
|
|
||||||
|
#undef OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS
|
||||||
|
|
||||||
|
OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls_to_rgb, 3, 3, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls_to_rgba, 3, 4, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_rgb, 4, 3, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_rgba, 4, 4, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls_to_bgr, 3, 3, 0)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls_to_bgra, 3, 4, 0)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_bgr, 4, 3, 0)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_bgra, 4, 4, 0)
|
||||||
|
|
||||||
|
#undef OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS
|
||||||
|
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(rgb_to_lab, 3, 3, true, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(rgba_to_lab, 4, 3, true, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(rgb_to_lab4, 3, 4, true, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(rgba_to_lab4, 4, 4, true, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(bgr_to_lab, 3, 3, true, 0)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(bgra_to_lab, 4, 3, true, 0)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(bgr_to_lab4, 3, 4, true, 0)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(bgra_to_lab4, 4, 4, true, 0)
|
||||||
|
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lrgb_to_lab, 3, 3, false, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lrgba_to_lab, 4, 3, false, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lrgb_to_lab4, 3, 4, false, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lrgba_to_lab4, 4, 4, false, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lbgr_to_lab, 3, 3, false, 0)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lbgra_to_lab, 4, 3, false, 0)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lbgr_to_lab4, 3, 4, false, 0)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lbgra_to_lab4, 4, 4, false, 0)
|
||||||
|
|
||||||
|
#undef OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS
|
||||||
|
|
||||||
|
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_rgb, 3, 3, true, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_rgb, 4, 3, true, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_rgba, 3, 4, true, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_rgba, 4, 4, true, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_bgr, 3, 3, true, 0)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_bgr, 4, 3, true, 0)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_bgra, 3, 4, true, 0)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_bgra, 4, 4, true, 0)
|
||||||
|
|
||||||
|
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_lrgb, 3, 3, false, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_lrgb, 4, 3, false, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_lrgba, 3, 4, false, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_lrgba, 4, 4, false, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_lbgr, 3, 3, false, 0)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_lbgr, 4, 3, false, 0)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_lbgra, 3, 4, false, 0)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_lbgra, 4, 4, false, 0)
|
||||||
|
|
||||||
|
#undef OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS
|
||||||
|
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(rgb_to_luv, 3, 3, true, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(rgba_to_luv, 4, 3, true, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(rgb_to_luv4, 3, 4, true, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(rgba_to_luv4, 4, 4, true, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(bgr_to_luv, 3, 3, true, 0)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(bgra_to_luv, 4, 3, true, 0)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(bgr_to_luv4, 3, 4, true, 0)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(bgra_to_luv4, 4, 4, true, 0)
|
||||||
|
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lrgb_to_luv, 3, 3, false, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lrgba_to_luv, 4, 3, false, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lrgb_to_luv4, 3, 4, false, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lrgba_to_luv4, 4, 4, false, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lbgr_to_luv, 3, 3, false, 0)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lbgra_to_luv, 4, 3, false, 0)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lbgr_to_luv4, 3, 4, false, 0)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lbgra_to_luv4, 4, 4, false, 0)
|
||||||
|
|
||||||
|
#undef OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS
|
||||||
|
|
||||||
|
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_rgb, 3, 3, true, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_rgb, 4, 3, true, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_rgba, 3, 4, true, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_rgba, 4, 4, true, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_bgr, 3, 3, true, 0)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_bgr, 4, 3, true, 0)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_bgra, 3, 4, true, 0)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_bgra, 4, 4, true, 0)
|
||||||
|
|
||||||
|
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_lrgb, 3, 3, false, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_lrgb, 4, 3, false, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_lrgba, 3, 4, false, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_lrgba, 4, 4, false, 2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_lbgr, 3, 3, false, 0)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_lbgr, 4, 3, false, 0)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_lbgra, 3, 4, false, 0)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_lbgra, 4, 4, false, 0)
|
||||||
|
|
||||||
|
#undef OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS
|
||||||
|
}}} // namespace cv { namespace cuda { namespace cudev
|
||||||
|
|
||||||
|
//! @endcond
|
||||||
|
|
||||||
|
#endif // OPENCV_CUDA_COLOR_HPP
|
||||||
131
3rdpart/OpenCV/include/opencv2/core/cuda/common.hpp
Normal file
131
3rdpart/OpenCV/include/opencv2/core/cuda/common.hpp
Normal file
@@ -0,0 +1,131 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#ifndef OPENCV_CUDA_COMMON_HPP
|
||||||
|
#define OPENCV_CUDA_COMMON_HPP
|
||||||
|
|
||||||
|
#include <cuda_runtime.h>
|
||||||
|
#include "opencv2/core/cuda_types.hpp"
|
||||||
|
#include "opencv2/core/cvdef.h"
|
||||||
|
#include "opencv2/core/base.hpp"
|
||||||
|
|
||||||
|
/** @file
|
||||||
|
* @deprecated Use @ref cudev instead.
|
||||||
|
*/
|
||||||
|
|
||||||
|
//! @cond IGNORED
|
||||||
|
|
||||||
|
#ifndef CV_PI_F
|
||||||
|
#ifndef CV_PI
|
||||||
|
#define CV_PI_F 3.14159265f
|
||||||
|
#else
|
||||||
|
#define CV_PI_F ((float)CV_PI)
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
namespace cv { namespace cuda {
|
||||||
|
static inline void checkCudaError(cudaError_t err, const char* file, const int line, const char* func)
|
||||||
|
{
|
||||||
|
if (cudaSuccess != err) {
|
||||||
|
cudaGetLastError(); // reset the last stored error to cudaSuccess
|
||||||
|
cv::error(cv::Error::GpuApiCallError, cudaGetErrorString(err), func, file, line);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}}
|
||||||
|
|
||||||
|
#ifndef cudaSafeCall
|
||||||
|
#define cudaSafeCall(expr) cv::cuda::checkCudaError(expr, __FILE__, __LINE__, CV_Func)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
namespace cv { namespace cuda
|
||||||
|
{
|
||||||
|
template <typename T> static inline bool isAligned(const T* ptr, size_t size)
|
||||||
|
{
|
||||||
|
return reinterpret_cast<size_t>(ptr) % size == 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool isAligned(size_t step, size_t size)
|
||||||
|
{
|
||||||
|
return step % size == 0;
|
||||||
|
}
|
||||||
|
}}
|
||||||
|
|
||||||
|
namespace cv { namespace cuda
|
||||||
|
{
|
||||||
|
namespace device
|
||||||
|
{
|
||||||
|
__host__ __device__ __forceinline__ int divUp(int total, int grain)
|
||||||
|
{
|
||||||
|
return (total + grain - 1) / grain;
|
||||||
|
}
|
||||||
|
|
||||||
|
#if (CUDART_VERSION >= 12000)
|
||||||
|
template<class T> inline void createTextureObjectPitch2D(cudaTextureObject_t*, PtrStepSz<T>&, const cudaTextureDesc&) {
|
||||||
|
CV_Error(cv::Error::GpuNotSupported, "Function removed in CUDA SDK 12"); }
|
||||||
|
#else
|
||||||
|
//TODO: remove from OpenCV 5.x
|
||||||
|
template<class T> inline void bindTexture(const textureReference* tex, const PtrStepSz<T>& img)
|
||||||
|
{
|
||||||
|
cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
|
||||||
|
cudaSafeCall( cudaBindTexture2D(0, tex, img.ptr(), &desc, img.cols, img.rows, img.step) );
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class T> inline void createTextureObjectPitch2D(cudaTextureObject_t* tex, PtrStepSz<T>& img, const cudaTextureDesc& texDesc)
|
||||||
|
{
|
||||||
|
cudaResourceDesc resDesc;
|
||||||
|
memset(&resDesc, 0, sizeof(resDesc));
|
||||||
|
resDesc.resType = cudaResourceTypePitch2D;
|
||||||
|
resDesc.res.pitch2D.devPtr = static_cast<void*>(img.ptr());
|
||||||
|
resDesc.res.pitch2D.height = img.rows;
|
||||||
|
resDesc.res.pitch2D.width = img.cols;
|
||||||
|
resDesc.res.pitch2D.pitchInBytes = img.step;
|
||||||
|
resDesc.res.pitch2D.desc = cudaCreateChannelDesc<T>();
|
||||||
|
|
||||||
|
cudaSafeCall( cudaCreateTextureObject(tex, &resDesc, &texDesc, NULL) );
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}}
|
||||||
|
|
||||||
|
//! @endcond
|
||||||
|
|
||||||
|
#endif // OPENCV_CUDA_COMMON_HPP
|
||||||
38
3rdpart/OpenCV/include/opencv2/core/cuda/cuda_compat.hpp
Normal file
38
3rdpart/OpenCV/include/opencv2/core/cuda/cuda_compat.hpp
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
// This file is part of OpenCV project.
|
||||||
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||||
|
// of this distribution and at http://opencv.org/license.html.
|
||||||
|
|
||||||
|
#ifndef OPENCV_CUDA_CUDA_COMPAT_HPP
|
||||||
|
#define OPENCV_CUDA_CUDA_COMPAT_HPP
|
||||||
|
|
||||||
|
#include <cuda.h>
|
||||||
|
|
||||||
|
namespace cv { namespace cuda { namespace device { namespace compat
|
||||||
|
{
|
||||||
|
#if CUDA_VERSION >= 13000
|
||||||
|
using ulonglong4 = ::ulonglong4_16a;
|
||||||
|
using double4 = ::double4_16a;
|
||||||
|
__host__ __device__ __forceinline__
|
||||||
|
double4 make_double4(double x, double y, double z, double w)
|
||||||
|
{
|
||||||
|
return ::make_double4_16a(x, y, z, w);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
using ulonglong4 = ::ulonglong4;
|
||||||
|
using double4 = ::double4;
|
||||||
|
__host__ __device__ __forceinline__
|
||||||
|
double4 make_double4(double x, double y, double z, double w)
|
||||||
|
{
|
||||||
|
return ::make_double4(x, y, z, w);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
using ulonglong4Compat = ulonglong4;
|
||||||
|
using double4Compat = double4;
|
||||||
|
__host__ __device__ __forceinline__
|
||||||
|
double4Compat make_double4_compat(double x, double y, double z, double w)
|
||||||
|
{
|
||||||
|
return make_double4(x, y, z, w);
|
||||||
|
}
|
||||||
|
}}}}
|
||||||
|
|
||||||
|
#endif // OPENCV_CUDA_CUDA_COMPAT_HPP
|
||||||
113
3rdpart/OpenCV/include/opencv2/core/cuda/datamov_utils.hpp
Normal file
113
3rdpart/OpenCV/include/opencv2/core/cuda/datamov_utils.hpp
Normal file
@@ -0,0 +1,113 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#ifndef OPENCV_CUDA_DATAMOV_UTILS_HPP
|
||||||
|
#define OPENCV_CUDA_DATAMOV_UTILS_HPP
|
||||||
|
|
||||||
|
#include "common.hpp"
|
||||||
|
|
||||||
|
/** @file
|
||||||
|
* @deprecated Use @ref cudev instead.
|
||||||
|
*/
|
||||||
|
|
||||||
|
//! @cond IGNORED
|
||||||
|
|
||||||
|
namespace cv { namespace cuda { namespace device
|
||||||
|
{
|
||||||
|
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 200
|
||||||
|
|
||||||
|
// for Fermi memory space is detected automatically
|
||||||
|
template <typename T> struct ForceGlob
|
||||||
|
{
|
||||||
|
__device__ __forceinline__ static void Load(const T* ptr, int offset, T& val) { val = ptr[offset]; }
|
||||||
|
};
|
||||||
|
|
||||||
|
#else // __CUDA_ARCH__ >= 200
|
||||||
|
|
||||||
|
#if defined(_WIN64) || defined(__LP64__)
|
||||||
|
// 64-bit register modifier for inlined asm
|
||||||
|
#define OPENCV_CUDA_ASM_PTR "l"
|
||||||
|
#else
|
||||||
|
// 32-bit register modifier for inlined asm
|
||||||
|
#define OPENCV_CUDA_ASM_PTR "r"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
template<class T> struct ForceGlob;
|
||||||
|
|
||||||
|
#define OPENCV_CUDA_DEFINE_FORCE_GLOB(base_type, ptx_type, reg_mod) \
|
||||||
|
template <> struct ForceGlob<base_type> \
|
||||||
|
{ \
|
||||||
|
__device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val) \
|
||||||
|
{ \
|
||||||
|
asm("ld.global."#ptx_type" %0, [%1];" : "="#reg_mod(val) : OPENCV_CUDA_ASM_PTR(ptr + offset)); \
|
||||||
|
} \
|
||||||
|
};
|
||||||
|
|
||||||
|
#define OPENCV_CUDA_DEFINE_FORCE_GLOB_B(base_type, ptx_type) \
|
||||||
|
template <> struct ForceGlob<base_type> \
|
||||||
|
{ \
|
||||||
|
__device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val) \
|
||||||
|
{ \
|
||||||
|
asm("ld.global."#ptx_type" %0, [%1];" : "=r"(*reinterpret_cast<uint*>(&val)) : OPENCV_CUDA_ASM_PTR(ptr + offset)); \
|
||||||
|
} \
|
||||||
|
};
|
||||||
|
|
||||||
|
OPENCV_CUDA_DEFINE_FORCE_GLOB_B(uchar, u8)
|
||||||
|
OPENCV_CUDA_DEFINE_FORCE_GLOB_B(schar, s8)
|
||||||
|
OPENCV_CUDA_DEFINE_FORCE_GLOB_B(char, b8)
|
||||||
|
OPENCV_CUDA_DEFINE_FORCE_GLOB (ushort, u16, h)
|
||||||
|
OPENCV_CUDA_DEFINE_FORCE_GLOB (short, s16, h)
|
||||||
|
OPENCV_CUDA_DEFINE_FORCE_GLOB (uint, u32, r)
|
||||||
|
OPENCV_CUDA_DEFINE_FORCE_GLOB (int, s32, r)
|
||||||
|
OPENCV_CUDA_DEFINE_FORCE_GLOB (float, f32, f)
|
||||||
|
OPENCV_CUDA_DEFINE_FORCE_GLOB (double, f64, d)
|
||||||
|
|
||||||
|
#undef OPENCV_CUDA_DEFINE_FORCE_GLOB
|
||||||
|
#undef OPENCV_CUDA_DEFINE_FORCE_GLOB_B
|
||||||
|
#undef OPENCV_CUDA_ASM_PTR
|
||||||
|
|
||||||
|
#endif // __CUDA_ARCH__ >= 200
|
||||||
|
}}} // namespace cv { namespace cuda { namespace cudev
|
||||||
|
|
||||||
|
//! @endcond
|
||||||
|
|
||||||
|
#endif // OPENCV_CUDA_DATAMOV_UTILS_HPP
|
||||||
2018
3rdpart/OpenCV/include/opencv2/core/cuda/detail/color_detail.hpp
Normal file
2018
3rdpart/OpenCV/include/opencv2/core/cuda/detail/color_detail.hpp
Normal file
File diff suppressed because one or more lines are too long
394
3rdpart/OpenCV/include/opencv2/core/cuda/detail/reduce.hpp
Normal file
394
3rdpart/OpenCV/include/opencv2/core/cuda/detail/reduce.hpp
Normal file
@@ -0,0 +1,394 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#ifndef OPENCV_CUDA_REDUCE_DETAIL_HPP
|
||||||
|
#define OPENCV_CUDA_REDUCE_DETAIL_HPP
|
||||||
|
|
||||||
|
#include <thrust/tuple.h>
|
||||||
|
#include "../warp.hpp"
|
||||||
|
#include "../warp_shuffle.hpp"
|
||||||
|
|
||||||
|
//! @cond IGNORED
|
||||||
|
|
||||||
|
namespace cv { namespace cuda { namespace device
|
||||||
|
{
|
||||||
|
namespace reduce_detail
|
||||||
|
{
|
||||||
|
template <typename T> struct GetType;
|
||||||
|
template <typename T> struct GetType<T*>
|
||||||
|
{
|
||||||
|
typedef T type;
|
||||||
|
};
|
||||||
|
template <typename T> struct GetType<volatile T*>
|
||||||
|
{
|
||||||
|
typedef T type;
|
||||||
|
};
|
||||||
|
template <typename T> struct GetType<T&>
|
||||||
|
{
|
||||||
|
typedef T type;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <unsigned int I, unsigned int N>
|
||||||
|
struct For
|
||||||
|
{
|
||||||
|
template <class PointerTuple, class ValTuple>
|
||||||
|
static __device__ void loadToSmem(const PointerTuple& smem, const ValTuple& val, unsigned int tid)
|
||||||
|
{
|
||||||
|
thrust::get<I>(smem)[tid] = thrust::get<I>(val);
|
||||||
|
|
||||||
|
For<I + 1, N>::loadToSmem(smem, val, tid);
|
||||||
|
}
|
||||||
|
template <class PointerTuple, class ValTuple>
|
||||||
|
static __device__ void loadFromSmem(const PointerTuple& smem, const ValTuple& val, unsigned int tid)
|
||||||
|
{
|
||||||
|
thrust::get<I>(val) = thrust::get<I>(smem)[tid];
|
||||||
|
|
||||||
|
For<I + 1, N>::loadFromSmem(smem, val, tid);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class PointerTuple, class ValTuple, class OpTuple>
|
||||||
|
static __device__ void merge(const PointerTuple& smem, const ValTuple& val, unsigned int tid, unsigned int delta, const OpTuple& op)
|
||||||
|
{
|
||||||
|
typename GetType<typename thrust::tuple_element<I, PointerTuple>::type>::type reg = thrust::get<I>(smem)[tid + delta];
|
||||||
|
thrust::get<I>(smem)[tid] = thrust::get<I>(val) = thrust::get<I>(op)(thrust::get<I>(val), reg);
|
||||||
|
|
||||||
|
For<I + 1, N>::merge(smem, val, tid, delta, op);
|
||||||
|
}
|
||||||
|
template <class ValTuple, class OpTuple>
|
||||||
|
static __device__ void mergeShfl(const ValTuple& val, unsigned int delta, unsigned int width, const OpTuple& op)
|
||||||
|
{
|
||||||
|
typename GetType<typename thrust::tuple_element<I, ValTuple>::type>::type reg = shfl_down(thrust::get<I>(val), delta, width);
|
||||||
|
thrust::get<I>(val) = thrust::get<I>(op)(thrust::get<I>(val), reg);
|
||||||
|
|
||||||
|
For<I + 1, N>::mergeShfl(val, delta, width, op);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
template <unsigned int N>
|
||||||
|
struct For<N, N>
|
||||||
|
{
|
||||||
|
template <class PointerTuple, class ValTuple>
|
||||||
|
static __device__ void loadToSmem(const PointerTuple&, const ValTuple&, unsigned int)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
template <class PointerTuple, class ValTuple>
|
||||||
|
static __device__ void loadFromSmem(const PointerTuple&, const ValTuple&, unsigned int)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class PointerTuple, class ValTuple, class OpTuple>
|
||||||
|
static __device__ void merge(const PointerTuple&, const ValTuple&, unsigned int, unsigned int, const OpTuple&)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
template <class ValTuple, class OpTuple>
|
||||||
|
static __device__ void mergeShfl(const ValTuple&, unsigned int, unsigned int, const OpTuple&)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
__device__ __forceinline__ void loadToSmem(volatile T* smem, T& val, unsigned int tid)
|
||||||
|
{
|
||||||
|
smem[tid] = val;
|
||||||
|
}
|
||||||
|
template <typename T>
|
||||||
|
__device__ __forceinline__ void loadFromSmem(volatile T* smem, T& val, unsigned int tid)
|
||||||
|
{
|
||||||
|
val = smem[tid];
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T, class Op>
|
||||||
|
__device__ __forceinline__ void merge(volatile T* smem, T& val, unsigned int tid, unsigned int delta, const Op& op)
|
||||||
|
{
|
||||||
|
T reg = smem[tid + delta];
|
||||||
|
smem[tid] = val = op(val, reg);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T, class Op>
|
||||||
|
__device__ __forceinline__ void mergeShfl(T& val, unsigned int delta, unsigned int width, const Op& op)
|
||||||
|
{
|
||||||
|
T reg = shfl_down(val, delta, width);
|
||||||
|
val = op(val, reg);
|
||||||
|
}
|
||||||
|
|
||||||
|
#if (CUDART_VERSION < 12040) // details: https://github.com/opencv/opencv_contrib/issues/3690
|
||||||
|
template <typename P0, typename P1, typename P2, typename P3, typename P4, typename P5, typename P6, typename P7, typename P8, typename P9,
|
||||||
|
typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9>
|
||||||
|
__device__ __forceinline__ void loadToSmem(const thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem,
|
||||||
|
const thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val,
|
||||||
|
unsigned int tid)
|
||||||
|
{
|
||||||
|
For<0, thrust::tuple_size<thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9> >::value>::loadToSmem(smem, val, tid);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename P0, typename P1, typename P2, typename P3, typename P4, typename P5, typename P6, typename P7, typename P8, typename P9,
|
||||||
|
typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9>
|
||||||
|
__device__ __forceinline__ void loadFromSmem(const thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem,
|
||||||
|
const thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val,
|
||||||
|
unsigned int tid)
|
||||||
|
{
|
||||||
|
For<0, thrust::tuple_size<thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9> >::value>::loadFromSmem(smem, val, tid);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename P0, typename P1, typename P2, typename P3, typename P4, typename P5, typename P6, typename P7, typename P8, typename P9,
|
||||||
|
typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9,
|
||||||
|
class Op0, class Op1, class Op2, class Op3, class Op4, class Op5, class Op6, class Op7, class Op8, class Op9>
|
||||||
|
__device__ __forceinline__ void merge(const thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem,
|
||||||
|
const thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val,
|
||||||
|
unsigned int tid,
|
||||||
|
unsigned int delta,
|
||||||
|
const thrust::tuple<Op0, Op1, Op2, Op3, Op4, Op5, Op6, Op7, Op8, Op9>& op)
|
||||||
|
{
|
||||||
|
For<0, thrust::tuple_size<thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9> >::value>::merge(smem, val, tid, delta, op);
|
||||||
|
}
|
||||||
|
template <typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9,
|
||||||
|
class Op0, class Op1, class Op2, class Op3, class Op4, class Op5, class Op6, class Op7, class Op8, class Op9>
|
||||||
|
__device__ __forceinline__ void mergeShfl(const thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val,
|
||||||
|
unsigned int delta,
|
||||||
|
unsigned int width,
|
||||||
|
const thrust::tuple<Op0, Op1, Op2, Op3, Op4, Op5, Op6, Op7, Op8, Op9>& op)
|
||||||
|
{
|
||||||
|
For<0, thrust::tuple_size<thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9> >::value>::mergeShfl(val, delta, width, op);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
template <typename... P, typename... R>
|
||||||
|
__device__ __forceinline__ void loadToSmem(const thrust::tuple<P...>& smem, const thrust::tuple<R...>& val, unsigned int tid)
|
||||||
|
{
|
||||||
|
For<0, thrust::tuple_size<thrust::tuple<P...> >::value>::loadToSmem(smem, val, tid);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename... P, typename... R>
|
||||||
|
__device__ __forceinline__ void loadFromSmem(const thrust::tuple<P...>& smem, const thrust::tuple<R...>& val, unsigned int tid)
|
||||||
|
{
|
||||||
|
For<0, thrust::tuple_size<thrust::tuple<P...> >::value>::loadFromSmem(smem, val, tid);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename... P, typename... R, class... Op>
|
||||||
|
__device__ __forceinline__ void merge(const thrust::tuple<P...>& smem, const thrust::tuple<R...>& val, unsigned int tid, unsigned int delta, const thrust::tuple<Op...>& op)
|
||||||
|
{
|
||||||
|
For<0, thrust::tuple_size<thrust::tuple<P...> >::value>::merge(smem, val, tid, delta, op);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename... R, class... Op>
|
||||||
|
__device__ __forceinline__ void mergeShfl(const thrust::tuple<R...>& val, unsigned int delta, unsigned int width, const thrust::tuple<Op...>& op)
|
||||||
|
{
|
||||||
|
For<0, thrust::tuple_size<thrust::tuple<R...> >::value>::mergeShfl(val, delta, width, op);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
template <unsigned int N> struct Generic
|
||||||
|
{
|
||||||
|
template <typename Pointer, typename Reference, class Op>
|
||||||
|
static __device__ void reduce(Pointer smem, Reference val, unsigned int tid, Op op)
|
||||||
|
{
|
||||||
|
loadToSmem(smem, val, tid);
|
||||||
|
if (N >= 32)
|
||||||
|
__syncthreads();
|
||||||
|
|
||||||
|
if (N >= 2048)
|
||||||
|
{
|
||||||
|
if (tid < 1024)
|
||||||
|
merge(smem, val, tid, 1024, op);
|
||||||
|
|
||||||
|
__syncthreads();
|
||||||
|
}
|
||||||
|
if (N >= 1024)
|
||||||
|
{
|
||||||
|
if (tid < 512)
|
||||||
|
merge(smem, val, tid, 512, op);
|
||||||
|
|
||||||
|
__syncthreads();
|
||||||
|
}
|
||||||
|
if (N >= 512)
|
||||||
|
{
|
||||||
|
if (tid < 256)
|
||||||
|
merge(smem, val, tid, 256, op);
|
||||||
|
|
||||||
|
__syncthreads();
|
||||||
|
}
|
||||||
|
if (N >= 256)
|
||||||
|
{
|
||||||
|
if (tid < 128)
|
||||||
|
merge(smem, val, tid, 128, op);
|
||||||
|
|
||||||
|
__syncthreads();
|
||||||
|
}
|
||||||
|
if (N >= 128)
|
||||||
|
{
|
||||||
|
if (tid < 64)
|
||||||
|
merge(smem, val, tid, 64, op);
|
||||||
|
|
||||||
|
__syncthreads();
|
||||||
|
}
|
||||||
|
if (N >= 64)
|
||||||
|
{
|
||||||
|
if (tid < 32)
|
||||||
|
merge(smem, val, tid, 32, op);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (tid < 16)
|
||||||
|
{
|
||||||
|
merge(smem, val, tid, 16, op);
|
||||||
|
merge(smem, val, tid, 8, op);
|
||||||
|
merge(smem, val, tid, 4, op);
|
||||||
|
merge(smem, val, tid, 2, op);
|
||||||
|
merge(smem, val, tid, 1, op);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <unsigned int I, typename Pointer, typename Reference, class Op>
|
||||||
|
struct Unroll
|
||||||
|
{
|
||||||
|
static __device__ void loopShfl(Reference val, Op op, unsigned int N)
|
||||||
|
{
|
||||||
|
mergeShfl(val, I, N, op);
|
||||||
|
Unroll<I / 2, Pointer, Reference, Op>::loopShfl(val, op, N);
|
||||||
|
}
|
||||||
|
static __device__ void loop(Pointer smem, Reference val, unsigned int tid, Op op)
|
||||||
|
{
|
||||||
|
merge(smem, val, tid, I, op);
|
||||||
|
Unroll<I / 2, Pointer, Reference, Op>::loop(smem, val, tid, op);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
template <typename Pointer, typename Reference, class Op>
|
||||||
|
struct Unroll<0, Pointer, Reference, Op>
|
||||||
|
{
|
||||||
|
static __device__ void loopShfl(Reference, Op, unsigned int)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
static __device__ void loop(Pointer, Reference, unsigned int, Op)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <unsigned int N> struct WarpOptimized
|
||||||
|
{
|
||||||
|
template <typename Pointer, typename Reference, class Op>
|
||||||
|
static __device__ void reduce(Pointer smem, Reference val, unsigned int tid, Op op)
|
||||||
|
{
|
||||||
|
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
|
||||||
|
CV_UNUSED(smem);
|
||||||
|
CV_UNUSED(tid);
|
||||||
|
|
||||||
|
Unroll<N / 2, Pointer, Reference, Op>::loopShfl(val, op, N);
|
||||||
|
#else
|
||||||
|
loadToSmem(smem, val, tid);
|
||||||
|
|
||||||
|
if (tid < N / 2)
|
||||||
|
Unroll<N / 2, Pointer, Reference, Op>::loop(smem, val, tid, op);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <unsigned int N> struct GenericOptimized32
|
||||||
|
{
|
||||||
|
enum { M = N / 32 };
|
||||||
|
|
||||||
|
template <typename Pointer, typename Reference, class Op>
|
||||||
|
static __device__ void reduce(Pointer smem, Reference val, unsigned int tid, Op op)
|
||||||
|
{
|
||||||
|
const unsigned int laneId = Warp::laneId();
|
||||||
|
|
||||||
|
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
|
||||||
|
Unroll<16, Pointer, Reference, Op>::loopShfl(val, op, warpSize);
|
||||||
|
|
||||||
|
if (laneId == 0)
|
||||||
|
loadToSmem(smem, val, tid / 32);
|
||||||
|
#else
|
||||||
|
loadToSmem(smem, val, tid);
|
||||||
|
|
||||||
|
if (laneId < 16)
|
||||||
|
Unroll<16, Pointer, Reference, Op>::loop(smem, val, tid, op);
|
||||||
|
|
||||||
|
__syncthreads();
|
||||||
|
|
||||||
|
if (laneId == 0)
|
||||||
|
loadToSmem(smem, val, tid / 32);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
__syncthreads();
|
||||||
|
|
||||||
|
loadFromSmem(smem, val, tid);
|
||||||
|
|
||||||
|
if (tid < 32)
|
||||||
|
{
|
||||||
|
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
|
||||||
|
Unroll<M / 2, Pointer, Reference, Op>::loopShfl(val, op, M);
|
||||||
|
#else
|
||||||
|
Unroll<M / 2, Pointer, Reference, Op>::loop(smem, val, tid, op);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <bool val, class T1, class T2> struct StaticIf;
|
||||||
|
template <class T1, class T2> struct StaticIf<true, T1, T2>
|
||||||
|
{
|
||||||
|
typedef T1 type;
|
||||||
|
};
|
||||||
|
template <class T1, class T2> struct StaticIf<false, T1, T2>
|
||||||
|
{
|
||||||
|
typedef T2 type;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <unsigned int N> struct IsPowerOf2
|
||||||
|
{
|
||||||
|
enum { value = ((N != 0) && !(N & (N - 1))) };
|
||||||
|
};
|
||||||
|
|
||||||
|
template <unsigned int N> struct Dispatcher
|
||||||
|
{
|
||||||
|
typedef typename StaticIf<
|
||||||
|
(N <= 32) && IsPowerOf2<N>::value,
|
||||||
|
WarpOptimized<N>,
|
||||||
|
typename StaticIf<
|
||||||
|
(N <= 1024) && IsPowerOf2<N>::value,
|
||||||
|
GenericOptimized32<N>,
|
||||||
|
Generic<N>
|
||||||
|
>::type
|
||||||
|
>::type reductor;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}}}
|
||||||
|
|
||||||
|
//! @endcond
|
||||||
|
|
||||||
|
#endif // OPENCV_CUDA_REDUCE_DETAIL_HPP
|
||||||
@@ -0,0 +1,567 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#ifndef OPENCV_CUDA_PRED_VAL_REDUCE_DETAIL_HPP
|
||||||
|
#define OPENCV_CUDA_PRED_VAL_REDUCE_DETAIL_HPP
|
||||||
|
|
||||||
|
#include <thrust/tuple.h>
|
||||||
|
#include "../warp.hpp"
|
||||||
|
#include "../warp_shuffle.hpp"
|
||||||
|
|
||||||
|
//! @cond IGNORED
|
||||||
|
|
||||||
|
namespace cv { namespace cuda { namespace device
|
||||||
|
{
|
||||||
|
namespace reduce_key_val_detail
|
||||||
|
{
|
||||||
|
template <typename T> struct GetType;
|
||||||
|
template <typename T> struct GetType<T*>
|
||||||
|
{
|
||||||
|
typedef T type;
|
||||||
|
};
|
||||||
|
template <typename T> struct GetType<volatile T*>
|
||||||
|
{
|
||||||
|
typedef T type;
|
||||||
|
};
|
||||||
|
template <typename T> struct GetType<T&>
|
||||||
|
{
|
||||||
|
typedef T type;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <unsigned int I, unsigned int N>
|
||||||
|
struct For
|
||||||
|
{
|
||||||
|
template <class PointerTuple, class ReferenceTuple>
|
||||||
|
static __device__ void loadToSmem(const PointerTuple& smem, const ReferenceTuple& data, unsigned int tid)
|
||||||
|
{
|
||||||
|
thrust::get<I>(smem)[tid] = thrust::get<I>(data);
|
||||||
|
|
||||||
|
For<I + 1, N>::loadToSmem(smem, data, tid);
|
||||||
|
}
|
||||||
|
template <class PointerTuple, class ReferenceTuple>
|
||||||
|
static __device__ void loadFromSmem(const PointerTuple& smem, const ReferenceTuple& data, unsigned int tid)
|
||||||
|
{
|
||||||
|
thrust::get<I>(data) = thrust::get<I>(smem)[tid];
|
||||||
|
|
||||||
|
For<I + 1, N>::loadFromSmem(smem, data, tid);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class ReferenceTuple>
|
||||||
|
static __device__ void copyShfl(const ReferenceTuple& val, unsigned int delta, int width)
|
||||||
|
{
|
||||||
|
thrust::get<I>(val) = shfl_down(thrust::get<I>(val), delta, width);
|
||||||
|
|
||||||
|
For<I + 1, N>::copyShfl(val, delta, width);
|
||||||
|
}
|
||||||
|
template <class PointerTuple, class ReferenceTuple>
|
||||||
|
static __device__ void copy(const PointerTuple& svals, const ReferenceTuple& val, unsigned int tid, unsigned int delta)
|
||||||
|
{
|
||||||
|
thrust::get<I>(svals)[tid] = thrust::get<I>(val) = thrust::get<I>(svals)[tid + delta];
|
||||||
|
|
||||||
|
For<I + 1, N>::copy(svals, val, tid, delta);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class KeyReferenceTuple, class ValReferenceTuple, class CmpTuple>
|
||||||
|
static __device__ void mergeShfl(const KeyReferenceTuple& key, const ValReferenceTuple& val, const CmpTuple& cmp, unsigned int delta, int width)
|
||||||
|
{
|
||||||
|
typename GetType<typename thrust::tuple_element<I, KeyReferenceTuple>::type>::type reg = shfl_down(thrust::get<I>(key), delta, width);
|
||||||
|
|
||||||
|
if (thrust::get<I>(cmp)(reg, thrust::get<I>(key)))
|
||||||
|
{
|
||||||
|
thrust::get<I>(key) = reg;
|
||||||
|
thrust::get<I>(val) = shfl_down(thrust::get<I>(val), delta, width);
|
||||||
|
}
|
||||||
|
|
||||||
|
For<I + 1, N>::mergeShfl(key, val, cmp, delta, width);
|
||||||
|
}
|
||||||
|
template <class KeyPointerTuple, class KeyReferenceTuple, class ValPointerTuple, class ValReferenceTuple, class CmpTuple>
|
||||||
|
static __device__ void merge(const KeyPointerTuple& skeys, const KeyReferenceTuple& key,
|
||||||
|
const ValPointerTuple& svals, const ValReferenceTuple& val,
|
||||||
|
const CmpTuple& cmp,
|
||||||
|
unsigned int tid, unsigned int delta)
|
||||||
|
{
|
||||||
|
typename GetType<typename thrust::tuple_element<I, KeyPointerTuple>::type>::type reg = thrust::get<I>(skeys)[tid + delta];
|
||||||
|
|
||||||
|
if (thrust::get<I>(cmp)(reg, thrust::get<I>(key)))
|
||||||
|
{
|
||||||
|
thrust::get<I>(skeys)[tid] = thrust::get<I>(key) = reg;
|
||||||
|
thrust::get<I>(svals)[tid] = thrust::get<I>(val) = thrust::get<I>(svals)[tid + delta];
|
||||||
|
}
|
||||||
|
|
||||||
|
For<I + 1, N>::merge(skeys, key, svals, val, cmp, tid, delta);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
template <unsigned int N>
|
||||||
|
struct For<N, N>
|
||||||
|
{
|
||||||
|
template <class PointerTuple, class ReferenceTuple>
|
||||||
|
static __device__ void loadToSmem(const PointerTuple&, const ReferenceTuple&, unsigned int)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
template <class PointerTuple, class ReferenceTuple>
|
||||||
|
static __device__ void loadFromSmem(const PointerTuple&, const ReferenceTuple&, unsigned int)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class ReferenceTuple>
|
||||||
|
static __device__ void copyShfl(const ReferenceTuple&, unsigned int, int)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
template <class PointerTuple, class ReferenceTuple>
|
||||||
|
static __device__ void copy(const PointerTuple&, const ReferenceTuple&, unsigned int, unsigned int)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class KeyReferenceTuple, class ValReferenceTuple, class CmpTuple>
|
||||||
|
static __device__ void mergeShfl(const KeyReferenceTuple&, const ValReferenceTuple&, const CmpTuple&, unsigned int, int)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
template <class KeyPointerTuple, class KeyReferenceTuple, class ValPointerTuple, class ValReferenceTuple, class CmpTuple>
|
||||||
|
static __device__ void merge(const KeyPointerTuple&, const KeyReferenceTuple&,
|
||||||
|
const ValPointerTuple&, const ValReferenceTuple&,
|
||||||
|
const CmpTuple&,
|
||||||
|
unsigned int, unsigned int)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////
|
||||||
|
// loadToSmem
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
__device__ __forceinline__ void loadToSmem(volatile T* smem, T& data, unsigned int tid)
|
||||||
|
{
|
||||||
|
smem[tid] = data;
|
||||||
|
}
|
||||||
|
template <typename T>
|
||||||
|
__device__ __forceinline__ void loadFromSmem(volatile T* smem, T& data, unsigned int tid)
|
||||||
|
{
|
||||||
|
data = smem[tid];
|
||||||
|
}
|
||||||
|
|
||||||
|
#if (CUDART_VERSION < 12040)
|
||||||
|
template <typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
|
||||||
|
typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9>
|
||||||
|
__device__ __forceinline__ void loadToSmem(const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& smem,
|
||||||
|
const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& data,
|
||||||
|
unsigned int tid)
|
||||||
|
{
|
||||||
|
For<0, thrust::tuple_size<thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::loadToSmem(smem, data, tid);
|
||||||
|
}
|
||||||
|
template <typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
|
||||||
|
typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9>
|
||||||
|
__device__ __forceinline__ void loadFromSmem(const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& smem,
|
||||||
|
const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& data,
|
||||||
|
unsigned int tid)
|
||||||
|
{
|
||||||
|
For<0, thrust::tuple_size<thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::loadFromSmem(smem, data, tid);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
template <typename... VP, typename... VR>
|
||||||
|
__device__ __forceinline__ void loadToSmem(const thrust::tuple<VP...>& smem, const thrust::tuple<VR...>& data, unsigned int tid)
|
||||||
|
{
|
||||||
|
For<0, thrust::tuple_size<thrust::tuple<VP...> >::value>::loadToSmem(smem, data, tid);
|
||||||
|
}
|
||||||
|
template <typename... VP, typename... VR>
|
||||||
|
__device__ __forceinline__ void loadFromSmem(const thrust::tuple<VP...>& smem, const thrust::tuple<VR...>& data, unsigned int tid)
|
||||||
|
{
|
||||||
|
For<0, thrust::tuple_size<thrust::tuple<VP...> >::value>::loadFromSmem(smem, data, tid);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
template <typename V>
|
||||||
|
__device__ __forceinline__ void copyValsShfl(V& val, unsigned int delta, int width)
|
||||||
|
{
|
||||||
|
val = shfl_down(val, delta, width);
|
||||||
|
}
|
||||||
|
template <typename V>
|
||||||
|
__device__ __forceinline__ void copyVals(volatile V* svals, V& val, unsigned int tid, unsigned int delta)
|
||||||
|
{
|
||||||
|
svals[tid] = val = svals[tid + delta];
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename K, typename V, class Cmp>
|
||||||
|
__device__ __forceinline__ void mergeShfl(K& key, V& val, const Cmp& cmp, unsigned int delta, int width)
|
||||||
|
{
|
||||||
|
K reg = shfl_down(key, delta, width);
|
||||||
|
|
||||||
|
if (cmp(reg, key))
|
||||||
|
{
|
||||||
|
key = reg;
|
||||||
|
copyValsShfl(val, delta, width);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
template <typename K, typename V, class Cmp>
|
||||||
|
__device__ __forceinline__ void merge(volatile K* skeys, K& key, volatile V* svals, V& val, const Cmp& cmp, unsigned int tid, unsigned int delta)
|
||||||
|
{
|
||||||
|
K reg = skeys[tid + delta];
|
||||||
|
|
||||||
|
if (cmp(reg, key))
|
||||||
|
{
|
||||||
|
skeys[tid] = key = reg;
|
||||||
|
copyVals(svals, val, tid, delta);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#if (CUDART_VERSION < 12040) // details: https://github.com/opencv/opencv_contrib/issues/3690
|
||||||
|
template <typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9>
|
||||||
|
__device__ __forceinline__ void copyValsShfl(const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
|
||||||
|
unsigned int delta,
|
||||||
|
int width)
|
||||||
|
{
|
||||||
|
For<0, thrust::tuple_size<thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9> >::value>::copyShfl(val, delta, width);
|
||||||
|
}
|
||||||
|
template <typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
|
||||||
|
typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9>
|
||||||
|
__device__ __forceinline__ void copyVals(const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals,
|
||||||
|
const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
|
||||||
|
unsigned int tid, unsigned int delta)
|
||||||
|
{
|
||||||
|
For<0, thrust::tuple_size<thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::copy(svals, val, tid, delta);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename K,
|
||||||
|
typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
|
||||||
|
class Cmp>
|
||||||
|
__device__ __forceinline__ void mergeShfl(K& key,
|
||||||
|
const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
|
||||||
|
const Cmp& cmp,
|
||||||
|
unsigned int delta, int width)
|
||||||
|
{
|
||||||
|
K reg = shfl_down(key, delta, width);
|
||||||
|
|
||||||
|
if (cmp(reg, key))
|
||||||
|
{
|
||||||
|
key = reg;
|
||||||
|
copyValsShfl(val, delta, width);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
template <typename K,
|
||||||
|
typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
|
||||||
|
typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
|
||||||
|
class Cmp>
|
||||||
|
__device__ __forceinline__ void merge(volatile K* skeys, K& key,
|
||||||
|
const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals,
|
||||||
|
const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
|
||||||
|
const Cmp& cmp, unsigned int tid, unsigned int delta)
|
||||||
|
{
|
||||||
|
K reg = skeys[tid + delta];
|
||||||
|
|
||||||
|
if (cmp(reg, key))
|
||||||
|
{
|
||||||
|
skeys[tid] = key = reg;
|
||||||
|
copyVals(svals, val, tid, delta);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
template <typename KR0, typename KR1, typename KR2, typename KR3, typename KR4, typename KR5, typename KR6, typename KR7, typename KR8, typename KR9,
|
||||||
|
typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
|
||||||
|
class Cmp0, class Cmp1, class Cmp2, class Cmp3, class Cmp4, class Cmp5, class Cmp6, class Cmp7, class Cmp8, class Cmp9>
|
||||||
|
__device__ __forceinline__ void mergeShfl(const thrust::tuple<KR0, KR1, KR2, KR3, KR4, KR5, KR6, KR7, KR8, KR9>& key,
|
||||||
|
const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
|
||||||
|
const thrust::tuple<Cmp0, Cmp1, Cmp2, Cmp3, Cmp4, Cmp5, Cmp6, Cmp7, Cmp8, Cmp9>& cmp,
|
||||||
|
unsigned int delta, int width)
|
||||||
|
{
|
||||||
|
For<0, thrust::tuple_size<thrust::tuple<KR0, KR1, KR2, KR3, KR4, KR5, KR6, KR7, KR8, KR9> >::value>::mergeShfl(key, val, cmp, delta, width);
|
||||||
|
}
|
||||||
|
template <typename KP0, typename KP1, typename KP2, typename KP3, typename KP4, typename KP5, typename KP6, typename KP7, typename KP8, typename KP9,
|
||||||
|
typename KR0, typename KR1, typename KR2, typename KR3, typename KR4, typename KR5, typename KR6, typename KR7, typename KR8, typename KR9,
|
||||||
|
typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
|
||||||
|
typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
|
||||||
|
class Cmp0, class Cmp1, class Cmp2, class Cmp3, class Cmp4, class Cmp5, class Cmp6, class Cmp7, class Cmp8, class Cmp9>
|
||||||
|
__device__ __forceinline__ void merge(const thrust::tuple<KP0, KP1, KP2, KP3, KP4, KP5, KP6, KP7, KP8, KP9>& skeys,
|
||||||
|
const thrust::tuple<KR0, KR1, KR2, KR3, KR4, KR5, KR6, KR7, KR8, KR9>& key,
|
||||||
|
const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals,
|
||||||
|
const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
|
||||||
|
const thrust::tuple<Cmp0, Cmp1, Cmp2, Cmp3, Cmp4, Cmp5, Cmp6, Cmp7, Cmp8, Cmp9>& cmp,
|
||||||
|
unsigned int tid, unsigned int delta)
|
||||||
|
{
|
||||||
|
For<0, thrust::tuple_size<thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::merge(skeys, key, svals, val, cmp, tid, delta);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
template <typename... VR>
|
||||||
|
__device__ __forceinline__ void copyValsShfl(const thrust::tuple<VR...>& val, unsigned int delta, int width)
|
||||||
|
{
|
||||||
|
For<0, thrust::tuple_size<thrust::tuple<VR...> >::value>::copyShfl(val, delta, width);
|
||||||
|
}
|
||||||
|
template <typename... VP, typename... VR>
|
||||||
|
__device__ __forceinline__ void copyVals(const thrust::tuple<VP...>& svals, const thrust::tuple<VR...>& val, unsigned int tid, unsigned int delta)
|
||||||
|
{
|
||||||
|
For<0, thrust::tuple_size<thrust::tuple<VP...> >::value>::copy(svals, val, tid, delta);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename K, typename... VR, class Cmp>
|
||||||
|
__device__ __forceinline__ void mergeShfl(K& key, const thrust::tuple<VR...>& val, const Cmp& cmp, unsigned int delta, int width)
|
||||||
|
{
|
||||||
|
K reg = shfl_down(key, delta, width);
|
||||||
|
|
||||||
|
if (cmp(reg, key))
|
||||||
|
{
|
||||||
|
key = reg;
|
||||||
|
copyValsShfl(val, delta, width);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
template <typename K, typename... VP, typename... VR, class Cmp>
|
||||||
|
__device__ __forceinline__ void merge(volatile K* skeys, K& key, const thrust::tuple<VP...>& svals,
|
||||||
|
const thrust::tuple<VR...>& val, const Cmp& cmp, unsigned int tid, unsigned int delta)
|
||||||
|
{
|
||||||
|
K reg = skeys[tid + delta];
|
||||||
|
|
||||||
|
if (cmp(reg, key))
|
||||||
|
{
|
||||||
|
skeys[tid] = key = reg;
|
||||||
|
copyVals(svals, val, tid, delta);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
template <typename... KR, typename... VR, class... Cmp>
|
||||||
|
__device__ __forceinline__ void mergeShfl(const thrust::tuple<KR...>& key,
|
||||||
|
const thrust::tuple<VR...>& val,
|
||||||
|
const thrust::tuple<Cmp...>& cmp,
|
||||||
|
unsigned int delta, int width)
|
||||||
|
{
|
||||||
|
For<0, thrust::tuple_size<thrust::tuple<KR...> >::value>::mergeShfl(key, val, cmp, delta, width);
|
||||||
|
}
|
||||||
|
template <typename... KP, typename... KR, typename... VP, typename... VR, class... Cmp>
|
||||||
|
__device__ __forceinline__ void merge(const thrust::tuple<KP...>& skeys,
|
||||||
|
const thrust::tuple<KR...>& key,
|
||||||
|
const thrust::tuple<VP...>& svals,
|
||||||
|
const thrust::tuple<VR...>& val,
|
||||||
|
const thrust::tuple<Cmp...>& cmp,
|
||||||
|
unsigned int tid, unsigned int delta)
|
||||||
|
{
|
||||||
|
For<0, thrust::tuple_size<thrust::tuple<VP...> >::value>::merge(skeys, key, svals, val, cmp, tid, delta);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
//////////////////////////////////////////////////////
|
||||||
|
// Generic
|
||||||
|
|
||||||
|
template <unsigned int N> struct Generic
|
||||||
|
{
|
||||||
|
template <class KP, class KR, class VP, class VR, class Cmp>
|
||||||
|
static __device__ void reduce(KP skeys, KR key, VP svals, VR val, unsigned int tid, Cmp cmp)
|
||||||
|
{
|
||||||
|
loadToSmem(skeys, key, tid);
|
||||||
|
loadValsToSmem(svals, val, tid);
|
||||||
|
if (N >= 32)
|
||||||
|
__syncthreads();
|
||||||
|
|
||||||
|
if (N >= 2048)
|
||||||
|
{
|
||||||
|
if (tid < 1024)
|
||||||
|
merge(skeys, key, svals, val, cmp, tid, 1024);
|
||||||
|
|
||||||
|
__syncthreads();
|
||||||
|
}
|
||||||
|
if (N >= 1024)
|
||||||
|
{
|
||||||
|
if (tid < 512)
|
||||||
|
merge(skeys, key, svals, val, cmp, tid, 512);
|
||||||
|
|
||||||
|
__syncthreads();
|
||||||
|
}
|
||||||
|
if (N >= 512)
|
||||||
|
{
|
||||||
|
if (tid < 256)
|
||||||
|
merge(skeys, key, svals, val, cmp, tid, 256);
|
||||||
|
|
||||||
|
__syncthreads();
|
||||||
|
}
|
||||||
|
if (N >= 256)
|
||||||
|
{
|
||||||
|
if (tid < 128)
|
||||||
|
merge(skeys, key, svals, val, cmp, tid, 128);
|
||||||
|
|
||||||
|
__syncthreads();
|
||||||
|
}
|
||||||
|
if (N >= 128)
|
||||||
|
{
|
||||||
|
if (tid < 64)
|
||||||
|
merge(skeys, key, svals, val, cmp, tid, 64);
|
||||||
|
|
||||||
|
__syncthreads();
|
||||||
|
}
|
||||||
|
if (N >= 64)
|
||||||
|
{
|
||||||
|
if (tid < 32)
|
||||||
|
merge(skeys, key, svals, val, cmp, tid, 32);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (tid < 16)
|
||||||
|
{
|
||||||
|
merge(skeys, key, svals, val, cmp, tid, 16);
|
||||||
|
merge(skeys, key, svals, val, cmp, tid, 8);
|
||||||
|
merge(skeys, key, svals, val, cmp, tid, 4);
|
||||||
|
merge(skeys, key, svals, val, cmp, tid, 2);
|
||||||
|
merge(skeys, key, svals, val, cmp, tid, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <unsigned int I, class KP, class KR, class VP, class VR, class Cmp>
|
||||||
|
struct Unroll
|
||||||
|
{
|
||||||
|
static __device__ void loopShfl(KR key, VR val, Cmp cmp, unsigned int N)
|
||||||
|
{
|
||||||
|
mergeShfl(key, val, cmp, I, N);
|
||||||
|
Unroll<I / 2, KP, KR, VP, VR, Cmp>::loopShfl(key, val, cmp, N);
|
||||||
|
}
|
||||||
|
static __device__ void loop(KP skeys, KR key, VP svals, VR val, unsigned int tid, Cmp cmp)
|
||||||
|
{
|
||||||
|
merge(skeys, key, svals, val, cmp, tid, I);
|
||||||
|
Unroll<I / 2, KP, KR, VP, VR, Cmp>::loop(skeys, key, svals, val, tid, cmp);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
template <class KP, class KR, class VP, class VR, class Cmp>
|
||||||
|
struct Unroll<0, KP, KR, VP, VR, Cmp>
|
||||||
|
{
|
||||||
|
static __device__ void loopShfl(KR, VR, Cmp, unsigned int)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
static __device__ void loop(KP, KR, VP, VR, unsigned int, Cmp)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <unsigned int N> struct WarpOptimized
|
||||||
|
{
|
||||||
|
template <class KP, class KR, class VP, class VR, class Cmp>
|
||||||
|
static __device__ void reduce(KP skeys, KR key, VP svals, VR val, unsigned int tid, Cmp cmp)
|
||||||
|
{
|
||||||
|
#if 0 // __CUDA_ARCH__ >= 300
|
||||||
|
CV_UNUSED(skeys);
|
||||||
|
CV_UNUSED(svals);
|
||||||
|
CV_UNUSED(tid);
|
||||||
|
|
||||||
|
Unroll<N / 2, KP, KR, VP, VR, Cmp>::loopShfl(key, val, cmp, N);
|
||||||
|
#else
|
||||||
|
loadToSmem(skeys, key, tid);
|
||||||
|
loadToSmem(svals, val, tid);
|
||||||
|
|
||||||
|
if (tid < N / 2)
|
||||||
|
Unroll<N / 2, KP, KR, VP, VR, Cmp>::loop(skeys, key, svals, val, tid, cmp);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <unsigned int N> struct GenericOptimized32
|
||||||
|
{
|
||||||
|
enum { M = N / 32 };
|
||||||
|
|
||||||
|
template <class KP, class KR, class VP, class VR, class Cmp>
|
||||||
|
static __device__ void reduce(KP skeys, KR key, VP svals, VR val, unsigned int tid, Cmp cmp)
|
||||||
|
{
|
||||||
|
const unsigned int laneId = Warp::laneId();
|
||||||
|
|
||||||
|
#if 0 // __CUDA_ARCH__ >= 300
|
||||||
|
Unroll<16, KP, KR, VP, VR, Cmp>::loopShfl(key, val, cmp, warpSize);
|
||||||
|
|
||||||
|
if (laneId == 0)
|
||||||
|
{
|
||||||
|
loadToSmem(skeys, key, tid / 32);
|
||||||
|
loadToSmem(svals, val, tid / 32);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
loadToSmem(skeys, key, tid);
|
||||||
|
loadToSmem(svals, val, tid);
|
||||||
|
|
||||||
|
if (laneId < 16)
|
||||||
|
Unroll<16, KP, KR, VP, VR, Cmp>::loop(skeys, key, svals, val, tid, cmp);
|
||||||
|
|
||||||
|
__syncthreads();
|
||||||
|
|
||||||
|
if (laneId == 0)
|
||||||
|
{
|
||||||
|
loadToSmem(skeys, key, tid / 32);
|
||||||
|
loadToSmem(svals, val, tid / 32);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
__syncthreads();
|
||||||
|
|
||||||
|
loadFromSmem(skeys, key, tid);
|
||||||
|
|
||||||
|
if (tid < 32)
|
||||||
|
{
|
||||||
|
#if 0 // __CUDA_ARCH__ >= 300
|
||||||
|
loadFromSmem(svals, val, tid);
|
||||||
|
|
||||||
|
Unroll<M / 2, KP, KR, VP, VR, Cmp>::loopShfl(key, val, cmp, M);
|
||||||
|
#else
|
||||||
|
Unroll<M / 2, KP, KR, VP, VR, Cmp>::loop(skeys, key, svals, val, tid, cmp);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <bool val, class T1, class T2> struct StaticIf;
|
||||||
|
template <class T1, class T2> struct StaticIf<true, T1, T2>
|
||||||
|
{
|
||||||
|
typedef T1 type;
|
||||||
|
};
|
||||||
|
template <class T1, class T2> struct StaticIf<false, T1, T2>
|
||||||
|
{
|
||||||
|
typedef T2 type;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <unsigned int N> struct IsPowerOf2
|
||||||
|
{
|
||||||
|
enum { value = ((N != 0) && !(N & (N - 1))) };
|
||||||
|
};
|
||||||
|
|
||||||
|
template <unsigned int N> struct Dispatcher
|
||||||
|
{
|
||||||
|
typedef typename StaticIf<
|
||||||
|
(N <= 32) && IsPowerOf2<N>::value,
|
||||||
|
WarpOptimized<N>,
|
||||||
|
typename StaticIf<
|
||||||
|
(N <= 1024) && IsPowerOf2<N>::value,
|
||||||
|
GenericOptimized32<N>,
|
||||||
|
Generic<N>
|
||||||
|
>::type
|
||||||
|
>::type reductor;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}}}
|
||||||
|
|
||||||
|
//! @endcond
|
||||||
|
|
||||||
|
#endif // OPENCV_CUDA_PRED_VAL_REDUCE_DETAIL_HPP
|
||||||
@@ -0,0 +1,392 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#ifndef OPENCV_CUDA_TRANSFORM_DETAIL_HPP
|
||||||
|
#define OPENCV_CUDA_TRANSFORM_DETAIL_HPP
|
||||||
|
|
||||||
|
#include "../common.hpp"
|
||||||
|
#include "../vec_traits.hpp"
|
||||||
|
#include "../functional.hpp"
|
||||||
|
|
||||||
|
//! @cond IGNORED
|
||||||
|
|
||||||
|
namespace cv { namespace cuda { namespace device
|
||||||
|
{
|
||||||
|
namespace transform_detail
|
||||||
|
{
|
||||||
|
//! Read Write Traits
|
||||||
|
|
||||||
|
template <typename T, typename D, int shift> struct UnaryReadWriteTraits
|
||||||
|
{
|
||||||
|
typedef typename TypeVec<T, shift>::vec_type read_type;
|
||||||
|
typedef typename TypeVec<D, shift>::vec_type write_type;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T1, typename T2, typename D, int shift> struct BinaryReadWriteTraits
|
||||||
|
{
|
||||||
|
typedef typename TypeVec<T1, shift>::vec_type read_type1;
|
||||||
|
typedef typename TypeVec<T2, shift>::vec_type read_type2;
|
||||||
|
typedef typename TypeVec<D, shift>::vec_type write_type;
|
||||||
|
};
|
||||||
|
|
||||||
|
//! Transform kernels
|
||||||
|
|
||||||
|
template <int shift> struct OpUnroller;
|
||||||
|
template <> struct OpUnroller<1>
|
||||||
|
{
|
||||||
|
template <typename T, typename D, typename UnOp, typename Mask>
|
||||||
|
static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y)
|
||||||
|
{
|
||||||
|
if (mask(y, x_shifted))
|
||||||
|
dst.x = op(src.x);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||||
|
static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y)
|
||||||
|
{
|
||||||
|
if (mask(y, x_shifted))
|
||||||
|
dst.x = op(src1.x, src2.x);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
template <> struct OpUnroller<2>
|
||||||
|
{
|
||||||
|
template <typename T, typename D, typename UnOp, typename Mask>
|
||||||
|
static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y)
|
||||||
|
{
|
||||||
|
if (mask(y, x_shifted))
|
||||||
|
dst.x = op(src.x);
|
||||||
|
if (mask(y, x_shifted + 1))
|
||||||
|
dst.y = op(src.y);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||||
|
static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y)
|
||||||
|
{
|
||||||
|
if (mask(y, x_shifted))
|
||||||
|
dst.x = op(src1.x, src2.x);
|
||||||
|
if (mask(y, x_shifted + 1))
|
||||||
|
dst.y = op(src1.y, src2.y);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
template <> struct OpUnroller<3>
|
||||||
|
{
|
||||||
|
template <typename T, typename D, typename UnOp, typename Mask>
|
||||||
|
static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, const UnOp& op, int x_shifted, int y)
|
||||||
|
{
|
||||||
|
if (mask(y, x_shifted))
|
||||||
|
dst.x = op(src.x);
|
||||||
|
if (mask(y, x_shifted + 1))
|
||||||
|
dst.y = op(src.y);
|
||||||
|
if (mask(y, x_shifted + 2))
|
||||||
|
dst.z = op(src.z);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||||
|
static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, const BinOp& op, int x_shifted, int y)
|
||||||
|
{
|
||||||
|
if (mask(y, x_shifted))
|
||||||
|
dst.x = op(src1.x, src2.x);
|
||||||
|
if (mask(y, x_shifted + 1))
|
||||||
|
dst.y = op(src1.y, src2.y);
|
||||||
|
if (mask(y, x_shifted + 2))
|
||||||
|
dst.z = op(src1.z, src2.z);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
template <> struct OpUnroller<4>
|
||||||
|
{
|
||||||
|
template <typename T, typename D, typename UnOp, typename Mask>
|
||||||
|
static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, const UnOp& op, int x_shifted, int y)
|
||||||
|
{
|
||||||
|
if (mask(y, x_shifted))
|
||||||
|
dst.x = op(src.x);
|
||||||
|
if (mask(y, x_shifted + 1))
|
||||||
|
dst.y = op(src.y);
|
||||||
|
if (mask(y, x_shifted + 2))
|
||||||
|
dst.z = op(src.z);
|
||||||
|
if (mask(y, x_shifted + 3))
|
||||||
|
dst.w = op(src.w);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||||
|
static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, const BinOp& op, int x_shifted, int y)
|
||||||
|
{
|
||||||
|
if (mask(y, x_shifted))
|
||||||
|
dst.x = op(src1.x, src2.x);
|
||||||
|
if (mask(y, x_shifted + 1))
|
||||||
|
dst.y = op(src1.y, src2.y);
|
||||||
|
if (mask(y, x_shifted + 2))
|
||||||
|
dst.z = op(src1.z, src2.z);
|
||||||
|
if (mask(y, x_shifted + 3))
|
||||||
|
dst.w = op(src1.w, src2.w);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
template <> struct OpUnroller<8>
|
||||||
|
{
|
||||||
|
template <typename T, typename D, typename UnOp, typename Mask>
|
||||||
|
static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, const UnOp& op, int x_shifted, int y)
|
||||||
|
{
|
||||||
|
if (mask(y, x_shifted))
|
||||||
|
dst.a0 = op(src.a0);
|
||||||
|
if (mask(y, x_shifted + 1))
|
||||||
|
dst.a1 = op(src.a1);
|
||||||
|
if (mask(y, x_shifted + 2))
|
||||||
|
dst.a2 = op(src.a2);
|
||||||
|
if (mask(y, x_shifted + 3))
|
||||||
|
dst.a3 = op(src.a3);
|
||||||
|
if (mask(y, x_shifted + 4))
|
||||||
|
dst.a4 = op(src.a4);
|
||||||
|
if (mask(y, x_shifted + 5))
|
||||||
|
dst.a5 = op(src.a5);
|
||||||
|
if (mask(y, x_shifted + 6))
|
||||||
|
dst.a6 = op(src.a6);
|
||||||
|
if (mask(y, x_shifted + 7))
|
||||||
|
dst.a7 = op(src.a7);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||||
|
static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, const BinOp& op, int x_shifted, int y)
|
||||||
|
{
|
||||||
|
if (mask(y, x_shifted))
|
||||||
|
dst.a0 = op(src1.a0, src2.a0);
|
||||||
|
if (mask(y, x_shifted + 1))
|
||||||
|
dst.a1 = op(src1.a1, src2.a1);
|
||||||
|
if (mask(y, x_shifted + 2))
|
||||||
|
dst.a2 = op(src1.a2, src2.a2);
|
||||||
|
if (mask(y, x_shifted + 3))
|
||||||
|
dst.a3 = op(src1.a3, src2.a3);
|
||||||
|
if (mask(y, x_shifted + 4))
|
||||||
|
dst.a4 = op(src1.a4, src2.a4);
|
||||||
|
if (mask(y, x_shifted + 5))
|
||||||
|
dst.a5 = op(src1.a5, src2.a5);
|
||||||
|
if (mask(y, x_shifted + 6))
|
||||||
|
dst.a6 = op(src1.a6, src2.a6);
|
||||||
|
if (mask(y, x_shifted + 7))
|
||||||
|
dst.a7 = op(src1.a7, src2.a7);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T, typename D, typename UnOp, typename Mask>
|
||||||
|
static __global__ void transformSmart(const PtrStepSz<T> src_, PtrStep<D> dst_, const Mask mask, const UnOp op)
|
||||||
|
{
|
||||||
|
typedef TransformFunctorTraits<UnOp> ft;
|
||||||
|
typedef typename UnaryReadWriteTraits<T, D, ft::smart_shift>::read_type read_type;
|
||||||
|
typedef typename UnaryReadWriteTraits<T, D, ft::smart_shift>::write_type write_type;
|
||||||
|
|
||||||
|
const int x = threadIdx.x + blockIdx.x * blockDim.x;
|
||||||
|
const int y = threadIdx.y + blockIdx.y * blockDim.y;
|
||||||
|
const int x_shifted = x * ft::smart_shift;
|
||||||
|
|
||||||
|
if (y < src_.rows)
|
||||||
|
{
|
||||||
|
const T* src = src_.ptr(y);
|
||||||
|
D* dst = dst_.ptr(y);
|
||||||
|
|
||||||
|
if (x_shifted + ft::smart_shift - 1 < src_.cols)
|
||||||
|
{
|
||||||
|
const read_type src_n_el = ((const read_type*)src)[x];
|
||||||
|
OpUnroller<ft::smart_shift>::unroll(src_n_el, ((write_type*)dst)[x], mask, op, x_shifted, y);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for (int real_x = x_shifted; real_x < src_.cols; ++real_x)
|
||||||
|
{
|
||||||
|
if (mask(y, real_x))
|
||||||
|
dst[real_x] = op(src[real_x]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T, typename D, typename UnOp, typename Mask>
|
||||||
|
__global__ static void transformSimple(const PtrStepSz<T> src, PtrStep<D> dst, const Mask mask, const UnOp op)
|
||||||
|
{
|
||||||
|
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||||
|
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||||
|
|
||||||
|
if (x < src.cols && y < src.rows && mask(y, x))
|
||||||
|
{
|
||||||
|
dst.ptr(y)[x] = op(src.ptr(y)[x]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||||
|
static __global__ void transformSmart(const PtrStepSz<T1> src1_, const PtrStep<T2> src2_, PtrStep<D> dst_,
|
||||||
|
const Mask mask, const BinOp op)
|
||||||
|
{
|
||||||
|
typedef TransformFunctorTraits<BinOp> ft;
|
||||||
|
typedef typename BinaryReadWriteTraits<T1, T2, D, ft::smart_shift>::read_type1 read_type1;
|
||||||
|
typedef typename BinaryReadWriteTraits<T1, T2, D, ft::smart_shift>::read_type2 read_type2;
|
||||||
|
typedef typename BinaryReadWriteTraits<T1, T2, D, ft::smart_shift>::write_type write_type;
|
||||||
|
|
||||||
|
const int x = threadIdx.x + blockIdx.x * blockDim.x;
|
||||||
|
const int y = threadIdx.y + blockIdx.y * blockDim.y;
|
||||||
|
const int x_shifted = x * ft::smart_shift;
|
||||||
|
|
||||||
|
if (y < src1_.rows)
|
||||||
|
{
|
||||||
|
const T1* src1 = src1_.ptr(y);
|
||||||
|
const T2* src2 = src2_.ptr(y);
|
||||||
|
D* dst = dst_.ptr(y);
|
||||||
|
|
||||||
|
if (x_shifted + ft::smart_shift - 1 < src1_.cols)
|
||||||
|
{
|
||||||
|
const read_type1 src1_n_el = ((const read_type1*)src1)[x];
|
||||||
|
const read_type2 src2_n_el = ((const read_type2*)src2)[x];
|
||||||
|
|
||||||
|
OpUnroller<ft::smart_shift>::unroll(src1_n_el, src2_n_el, ((write_type*)dst)[x], mask, op, x_shifted, y);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for (int real_x = x_shifted; real_x < src1_.cols; ++real_x)
|
||||||
|
{
|
||||||
|
if (mask(y, real_x))
|
||||||
|
dst[real_x] = op(src1[real_x], src2[real_x]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||||
|
static __global__ void transformSimple(const PtrStepSz<T1> src1, const PtrStep<T2> src2, PtrStep<D> dst,
|
||||||
|
const Mask mask, const BinOp op)
|
||||||
|
{
|
||||||
|
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||||
|
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||||
|
|
||||||
|
if (x < src1.cols && y < src1.rows && mask(y, x))
|
||||||
|
{
|
||||||
|
const T1 src1_data = src1.ptr(y)[x];
|
||||||
|
const T2 src2_data = src2.ptr(y)[x];
|
||||||
|
dst.ptr(y)[x] = op(src1_data, src2_data);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <bool UseSmart> struct TransformDispatcher;
|
||||||
|
template<> struct TransformDispatcher<false>
|
||||||
|
{
|
||||||
|
template <typename T, typename D, typename UnOp, typename Mask>
|
||||||
|
static void call(PtrStepSz<T> src, PtrStepSz<D> dst, UnOp op, Mask mask, cudaStream_t stream)
|
||||||
|
{
|
||||||
|
typedef TransformFunctorTraits<UnOp> ft;
|
||||||
|
|
||||||
|
const dim3 threads(ft::simple_block_dim_x, ft::simple_block_dim_y, 1);
|
||||||
|
const dim3 grid(divUp(src.cols, threads.x), divUp(src.rows, threads.y), 1);
|
||||||
|
|
||||||
|
transformSimple<T, D><<<grid, threads, 0, stream>>>(src, dst, mask, op);
|
||||||
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
|
if (stream == 0)
|
||||||
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||||
|
static void call(PtrStepSz<T1> src1, PtrStepSz<T2> src2, PtrStepSz<D> dst, BinOp op, Mask mask, cudaStream_t stream)
|
||||||
|
{
|
||||||
|
typedef TransformFunctorTraits<BinOp> ft;
|
||||||
|
|
||||||
|
const dim3 threads(ft::simple_block_dim_x, ft::simple_block_dim_y, 1);
|
||||||
|
const dim3 grid(divUp(src1.cols, threads.x), divUp(src1.rows, threads.y), 1);
|
||||||
|
|
||||||
|
transformSimple<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, mask, op);
|
||||||
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
|
if (stream == 0)
|
||||||
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
|
}
|
||||||
|
};
|
||||||
|
template<> struct TransformDispatcher<true>
|
||||||
|
{
|
||||||
|
template <typename T, typename D, typename UnOp, typename Mask>
|
||||||
|
static void call(PtrStepSz<T> src, PtrStepSz<D> dst, UnOp op, Mask mask, cudaStream_t stream)
|
||||||
|
{
|
||||||
|
typedef TransformFunctorTraits<UnOp> ft;
|
||||||
|
|
||||||
|
CV_StaticAssert(ft::smart_shift != 1, "");
|
||||||
|
|
||||||
|
if (!isAligned(src.data, ft::smart_shift * sizeof(T)) || !isAligned(src.step, ft::smart_shift * sizeof(T)) ||
|
||||||
|
!isAligned(dst.data, ft::smart_shift * sizeof(D)) || !isAligned(dst.step, ft::smart_shift * sizeof(D)))
|
||||||
|
{
|
||||||
|
TransformDispatcher<false>::call(src, dst, op, mask, stream);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const dim3 threads(ft::smart_block_dim_x, ft::smart_block_dim_y, 1);
|
||||||
|
const dim3 grid(divUp(src.cols, threads.x * ft::smart_shift), divUp(src.rows, threads.y), 1);
|
||||||
|
|
||||||
|
transformSmart<T, D><<<grid, threads, 0, stream>>>(src, dst, mask, op);
|
||||||
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
|
if (stream == 0)
|
||||||
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||||
|
static void call(PtrStepSz<T1> src1, PtrStepSz<T2> src2, PtrStepSz<D> dst, BinOp op, Mask mask, cudaStream_t stream)
|
||||||
|
{
|
||||||
|
typedef TransformFunctorTraits<BinOp> ft;
|
||||||
|
|
||||||
|
CV_StaticAssert(ft::smart_shift != 1, "");
|
||||||
|
|
||||||
|
if (!isAligned(src1.data, ft::smart_shift * sizeof(T1)) || !isAligned(src1.step, ft::smart_shift * sizeof(T1)) ||
|
||||||
|
!isAligned(src2.data, ft::smart_shift * sizeof(T2)) || !isAligned(src2.step, ft::smart_shift * sizeof(T2)) ||
|
||||||
|
!isAligned(dst.data, ft::smart_shift * sizeof(D)) || !isAligned(dst.step, ft::smart_shift * sizeof(D)))
|
||||||
|
{
|
||||||
|
TransformDispatcher<false>::call(src1, src2, dst, op, mask, stream);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const dim3 threads(ft::smart_block_dim_x, ft::smart_block_dim_y, 1);
|
||||||
|
const dim3 grid(divUp(src1.cols, threads.x * ft::smart_shift), divUp(src1.rows, threads.y), 1);
|
||||||
|
|
||||||
|
transformSmart<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, mask, op);
|
||||||
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
|
if (stream == 0)
|
||||||
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
|
}
|
||||||
|
};
|
||||||
|
} // namespace transform_detail
|
||||||
|
}}} // namespace cv { namespace cuda { namespace cudev
|
||||||
|
|
||||||
|
//! @endcond
|
||||||
|
|
||||||
|
#endif // OPENCV_CUDA_TRANSFORM_DETAIL_HPP
|
||||||
@@ -0,0 +1,191 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#ifndef OPENCV_CUDA_TYPE_TRAITS_DETAIL_HPP
|
||||||
|
#define OPENCV_CUDA_TYPE_TRAITS_DETAIL_HPP
|
||||||
|
|
||||||
|
#include "../common.hpp"
|
||||||
|
#include "../vec_traits.hpp"
|
||||||
|
|
||||||
|
//! @cond IGNORED
|
||||||
|
|
||||||
|
namespace cv { namespace cuda { namespace device
|
||||||
|
{
|
||||||
|
namespace type_traits_detail
|
||||||
|
{
|
||||||
|
template <bool, typename T1, typename T2> struct Select { typedef T1 type; };
|
||||||
|
template <typename T1, typename T2> struct Select<false, T1, T2> { typedef T2 type; };
|
||||||
|
|
||||||
|
template <typename T> struct IsSignedIntergral { enum {value = 0}; };
|
||||||
|
template <> struct IsSignedIntergral<schar> { enum {value = 1}; };
|
||||||
|
template <> struct IsSignedIntergral<char1> { enum {value = 1}; };
|
||||||
|
template <> struct IsSignedIntergral<short> { enum {value = 1}; };
|
||||||
|
template <> struct IsSignedIntergral<short1> { enum {value = 1}; };
|
||||||
|
template <> struct IsSignedIntergral<int> { enum {value = 1}; };
|
||||||
|
template <> struct IsSignedIntergral<int1> { enum {value = 1}; };
|
||||||
|
|
||||||
|
template <typename T> struct IsUnsignedIntegral { enum {value = 0}; };
|
||||||
|
template <> struct IsUnsignedIntegral<uchar> { enum {value = 1}; };
|
||||||
|
template <> struct IsUnsignedIntegral<uchar1> { enum {value = 1}; };
|
||||||
|
template <> struct IsUnsignedIntegral<ushort> { enum {value = 1}; };
|
||||||
|
template <> struct IsUnsignedIntegral<ushort1> { enum {value = 1}; };
|
||||||
|
template <> struct IsUnsignedIntegral<uint> { enum {value = 1}; };
|
||||||
|
template <> struct IsUnsignedIntegral<uint1> { enum {value = 1}; };
|
||||||
|
|
||||||
|
template <typename T> struct IsIntegral { enum {value = IsSignedIntergral<T>::value || IsUnsignedIntegral<T>::value}; };
|
||||||
|
template <> struct IsIntegral<char> { enum {value = 1}; };
|
||||||
|
template <> struct IsIntegral<bool> { enum {value = 1}; };
|
||||||
|
|
||||||
|
template <typename T> struct IsFloat { enum {value = 0}; };
|
||||||
|
template <> struct IsFloat<float> { enum {value = 1}; };
|
||||||
|
template <> struct IsFloat<double> { enum {value = 1}; };
|
||||||
|
|
||||||
|
template <typename T> struct IsVec { enum {value = 0}; };
|
||||||
|
template <> struct IsVec<uchar1> { enum {value = 1}; };
|
||||||
|
template <> struct IsVec<uchar2> { enum {value = 1}; };
|
||||||
|
template <> struct IsVec<uchar3> { enum {value = 1}; };
|
||||||
|
template <> struct IsVec<uchar4> { enum {value = 1}; };
|
||||||
|
template <> struct IsVec<uchar8> { enum {value = 1}; };
|
||||||
|
template <> struct IsVec<char1> { enum {value = 1}; };
|
||||||
|
template <> struct IsVec<char2> { enum {value = 1}; };
|
||||||
|
template <> struct IsVec<char3> { enum {value = 1}; };
|
||||||
|
template <> struct IsVec<char4> { enum {value = 1}; };
|
||||||
|
template <> struct IsVec<char8> { enum {value = 1}; };
|
||||||
|
template <> struct IsVec<ushort1> { enum {value = 1}; };
|
||||||
|
template <> struct IsVec<ushort2> { enum {value = 1}; };
|
||||||
|
template <> struct IsVec<ushort3> { enum {value = 1}; };
|
||||||
|
template <> struct IsVec<ushort4> { enum {value = 1}; };
|
||||||
|
template <> struct IsVec<ushort8> { enum {value = 1}; };
|
||||||
|
template <> struct IsVec<short1> { enum {value = 1}; };
|
||||||
|
template <> struct IsVec<short2> { enum {value = 1}; };
|
||||||
|
template <> struct IsVec<short3> { enum {value = 1}; };
|
||||||
|
template <> struct IsVec<short4> { enum {value = 1}; };
|
||||||
|
template <> struct IsVec<short8> { enum {value = 1}; };
|
||||||
|
template <> struct IsVec<uint1> { enum {value = 1}; };
|
||||||
|
template <> struct IsVec<uint2> { enum {value = 1}; };
|
||||||
|
template <> struct IsVec<uint3> { enum {value = 1}; };
|
||||||
|
template <> struct IsVec<uint4> { enum {value = 1}; };
|
||||||
|
template <> struct IsVec<uint8> { enum {value = 1}; };
|
||||||
|
template <> struct IsVec<int1> { enum {value = 1}; };
|
||||||
|
template <> struct IsVec<int2> { enum {value = 1}; };
|
||||||
|
template <> struct IsVec<int3> { enum {value = 1}; };
|
||||||
|
template <> struct IsVec<int4> { enum {value = 1}; };
|
||||||
|
template <> struct IsVec<int8> { enum {value = 1}; };
|
||||||
|
template <> struct IsVec<float1> { enum {value = 1}; };
|
||||||
|
template <> struct IsVec<float2> { enum {value = 1}; };
|
||||||
|
template <> struct IsVec<float3> { enum {value = 1}; };
|
||||||
|
template <> struct IsVec<float4> { enum {value = 1}; };
|
||||||
|
template <> struct IsVec<float8> { enum {value = 1}; };
|
||||||
|
template <> struct IsVec<double1> { enum {value = 1}; };
|
||||||
|
template <> struct IsVec<double2> { enum {value = 1}; };
|
||||||
|
template <> struct IsVec<double3> { enum {value = 1}; };
|
||||||
|
template <> struct IsVec<double4> { enum {value = 1}; };
|
||||||
|
template <> struct IsVec<double8> { enum {value = 1}; };
|
||||||
|
|
||||||
|
template <class U> struct AddParameterType { typedef const U& type; };
|
||||||
|
template <class U> struct AddParameterType<U&> { typedef U& type; };
|
||||||
|
template <> struct AddParameterType<void> { typedef void type; };
|
||||||
|
|
||||||
|
template <class U> struct ReferenceTraits
|
||||||
|
{
|
||||||
|
enum { value = false };
|
||||||
|
typedef U type;
|
||||||
|
};
|
||||||
|
template <class U> struct ReferenceTraits<U&>
|
||||||
|
{
|
||||||
|
enum { value = true };
|
||||||
|
typedef U type;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <class U> struct PointerTraits
|
||||||
|
{
|
||||||
|
enum { value = false };
|
||||||
|
typedef void type;
|
||||||
|
};
|
||||||
|
template <class U> struct PointerTraits<U*>
|
||||||
|
{
|
||||||
|
enum { value = true };
|
||||||
|
typedef U type;
|
||||||
|
};
|
||||||
|
template <class U> struct PointerTraits<U*&>
|
||||||
|
{
|
||||||
|
enum { value = true };
|
||||||
|
typedef U type;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <class U> struct UnConst
|
||||||
|
{
|
||||||
|
typedef U type;
|
||||||
|
enum { value = 0 };
|
||||||
|
};
|
||||||
|
template <class U> struct UnConst<const U>
|
||||||
|
{
|
||||||
|
typedef U type;
|
||||||
|
enum { value = 1 };
|
||||||
|
};
|
||||||
|
template <class U> struct UnConst<const U&>
|
||||||
|
{
|
||||||
|
typedef U& type;
|
||||||
|
enum { value = 1 };
|
||||||
|
};
|
||||||
|
|
||||||
|
template <class U> struct UnVolatile
|
||||||
|
{
|
||||||
|
typedef U type;
|
||||||
|
enum { value = 0 };
|
||||||
|
};
|
||||||
|
template <class U> struct UnVolatile<volatile U>
|
||||||
|
{
|
||||||
|
typedef U type;
|
||||||
|
enum { value = 1 };
|
||||||
|
};
|
||||||
|
template <class U> struct UnVolatile<volatile U&>
|
||||||
|
{
|
||||||
|
typedef U& type;
|
||||||
|
enum { value = 1 };
|
||||||
|
};
|
||||||
|
} // namespace type_traits_detail
|
||||||
|
}}} // namespace cv { namespace cuda { namespace cudev
|
||||||
|
|
||||||
|
//! @endcond
|
||||||
|
|
||||||
|
#endif // OPENCV_CUDA_TYPE_TRAITS_DETAIL_HPP
|
||||||
@@ -0,0 +1,121 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#ifndef OPENCV_CUDA_VEC_DISTANCE_DETAIL_HPP
|
||||||
|
#define OPENCV_CUDA_VEC_DISTANCE_DETAIL_HPP
|
||||||
|
|
||||||
|
#include "../datamov_utils.hpp"
|
||||||
|
|
||||||
|
//! @cond IGNORED
|
||||||
|
|
||||||
|
namespace cv { namespace cuda { namespace device
|
||||||
|
{
|
||||||
|
namespace vec_distance_detail
|
||||||
|
{
|
||||||
|
template <int THREAD_DIM, int N> struct UnrollVecDiffCached
|
||||||
|
{
|
||||||
|
template <typename Dist, typename T1, typename T2>
|
||||||
|
static __device__ void calcCheck(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, int ind)
|
||||||
|
{
|
||||||
|
if (ind < len)
|
||||||
|
{
|
||||||
|
T1 val1 = *vecCached++;
|
||||||
|
|
||||||
|
T2 val2;
|
||||||
|
ForceGlob<T2>::Load(vecGlob, ind, val2);
|
||||||
|
|
||||||
|
dist.reduceIter(val1, val2);
|
||||||
|
|
||||||
|
UnrollVecDiffCached<THREAD_DIM, N - 1>::calcCheck(vecCached, vecGlob, len, dist, ind + THREAD_DIM);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename Dist, typename T1, typename T2>
|
||||||
|
static __device__ void calcWithoutCheck(const T1* vecCached, const T2* vecGlob, Dist& dist)
|
||||||
|
{
|
||||||
|
T1 val1 = *vecCached++;
|
||||||
|
|
||||||
|
T2 val2;
|
||||||
|
ForceGlob<T2>::Load(vecGlob, 0, val2);
|
||||||
|
vecGlob += THREAD_DIM;
|
||||||
|
|
||||||
|
dist.reduceIter(val1, val2);
|
||||||
|
|
||||||
|
UnrollVecDiffCached<THREAD_DIM, N - 1>::calcWithoutCheck(vecCached, vecGlob, dist);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
template <int THREAD_DIM> struct UnrollVecDiffCached<THREAD_DIM, 0>
|
||||||
|
{
|
||||||
|
template <typename Dist, typename T1, typename T2>
|
||||||
|
static __device__ __forceinline__ void calcCheck(const T1*, const T2*, int, Dist&, int)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename Dist, typename T1, typename T2>
|
||||||
|
static __device__ __forceinline__ void calcWithoutCheck(const T1*, const T2*, Dist&)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <int THREAD_DIM, int MAX_LEN, bool LEN_EQ_MAX_LEN> struct VecDiffCachedCalculator;
|
||||||
|
template <int THREAD_DIM, int MAX_LEN> struct VecDiffCachedCalculator<THREAD_DIM, MAX_LEN, false>
|
||||||
|
{
|
||||||
|
template <typename Dist, typename T1, typename T2>
|
||||||
|
static __device__ __forceinline__ void calc(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, int tid)
|
||||||
|
{
|
||||||
|
UnrollVecDiffCached<THREAD_DIM, MAX_LEN / THREAD_DIM>::calcCheck(vecCached, vecGlob, len, dist, tid);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
template <int THREAD_DIM, int MAX_LEN> struct VecDiffCachedCalculator<THREAD_DIM, MAX_LEN, true>
|
||||||
|
{
|
||||||
|
template <typename Dist, typename T1, typename T2>
|
||||||
|
static __device__ __forceinline__ void calc(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, int tid)
|
||||||
|
{
|
||||||
|
UnrollVecDiffCached<THREAD_DIM, MAX_LEN / THREAD_DIM>::calcWithoutCheck(vecCached, vecGlob + tid, dist);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
} // namespace vec_distance_detail
|
||||||
|
}}} // namespace cv { namespace cuda { namespace cudev
|
||||||
|
|
||||||
|
//! @endcond
|
||||||
|
|
||||||
|
#endif // OPENCV_CUDA_VEC_DISTANCE_DETAIL_HPP
|
||||||
88
3rdpart/OpenCV/include/opencv2/core/cuda/dynamic_smem.hpp
Normal file
88
3rdpart/OpenCV/include/opencv2/core/cuda/dynamic_smem.hpp
Normal file
@@ -0,0 +1,88 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#ifndef OPENCV_CUDA_DYNAMIC_SMEM_HPP
|
||||||
|
#define OPENCV_CUDA_DYNAMIC_SMEM_HPP
|
||||||
|
|
||||||
|
/** @file
|
||||||
|
* @deprecated Use @ref cudev instead.
|
||||||
|
*/
|
||||||
|
|
||||||
|
//! @cond IGNORED
|
||||||
|
|
||||||
|
namespace cv { namespace cuda { namespace device
|
||||||
|
{
|
||||||
|
template<class T> struct DynamicSharedMem
|
||||||
|
{
|
||||||
|
__device__ __forceinline__ operator T*()
|
||||||
|
{
|
||||||
|
extern __shared__ int __smem[];
|
||||||
|
return (T*)__smem;
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __forceinline__ operator const T*() const
|
||||||
|
{
|
||||||
|
extern __shared__ int __smem[];
|
||||||
|
return (T*)__smem;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// specialize for double to avoid unaligned memory access compile errors
|
||||||
|
template<> struct DynamicSharedMem<double>
|
||||||
|
{
|
||||||
|
__device__ __forceinline__ operator double*()
|
||||||
|
{
|
||||||
|
extern __shared__ double __smem_d[];
|
||||||
|
return (double*)__smem_d;
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __forceinline__ operator const double*() const
|
||||||
|
{
|
||||||
|
extern __shared__ double __smem_d[];
|
||||||
|
return (double*)__smem_d;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}}}
|
||||||
|
|
||||||
|
//! @endcond
|
||||||
|
|
||||||
|
#endif // OPENCV_CUDA_DYNAMIC_SMEM_HPP
|
||||||
269
3rdpart/OpenCV/include/opencv2/core/cuda/emulation.hpp
Normal file
269
3rdpart/OpenCV/include/opencv2/core/cuda/emulation.hpp
Normal file
@@ -0,0 +1,269 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#ifndef OPENCV_CUDA_EMULATION_HPP_
|
||||||
|
#define OPENCV_CUDA_EMULATION_HPP_
|
||||||
|
|
||||||
|
#include "common.hpp"
|
||||||
|
#include "warp_reduce.hpp"
|
||||||
|
|
||||||
|
/** @file
|
||||||
|
* @deprecated Use @ref cudev instead.
|
||||||
|
*/
|
||||||
|
|
||||||
|
//! @cond IGNORED
|
||||||
|
|
||||||
|
namespace cv { namespace cuda { namespace device
|
||||||
|
{
|
||||||
|
struct Emulation
|
||||||
|
{
|
||||||
|
|
||||||
|
static __device__ __forceinline__ int syncthreadsOr(int pred)
|
||||||
|
{
|
||||||
|
#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 200)
|
||||||
|
// just campilation stab
|
||||||
|
return 0;
|
||||||
|
#else
|
||||||
|
return __syncthreads_or(pred);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
template<int CTA_SIZE>
|
||||||
|
static __forceinline__ __device__ int Ballot(int predicate)
|
||||||
|
{
|
||||||
|
#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ >= 200)
|
||||||
|
return __ballot(predicate);
|
||||||
|
#else
|
||||||
|
__shared__ volatile int cta_buffer[CTA_SIZE];
|
||||||
|
|
||||||
|
int tid = threadIdx.x;
|
||||||
|
cta_buffer[tid] = predicate ? (1 << (tid & 31)) : 0;
|
||||||
|
return warp_reduce(cta_buffer);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
struct smem
|
||||||
|
{
|
||||||
|
enum { TAG_MASK = (1U << ( (sizeof(unsigned int) << 3) - 5U)) - 1U };
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
static __device__ __forceinline__ T atomicInc(T* address, T val)
|
||||||
|
{
|
||||||
|
#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120)
|
||||||
|
T count;
|
||||||
|
unsigned int tag = threadIdx.x << ( (sizeof(unsigned int) << 3) - 5U);
|
||||||
|
do
|
||||||
|
{
|
||||||
|
count = *address & TAG_MASK;
|
||||||
|
count = tag | (count + 1);
|
||||||
|
*address = count;
|
||||||
|
} while (*address != count);
|
||||||
|
|
||||||
|
return (count & TAG_MASK) - 1;
|
||||||
|
#else
|
||||||
|
return ::atomicInc(address, val);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
static __device__ __forceinline__ T atomicAdd(T* address, T val)
|
||||||
|
{
|
||||||
|
#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120)
|
||||||
|
T count;
|
||||||
|
unsigned int tag = threadIdx.x << ( (sizeof(unsigned int) << 3) - 5U);
|
||||||
|
do
|
||||||
|
{
|
||||||
|
count = *address & TAG_MASK;
|
||||||
|
count = tag | (count + val);
|
||||||
|
*address = count;
|
||||||
|
} while (*address != count);
|
||||||
|
|
||||||
|
return (count & TAG_MASK) - val;
|
||||||
|
#else
|
||||||
|
return ::atomicAdd(address, val);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
static __device__ __forceinline__ T atomicMin(T* address, T val)
|
||||||
|
{
|
||||||
|
#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120)
|
||||||
|
T count = ::min(*address, val);
|
||||||
|
do
|
||||||
|
{
|
||||||
|
*address = count;
|
||||||
|
} while (*address > count);
|
||||||
|
|
||||||
|
return count;
|
||||||
|
#else
|
||||||
|
return ::atomicMin(address, val);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}; // struct cmem
|
||||||
|
|
||||||
|
struct glob
|
||||||
|
{
|
||||||
|
static __device__ __forceinline__ int atomicAdd(int* address, int val)
|
||||||
|
{
|
||||||
|
return ::atomicAdd(address, val);
|
||||||
|
}
|
||||||
|
static __device__ __forceinline__ unsigned int atomicAdd(unsigned int* address, unsigned int val)
|
||||||
|
{
|
||||||
|
return ::atomicAdd(address, val);
|
||||||
|
}
|
||||||
|
static __device__ __forceinline__ float atomicAdd(float* address, float val)
|
||||||
|
{
|
||||||
|
#if __CUDA_ARCH__ >= 200
|
||||||
|
return ::atomicAdd(address, val);
|
||||||
|
#else
|
||||||
|
int* address_as_i = (int*) address;
|
||||||
|
int old = *address_as_i, assumed;
|
||||||
|
do {
|
||||||
|
assumed = old;
|
||||||
|
old = ::atomicCAS(address_as_i, assumed,
|
||||||
|
__float_as_int(val + __int_as_float(assumed)));
|
||||||
|
} while (assumed != old);
|
||||||
|
return __int_as_float(old);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
static __device__ __forceinline__ double atomicAdd(double* address, double val)
|
||||||
|
{
|
||||||
|
#if __CUDA_ARCH__ >= 130
|
||||||
|
unsigned long long int* address_as_ull = (unsigned long long int*) address;
|
||||||
|
unsigned long long int old = *address_as_ull, assumed;
|
||||||
|
do {
|
||||||
|
assumed = old;
|
||||||
|
old = ::atomicCAS(address_as_ull, assumed,
|
||||||
|
__double_as_longlong(val + __longlong_as_double(assumed)));
|
||||||
|
} while (assumed != old);
|
||||||
|
return __longlong_as_double(old);
|
||||||
|
#else
|
||||||
|
CV_UNUSED(address);
|
||||||
|
CV_UNUSED(val);
|
||||||
|
return 0.0;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
static __device__ __forceinline__ int atomicMin(int* address, int val)
|
||||||
|
{
|
||||||
|
return ::atomicMin(address, val);
|
||||||
|
}
|
||||||
|
static __device__ __forceinline__ float atomicMin(float* address, float val)
|
||||||
|
{
|
||||||
|
#if __CUDA_ARCH__ >= 120
|
||||||
|
int* address_as_i = (int*) address;
|
||||||
|
int old = *address_as_i, assumed;
|
||||||
|
do {
|
||||||
|
assumed = old;
|
||||||
|
old = ::atomicCAS(address_as_i, assumed,
|
||||||
|
__float_as_int(::fminf(val, __int_as_float(assumed))));
|
||||||
|
} while (assumed != old);
|
||||||
|
return __int_as_float(old);
|
||||||
|
#else
|
||||||
|
CV_UNUSED(address);
|
||||||
|
CV_UNUSED(val);
|
||||||
|
return 0.0f;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
static __device__ __forceinline__ double atomicMin(double* address, double val)
|
||||||
|
{
|
||||||
|
#if __CUDA_ARCH__ >= 130
|
||||||
|
unsigned long long int* address_as_ull = (unsigned long long int*) address;
|
||||||
|
unsigned long long int old = *address_as_ull, assumed;
|
||||||
|
do {
|
||||||
|
assumed = old;
|
||||||
|
old = ::atomicCAS(address_as_ull, assumed,
|
||||||
|
__double_as_longlong(::fmin(val, __longlong_as_double(assumed))));
|
||||||
|
} while (assumed != old);
|
||||||
|
return __longlong_as_double(old);
|
||||||
|
#else
|
||||||
|
CV_UNUSED(address);
|
||||||
|
CV_UNUSED(val);
|
||||||
|
return 0.0;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
static __device__ __forceinline__ int atomicMax(int* address, int val)
|
||||||
|
{
|
||||||
|
return ::atomicMax(address, val);
|
||||||
|
}
|
||||||
|
static __device__ __forceinline__ float atomicMax(float* address, float val)
|
||||||
|
{
|
||||||
|
#if __CUDA_ARCH__ >= 120
|
||||||
|
int* address_as_i = (int*) address;
|
||||||
|
int old = *address_as_i, assumed;
|
||||||
|
do {
|
||||||
|
assumed = old;
|
||||||
|
old = ::atomicCAS(address_as_i, assumed,
|
||||||
|
__float_as_int(::fmaxf(val, __int_as_float(assumed))));
|
||||||
|
} while (assumed != old);
|
||||||
|
return __int_as_float(old);
|
||||||
|
#else
|
||||||
|
CV_UNUSED(address);
|
||||||
|
CV_UNUSED(val);
|
||||||
|
return 0.0f;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
static __device__ __forceinline__ double atomicMax(double* address, double val)
|
||||||
|
{
|
||||||
|
#if __CUDA_ARCH__ >= 130
|
||||||
|
unsigned long long int* address_as_ull = (unsigned long long int*) address;
|
||||||
|
unsigned long long int old = *address_as_ull, assumed;
|
||||||
|
do {
|
||||||
|
assumed = old;
|
||||||
|
old = ::atomicCAS(address_as_ull, assumed,
|
||||||
|
__double_as_longlong(::fmax(val, __longlong_as_double(assumed))));
|
||||||
|
} while (assumed != old);
|
||||||
|
return __longlong_as_double(old);
|
||||||
|
#else
|
||||||
|
CV_UNUSED(address);
|
||||||
|
CV_UNUSED(val);
|
||||||
|
return 0.0;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}; //struct Emulation
|
||||||
|
}}} // namespace cv { namespace cuda { namespace cudev
|
||||||
|
|
||||||
|
//! @endcond
|
||||||
|
|
||||||
|
#endif /* OPENCV_CUDA_EMULATION_HPP_ */
|
||||||
293
3rdpart/OpenCV/include/opencv2/core/cuda/filters.hpp
Normal file
293
3rdpart/OpenCV/include/opencv2/core/cuda/filters.hpp
Normal file
@@ -0,0 +1,293 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#ifndef OPENCV_CUDA_FILTERS_HPP
|
||||||
|
#define OPENCV_CUDA_FILTERS_HPP
|
||||||
|
|
||||||
|
#include "saturate_cast.hpp"
|
||||||
|
#include "vec_traits.hpp"
|
||||||
|
#include "vec_math.hpp"
|
||||||
|
#include "type_traits.hpp"
|
||||||
|
#include "nppdefs.h"
|
||||||
|
|
||||||
|
/** @file
|
||||||
|
* @deprecated Use @ref cudev instead.
|
||||||
|
*/
|
||||||
|
|
||||||
|
//! @cond IGNORED
|
||||||
|
|
||||||
|
namespace cv { namespace cuda { namespace device
|
||||||
|
{
|
||||||
|
template <typename Ptr2D> struct PointFilter
|
||||||
|
{
|
||||||
|
typedef typename Ptr2D::elem_type elem_type;
|
||||||
|
typedef float index_type;
|
||||||
|
|
||||||
|
explicit __host__ __device__ __forceinline__ PointFilter(const Ptr2D& src_, float fx = 0.f, float fy = 0.f)
|
||||||
|
: src(src_)
|
||||||
|
{
|
||||||
|
CV_UNUSED(fx);
|
||||||
|
CV_UNUSED(fy);
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __forceinline__ elem_type operator ()(float y, float x) const
|
||||||
|
{
|
||||||
|
return src(__float2int_rz(y), __float2int_rz(x));
|
||||||
|
}
|
||||||
|
|
||||||
|
Ptr2D src;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Ptr2D> struct LinearFilter
|
||||||
|
{
|
||||||
|
typedef typename Ptr2D::elem_type elem_type;
|
||||||
|
typedef float index_type;
|
||||||
|
|
||||||
|
explicit __host__ __device__ __forceinline__ LinearFilter(const Ptr2D& src_, float fx = 0.f, float fy = 0.f)
|
||||||
|
: src(src_)
|
||||||
|
{
|
||||||
|
CV_UNUSED(fx);
|
||||||
|
CV_UNUSED(fy);
|
||||||
|
}
|
||||||
|
__device__ __forceinline__ elem_type operator ()(float y, float x) const
|
||||||
|
{
|
||||||
|
typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;
|
||||||
|
|
||||||
|
work_type out = VecTraits<work_type>::all(0);
|
||||||
|
|
||||||
|
const int x1 = __float2int_rd(x);
|
||||||
|
const int y1 = __float2int_rd(y);
|
||||||
|
if (x1 <= NPP_MIN_32S || x1 >= NPP_MAX_32S || y1 <= NPP_MIN_32S || y1 >= NPP_MAX_32S)
|
||||||
|
{
|
||||||
|
elem_type src_reg = src(y1, x1);
|
||||||
|
out = out + src_reg * 1.0f;
|
||||||
|
return saturate_cast<elem_type>(out);
|
||||||
|
}
|
||||||
|
const int x2 = x1 + 1;
|
||||||
|
const int y2 = y1 + 1;
|
||||||
|
|
||||||
|
elem_type src_reg = src(y1, x1);
|
||||||
|
out = out + src_reg * ((x2 - x) * (y2 - y));
|
||||||
|
|
||||||
|
src_reg = src(y1, x2);
|
||||||
|
out = out + src_reg * ((x - x1) * (y2 - y));
|
||||||
|
|
||||||
|
src_reg = src(y2, x1);
|
||||||
|
out = out + src_reg * ((x2 - x) * (y - y1));
|
||||||
|
|
||||||
|
src_reg = src(y2, x2);
|
||||||
|
out = out + src_reg * ((x - x1) * (y - y1));
|
||||||
|
|
||||||
|
return saturate_cast<elem_type>(out);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ptr2D src;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Ptr2D> struct CubicFilter
|
||||||
|
{
|
||||||
|
typedef typename Ptr2D::elem_type elem_type;
|
||||||
|
typedef float index_type;
|
||||||
|
typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;
|
||||||
|
|
||||||
|
explicit __host__ __device__ __forceinline__ CubicFilter(const Ptr2D& src_, float fx = 0.f, float fy = 0.f)
|
||||||
|
: src(src_)
|
||||||
|
{
|
||||||
|
CV_UNUSED(fx);
|
||||||
|
CV_UNUSED(fy);
|
||||||
|
}
|
||||||
|
|
||||||
|
static __device__ __forceinline__ float bicubicCoeff(float x_)
|
||||||
|
{
|
||||||
|
float x = fabsf(x_);
|
||||||
|
if (x <= 1.0f)
|
||||||
|
{
|
||||||
|
return x * x * (1.5f * x - 2.5f) + 1.0f;
|
||||||
|
}
|
||||||
|
else if (x < 2.0f)
|
||||||
|
{
|
||||||
|
return x * (x * (-0.5f * x + 2.5f) - 4.0f) + 2.0f;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
return 0.0f;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ elem_type operator ()(float y, float x) const
|
||||||
|
{
|
||||||
|
const float xmin = ::ceilf(x - 2.0f);
|
||||||
|
const float xmax = ::floorf(x + 2.0f);
|
||||||
|
|
||||||
|
const float ymin = ::ceilf(y - 2.0f);
|
||||||
|
const float ymax = ::floorf(y + 2.0f);
|
||||||
|
|
||||||
|
work_type sum = VecTraits<work_type>::all(0);
|
||||||
|
float wsum = 0.0f;
|
||||||
|
|
||||||
|
for (float cy = ymin; cy <= ymax; cy += 1.0f)
|
||||||
|
{
|
||||||
|
for (float cx = xmin; cx <= xmax; cx += 1.0f)
|
||||||
|
{
|
||||||
|
const float w = bicubicCoeff(x - cx) * bicubicCoeff(y - cy);
|
||||||
|
sum = sum + w * src(__float2int_rd(cy), __float2int_rd(cx));
|
||||||
|
wsum += w;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
work_type res = (!wsum)? VecTraits<work_type>::all(0) : sum / wsum;
|
||||||
|
|
||||||
|
return saturate_cast<elem_type>(res);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ptr2D src;
|
||||||
|
};
|
||||||
|
// for integer scaling
|
||||||
|
template <typename Ptr2D> struct IntegerAreaFilter
|
||||||
|
{
|
||||||
|
typedef typename Ptr2D::elem_type elem_type;
|
||||||
|
typedef float index_type;
|
||||||
|
|
||||||
|
explicit __host__ __device__ __forceinline__ IntegerAreaFilter(const Ptr2D& src_, float scale_x_, float scale_y_)
|
||||||
|
: src(src_), scale_x(scale_x_), scale_y(scale_y_), scale(1.f / (scale_x * scale_y)) {}
|
||||||
|
|
||||||
|
__device__ __forceinline__ elem_type operator ()(float y, float x) const
|
||||||
|
{
|
||||||
|
float fsx1 = x * scale_x;
|
||||||
|
float fsx2 = fsx1 + scale_x;
|
||||||
|
|
||||||
|
int sx1 = __float2int_ru(fsx1);
|
||||||
|
int sx2 = __float2int_rd(fsx2);
|
||||||
|
|
||||||
|
float fsy1 = y * scale_y;
|
||||||
|
float fsy2 = fsy1 + scale_y;
|
||||||
|
|
||||||
|
int sy1 = __float2int_ru(fsy1);
|
||||||
|
int sy2 = __float2int_rd(fsy2);
|
||||||
|
|
||||||
|
typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;
|
||||||
|
work_type out = VecTraits<work_type>::all(0.f);
|
||||||
|
|
||||||
|
for(int dy = sy1; dy < sy2; ++dy)
|
||||||
|
for(int dx = sx1; dx < sx2; ++dx)
|
||||||
|
{
|
||||||
|
out = out + src(dy, dx) * scale;
|
||||||
|
}
|
||||||
|
|
||||||
|
return saturate_cast<elem_type>(out);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ptr2D src;
|
||||||
|
float scale_x, scale_y ,scale;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Ptr2D> struct AreaFilter
|
||||||
|
{
|
||||||
|
typedef typename Ptr2D::elem_type elem_type;
|
||||||
|
typedef float index_type;
|
||||||
|
|
||||||
|
explicit __host__ __device__ __forceinline__ AreaFilter(const Ptr2D& src_, float scale_x_, float scale_y_)
|
||||||
|
: src(src_), scale_x(scale_x_), scale_y(scale_y_){}
|
||||||
|
|
||||||
|
__device__ __forceinline__ elem_type operator ()(float y, float x) const
|
||||||
|
{
|
||||||
|
float fsx1 = x * scale_x;
|
||||||
|
float fsx2 = fsx1 + scale_x;
|
||||||
|
|
||||||
|
int sx1 = __float2int_ru(fsx1);
|
||||||
|
int sx2 = __float2int_rd(fsx2);
|
||||||
|
|
||||||
|
float fsy1 = y * scale_y;
|
||||||
|
float fsy2 = fsy1 + scale_y;
|
||||||
|
|
||||||
|
int sy1 = __float2int_ru(fsy1);
|
||||||
|
int sy2 = __float2int_rd(fsy2);
|
||||||
|
|
||||||
|
float scale = 1.f / (fminf(scale_x, src.width - fsx1) * fminf(scale_y, src.height - fsy1));
|
||||||
|
|
||||||
|
typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;
|
||||||
|
work_type out = VecTraits<work_type>::all(0.f);
|
||||||
|
|
||||||
|
for (int dy = sy1; dy < sy2; ++dy)
|
||||||
|
{
|
||||||
|
for (int dx = sx1; dx < sx2; ++dx)
|
||||||
|
out = out + src(dy, dx) * scale;
|
||||||
|
|
||||||
|
if (sx1 > fsx1)
|
||||||
|
out = out + src(dy, (sx1 -1) ) * ((sx1 - fsx1) * scale);
|
||||||
|
|
||||||
|
if (sx2 < fsx2)
|
||||||
|
out = out + src(dy, sx2) * ((fsx2 -sx2) * scale);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sy1 > fsy1)
|
||||||
|
for (int dx = sx1; dx < sx2; ++dx)
|
||||||
|
out = out + src( (sy1 - 1) , dx) * ((sy1 -fsy1) * scale);
|
||||||
|
|
||||||
|
if (sy2 < fsy2)
|
||||||
|
for (int dx = sx1; dx < sx2; ++dx)
|
||||||
|
out = out + src(sy2, dx) * ((fsy2 -sy2) * scale);
|
||||||
|
|
||||||
|
if ((sy1 > fsy1) && (sx1 > fsx1))
|
||||||
|
out = out + src( (sy1 - 1) , (sx1 - 1)) * ((sy1 -fsy1) * (sx1 -fsx1) * scale);
|
||||||
|
|
||||||
|
if ((sy1 > fsy1) && (sx2 < fsx2))
|
||||||
|
out = out + src( (sy1 - 1) , sx2) * ((sy1 -fsy1) * (fsx2 -sx2) * scale);
|
||||||
|
|
||||||
|
if ((sy2 < fsy2) && (sx2 < fsx2))
|
||||||
|
out = out + src(sy2, sx2) * ((fsy2 -sy2) * (fsx2 -sx2) * scale);
|
||||||
|
|
||||||
|
if ((sy2 < fsy2) && (sx1 > fsx1))
|
||||||
|
out = out + src(sy2, (sx1 - 1)) * ((fsy2 -sy2) * (sx1 -fsx1) * scale);
|
||||||
|
|
||||||
|
return saturate_cast<elem_type>(out);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ptr2D src;
|
||||||
|
float scale_x, scale_y;
|
||||||
|
int width, haight;
|
||||||
|
};
|
||||||
|
}}} // namespace cv { namespace cuda { namespace cudev
|
||||||
|
|
||||||
|
//! @endcond
|
||||||
|
|
||||||
|
#endif // OPENCV_CUDA_FILTERS_HPP
|
||||||
79
3rdpart/OpenCV/include/opencv2/core/cuda/funcattrib.hpp
Normal file
79
3rdpart/OpenCV/include/opencv2/core/cuda/funcattrib.hpp
Normal file
@@ -0,0 +1,79 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#ifndef OPENCV_CUDA_DEVICE_FUNCATTRIB_HPP
|
||||||
|
#define OPENCV_CUDA_DEVICE_FUNCATTRIB_HPP
|
||||||
|
|
||||||
|
#include <cstdio>
|
||||||
|
|
||||||
|
/** @file
|
||||||
|
* @deprecated Use @ref cudev instead.
|
||||||
|
*/
|
||||||
|
|
||||||
|
//! @cond IGNORED
|
||||||
|
|
||||||
|
namespace cv { namespace cuda { namespace device
|
||||||
|
{
|
||||||
|
template<class Func>
|
||||||
|
void printFuncAttrib(Func& func)
|
||||||
|
{
|
||||||
|
|
||||||
|
cudaFuncAttributes attrs;
|
||||||
|
cudaFuncGetAttributes(&attrs, func);
|
||||||
|
|
||||||
|
printf("=== Function stats ===\n");
|
||||||
|
printf("Name: \n");
|
||||||
|
printf("sharedSizeBytes = %d\n", attrs.sharedSizeBytes);
|
||||||
|
printf("constSizeBytes = %d\n", attrs.constSizeBytes);
|
||||||
|
printf("localSizeBytes = %d\n", attrs.localSizeBytes);
|
||||||
|
printf("maxThreadsPerBlock = %d\n", attrs.maxThreadsPerBlock);
|
||||||
|
printf("numRegs = %d\n", attrs.numRegs);
|
||||||
|
printf("ptxVersion = %d\n", attrs.ptxVersion);
|
||||||
|
printf("binaryVersion = %d\n", attrs.binaryVersion);
|
||||||
|
printf("\n");
|
||||||
|
fflush(stdout);
|
||||||
|
}
|
||||||
|
}}} // namespace cv { namespace cuda { namespace cudev
|
||||||
|
|
||||||
|
//! @endcond
|
||||||
|
|
||||||
|
#endif /* OPENCV_CUDA_DEVICE_FUNCATTRIB_HPP */
|
||||||
805
3rdpart/OpenCV/include/opencv2/core/cuda/functional.hpp
Normal file
805
3rdpart/OpenCV/include/opencv2/core/cuda/functional.hpp
Normal file
@@ -0,0 +1,805 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#ifndef OPENCV_CUDA_FUNCTIONAL_HPP
|
||||||
|
#define OPENCV_CUDA_FUNCTIONAL_HPP
|
||||||
|
|
||||||
|
#include <functional>
|
||||||
|
#include "saturate_cast.hpp"
|
||||||
|
#include "vec_traits.hpp"
|
||||||
|
#include "type_traits.hpp"
|
||||||
|
|
||||||
|
/** @file
|
||||||
|
* @deprecated Use @ref cudev instead.
|
||||||
|
*/
|
||||||
|
|
||||||
|
//! @cond IGNORED
|
||||||
|
|
||||||
|
namespace cv { namespace cuda { namespace device
|
||||||
|
{
|
||||||
|
// Function Objects
|
||||||
|
template<typename Argument, typename Result> struct unary_function
|
||||||
|
{
|
||||||
|
typedef Argument argument_type;
|
||||||
|
typedef Result result_type;
|
||||||
|
};
|
||||||
|
template<typename Argument1, typename Argument2, typename Result> struct binary_function
|
||||||
|
{
|
||||||
|
typedef Argument1 first_argument_type;
|
||||||
|
typedef Argument2 second_argument_type;
|
||||||
|
typedef Result result_type;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Arithmetic Operations
|
||||||
|
template <typename T> struct plus : binary_function<T, T, T>
|
||||||
|
{
|
||||||
|
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
|
||||||
|
typename TypeTraits<T>::ParameterType b) const
|
||||||
|
{
|
||||||
|
return a + b;
|
||||||
|
}
|
||||||
|
__host__ __device__ __forceinline__ plus() {}
|
||||||
|
__host__ __device__ __forceinline__ plus(const plus&) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T> struct minus : binary_function<T, T, T>
|
||||||
|
{
|
||||||
|
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
|
||||||
|
typename TypeTraits<T>::ParameterType b) const
|
||||||
|
{
|
||||||
|
return a - b;
|
||||||
|
}
|
||||||
|
__host__ __device__ __forceinline__ minus() {}
|
||||||
|
__host__ __device__ __forceinline__ minus(const minus&) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T> struct multiplies : binary_function<T, T, T>
|
||||||
|
{
|
||||||
|
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
|
||||||
|
typename TypeTraits<T>::ParameterType b) const
|
||||||
|
{
|
||||||
|
return a * b;
|
||||||
|
}
|
||||||
|
__host__ __device__ __forceinline__ multiplies() {}
|
||||||
|
__host__ __device__ __forceinline__ multiplies(const multiplies&) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T> struct divides : binary_function<T, T, T>
|
||||||
|
{
|
||||||
|
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
|
||||||
|
typename TypeTraits<T>::ParameterType b) const
|
||||||
|
{
|
||||||
|
return a / b;
|
||||||
|
}
|
||||||
|
__host__ __device__ __forceinline__ divides() {}
|
||||||
|
__host__ __device__ __forceinline__ divides(const divides&) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T> struct modulus : binary_function<T, T, T>
|
||||||
|
{
|
||||||
|
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
|
||||||
|
typename TypeTraits<T>::ParameterType b) const
|
||||||
|
{
|
||||||
|
return a % b;
|
||||||
|
}
|
||||||
|
__host__ __device__ __forceinline__ modulus() {}
|
||||||
|
__host__ __device__ __forceinline__ modulus(const modulus&) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T> struct negate : unary_function<T, T>
|
||||||
|
{
|
||||||
|
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a) const
|
||||||
|
{
|
||||||
|
return -a;
|
||||||
|
}
|
||||||
|
__host__ __device__ __forceinline__ negate() {}
|
||||||
|
__host__ __device__ __forceinline__ negate(const negate&) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Comparison Operations
|
||||||
|
template <typename T> struct equal_to : binary_function<T, T, bool>
|
||||||
|
{
|
||||||
|
__device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
|
||||||
|
typename TypeTraits<T>::ParameterType b) const
|
||||||
|
{
|
||||||
|
return a == b;
|
||||||
|
}
|
||||||
|
__host__ __device__ __forceinline__ equal_to() {}
|
||||||
|
__host__ __device__ __forceinline__ equal_to(const equal_to&) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T> struct not_equal_to : binary_function<T, T, bool>
|
||||||
|
{
|
||||||
|
__device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
|
||||||
|
typename TypeTraits<T>::ParameterType b) const
|
||||||
|
{
|
||||||
|
return a != b;
|
||||||
|
}
|
||||||
|
__host__ __device__ __forceinline__ not_equal_to() {}
|
||||||
|
__host__ __device__ __forceinline__ not_equal_to(const not_equal_to&) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T> struct greater : binary_function<T, T, bool>
|
||||||
|
{
|
||||||
|
__device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
|
||||||
|
typename TypeTraits<T>::ParameterType b) const
|
||||||
|
{
|
||||||
|
return a > b;
|
||||||
|
}
|
||||||
|
__host__ __device__ __forceinline__ greater() {}
|
||||||
|
__host__ __device__ __forceinline__ greater(const greater&) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T> struct less : binary_function<T, T, bool>
|
||||||
|
{
|
||||||
|
__device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
|
||||||
|
typename TypeTraits<T>::ParameterType b) const
|
||||||
|
{
|
||||||
|
return a < b;
|
||||||
|
}
|
||||||
|
__host__ __device__ __forceinline__ less() {}
|
||||||
|
__host__ __device__ __forceinline__ less(const less&) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T> struct greater_equal : binary_function<T, T, bool>
|
||||||
|
{
|
||||||
|
__device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
|
||||||
|
typename TypeTraits<T>::ParameterType b) const
|
||||||
|
{
|
||||||
|
return a >= b;
|
||||||
|
}
|
||||||
|
__host__ __device__ __forceinline__ greater_equal() {}
|
||||||
|
__host__ __device__ __forceinline__ greater_equal(const greater_equal&) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T> struct less_equal : binary_function<T, T, bool>
|
||||||
|
{
|
||||||
|
__device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
|
||||||
|
typename TypeTraits<T>::ParameterType b) const
|
||||||
|
{
|
||||||
|
return a <= b;
|
||||||
|
}
|
||||||
|
__host__ __device__ __forceinline__ less_equal() {}
|
||||||
|
__host__ __device__ __forceinline__ less_equal(const less_equal&) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Logical Operations
|
||||||
|
template <typename T> struct logical_and : binary_function<T, T, bool>
|
||||||
|
{
|
||||||
|
__device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
|
||||||
|
typename TypeTraits<T>::ParameterType b) const
|
||||||
|
{
|
||||||
|
return a && b;
|
||||||
|
}
|
||||||
|
__host__ __device__ __forceinline__ logical_and() {}
|
||||||
|
__host__ __device__ __forceinline__ logical_and(const logical_and&) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T> struct logical_or : binary_function<T, T, bool>
|
||||||
|
{
|
||||||
|
__device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
|
||||||
|
typename TypeTraits<T>::ParameterType b) const
|
||||||
|
{
|
||||||
|
return a || b;
|
||||||
|
}
|
||||||
|
__host__ __device__ __forceinline__ logical_or() {}
|
||||||
|
__host__ __device__ __forceinline__ logical_or(const logical_or&) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T> struct logical_not : unary_function<T, bool>
|
||||||
|
{
|
||||||
|
__device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a) const
|
||||||
|
{
|
||||||
|
return !a;
|
||||||
|
}
|
||||||
|
__host__ __device__ __forceinline__ logical_not() {}
|
||||||
|
__host__ __device__ __forceinline__ logical_not(const logical_not&) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Bitwise Operations
|
||||||
|
template <typename T> struct bit_and : binary_function<T, T, T>
|
||||||
|
{
|
||||||
|
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
|
||||||
|
typename TypeTraits<T>::ParameterType b) const
|
||||||
|
{
|
||||||
|
return a & b;
|
||||||
|
}
|
||||||
|
__host__ __device__ __forceinline__ bit_and() {}
|
||||||
|
__host__ __device__ __forceinline__ bit_and(const bit_and&) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T> struct bit_or : binary_function<T, T, T>
|
||||||
|
{
|
||||||
|
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
|
||||||
|
typename TypeTraits<T>::ParameterType b) const
|
||||||
|
{
|
||||||
|
return a | b;
|
||||||
|
}
|
||||||
|
__host__ __device__ __forceinline__ bit_or() {}
|
||||||
|
__host__ __device__ __forceinline__ bit_or(const bit_or&) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T> struct bit_xor : binary_function<T, T, T>
|
||||||
|
{
|
||||||
|
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
|
||||||
|
typename TypeTraits<T>::ParameterType b) const
|
||||||
|
{
|
||||||
|
return a ^ b;
|
||||||
|
}
|
||||||
|
__host__ __device__ __forceinline__ bit_xor() {}
|
||||||
|
__host__ __device__ __forceinline__ bit_xor(const bit_xor&) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T> struct bit_not : unary_function<T, T>
|
||||||
|
{
|
||||||
|
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType v) const
|
||||||
|
{
|
||||||
|
return ~v;
|
||||||
|
}
|
||||||
|
__host__ __device__ __forceinline__ bit_not() {}
|
||||||
|
__host__ __device__ __forceinline__ bit_not(const bit_not&) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Generalized Identity Operations
|
||||||
|
template <typename T> struct identity : unary_function<T, T>
|
||||||
|
{
|
||||||
|
__device__ __forceinline__ typename TypeTraits<T>::ParameterType operator()(typename TypeTraits<T>::ParameterType x) const
|
||||||
|
{
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
__host__ __device__ __forceinline__ identity() {}
|
||||||
|
__host__ __device__ __forceinline__ identity(const identity&) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T1, typename T2> struct project1st : binary_function<T1, T2, T1>
|
||||||
|
{
|
||||||
|
__device__ __forceinline__ typename TypeTraits<T1>::ParameterType operator()(typename TypeTraits<T1>::ParameterType lhs, typename TypeTraits<T2>::ParameterType rhs) const
|
||||||
|
{
|
||||||
|
return lhs;
|
||||||
|
}
|
||||||
|
__host__ __device__ __forceinline__ project1st() {}
|
||||||
|
__host__ __device__ __forceinline__ project1st(const project1st&) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T1, typename T2> struct project2nd : binary_function<T1, T2, T2>
|
||||||
|
{
|
||||||
|
__device__ __forceinline__ typename TypeTraits<T2>::ParameterType operator()(typename TypeTraits<T1>::ParameterType lhs, typename TypeTraits<T2>::ParameterType rhs) const
|
||||||
|
{
|
||||||
|
return rhs;
|
||||||
|
}
|
||||||
|
__host__ __device__ __forceinline__ project2nd() {}
|
||||||
|
__host__ __device__ __forceinline__ project2nd(const project2nd&) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Min/Max Operations
|
||||||
|
|
||||||
|
#define OPENCV_CUDA_IMPLEMENT_MINMAX(name, type, op) \
|
||||||
|
template <> struct name<type> : binary_function<type, type, type> \
|
||||||
|
{ \
|
||||||
|
__device__ __forceinline__ type operator()(type lhs, type rhs) const {return op(lhs, rhs);} \
|
||||||
|
__host__ __device__ __forceinline__ name() {}\
|
||||||
|
__host__ __device__ __forceinline__ name(const name&) {}\
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T> struct maximum : binary_function<T, T, T>
|
||||||
|
{
|
||||||
|
__device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType lhs, typename TypeTraits<T>::ParameterType rhs) const
|
||||||
|
{
|
||||||
|
return max(lhs, rhs);
|
||||||
|
}
|
||||||
|
__host__ __device__ __forceinline__ maximum() {}
|
||||||
|
__host__ __device__ __forceinline__ maximum(const maximum&) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, uchar, ::max)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, schar, ::max)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, char, ::max)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, ushort, ::max)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, short, ::max)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, int, ::max)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, uint, ::max)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, float, ::fmax)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, double, ::fmax)
|
||||||
|
|
||||||
|
template <typename T> struct minimum : binary_function<T, T, T>
|
||||||
|
{
|
||||||
|
__device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType lhs, typename TypeTraits<T>::ParameterType rhs) const
|
||||||
|
{
|
||||||
|
return min(lhs, rhs);
|
||||||
|
}
|
||||||
|
__host__ __device__ __forceinline__ minimum() {}
|
||||||
|
__host__ __device__ __forceinline__ minimum(const minimum&) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, uchar, ::min)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, schar, ::min)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, char, ::min)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, ushort, ::min)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, short, ::min)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, int, ::min)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, uint, ::min)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, float, ::fmin)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, double, ::fmin)
|
||||||
|
|
||||||
|
#undef OPENCV_CUDA_IMPLEMENT_MINMAX
|
||||||
|
|
||||||
|
// Math functions
|
||||||
|
|
||||||
|
template <typename T> struct abs_func : unary_function<T, T>
|
||||||
|
{
|
||||||
|
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType x) const
|
||||||
|
{
|
||||||
|
return abs(x);
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ __device__ __forceinline__ abs_func() {}
|
||||||
|
__host__ __device__ __forceinline__ abs_func(const abs_func&) {}
|
||||||
|
};
|
||||||
|
template <> struct abs_func<unsigned char> : unary_function<unsigned char, unsigned char>
|
||||||
|
{
|
||||||
|
__device__ __forceinline__ unsigned char operator ()(unsigned char x) const
|
||||||
|
{
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ __device__ __forceinline__ abs_func() {}
|
||||||
|
__host__ __device__ __forceinline__ abs_func(const abs_func&) {}
|
||||||
|
};
|
||||||
|
template <> struct abs_func<signed char> : unary_function<signed char, signed char>
|
||||||
|
{
|
||||||
|
__device__ __forceinline__ signed char operator ()(signed char x) const
|
||||||
|
{
|
||||||
|
return ::abs((int)x);
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ __device__ __forceinline__ abs_func() {}
|
||||||
|
__host__ __device__ __forceinline__ abs_func(const abs_func&) {}
|
||||||
|
};
|
||||||
|
template <> struct abs_func<char> : unary_function<char, char>
|
||||||
|
{
|
||||||
|
__device__ __forceinline__ char operator ()(char x) const
|
||||||
|
{
|
||||||
|
return ::abs((int)x);
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ __device__ __forceinline__ abs_func() {}
|
||||||
|
__host__ __device__ __forceinline__ abs_func(const abs_func&) {}
|
||||||
|
};
|
||||||
|
template <> struct abs_func<unsigned short> : unary_function<unsigned short, unsigned short>
|
||||||
|
{
|
||||||
|
__device__ __forceinline__ unsigned short operator ()(unsigned short x) const
|
||||||
|
{
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ __device__ __forceinline__ abs_func() {}
|
||||||
|
__host__ __device__ __forceinline__ abs_func(const abs_func&) {}
|
||||||
|
};
|
||||||
|
template <> struct abs_func<short> : unary_function<short, short>
|
||||||
|
{
|
||||||
|
__device__ __forceinline__ short operator ()(short x) const
|
||||||
|
{
|
||||||
|
return ::abs((int)x);
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ __device__ __forceinline__ abs_func() {}
|
||||||
|
__host__ __device__ __forceinline__ abs_func(const abs_func&) {}
|
||||||
|
};
|
||||||
|
template <> struct abs_func<unsigned int> : unary_function<unsigned int, unsigned int>
|
||||||
|
{
|
||||||
|
__device__ __forceinline__ unsigned int operator ()(unsigned int x) const
|
||||||
|
{
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ __device__ __forceinline__ abs_func() {}
|
||||||
|
__host__ __device__ __forceinline__ abs_func(const abs_func&) {}
|
||||||
|
};
|
||||||
|
template <> struct abs_func<int> : unary_function<int, int>
|
||||||
|
{
|
||||||
|
__device__ __forceinline__ int operator ()(int x) const
|
||||||
|
{
|
||||||
|
return ::abs(x);
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ __device__ __forceinline__ abs_func() {}
|
||||||
|
__host__ __device__ __forceinline__ abs_func(const abs_func&) {}
|
||||||
|
};
|
||||||
|
template <> struct abs_func<float> : unary_function<float, float>
|
||||||
|
{
|
||||||
|
__device__ __forceinline__ float operator ()(float x) const
|
||||||
|
{
|
||||||
|
return ::fabsf(x);
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ __device__ __forceinline__ abs_func() {}
|
||||||
|
__host__ __device__ __forceinline__ abs_func(const abs_func&) {}
|
||||||
|
};
|
||||||
|
template <> struct abs_func<double> : unary_function<double, double>
|
||||||
|
{
|
||||||
|
__device__ __forceinline__ double operator ()(double x) const
|
||||||
|
{
|
||||||
|
return ::fabs(x);
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ __device__ __forceinline__ abs_func() {}
|
||||||
|
__host__ __device__ __forceinline__ abs_func(const abs_func&) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
#define OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(name, func) \
|
||||||
|
template <typename T> struct name ## _func : unary_function<T, float> \
|
||||||
|
{ \
|
||||||
|
__device__ __forceinline__ float operator ()(typename TypeTraits<T>::ParameterType v) const \
|
||||||
|
{ \
|
||||||
|
return func ## f(v); \
|
||||||
|
} \
|
||||||
|
__host__ __device__ __forceinline__ name ## _func() {} \
|
||||||
|
__host__ __device__ __forceinline__ name ## _func(const name ## _func&) {} \
|
||||||
|
}; \
|
||||||
|
template <> struct name ## _func<double> : unary_function<double, double> \
|
||||||
|
{ \
|
||||||
|
__device__ __forceinline__ double operator ()(double v) const \
|
||||||
|
{ \
|
||||||
|
return func(v); \
|
||||||
|
} \
|
||||||
|
__host__ __device__ __forceinline__ name ## _func() {} \
|
||||||
|
__host__ __device__ __forceinline__ name ## _func(const name ## _func&) {} \
|
||||||
|
};
|
||||||
|
|
||||||
|
#define OPENCV_CUDA_IMPLEMENT_BIN_FUNCTOR(name, func) \
|
||||||
|
template <typename T> struct name ## _func : binary_function<T, T, float> \
|
||||||
|
{ \
|
||||||
|
__device__ __forceinline__ float operator ()(typename TypeTraits<T>::ParameterType v1, typename TypeTraits<T>::ParameterType v2) const \
|
||||||
|
{ \
|
||||||
|
return func ## f(v1, v2); \
|
||||||
|
} \
|
||||||
|
__host__ __device__ __forceinline__ name ## _func() {} \
|
||||||
|
__host__ __device__ __forceinline__ name ## _func(const name ## _func&) {} \
|
||||||
|
}; \
|
||||||
|
template <> struct name ## _func<double> : binary_function<double, double, double> \
|
||||||
|
{ \
|
||||||
|
__device__ __forceinline__ double operator ()(double v1, double v2) const \
|
||||||
|
{ \
|
||||||
|
return func(v1, v2); \
|
||||||
|
} \
|
||||||
|
__host__ __device__ __forceinline__ name ## _func() {} \
|
||||||
|
__host__ __device__ __forceinline__ name ## _func(const name ## _func&) {} \
|
||||||
|
};
|
||||||
|
|
||||||
|
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(sqrt, ::sqrt)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(exp, ::exp)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(exp2, ::exp2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(exp10, ::exp10)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(log, ::log)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(log2, ::log2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(log10, ::log10)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(sin, ::sin)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(cos, ::cos)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(tan, ::tan)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(asin, ::asin)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(acos, ::acos)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(atan, ::atan)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(sinh, ::sinh)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(cosh, ::cosh)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(tanh, ::tanh)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(asinh, ::asinh)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(acosh, ::acosh)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(atanh, ::atanh)
|
||||||
|
|
||||||
|
OPENCV_CUDA_IMPLEMENT_BIN_FUNCTOR(hypot, ::hypot)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_BIN_FUNCTOR(atan2, ::atan2)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_BIN_FUNCTOR(pow, ::pow)
|
||||||
|
|
||||||
|
#undef OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR
|
||||||
|
#undef OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR_NO_DOUBLE
|
||||||
|
#undef OPENCV_CUDA_IMPLEMENT_BIN_FUNCTOR
|
||||||
|
|
||||||
|
template<typename T> struct hypot_sqr_func : binary_function<T, T, float>
|
||||||
|
{
|
||||||
|
__device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType src1, typename TypeTraits<T>::ParameterType src2) const
|
||||||
|
{
|
||||||
|
return src1 * src1 + src2 * src2;
|
||||||
|
}
|
||||||
|
__host__ __device__ __forceinline__ hypot_sqr_func() {}
|
||||||
|
__host__ __device__ __forceinline__ hypot_sqr_func(const hypot_sqr_func&) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Saturate Cast Functor
|
||||||
|
template <typename T, typename D> struct saturate_cast_func : unary_function<T, D>
|
||||||
|
{
|
||||||
|
__device__ __forceinline__ D operator ()(typename TypeTraits<T>::ParameterType v) const
|
||||||
|
{
|
||||||
|
return saturate_cast<D>(v);
|
||||||
|
}
|
||||||
|
__host__ __device__ __forceinline__ saturate_cast_func() {}
|
||||||
|
__host__ __device__ __forceinline__ saturate_cast_func(const saturate_cast_func&) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Threshold Functors
|
||||||
|
template <typename T> struct thresh_binary_func : unary_function<T, T>
|
||||||
|
{
|
||||||
|
__host__ __device__ __forceinline__ thresh_binary_func(T thresh_, T maxVal_) : thresh(thresh_), maxVal(maxVal_) {}
|
||||||
|
|
||||||
|
__device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType src) const
|
||||||
|
{
|
||||||
|
return (src > thresh) * maxVal;
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ __device__ __forceinline__ thresh_binary_func() {}
|
||||||
|
__host__ __device__ __forceinline__ thresh_binary_func(const thresh_binary_func& other)
|
||||||
|
: thresh(other.thresh), maxVal(other.maxVal) {}
|
||||||
|
|
||||||
|
T thresh;
|
||||||
|
T maxVal;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T> struct thresh_binary_inv_func : unary_function<T, T>
|
||||||
|
{
|
||||||
|
__host__ __device__ __forceinline__ thresh_binary_inv_func(T thresh_, T maxVal_) : thresh(thresh_), maxVal(maxVal_) {}
|
||||||
|
|
||||||
|
__device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType src) const
|
||||||
|
{
|
||||||
|
return (src <= thresh) * maxVal;
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ __device__ __forceinline__ thresh_binary_inv_func() {}
|
||||||
|
__host__ __device__ __forceinline__ thresh_binary_inv_func(const thresh_binary_inv_func& other)
|
||||||
|
: thresh(other.thresh), maxVal(other.maxVal) {}
|
||||||
|
|
||||||
|
T thresh;
|
||||||
|
T maxVal;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T> struct thresh_trunc_func : unary_function<T, T>
|
||||||
|
{
|
||||||
|
explicit __host__ __device__ __forceinline__ thresh_trunc_func(T thresh_, T maxVal_ = 0) : thresh(thresh_) {CV_UNUSED(maxVal_);}
|
||||||
|
|
||||||
|
__device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType src) const
|
||||||
|
{
|
||||||
|
return minimum<T>()(src, thresh);
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ __device__ __forceinline__ thresh_trunc_func() {}
|
||||||
|
__host__ __device__ __forceinline__ thresh_trunc_func(const thresh_trunc_func& other)
|
||||||
|
: thresh(other.thresh) {}
|
||||||
|
|
||||||
|
T thresh;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T> struct thresh_to_zero_func : unary_function<T, T>
|
||||||
|
{
|
||||||
|
explicit __host__ __device__ __forceinline__ thresh_to_zero_func(T thresh_, T maxVal_ = 0) : thresh(thresh_) {CV_UNUSED(maxVal_);}
|
||||||
|
|
||||||
|
__device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType src) const
|
||||||
|
{
|
||||||
|
return (src > thresh) * src;
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ __device__ __forceinline__ thresh_to_zero_func() {}
|
||||||
|
__host__ __device__ __forceinline__ thresh_to_zero_func(const thresh_to_zero_func& other)
|
||||||
|
: thresh(other.thresh) {}
|
||||||
|
|
||||||
|
T thresh;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T> struct thresh_to_zero_inv_func : unary_function<T, T>
|
||||||
|
{
|
||||||
|
explicit __host__ __device__ __forceinline__ thresh_to_zero_inv_func(T thresh_, T maxVal_ = 0) : thresh(thresh_) {CV_UNUSED(maxVal_);}
|
||||||
|
|
||||||
|
__device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType src) const
|
||||||
|
{
|
||||||
|
return (src <= thresh) * src;
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ __device__ __forceinline__ thresh_to_zero_inv_func() {}
|
||||||
|
__host__ __device__ __forceinline__ thresh_to_zero_inv_func(const thresh_to_zero_inv_func& other)
|
||||||
|
: thresh(other.thresh) {}
|
||||||
|
|
||||||
|
T thresh;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Function Object Adaptors
|
||||||
|
template <typename Predicate> struct unary_negate : unary_function<typename Predicate::argument_type, bool>
|
||||||
|
{
|
||||||
|
explicit __host__ __device__ __forceinline__ unary_negate(const Predicate& p) : pred(p) {}
|
||||||
|
|
||||||
|
__device__ __forceinline__ bool operator()(typename TypeTraits<typename Predicate::argument_type>::ParameterType x) const
|
||||||
|
{
|
||||||
|
return !pred(x);
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ __device__ __forceinline__ unary_negate() {}
|
||||||
|
__host__ __device__ __forceinline__ unary_negate(const unary_negate& other) : pred(other.pred) {}
|
||||||
|
|
||||||
|
Predicate pred;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Predicate> __host__ __device__ __forceinline__ unary_negate<Predicate> not1(const Predicate& pred)
|
||||||
|
{
|
||||||
|
return unary_negate<Predicate>(pred);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename Predicate> struct binary_negate : binary_function<typename Predicate::first_argument_type, typename Predicate::second_argument_type, bool>
|
||||||
|
{
|
||||||
|
explicit __host__ __device__ __forceinline__ binary_negate(const Predicate& p) : pred(p) {}
|
||||||
|
|
||||||
|
__device__ __forceinline__ bool operator()(typename TypeTraits<typename Predicate::first_argument_type>::ParameterType x,
|
||||||
|
typename TypeTraits<typename Predicate::second_argument_type>::ParameterType y) const
|
||||||
|
{
|
||||||
|
return !pred(x,y);
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ __device__ __forceinline__ binary_negate() {}
|
||||||
|
__host__ __device__ __forceinline__ binary_negate(const binary_negate& other) : pred(other.pred) {}
|
||||||
|
|
||||||
|
Predicate pred;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename BinaryPredicate> __host__ __device__ __forceinline__ binary_negate<BinaryPredicate> not2(const BinaryPredicate& pred)
|
||||||
|
{
|
||||||
|
return binary_negate<BinaryPredicate>(pred);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename Op> struct binder1st : unary_function<typename Op::second_argument_type, typename Op::result_type>
|
||||||
|
{
|
||||||
|
__host__ __device__ __forceinline__ binder1st(const Op& op_, const typename Op::first_argument_type& arg1_) : op(op_), arg1(arg1_) {}
|
||||||
|
|
||||||
|
__device__ __forceinline__ typename Op::result_type operator ()(typename TypeTraits<typename Op::second_argument_type>::ParameterType a) const
|
||||||
|
{
|
||||||
|
return op(arg1, a);
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ __device__ __forceinline__ binder1st() {}
|
||||||
|
__host__ __device__ __forceinline__ binder1st(const binder1st& other) : op(other.op), arg1(other.arg1) {}
|
||||||
|
|
||||||
|
Op op;
|
||||||
|
typename Op::first_argument_type arg1;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Op, typename T> __host__ __device__ __forceinline__ binder1st<Op> bind1st(const Op& op, const T& x)
|
||||||
|
{
|
||||||
|
return binder1st<Op>(op, typename Op::first_argument_type(x));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename Op> struct binder2nd : unary_function<typename Op::first_argument_type, typename Op::result_type>
|
||||||
|
{
|
||||||
|
__host__ __device__ __forceinline__ binder2nd(const Op& op_, const typename Op::second_argument_type& arg2_) : op(op_), arg2(arg2_) {}
|
||||||
|
|
||||||
|
__forceinline__ __device__ typename Op::result_type operator ()(typename TypeTraits<typename Op::first_argument_type>::ParameterType a) const
|
||||||
|
{
|
||||||
|
return op(a, arg2);
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ __device__ __forceinline__ binder2nd() {}
|
||||||
|
__host__ __device__ __forceinline__ binder2nd(const binder2nd& other) : op(other.op), arg2(other.arg2) {}
|
||||||
|
|
||||||
|
Op op;
|
||||||
|
typename Op::second_argument_type arg2;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Op, typename T> __host__ __device__ __forceinline__ binder2nd<Op> bind2nd(const Op& op, const T& x)
|
||||||
|
{
|
||||||
|
return binder2nd<Op>(op, typename Op::second_argument_type(x));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Functor Traits
|
||||||
|
template <typename F> struct IsUnaryFunction
|
||||||
|
{
|
||||||
|
typedef char Yes;
|
||||||
|
struct No {Yes a[2];};
|
||||||
|
|
||||||
|
template <typename T, typename D> static Yes check(unary_function<T, D>);
|
||||||
|
static No check(...);
|
||||||
|
|
||||||
|
static F makeF();
|
||||||
|
|
||||||
|
enum { value = (sizeof(check(makeF())) == sizeof(Yes)) };
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename F> struct IsBinaryFunction
|
||||||
|
{
|
||||||
|
typedef char Yes;
|
||||||
|
struct No {Yes a[2];};
|
||||||
|
|
||||||
|
template <typename T1, typename T2, typename D> static Yes check(binary_function<T1, T2, D>);
|
||||||
|
static No check(...);
|
||||||
|
|
||||||
|
static F makeF();
|
||||||
|
|
||||||
|
enum { value = (sizeof(check(makeF())) == sizeof(Yes)) };
|
||||||
|
};
|
||||||
|
|
||||||
|
namespace functional_detail
|
||||||
|
{
|
||||||
|
template <size_t src_elem_size, size_t dst_elem_size> struct UnOpShift { enum { shift = 1 }; };
|
||||||
|
template <size_t src_elem_size> struct UnOpShift<src_elem_size, 1> { enum { shift = 4 }; };
|
||||||
|
template <size_t src_elem_size> struct UnOpShift<src_elem_size, 2> { enum { shift = 2 }; };
|
||||||
|
|
||||||
|
template <typename T, typename D> struct DefaultUnaryShift
|
||||||
|
{
|
||||||
|
enum { shift = UnOpShift<sizeof(T), sizeof(D)>::shift };
|
||||||
|
};
|
||||||
|
|
||||||
|
template <size_t src_elem_size1, size_t src_elem_size2, size_t dst_elem_size> struct BinOpShift { enum { shift = 1 }; };
|
||||||
|
template <size_t src_elem_size1, size_t src_elem_size2> struct BinOpShift<src_elem_size1, src_elem_size2, 1> { enum { shift = 4 }; };
|
||||||
|
template <size_t src_elem_size1, size_t src_elem_size2> struct BinOpShift<src_elem_size1, src_elem_size2, 2> { enum { shift = 2 }; };
|
||||||
|
|
||||||
|
template <typename T1, typename T2, typename D> struct DefaultBinaryShift
|
||||||
|
{
|
||||||
|
enum { shift = BinOpShift<sizeof(T1), sizeof(T2), sizeof(D)>::shift };
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Func, bool unary = IsUnaryFunction<Func>::value> struct ShiftDispatcher;
|
||||||
|
template <typename Func> struct ShiftDispatcher<Func, true>
|
||||||
|
{
|
||||||
|
enum { shift = DefaultUnaryShift<typename Func::argument_type, typename Func::result_type>::shift };
|
||||||
|
};
|
||||||
|
template <typename Func> struct ShiftDispatcher<Func, false>
|
||||||
|
{
|
||||||
|
enum { shift = DefaultBinaryShift<typename Func::first_argument_type, typename Func::second_argument_type, typename Func::result_type>::shift };
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename Func> struct DefaultTransformShift
|
||||||
|
{
|
||||||
|
enum { shift = functional_detail::ShiftDispatcher<Func>::shift };
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Func> struct DefaultTransformFunctorTraits
|
||||||
|
{
|
||||||
|
enum { simple_block_dim_x = 16 };
|
||||||
|
enum { simple_block_dim_y = 16 };
|
||||||
|
|
||||||
|
enum { smart_block_dim_x = 16 };
|
||||||
|
enum { smart_block_dim_y = 16 };
|
||||||
|
enum { smart_shift = DefaultTransformShift<Func>::shift };
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Func> struct TransformFunctorTraits : DefaultTransformFunctorTraits<Func> {};
|
||||||
|
|
||||||
|
#define OPENCV_CUDA_TRANSFORM_FUNCTOR_TRAITS(type) \
|
||||||
|
template <> struct TransformFunctorTraits< type > : DefaultTransformFunctorTraits< type >
|
||||||
|
}}} // namespace cv { namespace cuda { namespace cudev
|
||||||
|
|
||||||
|
//! @endcond
|
||||||
|
|
||||||
|
#endif // OPENCV_CUDA_FUNCTIONAL_HPP
|
||||||
128
3rdpart/OpenCV/include/opencv2/core/cuda/limits.hpp
Normal file
128
3rdpart/OpenCV/include/opencv2/core/cuda/limits.hpp
Normal file
@@ -0,0 +1,128 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#ifndef OPENCV_CUDA_LIMITS_HPP
|
||||||
|
#define OPENCV_CUDA_LIMITS_HPP
|
||||||
|
|
||||||
|
#include <limits.h>
|
||||||
|
#include <float.h>
|
||||||
|
#include "common.hpp"
|
||||||
|
|
||||||
|
/** @file
|
||||||
|
* @deprecated Use @ref cudev instead.
|
||||||
|
*/
|
||||||
|
|
||||||
|
//! @cond IGNORED
|
||||||
|
|
||||||
|
namespace cv { namespace cuda { namespace device
|
||||||
|
{
|
||||||
|
template <class T> struct numeric_limits;
|
||||||
|
|
||||||
|
template <> struct numeric_limits<bool>
|
||||||
|
{
|
||||||
|
__device__ __forceinline__ static bool min() { return false; }
|
||||||
|
__device__ __forceinline__ static bool max() { return true; }
|
||||||
|
static const bool is_signed = false;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <> struct numeric_limits<signed char>
|
||||||
|
{
|
||||||
|
__device__ __forceinline__ static signed char min() { return SCHAR_MIN; }
|
||||||
|
__device__ __forceinline__ static signed char max() { return SCHAR_MAX; }
|
||||||
|
static const bool is_signed = true;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <> struct numeric_limits<unsigned char>
|
||||||
|
{
|
||||||
|
__device__ __forceinline__ static unsigned char min() { return 0; }
|
||||||
|
__device__ __forceinline__ static unsigned char max() { return UCHAR_MAX; }
|
||||||
|
static const bool is_signed = false;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <> struct numeric_limits<short>
|
||||||
|
{
|
||||||
|
__device__ __forceinline__ static short min() { return SHRT_MIN; }
|
||||||
|
__device__ __forceinline__ static short max() { return SHRT_MAX; }
|
||||||
|
static const bool is_signed = true;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <> struct numeric_limits<unsigned short>
|
||||||
|
{
|
||||||
|
__device__ __forceinline__ static unsigned short min() { return 0; }
|
||||||
|
__device__ __forceinline__ static unsigned short max() { return USHRT_MAX; }
|
||||||
|
static const bool is_signed = false;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <> struct numeric_limits<int>
|
||||||
|
{
|
||||||
|
__device__ __forceinline__ static int min() { return INT_MIN; }
|
||||||
|
__device__ __forceinline__ static int max() { return INT_MAX; }
|
||||||
|
static const bool is_signed = true;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <> struct numeric_limits<unsigned int>
|
||||||
|
{
|
||||||
|
__device__ __forceinline__ static unsigned int min() { return 0; }
|
||||||
|
__device__ __forceinline__ static unsigned int max() { return UINT_MAX; }
|
||||||
|
static const bool is_signed = false;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <> struct numeric_limits<float>
|
||||||
|
{
|
||||||
|
__device__ __forceinline__ static float min() { return FLT_MIN; }
|
||||||
|
__device__ __forceinline__ static float max() { return FLT_MAX; }
|
||||||
|
__device__ __forceinline__ static float epsilon() { return FLT_EPSILON; }
|
||||||
|
static const bool is_signed = true;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <> struct numeric_limits<double>
|
||||||
|
{
|
||||||
|
__device__ __forceinline__ static double min() { return DBL_MIN; }
|
||||||
|
__device__ __forceinline__ static double max() { return DBL_MAX; }
|
||||||
|
__device__ __forceinline__ static double epsilon() { return DBL_EPSILON; }
|
||||||
|
static const bool is_signed = true;
|
||||||
|
};
|
||||||
|
}}} // namespace cv { namespace cuda { namespace cudev {
|
||||||
|
|
||||||
|
//! @endcond
|
||||||
|
|
||||||
|
#endif // OPENCV_CUDA_LIMITS_HPP
|
||||||
230
3rdpart/OpenCV/include/opencv2/core/cuda/reduce.hpp
Normal file
230
3rdpart/OpenCV/include/opencv2/core/cuda/reduce.hpp
Normal file
@@ -0,0 +1,230 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#ifndef OPENCV_CUDA_REDUCE_HPP
|
||||||
|
#define OPENCV_CUDA_REDUCE_HPP
|
||||||
|
|
||||||
|
#ifndef THRUST_DEBUG // eliminate -Wundef warning
|
||||||
|
#define THRUST_DEBUG 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <thrust/tuple.h>
|
||||||
|
#include "detail/reduce.hpp"
|
||||||
|
#include "detail/reduce_key_val.hpp"
|
||||||
|
|
||||||
|
/** @file
|
||||||
|
* @deprecated Use @ref cudev instead.
|
||||||
|
*/
|
||||||
|
|
||||||
|
//! @cond IGNORED
|
||||||
|
|
||||||
|
namespace cv { namespace cuda { namespace device
|
||||||
|
{
|
||||||
|
template <int N, typename T, class Op>
|
||||||
|
__device__ __forceinline__ void reduce(volatile T* smem, T& val, unsigned int tid, const Op& op)
|
||||||
|
{
|
||||||
|
reduce_detail::Dispatcher<N>::reductor::template reduce<volatile T*, T&, const Op&>(smem, val, tid, op);
|
||||||
|
}
|
||||||
|
template <unsigned int N, typename K, typename V, class Cmp>
|
||||||
|
__device__ __forceinline__ void reduceKeyVal(volatile K* skeys, K& key, volatile V* svals, V& val, unsigned int tid, const Cmp& cmp)
|
||||||
|
{
|
||||||
|
reduce_key_val_detail::Dispatcher<N>::reductor::template reduce<volatile K*, K&, volatile V*, V&, const Cmp&>(skeys, key, svals, val, tid, cmp);
|
||||||
|
}
|
||||||
|
#if (CUDART_VERSION < 12040) // details: https://github.com/opencv/opencv_contrib/issues/3690
|
||||||
|
template <int N,
|
||||||
|
typename P0, typename P1, typename P2, typename P3, typename P4, typename P5, typename P6, typename P7, typename P8, typename P9,
|
||||||
|
typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9,
|
||||||
|
class Op0, class Op1, class Op2, class Op3, class Op4, class Op5, class Op6, class Op7, class Op8, class Op9>
|
||||||
|
__device__ __forceinline__ void reduce(const thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem,
|
||||||
|
const thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val,
|
||||||
|
unsigned int tid,
|
||||||
|
const thrust::tuple<Op0, Op1, Op2, Op3, Op4, Op5, Op6, Op7, Op8, Op9>& op)
|
||||||
|
{
|
||||||
|
reduce_detail::Dispatcher<N>::reductor::template reduce<
|
||||||
|
const thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>&,
|
||||||
|
const thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>&,
|
||||||
|
const thrust::tuple<Op0, Op1, Op2, Op3, Op4, Op5, Op6, Op7, Op8, Op9>&>(smem, val, tid, op);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <unsigned int N,
|
||||||
|
typename K,
|
||||||
|
typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
|
||||||
|
typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
|
||||||
|
class Cmp>
|
||||||
|
__device__ __forceinline__ void reduceKeyVal(volatile K* skeys, K& key,
|
||||||
|
const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals,
|
||||||
|
const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
|
||||||
|
unsigned int tid, const Cmp& cmp)
|
||||||
|
{
|
||||||
|
reduce_key_val_detail::Dispatcher<N>::reductor::template reduce<volatile K*, K&,
|
||||||
|
const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>&,
|
||||||
|
const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>&,
|
||||||
|
const Cmp&>(skeys, key, svals, val, tid, cmp);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <unsigned int N,
|
||||||
|
typename KP0, typename KP1, typename KP2, typename KP3, typename KP4, typename KP5, typename KP6, typename KP7, typename KP8, typename KP9,
|
||||||
|
typename KR0, typename KR1, typename KR2, typename KR3, typename KR4, typename KR5, typename KR6, typename KR7, typename KR8, typename KR9,
|
||||||
|
typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
|
||||||
|
typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
|
||||||
|
class Cmp0, class Cmp1, class Cmp2, class Cmp3, class Cmp4, class Cmp5, class Cmp6, class Cmp7, class Cmp8, class Cmp9>
|
||||||
|
__device__ __forceinline__ void reduceKeyVal(const thrust::tuple<KP0, KP1, KP2, KP3, KP4, KP5, KP6, KP7, KP8, KP9>& skeys,
|
||||||
|
const thrust::tuple<KR0, KR1, KR2, KR3, KR4, KR5, KR6, KR7, KR8, KR9>& key,
|
||||||
|
const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals,
|
||||||
|
const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
|
||||||
|
unsigned int tid,
|
||||||
|
const thrust::tuple<Cmp0, Cmp1, Cmp2, Cmp3, Cmp4, Cmp5, Cmp6, Cmp7, Cmp8, Cmp9>& cmp)
|
||||||
|
{
|
||||||
|
reduce_key_val_detail::Dispatcher<N>::reductor::template reduce<
|
||||||
|
const thrust::tuple<KP0, KP1, KP2, KP3, KP4, KP5, KP6, KP7, KP8, KP9>&,
|
||||||
|
const thrust::tuple<KR0, KR1, KR2, KR3, KR4, KR5, KR6, KR7, KR8, KR9>&,
|
||||||
|
const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>&,
|
||||||
|
const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>&,
|
||||||
|
const thrust::tuple<Cmp0, Cmp1, Cmp2, Cmp3, Cmp4, Cmp5, Cmp6, Cmp7, Cmp8, Cmp9>&
|
||||||
|
>(skeys, key, svals, val, tid, cmp);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
template <int N, typename... P, typename... R, class... Op>
|
||||||
|
__device__ __forceinline__ void reduce(const thrust::tuple<P...>& smem, const thrust::tuple<R...>& val, unsigned int tid, const thrust::tuple<Op...>& op)
|
||||||
|
{
|
||||||
|
reduce_detail::Dispatcher<N>::reductor::template reduce<const thrust::tuple<P...>&, const thrust::tuple<R...>&, const thrust::tuple<Op...>&>(smem, val, tid, op);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <unsigned int N, typename K, typename... VP, typename... VR, class Cmp>
|
||||||
|
__device__ __forceinline__ void reduceKeyVal(volatile K* skeys, K& key, const thrust::tuple<VP...>& svals, const thrust::tuple<VR...>& val, unsigned int tid, const Cmp& cmp)
|
||||||
|
{
|
||||||
|
reduce_key_val_detail::Dispatcher<N>::reductor::template reduce<volatile K*, K&, const thrust::tuple<VP...>&, const thrust::tuple<VR...>&, const Cmp&>(skeys, key, svals, val, tid, cmp);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <unsigned int N, typename... KP, typename... KR, typename... VP, typename... VR, class... Cmp>
|
||||||
|
__device__ __forceinline__ void reduceKeyVal(const thrust::tuple<KP...>& skeys, const thrust::tuple<KR...>& key, const thrust::tuple<VP...>& svals, const thrust::tuple<VR...>& val, unsigned int tid, const thrust::tuple<Cmp...>& cmp)
|
||||||
|
{
|
||||||
|
reduce_key_val_detail::Dispatcher<N>::reductor::template reduce<const thrust::tuple<KP...>&, const thrust::tuple<KR...>&, const thrust::tuple<VP...>&, const thrust::tuple<VR...>&, const thrust::tuple<Cmp...>&>(skeys, key, svals, val, tid, cmp);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// smem_tuple
|
||||||
|
|
||||||
|
template <typename T0>
|
||||||
|
__device__ __forceinline__
|
||||||
|
thrust::tuple<volatile T0*>
|
||||||
|
smem_tuple(T0* t0)
|
||||||
|
{
|
||||||
|
return thrust::make_tuple((volatile T0*) t0);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T0, typename T1>
|
||||||
|
__device__ __forceinline__
|
||||||
|
thrust::tuple<volatile T0*, volatile T1*>
|
||||||
|
smem_tuple(T0* t0, T1* t1)
|
||||||
|
{
|
||||||
|
return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T0, typename T1, typename T2>
|
||||||
|
__device__ __forceinline__
|
||||||
|
thrust::tuple<volatile T0*, volatile T1*, volatile T2*>
|
||||||
|
smem_tuple(T0* t0, T1* t1, T2* t2)
|
||||||
|
{
|
||||||
|
return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T0, typename T1, typename T2, typename T3>
|
||||||
|
__device__ __forceinline__
|
||||||
|
thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*>
|
||||||
|
smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3)
|
||||||
|
{
|
||||||
|
return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T0, typename T1, typename T2, typename T3, typename T4>
|
||||||
|
__device__ __forceinline__
|
||||||
|
thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*>
|
||||||
|
smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4)
|
||||||
|
{
|
||||||
|
return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5>
|
||||||
|
__device__ __forceinline__
|
||||||
|
thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*, volatile T5*>
|
||||||
|
smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5)
|
||||||
|
{
|
||||||
|
return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6>
|
||||||
|
__device__ __forceinline__
|
||||||
|
thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*, volatile T5*, volatile T6*>
|
||||||
|
smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5, T6* t6)
|
||||||
|
{
|
||||||
|
return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5, (volatile T6*) t6);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7>
|
||||||
|
__device__ __forceinline__
|
||||||
|
thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*, volatile T5*, volatile T6*, volatile T7*>
|
||||||
|
smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5, T6* t6, T7* t7)
|
||||||
|
{
|
||||||
|
return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5, (volatile T6*) t6, (volatile T7*) t7);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8>
|
||||||
|
__device__ __forceinline__
|
||||||
|
thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*, volatile T5*, volatile T6*, volatile T7*, volatile T8*>
|
||||||
|
smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5, T6* t6, T7* t7, T8* t8)
|
||||||
|
{
|
||||||
|
return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5, (volatile T6*) t6, (volatile T7*) t7, (volatile T8*) t8);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8, typename T9>
|
||||||
|
__device__ __forceinline__
|
||||||
|
thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*, volatile T5*, volatile T6*, volatile T7*, volatile T8*, volatile T9*>
|
||||||
|
smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5, T6* t6, T7* t7, T8* t8, T9* t9)
|
||||||
|
{
|
||||||
|
return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5, (volatile T6*) t6, (volatile T7*) t7, (volatile T8*) t8, (volatile T9*) t9);
|
||||||
|
}
|
||||||
|
}}}
|
||||||
|
|
||||||
|
//! @endcond
|
||||||
|
|
||||||
|
#endif // OPENCV_CUDA_REDUCE_HPP
|
||||||
292
3rdpart/OpenCV/include/opencv2/core/cuda/saturate_cast.hpp
Normal file
292
3rdpart/OpenCV/include/opencv2/core/cuda/saturate_cast.hpp
Normal file
@@ -0,0 +1,292 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#ifndef OPENCV_CUDA_SATURATE_CAST_HPP
|
||||||
|
#define OPENCV_CUDA_SATURATE_CAST_HPP
|
||||||
|
|
||||||
|
#include "common.hpp"
|
||||||
|
|
||||||
|
/** @file
|
||||||
|
* @deprecated Use @ref cudev instead.
|
||||||
|
*/
|
||||||
|
|
||||||
|
//! @cond IGNORED
|
||||||
|
|
||||||
|
namespace cv { namespace cuda { namespace device
|
||||||
|
{
|
||||||
|
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(uchar v) { return _Tp(v); }
|
||||||
|
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(schar v) { return _Tp(v); }
|
||||||
|
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(ushort v) { return _Tp(v); }
|
||||||
|
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(short v) { return _Tp(v); }
|
||||||
|
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(uint v) { return _Tp(v); }
|
||||||
|
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(int v) { return _Tp(v); }
|
||||||
|
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(float v) { return _Tp(v); }
|
||||||
|
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(double v) { return _Tp(v); }
|
||||||
|
|
||||||
|
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(schar v)
|
||||||
|
{
|
||||||
|
uint res = 0;
|
||||||
|
int vi = v;
|
||||||
|
asm("cvt.sat.u8.s8 %0, %1;" : "=r"(res) : "r"(vi));
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(short v)
|
||||||
|
{
|
||||||
|
uint res = 0;
|
||||||
|
asm("cvt.sat.u8.s16 %0, %1;" : "=r"(res) : "h"(v));
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(ushort v)
|
||||||
|
{
|
||||||
|
uint res = 0;
|
||||||
|
asm("cvt.sat.u8.u16 %0, %1;" : "=r"(res) : "h"(v));
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(int v)
|
||||||
|
{
|
||||||
|
uint res = 0;
|
||||||
|
asm("cvt.sat.u8.s32 %0, %1;" : "=r"(res) : "r"(v));
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(uint v)
|
||||||
|
{
|
||||||
|
uint res = 0;
|
||||||
|
asm("cvt.sat.u8.u32 %0, %1;" : "=r"(res) : "r"(v));
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(float v)
|
||||||
|
{
|
||||||
|
uint res = 0;
|
||||||
|
asm("cvt.rni.sat.u8.f32 %0, %1;" : "=r"(res) : "f"(v));
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(double v)
|
||||||
|
{
|
||||||
|
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
|
||||||
|
uint res = 0;
|
||||||
|
asm("cvt.rni.sat.u8.f64 %0, %1;" : "=r"(res) : "d"(v));
|
||||||
|
return res;
|
||||||
|
#else
|
||||||
|
return saturate_cast<uchar>((float)v);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> __device__ __forceinline__ schar saturate_cast<schar>(uchar v)
|
||||||
|
{
|
||||||
|
uint res = 0;
|
||||||
|
uint vi = v;
|
||||||
|
asm("cvt.sat.s8.u8 %0, %1;" : "=r"(res) : "r"(vi));
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
template<> __device__ __forceinline__ schar saturate_cast<schar>(short v)
|
||||||
|
{
|
||||||
|
uint res = 0;
|
||||||
|
asm("cvt.sat.s8.s16 %0, %1;" : "=r"(res) : "h"(v));
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
template<> __device__ __forceinline__ schar saturate_cast<schar>(ushort v)
|
||||||
|
{
|
||||||
|
uint res = 0;
|
||||||
|
asm("cvt.sat.s8.u16 %0, %1;" : "=r"(res) : "h"(v));
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
template<> __device__ __forceinline__ schar saturate_cast<schar>(int v)
|
||||||
|
{
|
||||||
|
uint res = 0;
|
||||||
|
asm("cvt.sat.s8.s32 %0, %1;" : "=r"(res) : "r"(v));
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
template<> __device__ __forceinline__ schar saturate_cast<schar>(uint v)
|
||||||
|
{
|
||||||
|
uint res = 0;
|
||||||
|
asm("cvt.sat.s8.u32 %0, %1;" : "=r"(res) : "r"(v));
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
template<> __device__ __forceinline__ schar saturate_cast<schar>(float v)
|
||||||
|
{
|
||||||
|
uint res = 0;
|
||||||
|
asm("cvt.rni.sat.s8.f32 %0, %1;" : "=r"(res) : "f"(v));
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
template<> __device__ __forceinline__ schar saturate_cast<schar>(double v)
|
||||||
|
{
|
||||||
|
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
|
||||||
|
uint res = 0;
|
||||||
|
asm("cvt.rni.sat.s8.f64 %0, %1;" : "=r"(res) : "d"(v));
|
||||||
|
return res;
|
||||||
|
#else
|
||||||
|
return saturate_cast<schar>((float)v);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(schar v)
|
||||||
|
{
|
||||||
|
ushort res = 0;
|
||||||
|
int vi = v;
|
||||||
|
asm("cvt.sat.u16.s8 %0, %1;" : "=h"(res) : "r"(vi));
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(short v)
|
||||||
|
{
|
||||||
|
ushort res = 0;
|
||||||
|
asm("cvt.sat.u16.s16 %0, %1;" : "=h"(res) : "h"(v));
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(int v)
|
||||||
|
{
|
||||||
|
ushort res = 0;
|
||||||
|
asm("cvt.sat.u16.s32 %0, %1;" : "=h"(res) : "r"(v));
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(uint v)
|
||||||
|
{
|
||||||
|
ushort res = 0;
|
||||||
|
asm("cvt.sat.u16.u32 %0, %1;" : "=h"(res) : "r"(v));
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(float v)
|
||||||
|
{
|
||||||
|
ushort res = 0;
|
||||||
|
asm("cvt.rni.sat.u16.f32 %0, %1;" : "=h"(res) : "f"(v));
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(double v)
|
||||||
|
{
|
||||||
|
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
|
||||||
|
ushort res = 0;
|
||||||
|
asm("cvt.rni.sat.u16.f64 %0, %1;" : "=h"(res) : "d"(v));
|
||||||
|
return res;
|
||||||
|
#else
|
||||||
|
return saturate_cast<ushort>((float)v);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> __device__ __forceinline__ short saturate_cast<short>(ushort v)
|
||||||
|
{
|
||||||
|
short res = 0;
|
||||||
|
asm("cvt.sat.s16.u16 %0, %1;" : "=h"(res) : "h"(v));
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
template<> __device__ __forceinline__ short saturate_cast<short>(int v)
|
||||||
|
{
|
||||||
|
short res = 0;
|
||||||
|
asm("cvt.sat.s16.s32 %0, %1;" : "=h"(res) : "r"(v));
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
template<> __device__ __forceinline__ short saturate_cast<short>(uint v)
|
||||||
|
{
|
||||||
|
short res = 0;
|
||||||
|
asm("cvt.sat.s16.u32 %0, %1;" : "=h"(res) : "r"(v));
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
template<> __device__ __forceinline__ short saturate_cast<short>(float v)
|
||||||
|
{
|
||||||
|
short res = 0;
|
||||||
|
asm("cvt.rni.sat.s16.f32 %0, %1;" : "=h"(res) : "f"(v));
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
template<> __device__ __forceinline__ short saturate_cast<short>(double v)
|
||||||
|
{
|
||||||
|
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
|
||||||
|
short res = 0;
|
||||||
|
asm("cvt.rni.sat.s16.f64 %0, %1;" : "=h"(res) : "d"(v));
|
||||||
|
return res;
|
||||||
|
#else
|
||||||
|
return saturate_cast<short>((float)v);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> __device__ __forceinline__ int saturate_cast<int>(uint v)
|
||||||
|
{
|
||||||
|
int res = 0;
|
||||||
|
asm("cvt.sat.s32.u32 %0, %1;" : "=r"(res) : "r"(v));
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
template<> __device__ __forceinline__ int saturate_cast<int>(float v)
|
||||||
|
{
|
||||||
|
return __float2int_rn(v);
|
||||||
|
}
|
||||||
|
template<> __device__ __forceinline__ int saturate_cast<int>(double v)
|
||||||
|
{
|
||||||
|
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
|
||||||
|
return __double2int_rn(v);
|
||||||
|
#else
|
||||||
|
return saturate_cast<int>((float)v);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> __device__ __forceinline__ uint saturate_cast<uint>(schar v)
|
||||||
|
{
|
||||||
|
uint res = 0;
|
||||||
|
int vi = v;
|
||||||
|
asm("cvt.sat.u32.s8 %0, %1;" : "=r"(res) : "r"(vi));
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
template<> __device__ __forceinline__ uint saturate_cast<uint>(short v)
|
||||||
|
{
|
||||||
|
uint res = 0;
|
||||||
|
asm("cvt.sat.u32.s16 %0, %1;" : "=r"(res) : "h"(v));
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
template<> __device__ __forceinline__ uint saturate_cast<uint>(int v)
|
||||||
|
{
|
||||||
|
uint res = 0;
|
||||||
|
asm("cvt.sat.u32.s32 %0, %1;" : "=r"(res) : "r"(v));
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
template<> __device__ __forceinline__ uint saturate_cast<uint>(float v)
|
||||||
|
{
|
||||||
|
return __float2uint_rn(v);
|
||||||
|
}
|
||||||
|
template<> __device__ __forceinline__ uint saturate_cast<uint>(double v)
|
||||||
|
{
|
||||||
|
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
|
||||||
|
return __double2uint_rn(v);
|
||||||
|
#else
|
||||||
|
return saturate_cast<uint>((float)v);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}}}
|
||||||
|
|
||||||
|
//! @endcond
|
||||||
|
|
||||||
|
#endif /* OPENCV_CUDA_SATURATE_CAST_HPP */
|
||||||
258
3rdpart/OpenCV/include/opencv2/core/cuda/scan.hpp
Normal file
258
3rdpart/OpenCV/include/opencv2/core/cuda/scan.hpp
Normal file
@@ -0,0 +1,258 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#ifndef OPENCV_CUDA_SCAN_HPP
|
||||||
|
#define OPENCV_CUDA_SCAN_HPP
|
||||||
|
|
||||||
|
#include "opencv2/core/cuda/common.hpp"
|
||||||
|
#include "opencv2/core/cuda/utility.hpp"
|
||||||
|
#include "opencv2/core/cuda/warp.hpp"
|
||||||
|
#include "opencv2/core/cuda/warp_shuffle.hpp"
|
||||||
|
|
||||||
|
/** @file
|
||||||
|
* @deprecated Use @ref cudev instead.
|
||||||
|
*/
|
||||||
|
|
||||||
|
//! @cond IGNORED
|
||||||
|
|
||||||
|
namespace cv { namespace cuda { namespace device
|
||||||
|
{
|
||||||
|
enum ScanKind { EXCLUSIVE = 0, INCLUSIVE = 1 };
|
||||||
|
|
||||||
|
template <ScanKind Kind, typename T, typename F> struct WarpScan
|
||||||
|
{
|
||||||
|
__device__ __forceinline__ WarpScan() {}
|
||||||
|
__device__ __forceinline__ WarpScan(const WarpScan& other) { CV_UNUSED(other); }
|
||||||
|
|
||||||
|
__device__ __forceinline__ T operator()( volatile T *ptr , const unsigned int idx)
|
||||||
|
{
|
||||||
|
const unsigned int lane = idx & 31;
|
||||||
|
F op;
|
||||||
|
|
||||||
|
if ( lane >= 1) ptr [idx ] = op(ptr [idx - 1], ptr [idx]);
|
||||||
|
if ( lane >= 2) ptr [idx ] = op(ptr [idx - 2], ptr [idx]);
|
||||||
|
if ( lane >= 4) ptr [idx ] = op(ptr [idx - 4], ptr [idx]);
|
||||||
|
if ( lane >= 8) ptr [idx ] = op(ptr [idx - 8], ptr [idx]);
|
||||||
|
if ( lane >= 16) ptr [idx ] = op(ptr [idx - 16], ptr [idx]);
|
||||||
|
|
||||||
|
if( Kind == INCLUSIVE )
|
||||||
|
return ptr [idx];
|
||||||
|
else
|
||||||
|
return (lane > 0) ? ptr [idx - 1] : 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __forceinline__ unsigned int index(const unsigned int tid)
|
||||||
|
{
|
||||||
|
return tid;
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __forceinline__ void init(volatile T *ptr){}
|
||||||
|
|
||||||
|
static const int warp_offset = 0;
|
||||||
|
|
||||||
|
typedef WarpScan<INCLUSIVE, T, F> merge;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <ScanKind Kind , typename T, typename F> struct WarpScanNoComp
|
||||||
|
{
|
||||||
|
__device__ __forceinline__ WarpScanNoComp() {}
|
||||||
|
__device__ __forceinline__ WarpScanNoComp(const WarpScanNoComp& other) { CV_UNUSED(other); }
|
||||||
|
|
||||||
|
__device__ __forceinline__ T operator()( volatile T *ptr , const unsigned int idx)
|
||||||
|
{
|
||||||
|
const unsigned int lane = threadIdx.x & 31;
|
||||||
|
F op;
|
||||||
|
|
||||||
|
ptr [idx ] = op(ptr [idx - 1], ptr [idx]);
|
||||||
|
ptr [idx ] = op(ptr [idx - 2], ptr [idx]);
|
||||||
|
ptr [idx ] = op(ptr [idx - 4], ptr [idx]);
|
||||||
|
ptr [idx ] = op(ptr [idx - 8], ptr [idx]);
|
||||||
|
ptr [idx ] = op(ptr [idx - 16], ptr [idx]);
|
||||||
|
|
||||||
|
if( Kind == INCLUSIVE )
|
||||||
|
return ptr [idx];
|
||||||
|
else
|
||||||
|
return (lane > 0) ? ptr [idx - 1] : 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __forceinline__ unsigned int index(const unsigned int tid)
|
||||||
|
{
|
||||||
|
return (tid >> warp_log) * warp_smem_stride + 16 + (tid & warp_mask);
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __forceinline__ void init(volatile T *ptr)
|
||||||
|
{
|
||||||
|
ptr[threadIdx.x] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static const int warp_smem_stride = 32 + 16 + 1;
|
||||||
|
static const int warp_offset = 16;
|
||||||
|
static const int warp_log = 5;
|
||||||
|
static const int warp_mask = 31;
|
||||||
|
|
||||||
|
typedef WarpScanNoComp<INCLUSIVE, T, F> merge;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <ScanKind Kind , typename T, typename Sc, typename F> struct BlockScan
|
||||||
|
{
|
||||||
|
__device__ __forceinline__ BlockScan() {}
|
||||||
|
__device__ __forceinline__ BlockScan(const BlockScan& other) { CV_UNUSED(other); }
|
||||||
|
|
||||||
|
__device__ __forceinline__ T operator()(volatile T *ptr)
|
||||||
|
{
|
||||||
|
const unsigned int tid = threadIdx.x;
|
||||||
|
const unsigned int lane = tid & warp_mask;
|
||||||
|
const unsigned int warp = tid >> warp_log;
|
||||||
|
|
||||||
|
Sc scan;
|
||||||
|
typename Sc::merge merge_scan;
|
||||||
|
const unsigned int idx = scan.index(tid);
|
||||||
|
|
||||||
|
T val = scan(ptr, idx);
|
||||||
|
__syncthreads ();
|
||||||
|
|
||||||
|
if( warp == 0)
|
||||||
|
scan.init(ptr);
|
||||||
|
__syncthreads ();
|
||||||
|
|
||||||
|
if( lane == 31 )
|
||||||
|
ptr [scan.warp_offset + warp ] = (Kind == INCLUSIVE) ? val : ptr [idx];
|
||||||
|
__syncthreads ();
|
||||||
|
|
||||||
|
if( warp == 0 )
|
||||||
|
merge_scan(ptr, idx);
|
||||||
|
__syncthreads();
|
||||||
|
|
||||||
|
if ( warp > 0)
|
||||||
|
val = ptr [scan.warp_offset + warp - 1] + val;
|
||||||
|
__syncthreads ();
|
||||||
|
|
||||||
|
ptr[idx] = val;
|
||||||
|
__syncthreads ();
|
||||||
|
|
||||||
|
return val ;
|
||||||
|
}
|
||||||
|
|
||||||
|
static const int warp_log = 5;
|
||||||
|
static const int warp_mask = 31;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
__device__ T warpScanInclusive(T idata, volatile T* s_Data, unsigned int tid)
|
||||||
|
{
|
||||||
|
#if __CUDA_ARCH__ >= 300
|
||||||
|
const unsigned int laneId = cv::cuda::device::Warp::laneId();
|
||||||
|
|
||||||
|
// scan on shuffl functions
|
||||||
|
#pragma unroll
|
||||||
|
for (int i = 1; i <= (OPENCV_CUDA_WARP_SIZE / 2); i *= 2)
|
||||||
|
{
|
||||||
|
const T n = cv::cuda::device::shfl_up(idata, i);
|
||||||
|
if (laneId >= i)
|
||||||
|
idata += n;
|
||||||
|
}
|
||||||
|
|
||||||
|
return idata;
|
||||||
|
#else
|
||||||
|
unsigned int pos = 2 * tid - (tid & (OPENCV_CUDA_WARP_SIZE - 1));
|
||||||
|
s_Data[pos] = 0;
|
||||||
|
pos += OPENCV_CUDA_WARP_SIZE;
|
||||||
|
s_Data[pos] = idata;
|
||||||
|
|
||||||
|
s_Data[pos] += s_Data[pos - 1];
|
||||||
|
s_Data[pos] += s_Data[pos - 2];
|
||||||
|
s_Data[pos] += s_Data[pos - 4];
|
||||||
|
s_Data[pos] += s_Data[pos - 8];
|
||||||
|
s_Data[pos] += s_Data[pos - 16];
|
||||||
|
|
||||||
|
return s_Data[pos];
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
__device__ __forceinline__ T warpScanExclusive(T idata, volatile T* s_Data, unsigned int tid)
|
||||||
|
{
|
||||||
|
return warpScanInclusive(idata, s_Data, tid) - idata;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <int tiNumScanThreads, typename T>
|
||||||
|
__device__ T blockScanInclusive(T idata, volatile T* s_Data, unsigned int tid)
|
||||||
|
{
|
||||||
|
if (tiNumScanThreads > OPENCV_CUDA_WARP_SIZE)
|
||||||
|
{
|
||||||
|
//Bottom-level inclusive warp scan
|
||||||
|
T warpResult = warpScanInclusive(idata, s_Data, tid);
|
||||||
|
|
||||||
|
//Save top elements of each warp for exclusive warp scan
|
||||||
|
//sync to wait for warp scans to complete (because s_Data is being overwritten)
|
||||||
|
__syncthreads();
|
||||||
|
if ((tid & (OPENCV_CUDA_WARP_SIZE - 1)) == (OPENCV_CUDA_WARP_SIZE - 1))
|
||||||
|
{
|
||||||
|
s_Data[tid >> OPENCV_CUDA_LOG_WARP_SIZE] = warpResult;
|
||||||
|
}
|
||||||
|
|
||||||
|
//wait for warp scans to complete
|
||||||
|
__syncthreads();
|
||||||
|
|
||||||
|
if (tid < (tiNumScanThreads / OPENCV_CUDA_WARP_SIZE) )
|
||||||
|
{
|
||||||
|
//grab top warp elements
|
||||||
|
T val = s_Data[tid];
|
||||||
|
//calculate exclusive scan and write back to shared memory
|
||||||
|
s_Data[tid] = warpScanExclusive(val, s_Data, tid);
|
||||||
|
}
|
||||||
|
|
||||||
|
//return updated warp scans with exclusive scan results
|
||||||
|
__syncthreads();
|
||||||
|
|
||||||
|
return warpResult + s_Data[tid >> OPENCV_CUDA_LOG_WARP_SIZE];
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
return warpScanInclusive(idata, s_Data, tid);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}}}
|
||||||
|
|
||||||
|
//! @endcond
|
||||||
|
|
||||||
|
#endif // OPENCV_CUDA_SCAN_HPP
|
||||||
869
3rdpart/OpenCV/include/opencv2/core/cuda/simd_functions.hpp
Normal file
869
3rdpart/OpenCV/include/opencv2/core/cuda/simd_functions.hpp
Normal file
@@ -0,0 +1,869 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2013 NVIDIA Corporation. All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer in the documentation
|
||||||
|
* and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* Neither the name of NVIDIA Corporation nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef OPENCV_CUDA_SIMD_FUNCTIONS_HPP
|
||||||
|
#define OPENCV_CUDA_SIMD_FUNCTIONS_HPP
|
||||||
|
|
||||||
|
#include "common.hpp"
|
||||||
|
|
||||||
|
/** @file
|
||||||
|
* @deprecated Use @ref cudev instead.
|
||||||
|
*/
|
||||||
|
|
||||||
|
//! @cond IGNORED
|
||||||
|
|
||||||
|
namespace cv { namespace cuda { namespace device
|
||||||
|
{
|
||||||
|
// 2
|
||||||
|
|
||||||
|
static __device__ __forceinline__ unsigned int vadd2(unsigned int a, unsigned int b)
|
||||||
|
{
|
||||||
|
unsigned int r = 0;
|
||||||
|
|
||||||
|
#if __CUDA_ARCH__ >= 300
|
||||||
|
asm("vadd2.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||||
|
#elif __CUDA_ARCH__ >= 200
|
||||||
|
asm("vadd.u32.u32.u32.sat %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||||
|
asm("vadd.u32.u32.u32.sat %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||||
|
#else
|
||||||
|
unsigned int s;
|
||||||
|
s = a ^ b; // sum bits
|
||||||
|
r = a + b; // actual sum
|
||||||
|
s = s ^ r; // determine carry-ins for each bit position
|
||||||
|
s = s & 0x00010000; // carry-in to high word (= carry-out from low word)
|
||||||
|
r = r - s; // subtract out carry-out from low word
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
static __device__ __forceinline__ unsigned int vsub2(unsigned int a, unsigned int b)
|
||||||
|
{
|
||||||
|
unsigned int r = 0;
|
||||||
|
|
||||||
|
#if __CUDA_ARCH__ >= 300
|
||||||
|
asm("vsub2.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||||
|
#elif __CUDA_ARCH__ >= 200
|
||||||
|
asm("vsub.u32.u32.u32.sat %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||||
|
asm("vsub.u32.u32.u32.sat %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||||
|
#else
|
||||||
|
unsigned int s;
|
||||||
|
s = a ^ b; // sum bits
|
||||||
|
r = a - b; // actual sum
|
||||||
|
s = s ^ r; // determine carry-ins for each bit position
|
||||||
|
s = s & 0x00010000; // borrow to high word
|
||||||
|
r = r + s; // compensate for borrow from low word
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
static __device__ __forceinline__ unsigned int vabsdiff2(unsigned int a, unsigned int b)
|
||||||
|
{
|
||||||
|
unsigned int r = 0;
|
||||||
|
|
||||||
|
#if __CUDA_ARCH__ >= 300
|
||||||
|
asm("vabsdiff2.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||||
|
#elif __CUDA_ARCH__ >= 200
|
||||||
|
asm("vabsdiff.u32.u32.u32.sat %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||||
|
asm("vabsdiff.u32.u32.u32.sat %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||||
|
#else
|
||||||
|
unsigned int s, t, u, v;
|
||||||
|
s = a & 0x0000ffff; // extract low halfword
|
||||||
|
r = b & 0x0000ffff; // extract low halfword
|
||||||
|
u = ::max(r, s); // maximum of low halfwords
|
||||||
|
v = ::min(r, s); // minimum of low halfwords
|
||||||
|
s = a & 0xffff0000; // extract high halfword
|
||||||
|
r = b & 0xffff0000; // extract high halfword
|
||||||
|
t = ::max(r, s); // maximum of high halfwords
|
||||||
|
s = ::min(r, s); // minimum of high halfwords
|
||||||
|
r = u | t; // maximum of both halfwords
|
||||||
|
s = v | s; // minimum of both halfwords
|
||||||
|
r = r - s; // |a - b| = max(a,b) - min(a,b);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
static __device__ __forceinline__ unsigned int vavg2(unsigned int a, unsigned int b)
|
||||||
|
{
|
||||||
|
unsigned int r, s;
|
||||||
|
|
||||||
|
// HAKMEM #23: a + b = 2 * (a & b) + (a ^ b) ==>
|
||||||
|
// (a + b) / 2 = (a & b) + ((a ^ b) >> 1)
|
||||||
|
s = a ^ b;
|
||||||
|
r = a & b;
|
||||||
|
s = s & 0xfffefffe; // ensure shift doesn't cross halfword boundaries
|
||||||
|
s = s >> 1;
|
||||||
|
s = r + s;
|
||||||
|
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
static __device__ __forceinline__ unsigned int vavrg2(unsigned int a, unsigned int b)
|
||||||
|
{
|
||||||
|
unsigned int r = 0;
|
||||||
|
|
||||||
|
#if __CUDA_ARCH__ >= 300
|
||||||
|
asm("vavrg2.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||||
|
#else
|
||||||
|
// HAKMEM #23: a + b = 2 * (a | b) - (a ^ b) ==>
|
||||||
|
// (a + b + 1) / 2 = (a | b) - ((a ^ b) >> 1)
|
||||||
|
unsigned int s;
|
||||||
|
s = a ^ b;
|
||||||
|
r = a | b;
|
||||||
|
s = s & 0xfffefffe; // ensure shift doesn't cross half-word boundaries
|
||||||
|
s = s >> 1;
|
||||||
|
r = r - s;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
static __device__ __forceinline__ unsigned int vseteq2(unsigned int a, unsigned int b)
|
||||||
|
{
|
||||||
|
unsigned int r = 0;
|
||||||
|
|
||||||
|
#if __CUDA_ARCH__ >= 300
|
||||||
|
asm("vset2.u32.u32.eq %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||||
|
#else
|
||||||
|
// inspired by Alan Mycroft's null-byte detection algorithm:
|
||||||
|
// null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
|
||||||
|
unsigned int c;
|
||||||
|
r = a ^ b; // 0x0000 if a == b
|
||||||
|
c = r | 0x80008000; // set msbs, to catch carry out
|
||||||
|
r = r ^ c; // extract msbs, msb = 1 if r < 0x8000
|
||||||
|
c = c - 0x00010001; // msb = 0, if r was 0x0000 or 0x8000
|
||||||
|
c = r & ~c; // msb = 1, if r was 0x0000
|
||||||
|
r = c >> 15; // convert to bool
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
static __device__ __forceinline__ unsigned int vcmpeq2(unsigned int a, unsigned int b)
|
||||||
|
{
|
||||||
|
unsigned int r, c;
|
||||||
|
|
||||||
|
#if __CUDA_ARCH__ >= 300
|
||||||
|
r = vseteq2(a, b);
|
||||||
|
c = r << 16; // convert bool
|
||||||
|
r = c - r; // into mask
|
||||||
|
#else
|
||||||
|
// inspired by Alan Mycroft's null-byte detection algorithm:
|
||||||
|
// null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
|
||||||
|
r = a ^ b; // 0x0000 if a == b
|
||||||
|
c = r | 0x80008000; // set msbs, to catch carry out
|
||||||
|
r = r ^ c; // extract msbs, msb = 1 if r < 0x8000
|
||||||
|
c = c - 0x00010001; // msb = 0, if r was 0x0000 or 0x8000
|
||||||
|
c = r & ~c; // msb = 1, if r was 0x0000
|
||||||
|
r = c >> 15; // convert
|
||||||
|
r = c - r; // msbs to
|
||||||
|
r = c | r; // mask
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
static __device__ __forceinline__ unsigned int vsetge2(unsigned int a, unsigned int b)
|
||||||
|
{
|
||||||
|
unsigned int r = 0;
|
||||||
|
|
||||||
|
#if __CUDA_ARCH__ >= 300
|
||||||
|
asm("vset2.u32.u32.ge %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||||
|
#else
|
||||||
|
unsigned int c;
|
||||||
|
asm("not.b32 %0, %0;" : "+r"(b));
|
||||||
|
c = vavrg2(a, b); // (a + ~b + 1) / 2 = (a - b) / 2
|
||||||
|
c = c & 0x80008000; // msb = carry-outs
|
||||||
|
r = c >> 15; // convert to bool
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
static __device__ __forceinline__ unsigned int vcmpge2(unsigned int a, unsigned int b)
|
||||||
|
{
|
||||||
|
unsigned int r, c;
|
||||||
|
|
||||||
|
#if __CUDA_ARCH__ >= 300
|
||||||
|
r = vsetge2(a, b);
|
||||||
|
c = r << 16; // convert bool
|
||||||
|
r = c - r; // into mask
|
||||||
|
#else
|
||||||
|
asm("not.b32 %0, %0;" : "+r"(b));
|
||||||
|
c = vavrg2(a, b); // (a + ~b + 1) / 2 = (a - b) / 2
|
||||||
|
c = c & 0x80008000; // msb = carry-outs
|
||||||
|
r = c >> 15; // convert
|
||||||
|
r = c - r; // msbs to
|
||||||
|
r = c | r; // mask
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
static __device__ __forceinline__ unsigned int vsetgt2(unsigned int a, unsigned int b)
|
||||||
|
{
|
||||||
|
unsigned int r = 0;
|
||||||
|
|
||||||
|
#if __CUDA_ARCH__ >= 300
|
||||||
|
asm("vset2.u32.u32.gt %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||||
|
#else
|
||||||
|
unsigned int c;
|
||||||
|
asm("not.b32 %0, %0;" : "+r"(b));
|
||||||
|
c = vavg2(a, b); // (a + ~b) / 2 = (a - b) / 2 [rounded down]
|
||||||
|
c = c & 0x80008000; // msbs = carry-outs
|
||||||
|
r = c >> 15; // convert to bool
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
static __device__ __forceinline__ unsigned int vcmpgt2(unsigned int a, unsigned int b)
|
||||||
|
{
|
||||||
|
unsigned int r, c;
|
||||||
|
|
||||||
|
#if __CUDA_ARCH__ >= 300
|
||||||
|
r = vsetgt2(a, b);
|
||||||
|
c = r << 16; // convert bool
|
||||||
|
r = c - r; // into mask
|
||||||
|
#else
|
||||||
|
asm("not.b32 %0, %0;" : "+r"(b));
|
||||||
|
c = vavg2(a, b); // (a + ~b) / 2 = (a - b) / 2 [rounded down]
|
||||||
|
c = c & 0x80008000; // msbs = carry-outs
|
||||||
|
r = c >> 15; // convert
|
||||||
|
r = c - r; // msbs to
|
||||||
|
r = c | r; // mask
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
static __device__ __forceinline__ unsigned int vsetle2(unsigned int a, unsigned int b)
|
||||||
|
{
|
||||||
|
unsigned int r = 0;
|
||||||
|
|
||||||
|
#if __CUDA_ARCH__ >= 300
|
||||||
|
asm("vset2.u32.u32.le %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||||
|
#else
|
||||||
|
unsigned int c;
|
||||||
|
asm("not.b32 %0, %0;" : "+r"(a));
|
||||||
|
c = vavrg2(a, b); // (b + ~a + 1) / 2 = (b - a) / 2
|
||||||
|
c = c & 0x80008000; // msb = carry-outs
|
||||||
|
r = c >> 15; // convert to bool
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
static __device__ __forceinline__ unsigned int vcmple2(unsigned int a, unsigned int b)
|
||||||
|
{
|
||||||
|
unsigned int r, c;
|
||||||
|
|
||||||
|
#if __CUDA_ARCH__ >= 300
|
||||||
|
r = vsetle2(a, b);
|
||||||
|
c = r << 16; // convert bool
|
||||||
|
r = c - r; // into mask
|
||||||
|
#else
|
||||||
|
asm("not.b32 %0, %0;" : "+r"(a));
|
||||||
|
c = vavrg2(a, b); // (b + ~a + 1) / 2 = (b - a) / 2
|
||||||
|
c = c & 0x80008000; // msb = carry-outs
|
||||||
|
r = c >> 15; // convert
|
||||||
|
r = c - r; // msbs to
|
||||||
|
r = c | r; // mask
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
static __device__ __forceinline__ unsigned int vsetlt2(unsigned int a, unsigned int b)
|
||||||
|
{
|
||||||
|
unsigned int r = 0;
|
||||||
|
|
||||||
|
#if __CUDA_ARCH__ >= 300
|
||||||
|
asm("vset2.u32.u32.lt %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||||
|
#else
|
||||||
|
unsigned int c;
|
||||||
|
asm("not.b32 %0, %0;" : "+r"(a));
|
||||||
|
c = vavg2(a, b); // (b + ~a) / 2 = (b - a) / 2 [rounded down]
|
||||||
|
c = c & 0x80008000; // msb = carry-outs
|
||||||
|
r = c >> 15; // convert to bool
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
static __device__ __forceinline__ unsigned int vcmplt2(unsigned int a, unsigned int b)
|
||||||
|
{
|
||||||
|
unsigned int r, c;
|
||||||
|
|
||||||
|
#if __CUDA_ARCH__ >= 300
|
||||||
|
r = vsetlt2(a, b);
|
||||||
|
c = r << 16; // convert bool
|
||||||
|
r = c - r; // into mask
|
||||||
|
#else
|
||||||
|
asm("not.b32 %0, %0;" : "+r"(a));
|
||||||
|
c = vavg2(a, b); // (b + ~a) / 2 = (b - a) / 2 [rounded down]
|
||||||
|
c = c & 0x80008000; // msb = carry-outs
|
||||||
|
r = c >> 15; // convert
|
||||||
|
r = c - r; // msbs to
|
||||||
|
r = c | r; // mask
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
static __device__ __forceinline__ unsigned int vsetne2(unsigned int a, unsigned int b)
|
||||||
|
{
|
||||||
|
unsigned int r = 0;
|
||||||
|
|
||||||
|
#if __CUDA_ARCH__ >= 300
|
||||||
|
asm ("vset2.u32.u32.ne %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||||
|
#else
|
||||||
|
// inspired by Alan Mycroft's null-byte detection algorithm:
|
||||||
|
// null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
|
||||||
|
unsigned int c;
|
||||||
|
r = a ^ b; // 0x0000 if a == b
|
||||||
|
c = r | 0x80008000; // set msbs, to catch carry out
|
||||||
|
c = c - 0x00010001; // msb = 0, if r was 0x0000 or 0x8000
|
||||||
|
c = r | c; // msb = 1, if r was not 0x0000
|
||||||
|
c = c & 0x80008000; // extract msbs
|
||||||
|
r = c >> 15; // convert to bool
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
static __device__ __forceinline__ unsigned int vcmpne2(unsigned int a, unsigned int b)
|
||||||
|
{
|
||||||
|
unsigned int r, c;
|
||||||
|
|
||||||
|
#if __CUDA_ARCH__ >= 300
|
||||||
|
r = vsetne2(a, b);
|
||||||
|
c = r << 16; // convert bool
|
||||||
|
r = c - r; // into mask
|
||||||
|
#else
|
||||||
|
// inspired by Alan Mycroft's null-byte detection algorithm:
|
||||||
|
// null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
|
||||||
|
r = a ^ b; // 0x0000 if a == b
|
||||||
|
c = r | 0x80008000; // set msbs, to catch carry out
|
||||||
|
c = c - 0x00010001; // msb = 0, if r was 0x0000 or 0x8000
|
||||||
|
c = r | c; // msb = 1, if r was not 0x0000
|
||||||
|
c = c & 0x80008000; // extract msbs
|
||||||
|
r = c >> 15; // convert
|
||||||
|
r = c - r; // msbs to
|
||||||
|
r = c | r; // mask
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
static __device__ __forceinline__ unsigned int vmax2(unsigned int a, unsigned int b)
|
||||||
|
{
|
||||||
|
unsigned int r = 0;
|
||||||
|
|
||||||
|
#if __CUDA_ARCH__ >= 300
|
||||||
|
asm("vmax2.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||||
|
#elif __CUDA_ARCH__ >= 200
|
||||||
|
asm("vmax.u32.u32.u32 %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||||
|
asm("vmax.u32.u32.u32 %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||||
|
#else
|
||||||
|
unsigned int s, t, u;
|
||||||
|
r = a & 0x0000ffff; // extract low halfword
|
||||||
|
s = b & 0x0000ffff; // extract low halfword
|
||||||
|
t = ::max(r, s); // maximum of low halfwords
|
||||||
|
r = a & 0xffff0000; // extract high halfword
|
||||||
|
s = b & 0xffff0000; // extract high halfword
|
||||||
|
u = ::max(r, s); // maximum of high halfwords
|
||||||
|
r = t | u; // combine halfword maximums
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
static __device__ __forceinline__ unsigned int vmin2(unsigned int a, unsigned int b)
|
||||||
|
{
|
||||||
|
unsigned int r = 0;
|
||||||
|
|
||||||
|
#if __CUDA_ARCH__ >= 300
|
||||||
|
asm("vmin2.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||||
|
#elif __CUDA_ARCH__ >= 200
|
||||||
|
asm("vmin.u32.u32.u32 %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||||
|
asm("vmin.u32.u32.u32 %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||||
|
#else
|
||||||
|
unsigned int s, t, u;
|
||||||
|
r = a & 0x0000ffff; // extract low halfword
|
||||||
|
s = b & 0x0000ffff; // extract low halfword
|
||||||
|
t = ::min(r, s); // minimum of low halfwords
|
||||||
|
r = a & 0xffff0000; // extract high halfword
|
||||||
|
s = b & 0xffff0000; // extract high halfword
|
||||||
|
u = ::min(r, s); // minimum of high halfwords
|
||||||
|
r = t | u; // combine halfword minimums
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 4
|
||||||
|
|
||||||
|
static __device__ __forceinline__ unsigned int vadd4(unsigned int a, unsigned int b)
|
||||||
|
{
|
||||||
|
unsigned int r = 0;
|
||||||
|
|
||||||
|
#if __CUDA_ARCH__ >= 300
|
||||||
|
asm("vadd4.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||||
|
#elif __CUDA_ARCH__ >= 200
|
||||||
|
asm("vadd.u32.u32.u32.sat %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||||
|
asm("vadd.u32.u32.u32.sat %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||||
|
asm("vadd.u32.u32.u32.sat %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||||
|
asm("vadd.u32.u32.u32.sat %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||||
|
#else
|
||||||
|
unsigned int s, t;
|
||||||
|
s = a ^ b; // sum bits
|
||||||
|
r = a & 0x7f7f7f7f; // clear msbs
|
||||||
|
t = b & 0x7f7f7f7f; // clear msbs
|
||||||
|
s = s & 0x80808080; // msb sum bits
|
||||||
|
r = r + t; // add without msbs, record carry-out in msbs
|
||||||
|
r = r ^ s; // sum of msb sum and carry-in bits, w/o carry-out
|
||||||
|
#endif /* __CUDA_ARCH__ >= 300 */
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
static __device__ __forceinline__ unsigned int vsub4(unsigned int a, unsigned int b)
|
||||||
|
{
|
||||||
|
unsigned int r = 0;
|
||||||
|
|
||||||
|
#if __CUDA_ARCH__ >= 300
|
||||||
|
asm("vsub4.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||||
|
#elif __CUDA_ARCH__ >= 200
|
||||||
|
asm("vsub.u32.u32.u32.sat %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||||
|
asm("vsub.u32.u32.u32.sat %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||||
|
asm("vsub.u32.u32.u32.sat %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||||
|
asm("vsub.u32.u32.u32.sat %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||||
|
#else
|
||||||
|
unsigned int s, t;
|
||||||
|
s = a ^ ~b; // inverted sum bits
|
||||||
|
r = a | 0x80808080; // set msbs
|
||||||
|
t = b & 0x7f7f7f7f; // clear msbs
|
||||||
|
s = s & 0x80808080; // inverted msb sum bits
|
||||||
|
r = r - t; // subtract w/o msbs, record inverted borrows in msb
|
||||||
|
r = r ^ s; // combine inverted msb sum bits and borrows
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
static __device__ __forceinline__ unsigned int vavg4(unsigned int a, unsigned int b)
|
||||||
|
{
|
||||||
|
unsigned int r, s;
|
||||||
|
|
||||||
|
// HAKMEM #23: a + b = 2 * (a & b) + (a ^ b) ==>
|
||||||
|
// (a + b) / 2 = (a & b) + ((a ^ b) >> 1)
|
||||||
|
s = a ^ b;
|
||||||
|
r = a & b;
|
||||||
|
s = s & 0xfefefefe; // ensure following shift doesn't cross byte boundaries
|
||||||
|
s = s >> 1;
|
||||||
|
s = r + s;
|
||||||
|
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
static __device__ __forceinline__ unsigned int vavrg4(unsigned int a, unsigned int b)
|
||||||
|
{
|
||||||
|
unsigned int r = 0;
|
||||||
|
|
||||||
|
#if __CUDA_ARCH__ >= 300
|
||||||
|
asm("vavrg4.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||||
|
#else
|
||||||
|
// HAKMEM #23: a + b = 2 * (a | b) - (a ^ b) ==>
|
||||||
|
// (a + b + 1) / 2 = (a | b) - ((a ^ b) >> 1)
|
||||||
|
unsigned int c;
|
||||||
|
c = a ^ b;
|
||||||
|
r = a | b;
|
||||||
|
c = c & 0xfefefefe; // ensure following shift doesn't cross byte boundaries
|
||||||
|
c = c >> 1;
|
||||||
|
r = r - c;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
static __device__ __forceinline__ unsigned int vseteq4(unsigned int a, unsigned int b)
|
||||||
|
{
|
||||||
|
unsigned int r = 0;
|
||||||
|
|
||||||
|
#if __CUDA_ARCH__ >= 300
|
||||||
|
asm("vset4.u32.u32.eq %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||||
|
#else
|
||||||
|
// inspired by Alan Mycroft's null-byte detection algorithm:
|
||||||
|
// null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
|
||||||
|
unsigned int c;
|
||||||
|
r = a ^ b; // 0x00 if a == b
|
||||||
|
c = r | 0x80808080; // set msbs, to catch carry out
|
||||||
|
r = r ^ c; // extract msbs, msb = 1 if r < 0x80
|
||||||
|
c = c - 0x01010101; // msb = 0, if r was 0x00 or 0x80
|
||||||
|
c = r & ~c; // msb = 1, if r was 0x00
|
||||||
|
r = c >> 7; // convert to bool
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
static __device__ __forceinline__ unsigned int vcmpeq4(unsigned int a, unsigned int b)
|
||||||
|
{
|
||||||
|
unsigned int r, t;
|
||||||
|
|
||||||
|
#if __CUDA_ARCH__ >= 300
|
||||||
|
r = vseteq4(a, b);
|
||||||
|
t = r << 8; // convert bool
|
||||||
|
r = t - r; // to mask
|
||||||
|
#else
|
||||||
|
// inspired by Alan Mycroft's null-byte detection algorithm:
|
||||||
|
// null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
|
||||||
|
t = a ^ b; // 0x00 if a == b
|
||||||
|
r = t | 0x80808080; // set msbs, to catch carry out
|
||||||
|
t = t ^ r; // extract msbs, msb = 1 if t < 0x80
|
||||||
|
r = r - 0x01010101; // msb = 0, if t was 0x00 or 0x80
|
||||||
|
r = t & ~r; // msb = 1, if t was 0x00
|
||||||
|
t = r >> 7; // build mask
|
||||||
|
t = r - t; // from
|
||||||
|
r = t | r; // msbs
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
static __device__ __forceinline__ unsigned int vsetle4(unsigned int a, unsigned int b)
|
||||||
|
{
|
||||||
|
unsigned int r = 0;
|
||||||
|
|
||||||
|
#if __CUDA_ARCH__ >= 300
|
||||||
|
asm("vset4.u32.u32.le %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||||
|
#else
|
||||||
|
unsigned int c;
|
||||||
|
asm("not.b32 %0, %0;" : "+r"(a));
|
||||||
|
c = vavrg4(a, b); // (b + ~a + 1) / 2 = (b - a) / 2
|
||||||
|
c = c & 0x80808080; // msb = carry-outs
|
||||||
|
r = c >> 7; // convert to bool
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
static __device__ __forceinline__ unsigned int vcmple4(unsigned int a, unsigned int b)
|
||||||
|
{
|
||||||
|
unsigned int r, c;
|
||||||
|
|
||||||
|
#if __CUDA_ARCH__ >= 300
|
||||||
|
r = vsetle4(a, b);
|
||||||
|
c = r << 8; // convert bool
|
||||||
|
r = c - r; // to mask
|
||||||
|
#else
|
||||||
|
asm("not.b32 %0, %0;" : "+r"(a));
|
||||||
|
c = vavrg4(a, b); // (b + ~a + 1) / 2 = (b - a) / 2
|
||||||
|
c = c & 0x80808080; // msbs = carry-outs
|
||||||
|
r = c >> 7; // convert
|
||||||
|
r = c - r; // msbs to
|
||||||
|
r = c | r; // mask
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
static __device__ __forceinline__ unsigned int vsetlt4(unsigned int a, unsigned int b)
|
||||||
|
{
|
||||||
|
unsigned int r = 0;
|
||||||
|
|
||||||
|
#if __CUDA_ARCH__ >= 300
|
||||||
|
asm("vset4.u32.u32.lt %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||||
|
#else
|
||||||
|
unsigned int c;
|
||||||
|
asm("not.b32 %0, %0;" : "+r"(a));
|
||||||
|
c = vavg4(a, b); // (b + ~a) / 2 = (b - a) / 2 [rounded down]
|
||||||
|
c = c & 0x80808080; // msb = carry-outs
|
||||||
|
r = c >> 7; // convert to bool
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
static __device__ __forceinline__ unsigned int vcmplt4(unsigned int a, unsigned int b)
|
||||||
|
{
|
||||||
|
unsigned int r, c;
|
||||||
|
|
||||||
|
#if __CUDA_ARCH__ >= 300
|
||||||
|
r = vsetlt4(a, b);
|
||||||
|
c = r << 8; // convert bool
|
||||||
|
r = c - r; // to mask
|
||||||
|
#else
|
||||||
|
asm("not.b32 %0, %0;" : "+r"(a));
|
||||||
|
c = vavg4(a, b); // (b + ~a) / 2 = (b - a) / 2 [rounded down]
|
||||||
|
c = c & 0x80808080; // msbs = carry-outs
|
||||||
|
r = c >> 7; // convert
|
||||||
|
r = c - r; // msbs to
|
||||||
|
r = c | r; // mask
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
static __device__ __forceinline__ unsigned int vsetge4(unsigned int a, unsigned int b)
|
||||||
|
{
|
||||||
|
unsigned int r = 0;
|
||||||
|
|
||||||
|
#if __CUDA_ARCH__ >= 300
|
||||||
|
asm("vset4.u32.u32.ge %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||||
|
#else
|
||||||
|
unsigned int c;
|
||||||
|
asm("not.b32 %0, %0;" : "+r"(b));
|
||||||
|
c = vavrg4(a, b); // (a + ~b + 1) / 2 = (a - b) / 2
|
||||||
|
c = c & 0x80808080; // msb = carry-outs
|
||||||
|
r = c >> 7; // convert to bool
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
static __device__ __forceinline__ unsigned int vcmpge4(unsigned int a, unsigned int b)
|
||||||
|
{
|
||||||
|
unsigned int r, s;
|
||||||
|
|
||||||
|
#if __CUDA_ARCH__ >= 300
|
||||||
|
r = vsetge4(a, b);
|
||||||
|
s = r << 8; // convert bool
|
||||||
|
r = s - r; // to mask
|
||||||
|
#else
|
||||||
|
asm ("not.b32 %0,%0;" : "+r"(b));
|
||||||
|
r = vavrg4 (a, b); // (a + ~b + 1) / 2 = (a - b) / 2
|
||||||
|
r = r & 0x80808080; // msb = carry-outs
|
||||||
|
s = r >> 7; // build mask
|
||||||
|
s = r - s; // from
|
||||||
|
r = s | r; // msbs
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
static __device__ __forceinline__ unsigned int vsetgt4(unsigned int a, unsigned int b)
|
||||||
|
{
|
||||||
|
unsigned int r = 0;
|
||||||
|
|
||||||
|
#if __CUDA_ARCH__ >= 300
|
||||||
|
asm("vset4.u32.u32.gt %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||||
|
#else
|
||||||
|
unsigned int c;
|
||||||
|
asm("not.b32 %0, %0;" : "+r"(b));
|
||||||
|
c = vavg4(a, b); // (a + ~b) / 2 = (a - b) / 2 [rounded down]
|
||||||
|
c = c & 0x80808080; // msb = carry-outs
|
||||||
|
r = c >> 7; // convert to bool
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
static __device__ __forceinline__ unsigned int vcmpgt4(unsigned int a, unsigned int b)
|
||||||
|
{
|
||||||
|
unsigned int r, c;
|
||||||
|
|
||||||
|
#if __CUDA_ARCH__ >= 300
|
||||||
|
r = vsetgt4(a, b);
|
||||||
|
c = r << 8; // convert bool
|
||||||
|
r = c - r; // to mask
|
||||||
|
#else
|
||||||
|
asm("not.b32 %0, %0;" : "+r"(b));
|
||||||
|
c = vavg4(a, b); // (a + ~b) / 2 = (a - b) / 2 [rounded down]
|
||||||
|
c = c & 0x80808080; // msb = carry-outs
|
||||||
|
r = c >> 7; // convert
|
||||||
|
r = c - r; // msbs to
|
||||||
|
r = c | r; // mask
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
static __device__ __forceinline__ unsigned int vsetne4(unsigned int a, unsigned int b)
|
||||||
|
{
|
||||||
|
unsigned int r = 0;
|
||||||
|
|
||||||
|
#if __CUDA_ARCH__ >= 300
|
||||||
|
asm("vset4.u32.u32.ne %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||||
|
#else
|
||||||
|
// inspired by Alan Mycroft's null-byte detection algorithm:
|
||||||
|
// null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
|
||||||
|
unsigned int c;
|
||||||
|
r = a ^ b; // 0x00 if a == b
|
||||||
|
c = r | 0x80808080; // set msbs, to catch carry out
|
||||||
|
c = c - 0x01010101; // msb = 0, if r was 0x00 or 0x80
|
||||||
|
c = r | c; // msb = 1, if r was not 0x00
|
||||||
|
c = c & 0x80808080; // extract msbs
|
||||||
|
r = c >> 7; // convert to bool
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
static __device__ __forceinline__ unsigned int vcmpne4(unsigned int a, unsigned int b)
|
||||||
|
{
|
||||||
|
unsigned int r, c;
|
||||||
|
|
||||||
|
#if __CUDA_ARCH__ >= 300
|
||||||
|
r = vsetne4(a, b);
|
||||||
|
c = r << 8; // convert bool
|
||||||
|
r = c - r; // to mask
|
||||||
|
#else
|
||||||
|
// inspired by Alan Mycroft's null-byte detection algorithm:
|
||||||
|
// null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
|
||||||
|
r = a ^ b; // 0x00 if a == b
|
||||||
|
c = r | 0x80808080; // set msbs, to catch carry out
|
||||||
|
c = c - 0x01010101; // msb = 0, if r was 0x00 or 0x80
|
||||||
|
c = r | c; // msb = 1, if r was not 0x00
|
||||||
|
c = c & 0x80808080; // extract msbs
|
||||||
|
r = c >> 7; // convert
|
||||||
|
r = c - r; // msbs to
|
||||||
|
r = c | r; // mask
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
static __device__ __forceinline__ unsigned int vabsdiff4(unsigned int a, unsigned int b)
|
||||||
|
{
|
||||||
|
unsigned int r = 0;
|
||||||
|
|
||||||
|
#if __CUDA_ARCH__ >= 300
|
||||||
|
asm("vabsdiff4.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||||
|
#elif __CUDA_ARCH__ >= 200
|
||||||
|
asm("vabsdiff.u32.u32.u32.sat %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||||
|
asm("vabsdiff.u32.u32.u32.sat %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||||
|
asm("vabsdiff.u32.u32.u32.sat %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||||
|
asm("vabsdiff.u32.u32.u32.sat %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||||
|
#else
|
||||||
|
unsigned int s;
|
||||||
|
s = vcmpge4(a, b); // mask = 0xff if a >= b
|
||||||
|
r = a ^ b; //
|
||||||
|
s = (r & s) ^ b; // select a when a >= b, else select b => max(a,b)
|
||||||
|
r = s ^ r; // select a when b >= a, else select b => min(a,b)
|
||||||
|
r = s - r; // |a - b| = max(a,b) - min(a,b);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
static __device__ __forceinline__ unsigned int vmax4(unsigned int a, unsigned int b)
|
||||||
|
{
|
||||||
|
unsigned int r = 0;
|
||||||
|
|
||||||
|
#if __CUDA_ARCH__ >= 300
|
||||||
|
asm("vmax4.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||||
|
#elif __CUDA_ARCH__ >= 200
|
||||||
|
asm("vmax.u32.u32.u32 %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||||
|
asm("vmax.u32.u32.u32 %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||||
|
asm("vmax.u32.u32.u32 %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||||
|
asm("vmax.u32.u32.u32 %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||||
|
#else
|
||||||
|
unsigned int s;
|
||||||
|
s = vcmpge4(a, b); // mask = 0xff if a >= b
|
||||||
|
r = a & s; // select a when b >= a
|
||||||
|
s = b & ~s; // select b when b < a
|
||||||
|
r = r | s; // combine byte selections
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return r; // byte-wise unsigned maximum
|
||||||
|
}
|
||||||
|
|
||||||
|
static __device__ __forceinline__ unsigned int vmin4(unsigned int a, unsigned int b)
|
||||||
|
{
|
||||||
|
unsigned int r = 0;
|
||||||
|
|
||||||
|
#if __CUDA_ARCH__ >= 300
|
||||||
|
asm("vmin4.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||||
|
#elif __CUDA_ARCH__ >= 200
|
||||||
|
asm("vmin.u32.u32.u32 %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||||
|
asm("vmin.u32.u32.u32 %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||||
|
asm("vmin.u32.u32.u32 %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||||
|
asm("vmin.u32.u32.u32 %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
|
||||||
|
#else
|
||||||
|
unsigned int s;
|
||||||
|
s = vcmpge4(b, a); // mask = 0xff if a >= b
|
||||||
|
r = a & s; // select a when b >= a
|
||||||
|
s = b & ~s; // select b when b < a
|
||||||
|
r = r | s; // combine byte selections
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
}}}
|
||||||
|
|
||||||
|
//! @endcond
|
||||||
|
|
||||||
|
#endif // OPENCV_CUDA_SIMD_FUNCTIONS_HPP
|
||||||
75
3rdpart/OpenCV/include/opencv2/core/cuda/transform.hpp
Normal file
75
3rdpart/OpenCV/include/opencv2/core/cuda/transform.hpp
Normal file
@@ -0,0 +1,75 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#ifndef OPENCV_CUDA_TRANSFORM_HPP
|
||||||
|
#define OPENCV_CUDA_TRANSFORM_HPP
|
||||||
|
|
||||||
|
#include "common.hpp"
|
||||||
|
#include "utility.hpp"
|
||||||
|
#include "detail/transform_detail.hpp"
|
||||||
|
|
||||||
|
/** @file
|
||||||
|
* @deprecated Use @ref cudev instead.
|
||||||
|
*/
|
||||||
|
|
||||||
|
//! @cond IGNORED
|
||||||
|
|
||||||
|
namespace cv { namespace cuda { namespace device
|
||||||
|
{
|
||||||
|
template <typename T, typename D, typename UnOp, typename Mask>
|
||||||
|
static inline void transform(PtrStepSz<T> src, PtrStepSz<D> dst, UnOp op, const Mask& mask, cudaStream_t stream)
|
||||||
|
{
|
||||||
|
typedef TransformFunctorTraits<UnOp> ft;
|
||||||
|
transform_detail::TransformDispatcher<VecTraits<T>::cn == 1 && VecTraits<D>::cn == 1 && ft::smart_shift != 1>::call(src, dst, op, mask, stream);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||||
|
static inline void transform(PtrStepSz<T1> src1, PtrStepSz<T2> src2, PtrStepSz<D> dst, BinOp op, const Mask& mask, cudaStream_t stream)
|
||||||
|
{
|
||||||
|
typedef TransformFunctorTraits<BinOp> ft;
|
||||||
|
transform_detail::TransformDispatcher<VecTraits<T1>::cn == 1 && VecTraits<T2>::cn == 1 && VecTraits<D>::cn == 1 && ft::smart_shift != 1>::call(src1, src2, dst, op, mask, stream);
|
||||||
|
}
|
||||||
|
}}}
|
||||||
|
|
||||||
|
//! @endcond
|
||||||
|
|
||||||
|
#endif // OPENCV_CUDA_TRANSFORM_HPP
|
||||||
90
3rdpart/OpenCV/include/opencv2/core/cuda/type_traits.hpp
Normal file
90
3rdpart/OpenCV/include/opencv2/core/cuda/type_traits.hpp
Normal file
@@ -0,0 +1,90 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#ifndef OPENCV_CUDA_TYPE_TRAITS_HPP
|
||||||
|
#define OPENCV_CUDA_TYPE_TRAITS_HPP
|
||||||
|
|
||||||
|
#include "detail/type_traits_detail.hpp"
|
||||||
|
|
||||||
|
/** @file
|
||||||
|
* @deprecated Use @ref cudev instead.
|
||||||
|
*/
|
||||||
|
|
||||||
|
//! @cond IGNORED
|
||||||
|
|
||||||
|
namespace cv { namespace cuda { namespace device
|
||||||
|
{
|
||||||
|
template <typename T> struct IsSimpleParameter
|
||||||
|
{
|
||||||
|
enum {value = type_traits_detail::IsIntegral<T>::value || type_traits_detail::IsFloat<T>::value ||
|
||||||
|
type_traits_detail::PointerTraits<typename type_traits_detail::ReferenceTraits<T>::type>::value};
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T> struct TypeTraits
|
||||||
|
{
|
||||||
|
typedef typename type_traits_detail::UnConst<T>::type NonConstType;
|
||||||
|
typedef typename type_traits_detail::UnVolatile<T>::type NonVolatileType;
|
||||||
|
typedef typename type_traits_detail::UnVolatile<typename type_traits_detail::UnConst<T>::type>::type UnqualifiedType;
|
||||||
|
typedef typename type_traits_detail::PointerTraits<UnqualifiedType>::type PointeeType;
|
||||||
|
typedef typename type_traits_detail::ReferenceTraits<T>::type ReferredType;
|
||||||
|
|
||||||
|
enum { isConst = type_traits_detail::UnConst<T>::value };
|
||||||
|
enum { isVolatile = type_traits_detail::UnVolatile<T>::value };
|
||||||
|
|
||||||
|
enum { isReference = type_traits_detail::ReferenceTraits<UnqualifiedType>::value };
|
||||||
|
enum { isPointer = type_traits_detail::PointerTraits<typename type_traits_detail::ReferenceTraits<UnqualifiedType>::type>::value };
|
||||||
|
|
||||||
|
enum { isUnsignedInt = type_traits_detail::IsUnsignedIntegral<UnqualifiedType>::value };
|
||||||
|
enum { isSignedInt = type_traits_detail::IsSignedIntergral<UnqualifiedType>::value };
|
||||||
|
enum { isIntegral = type_traits_detail::IsIntegral<UnqualifiedType>::value };
|
||||||
|
enum { isFloat = type_traits_detail::IsFloat<UnqualifiedType>::value };
|
||||||
|
enum { isArith = isIntegral || isFloat };
|
||||||
|
enum { isVec = type_traits_detail::IsVec<UnqualifiedType>::value };
|
||||||
|
|
||||||
|
typedef typename type_traits_detail::Select<IsSimpleParameter<UnqualifiedType>::value,
|
||||||
|
T, typename type_traits_detail::AddParameterType<T>::type>::type ParameterType;
|
||||||
|
};
|
||||||
|
}}}
|
||||||
|
|
||||||
|
//! @endcond
|
||||||
|
|
||||||
|
#endif // OPENCV_CUDA_TYPE_TRAITS_HPP
|
||||||
230
3rdpart/OpenCV/include/opencv2/core/cuda/utility.hpp
Normal file
230
3rdpart/OpenCV/include/opencv2/core/cuda/utility.hpp
Normal file
@@ -0,0 +1,230 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#ifndef OPENCV_CUDA_UTILITY_HPP
|
||||||
|
#define OPENCV_CUDA_UTILITY_HPP
|
||||||
|
|
||||||
|
#include "saturate_cast.hpp"
|
||||||
|
#include "datamov_utils.hpp"
|
||||||
|
|
||||||
|
/** @file
|
||||||
|
* @deprecated Use @ref cudev instead.
|
||||||
|
*/
|
||||||
|
|
||||||
|
//! @cond IGNORED
|
||||||
|
|
||||||
|
namespace cv { namespace cuda { namespace device
|
||||||
|
{
|
||||||
|
struct CV_EXPORTS ThrustAllocator
|
||||||
|
{
|
||||||
|
typedef uchar value_type;
|
||||||
|
virtual ~ThrustAllocator();
|
||||||
|
virtual __device__ __host__ uchar* allocate(size_t numBytes) = 0;
|
||||||
|
virtual __device__ __host__ void deallocate(uchar* ptr, size_t numBytes) = 0;
|
||||||
|
static ThrustAllocator& getAllocator();
|
||||||
|
static void setAllocator(ThrustAllocator* allocator);
|
||||||
|
};
|
||||||
|
#define OPENCV_CUDA_LOG_WARP_SIZE (5)
|
||||||
|
#define OPENCV_CUDA_WARP_SIZE (1 << OPENCV_CUDA_LOG_WARP_SIZE)
|
||||||
|
#define OPENCV_CUDA_LOG_MEM_BANKS ((__CUDA_ARCH__ >= 200) ? 5 : 4) // 32 banks on fermi, 16 on tesla
|
||||||
|
#define OPENCV_CUDA_MEM_BANKS (1 << OPENCV_CUDA_LOG_MEM_BANKS)
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
// swap
|
||||||
|
|
||||||
|
template <typename T> void __device__ __host__ __forceinline__ swap(T& a, T& b)
|
||||||
|
{
|
||||||
|
const T temp = a;
|
||||||
|
a = b;
|
||||||
|
b = temp;
|
||||||
|
}
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Mask Reader
|
||||||
|
|
||||||
|
struct SingleMask
|
||||||
|
{
|
||||||
|
explicit __host__ __device__ __forceinline__ SingleMask(PtrStepb mask_) : mask(mask_) {}
|
||||||
|
__host__ __device__ __forceinline__ SingleMask(const SingleMask& mask_): mask(mask_.mask){}
|
||||||
|
|
||||||
|
__device__ __forceinline__ bool operator()(int y, int x) const
|
||||||
|
{
|
||||||
|
return mask.ptr(y)[x] != 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
PtrStepb mask;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct SingleMaskChannels
|
||||||
|
{
|
||||||
|
__host__ __device__ __forceinline__ SingleMaskChannels(PtrStepb mask_, int channels_)
|
||||||
|
: mask(mask_), channels(channels_) {}
|
||||||
|
__host__ __device__ __forceinline__ SingleMaskChannels(const SingleMaskChannels& mask_)
|
||||||
|
:mask(mask_.mask), channels(mask_.channels){}
|
||||||
|
|
||||||
|
__device__ __forceinline__ bool operator()(int y, int x) const
|
||||||
|
{
|
||||||
|
return mask.ptr(y)[x / channels] != 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
PtrStepb mask;
|
||||||
|
int channels;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct MaskCollection
|
||||||
|
{
|
||||||
|
explicit __host__ __device__ __forceinline__ MaskCollection(PtrStepb* maskCollection_)
|
||||||
|
: maskCollection(maskCollection_) {}
|
||||||
|
|
||||||
|
__device__ __forceinline__ MaskCollection(const MaskCollection& masks_)
|
||||||
|
: maskCollection(masks_.maskCollection), curMask(masks_.curMask){}
|
||||||
|
|
||||||
|
__device__ __forceinline__ void next()
|
||||||
|
{
|
||||||
|
curMask = *maskCollection++;
|
||||||
|
}
|
||||||
|
__device__ __forceinline__ void setMask(int z)
|
||||||
|
{
|
||||||
|
curMask = maskCollection[z];
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __forceinline__ bool operator()(int y, int x) const
|
||||||
|
{
|
||||||
|
uchar val;
|
||||||
|
return curMask.data == 0 || (ForceGlob<uchar>::Load(curMask.ptr(y), x, val), (val != 0));
|
||||||
|
}
|
||||||
|
|
||||||
|
const PtrStepb* maskCollection;
|
||||||
|
PtrStepb curMask;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct WithOutMask
|
||||||
|
{
|
||||||
|
__host__ __device__ __forceinline__ WithOutMask(){}
|
||||||
|
__host__ __device__ __forceinline__ WithOutMask(const WithOutMask&){}
|
||||||
|
|
||||||
|
__device__ __forceinline__ void next() const
|
||||||
|
{
|
||||||
|
}
|
||||||
|
__device__ __forceinline__ void setMask(int) const
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __forceinline__ bool operator()(int, int) const
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __forceinline__ bool operator()(int, int, int) const
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static __device__ __forceinline__ bool check(int, int)
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static __device__ __forceinline__ bool check(int, int, int)
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Solve linear system
|
||||||
|
|
||||||
|
// solve 2x2 linear system Ax=b
|
||||||
|
template <typename T> __device__ __forceinline__ bool solve2x2(const T A[2][2], const T b[2], T x[2])
|
||||||
|
{
|
||||||
|
T det = A[0][0] * A[1][1] - A[1][0] * A[0][1];
|
||||||
|
|
||||||
|
if (det != 0)
|
||||||
|
{
|
||||||
|
double invdet = 1.0 / det;
|
||||||
|
|
||||||
|
x[0] = saturate_cast<T>(invdet * (b[0] * A[1][1] - b[1] * A[0][1]));
|
||||||
|
|
||||||
|
x[1] = saturate_cast<T>(invdet * (A[0][0] * b[1] - A[1][0] * b[0]));
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// solve 3x3 linear system Ax=b
|
||||||
|
template <typename T> __device__ __forceinline__ bool solve3x3(const T A[3][3], const T b[3], T x[3])
|
||||||
|
{
|
||||||
|
T det = A[0][0] * (A[1][1] * A[2][2] - A[1][2] * A[2][1])
|
||||||
|
- A[0][1] * (A[1][0] * A[2][2] - A[1][2] * A[2][0])
|
||||||
|
+ A[0][2] * (A[1][0] * A[2][1] - A[1][1] * A[2][0]);
|
||||||
|
|
||||||
|
if (det != 0)
|
||||||
|
{
|
||||||
|
double invdet = 1.0 / det;
|
||||||
|
|
||||||
|
x[0] = saturate_cast<T>(invdet *
|
||||||
|
(b[0] * (A[1][1] * A[2][2] - A[1][2] * A[2][1]) -
|
||||||
|
A[0][1] * (b[1] * A[2][2] - A[1][2] * b[2] ) +
|
||||||
|
A[0][2] * (b[1] * A[2][1] - A[1][1] * b[2] )));
|
||||||
|
|
||||||
|
x[1] = saturate_cast<T>(invdet *
|
||||||
|
(A[0][0] * (b[1] * A[2][2] - A[1][2] * b[2] ) -
|
||||||
|
b[0] * (A[1][0] * A[2][2] - A[1][2] * A[2][0]) +
|
||||||
|
A[0][2] * (A[1][0] * b[2] - b[1] * A[2][0])));
|
||||||
|
|
||||||
|
x[2] = saturate_cast<T>(invdet *
|
||||||
|
(A[0][0] * (A[1][1] * b[2] - b[1] * A[2][1]) -
|
||||||
|
A[0][1] * (A[1][0] * b[2] - b[1] * A[2][0]) +
|
||||||
|
b[0] * (A[1][0] * A[2][1] - A[1][1] * A[2][0])));
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}}} // namespace cv { namespace cuda { namespace cudev
|
||||||
|
|
||||||
|
//! @endcond
|
||||||
|
|
||||||
|
#endif // OPENCV_CUDA_UTILITY_HPP
|
||||||
232
3rdpart/OpenCV/include/opencv2/core/cuda/vec_distance.hpp
Normal file
232
3rdpart/OpenCV/include/opencv2/core/cuda/vec_distance.hpp
Normal file
@@ -0,0 +1,232 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#ifndef OPENCV_CUDA_VEC_DISTANCE_HPP
|
||||||
|
#define OPENCV_CUDA_VEC_DISTANCE_HPP
|
||||||
|
|
||||||
|
#include "reduce.hpp"
|
||||||
|
#include "functional.hpp"
|
||||||
|
#include "detail/vec_distance_detail.hpp"
|
||||||
|
|
||||||
|
/** @file
|
||||||
|
* @deprecated Use @ref cudev instead.
|
||||||
|
*/
|
||||||
|
|
||||||
|
//! @cond IGNORED
|
||||||
|
|
||||||
|
namespace cv { namespace cuda { namespace device
|
||||||
|
{
|
||||||
|
template <typename T> struct L1Dist
|
||||||
|
{
|
||||||
|
typedef int value_type;
|
||||||
|
typedef int result_type;
|
||||||
|
|
||||||
|
__device__ __forceinline__ L1Dist() : mySum(0) {}
|
||||||
|
|
||||||
|
__device__ __forceinline__ void reduceIter(int val1, int val2)
|
||||||
|
{
|
||||||
|
mySum = __sad(val1, val2, mySum);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(int* smem, int tid)
|
||||||
|
{
|
||||||
|
reduce<THREAD_DIM>(smem, mySum, tid, plus<int>());
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __forceinline__ operator int() const
|
||||||
|
{
|
||||||
|
return mySum;
|
||||||
|
}
|
||||||
|
|
||||||
|
int mySum;
|
||||||
|
};
|
||||||
|
template <> struct L1Dist<float>
|
||||||
|
{
|
||||||
|
typedef float value_type;
|
||||||
|
typedef float result_type;
|
||||||
|
|
||||||
|
__device__ __forceinline__ L1Dist() : mySum(0.0f) {}
|
||||||
|
|
||||||
|
__device__ __forceinline__ void reduceIter(float val1, float val2)
|
||||||
|
{
|
||||||
|
mySum += ::fabs(val1 - val2);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(float* smem, int tid)
|
||||||
|
{
|
||||||
|
reduce<THREAD_DIM>(smem, mySum, tid, plus<float>());
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __forceinline__ operator float() const
|
||||||
|
{
|
||||||
|
return mySum;
|
||||||
|
}
|
||||||
|
|
||||||
|
float mySum;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct L2Dist
|
||||||
|
{
|
||||||
|
typedef float value_type;
|
||||||
|
typedef float result_type;
|
||||||
|
|
||||||
|
__device__ __forceinline__ L2Dist() : mySum(0.0f) {}
|
||||||
|
|
||||||
|
__device__ __forceinline__ void reduceIter(float val1, float val2)
|
||||||
|
{
|
||||||
|
float reg = val1 - val2;
|
||||||
|
mySum += reg * reg;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(float* smem, int tid)
|
||||||
|
{
|
||||||
|
reduce<THREAD_DIM>(smem, mySum, tid, plus<float>());
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __forceinline__ operator float() const
|
||||||
|
{
|
||||||
|
return sqrtf(mySum);
|
||||||
|
}
|
||||||
|
|
||||||
|
float mySum;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct HammingDist
|
||||||
|
{
|
||||||
|
typedef int value_type;
|
||||||
|
typedef int result_type;
|
||||||
|
|
||||||
|
__device__ __forceinline__ HammingDist() : mySum(0) {}
|
||||||
|
|
||||||
|
__device__ __forceinline__ void reduceIter(int val1, int val2)
|
||||||
|
{
|
||||||
|
mySum += __popc(val1 ^ val2);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(int* smem, int tid)
|
||||||
|
{
|
||||||
|
reduce<THREAD_DIM>(smem, mySum, tid, plus<int>());
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __forceinline__ operator int() const
|
||||||
|
{
|
||||||
|
return mySum;
|
||||||
|
}
|
||||||
|
|
||||||
|
int mySum;
|
||||||
|
};
|
||||||
|
|
||||||
|
// calc distance between two vectors in global memory
|
||||||
|
template <int THREAD_DIM, typename Dist, typename T1, typename T2>
|
||||||
|
__device__ void calcVecDiffGlobal(const T1* vec1, const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid)
|
||||||
|
{
|
||||||
|
for (int i = tid; i < len; i += THREAD_DIM)
|
||||||
|
{
|
||||||
|
T1 val1;
|
||||||
|
ForceGlob<T1>::Load(vec1, i, val1);
|
||||||
|
|
||||||
|
T2 val2;
|
||||||
|
ForceGlob<T2>::Load(vec2, i, val2);
|
||||||
|
|
||||||
|
dist.reduceIter(val1, val2);
|
||||||
|
}
|
||||||
|
|
||||||
|
dist.reduceAll<THREAD_DIM>(smem, tid);
|
||||||
|
}
|
||||||
|
|
||||||
|
// calc distance between two vectors, first vector is cached in register or shared memory, second vector is in global memory
|
||||||
|
template <int THREAD_DIM, int MAX_LEN, bool LEN_EQ_MAX_LEN, typename Dist, typename T1, typename T2>
|
||||||
|
__device__ __forceinline__ void calcVecDiffCached(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, typename Dist::result_type* smem, int tid)
|
||||||
|
{
|
||||||
|
vec_distance_detail::VecDiffCachedCalculator<THREAD_DIM, MAX_LEN, LEN_EQ_MAX_LEN>::calc(vecCached, vecGlob, len, dist, tid);
|
||||||
|
|
||||||
|
dist.reduceAll<THREAD_DIM>(smem, tid);
|
||||||
|
}
|
||||||
|
|
||||||
|
// calc distance between two vectors in global memory
|
||||||
|
template <int THREAD_DIM, typename T1> struct VecDiffGlobal
|
||||||
|
{
|
||||||
|
explicit __device__ __forceinline__ VecDiffGlobal(const T1* vec1_, int = 0, void* = 0, int = 0, int = 0)
|
||||||
|
{
|
||||||
|
vec1 = vec1_;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T2, typename Dist>
|
||||||
|
__device__ __forceinline__ void calc(const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid) const
|
||||||
|
{
|
||||||
|
calcVecDiffGlobal<THREAD_DIM>(vec1, vec2, len, dist, smem, tid);
|
||||||
|
}
|
||||||
|
|
||||||
|
const T1* vec1;
|
||||||
|
};
|
||||||
|
|
||||||
|
// calc distance between two vectors, first vector is cached in register memory, second vector is in global memory
|
||||||
|
template <int THREAD_DIM, int MAX_LEN, bool LEN_EQ_MAX_LEN, typename U> struct VecDiffCachedRegister
|
||||||
|
{
|
||||||
|
template <typename T1> __device__ __forceinline__ VecDiffCachedRegister(const T1* vec1, int len, U* smem, int glob_tid, int tid)
|
||||||
|
{
|
||||||
|
if (glob_tid < len)
|
||||||
|
smem[glob_tid] = vec1[glob_tid];
|
||||||
|
__syncthreads();
|
||||||
|
|
||||||
|
U* vec1ValsPtr = vec1Vals;
|
||||||
|
|
||||||
|
#pragma unroll
|
||||||
|
for (int i = tid; i < MAX_LEN; i += THREAD_DIM)
|
||||||
|
*vec1ValsPtr++ = smem[i];
|
||||||
|
|
||||||
|
__syncthreads();
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T2, typename Dist>
|
||||||
|
__device__ __forceinline__ void calc(const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid) const
|
||||||
|
{
|
||||||
|
calcVecDiffCached<THREAD_DIM, MAX_LEN, LEN_EQ_MAX_LEN>(vec1Vals, vec2, len, dist, smem, tid);
|
||||||
|
}
|
||||||
|
|
||||||
|
U vec1Vals[MAX_LEN / THREAD_DIM];
|
||||||
|
};
|
||||||
|
}}} // namespace cv { namespace cuda { namespace cudev
|
||||||
|
|
||||||
|
//! @endcond
|
||||||
|
|
||||||
|
#endif // OPENCV_CUDA_VEC_DISTANCE_HPP
|
||||||
926
3rdpart/OpenCV/include/opencv2/core/cuda/vec_math.hpp
Normal file
926
3rdpart/OpenCV/include/opencv2/core/cuda/vec_math.hpp
Normal file
@@ -0,0 +1,926 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#ifndef OPENCV_CUDA_VECMATH_HPP
|
||||||
|
#define OPENCV_CUDA_VECMATH_HPP
|
||||||
|
|
||||||
|
#include "vec_traits.hpp"
|
||||||
|
#include "saturate_cast.hpp"
|
||||||
|
#include "cuda_compat.hpp"
|
||||||
|
|
||||||
|
/** @file
|
||||||
|
* @deprecated Use @ref cudev instead.
|
||||||
|
*/
|
||||||
|
|
||||||
|
//! @cond IGNORED
|
||||||
|
|
||||||
|
namespace cv { namespace cuda { namespace device
|
||||||
|
{
|
||||||
|
using cv::cuda::device::compat::double4;
|
||||||
|
using cv::cuda::device::compat::make_double4;
|
||||||
|
|
||||||
|
// saturate_cast
|
||||||
|
|
||||||
|
namespace vec_math_detail
|
||||||
|
{
|
||||||
|
template <int cn, typename VecD> struct SatCastHelper;
|
||||||
|
template <typename VecD> struct SatCastHelper<1, VecD>
|
||||||
|
{
|
||||||
|
template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v)
|
||||||
|
{
|
||||||
|
typedef typename VecTraits<VecD>::elem_type D;
|
||||||
|
return VecTraits<VecD>::make(saturate_cast<D>(v.x));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
template <typename VecD> struct SatCastHelper<2, VecD>
|
||||||
|
{
|
||||||
|
template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v)
|
||||||
|
{
|
||||||
|
typedef typename VecTraits<VecD>::elem_type D;
|
||||||
|
return VecTraits<VecD>::make(saturate_cast<D>(v.x), saturate_cast<D>(v.y));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
template <typename VecD> struct SatCastHelper<3, VecD>
|
||||||
|
{
|
||||||
|
template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v)
|
||||||
|
{
|
||||||
|
typedef typename VecTraits<VecD>::elem_type D;
|
||||||
|
return VecTraits<VecD>::make(saturate_cast<D>(v.x), saturate_cast<D>(v.y), saturate_cast<D>(v.z));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
template <typename VecD> struct SatCastHelper<4, VecD>
|
||||||
|
{
|
||||||
|
template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v)
|
||||||
|
{
|
||||||
|
typedef typename VecTraits<VecD>::elem_type D;
|
||||||
|
return VecTraits<VecD>::make(saturate_cast<D>(v.x), saturate_cast<D>(v.y), saturate_cast<D>(v.z), saturate_cast<D>(v.w));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename VecD, typename VecS> static __device__ __forceinline__ VecD saturate_cast_helper(const VecS& v)
|
||||||
|
{
|
||||||
|
return SatCastHelper<VecTraits<VecD>::cn, VecD>::cast(v);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T> static __device__ __forceinline__ T saturate_cast(const uchar1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||||
|
template<typename T> static __device__ __forceinline__ T saturate_cast(const char1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||||
|
template<typename T> static __device__ __forceinline__ T saturate_cast(const ushort1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||||
|
template<typename T> static __device__ __forceinline__ T saturate_cast(const short1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||||
|
template<typename T> static __device__ __forceinline__ T saturate_cast(const uint1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||||
|
template<typename T> static __device__ __forceinline__ T saturate_cast(const int1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||||
|
template<typename T> static __device__ __forceinline__ T saturate_cast(const float1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||||
|
template<typename T> static __device__ __forceinline__ T saturate_cast(const double1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||||
|
|
||||||
|
template<typename T> static __device__ __forceinline__ T saturate_cast(const uchar2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||||
|
template<typename T> static __device__ __forceinline__ T saturate_cast(const char2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||||
|
template<typename T> static __device__ __forceinline__ T saturate_cast(const ushort2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||||
|
template<typename T> static __device__ __forceinline__ T saturate_cast(const short2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||||
|
template<typename T> static __device__ __forceinline__ T saturate_cast(const uint2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||||
|
template<typename T> static __device__ __forceinline__ T saturate_cast(const int2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||||
|
template<typename T> static __device__ __forceinline__ T saturate_cast(const float2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||||
|
template<typename T> static __device__ __forceinline__ T saturate_cast(const double2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||||
|
|
||||||
|
template<typename T> static __device__ __forceinline__ T saturate_cast(const uchar3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||||
|
template<typename T> static __device__ __forceinline__ T saturate_cast(const char3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||||
|
template<typename T> static __device__ __forceinline__ T saturate_cast(const ushort3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||||
|
template<typename T> static __device__ __forceinline__ T saturate_cast(const short3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||||
|
template<typename T> static __device__ __forceinline__ T saturate_cast(const uint3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||||
|
template<typename T> static __device__ __forceinline__ T saturate_cast(const int3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||||
|
template<typename T> static __device__ __forceinline__ T saturate_cast(const float3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||||
|
template<typename T> static __device__ __forceinline__ T saturate_cast(const double3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||||
|
|
||||||
|
template<typename T> static __device__ __forceinline__ T saturate_cast(const uchar4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||||
|
template<typename T> static __device__ __forceinline__ T saturate_cast(const char4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||||
|
template<typename T> static __device__ __forceinline__ T saturate_cast(const ushort4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||||
|
template<typename T> static __device__ __forceinline__ T saturate_cast(const short4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||||
|
template<typename T> static __device__ __forceinline__ T saturate_cast(const uint4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||||
|
template<typename T> static __device__ __forceinline__ T saturate_cast(const int4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||||
|
template<typename T> static __device__ __forceinline__ T saturate_cast(const float4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||||
|
template<typename T> static __device__ __forceinline__ T saturate_cast(const double4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
|
||||||
|
|
||||||
|
// unary operators
|
||||||
|
|
||||||
|
#define CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(op, input_type, output_type) \
|
||||||
|
__device__ __forceinline__ output_type ## 1 operator op(const input_type ## 1 & a) \
|
||||||
|
{ \
|
||||||
|
return VecTraits<output_type ## 1>::make(op (a.x)); \
|
||||||
|
} \
|
||||||
|
__device__ __forceinline__ output_type ## 2 operator op(const input_type ## 2 & a) \
|
||||||
|
{ \
|
||||||
|
return VecTraits<output_type ## 2>::make(op (a.x), op (a.y)); \
|
||||||
|
} \
|
||||||
|
__device__ __forceinline__ output_type ## 3 operator op(const input_type ## 3 & a) \
|
||||||
|
{ \
|
||||||
|
return VecTraits<output_type ## 3>::make(op (a.x), op (a.y), op (a.z)); \
|
||||||
|
} \
|
||||||
|
__device__ __forceinline__ output_type ## 4 operator op(const input_type ## 4 & a) \
|
||||||
|
{ \
|
||||||
|
return VecTraits<output_type ## 4>::make(op (a.x), op (a.y), op (a.z), op (a.w)); \
|
||||||
|
}
|
||||||
|
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, char, char)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, short, short)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, int, int)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, double, double)
|
||||||
|
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, uchar, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, char, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, ushort, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, short, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, int, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, uint, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, float, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, double, uchar)
|
||||||
|
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, uchar, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, char, char)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, ushort, ushort)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, short, short)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, int, int)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, uint, uint)
|
||||||
|
|
||||||
|
#undef CV_CUDEV_IMPLEMENT_VEC_UNARY_OP
|
||||||
|
|
||||||
|
// unary functions
|
||||||
|
|
||||||
|
#define CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(func_name, func, input_type, output_type) \
|
||||||
|
__device__ __forceinline__ output_type ## 1 func_name(const input_type ## 1 & a) \
|
||||||
|
{ \
|
||||||
|
return VecTraits<output_type ## 1>::make(func (a.x)); \
|
||||||
|
} \
|
||||||
|
__device__ __forceinline__ output_type ## 2 func_name(const input_type ## 2 & a) \
|
||||||
|
{ \
|
||||||
|
return VecTraits<output_type ## 2>::make(func (a.x), func (a.y)); \
|
||||||
|
} \
|
||||||
|
__device__ __forceinline__ output_type ## 3 func_name(const input_type ## 3 & a) \
|
||||||
|
{ \
|
||||||
|
return VecTraits<output_type ## 3>::make(func (a.x), func (a.y), func (a.z)); \
|
||||||
|
} \
|
||||||
|
__device__ __forceinline__ output_type ## 4 func_name(const input_type ## 4 & a) \
|
||||||
|
{ \
|
||||||
|
return VecTraits<output_type ## 4>::make(func (a.x), func (a.y), func (a.z), func (a.w)); \
|
||||||
|
}
|
||||||
|
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::fabsf, float, float)
|
||||||
|
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, uchar, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, char, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, ushort, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, short, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, int, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, uint, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrt, double, double)
|
||||||
|
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, uchar, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, char, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, ushort, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, short, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, int, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, uint, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::exp, double, double)
|
||||||
|
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, uchar, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, char, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, ushort, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, short, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, int, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, uint, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2, double, double)
|
||||||
|
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, uchar, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, char, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, ushort, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, short, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, int, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, uint, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10, double, double)
|
||||||
|
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, uchar, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, char, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, ushort, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, short, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, int, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, uint, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::log, double, double)
|
||||||
|
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, uchar, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, char, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, ushort, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, short, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, int, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, uint, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2, double, double)
|
||||||
|
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, uchar, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, char, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, ushort, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, short, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, int, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, uint, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10, double, double)
|
||||||
|
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, uchar, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, char, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, ushort, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, short, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, int, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, uint, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sin, double, double)
|
||||||
|
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, uchar, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, char, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, ushort, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, short, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, int, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, uint, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cos, double, double)
|
||||||
|
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, uchar, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, char, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, ushort, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, short, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, int, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, uint, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tan, double, double)
|
||||||
|
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, uchar, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, char, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, ushort, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, short, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, int, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, uint, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asin, double, double)
|
||||||
|
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, uchar, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, char, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, ushort, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, short, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, int, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, uint, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acos, double, double)
|
||||||
|
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, uchar, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, char, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, ushort, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, short, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, int, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, uint, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atan, double, double)
|
||||||
|
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, uchar, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, char, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, ushort, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, short, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, int, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, uint, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinh, double, double)
|
||||||
|
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, uchar, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, char, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, ushort, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, short, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, int, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, uint, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::cosh, double, double)
|
||||||
|
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, uchar, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, char, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, ushort, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, short, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, int, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, uint, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanh, double, double)
|
||||||
|
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, uchar, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, char, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, ushort, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, short, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, int, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, uint, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinh, double, double)
|
||||||
|
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, uchar, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, char, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, ushort, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, short, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, int, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, uint, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acosh, double, double)
|
||||||
|
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, uchar, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, char, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, ushort, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, short, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, int, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, uint, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanh, double, double)
|
||||||
|
|
||||||
|
#undef CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC
|
||||||
|
|
||||||
|
// binary operators (vec & vec)
|
||||||
|
|
||||||
|
#define CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(op, input_type, output_type) \
|
||||||
|
__device__ __forceinline__ output_type ## 1 operator op(const input_type ## 1 & a, const input_type ## 1 & b) \
|
||||||
|
{ \
|
||||||
|
return VecTraits<output_type ## 1>::make(a.x op b.x); \
|
||||||
|
} \
|
||||||
|
__device__ __forceinline__ output_type ## 2 operator op(const input_type ## 2 & a, const input_type ## 2 & b) \
|
||||||
|
{ \
|
||||||
|
return VecTraits<output_type ## 2>::make(a.x op b.x, a.y op b.y); \
|
||||||
|
} \
|
||||||
|
__device__ __forceinline__ output_type ## 3 operator op(const input_type ## 3 & a, const input_type ## 3 & b) \
|
||||||
|
{ \
|
||||||
|
return VecTraits<output_type ## 3>::make(a.x op b.x, a.y op b.y, a.z op b.z); \
|
||||||
|
} \
|
||||||
|
__device__ __forceinline__ output_type ## 4 operator op(const input_type ## 4 & a, const input_type ## 4 & b) \
|
||||||
|
{ \
|
||||||
|
return VecTraits<output_type ## 4>::make(a.x op b.x, a.y op b.y, a.z op b.z, a.w op b.w); \
|
||||||
|
}
|
||||||
|
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, uchar, int)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, char, int)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, ushort, int)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, short, int)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, int, int)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, uint, uint)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, double, double)
|
||||||
|
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, uchar, int)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, char, int)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, ushort, int)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, short, int)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, int, int)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, uint, uint)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, double, double)
|
||||||
|
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, uchar, int)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, char, int)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, ushort, int)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, short, int)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, int, int)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, uint, uint)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, double, double)
|
||||||
|
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, uchar, int)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, char, int)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, ushort, int)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, short, int)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, int, int)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, uint, uint)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, double, double)
|
||||||
|
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, uchar, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, char, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, ushort, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, short, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, int, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, uint, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, float, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, double, uchar)
|
||||||
|
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, uchar, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, char, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, ushort, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, short, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, int, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, uint, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, float, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, double, uchar)
|
||||||
|
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, uchar, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, char, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, ushort, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, short, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, int, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, uint, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, float, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, double, uchar)
|
||||||
|
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, uchar, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, char, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, ushort, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, short, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, int, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, uint, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, float, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, double, uchar)
|
||||||
|
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, uchar, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, char, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, ushort, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, short, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, int, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, uint, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, float, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, double, uchar)
|
||||||
|
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, uchar, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, char, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, ushort, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, short, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, int, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, uint, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, float, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, double, uchar)
|
||||||
|
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, uchar, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, char, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, ushort, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, short, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, int, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, uint, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, float, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, double, uchar)
|
||||||
|
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, uchar, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, char, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, ushort, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, short, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, int, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, uint, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, float, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, double, uchar)
|
||||||
|
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, uchar, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, char, char)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, ushort, ushort)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, short, short)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, int, int)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, uint, uint)
|
||||||
|
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, uchar, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, char, char)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, ushort, ushort)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, short, short)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, int, int)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, uint, uint)
|
||||||
|
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, uchar, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, char, char)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, ushort, ushort)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, short, short)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, int, int)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, uint, uint)
|
||||||
|
|
||||||
|
#undef CV_CUDEV_IMPLEMENT_VEC_BINARY_OP
|
||||||
|
|
||||||
|
// binary operators (vec & scalar)
|
||||||
|
|
||||||
|
#define CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(op, input_type, scalar_type, output_type) \
|
||||||
|
__device__ __forceinline__ output_type ## 1 operator op(const input_type ## 1 & a, scalar_type s) \
|
||||||
|
{ \
|
||||||
|
return VecTraits<output_type ## 1>::make(a.x op s); \
|
||||||
|
} \
|
||||||
|
__device__ __forceinline__ output_type ## 1 operator op(scalar_type s, const input_type ## 1 & b) \
|
||||||
|
{ \
|
||||||
|
return VecTraits<output_type ## 1>::make(s op b.x); \
|
||||||
|
} \
|
||||||
|
__device__ __forceinline__ output_type ## 2 operator op(const input_type ## 2 & a, scalar_type s) \
|
||||||
|
{ \
|
||||||
|
return VecTraits<output_type ## 2>::make(a.x op s, a.y op s); \
|
||||||
|
} \
|
||||||
|
__device__ __forceinline__ output_type ## 2 operator op(scalar_type s, const input_type ## 2 & b) \
|
||||||
|
{ \
|
||||||
|
return VecTraits<output_type ## 2>::make(s op b.x, s op b.y); \
|
||||||
|
} \
|
||||||
|
__device__ __forceinline__ output_type ## 3 operator op(const input_type ## 3 & a, scalar_type s) \
|
||||||
|
{ \
|
||||||
|
return VecTraits<output_type ## 3>::make(a.x op s, a.y op s, a.z op s); \
|
||||||
|
} \
|
||||||
|
__device__ __forceinline__ output_type ## 3 operator op(scalar_type s, const input_type ## 3 & b) \
|
||||||
|
{ \
|
||||||
|
return VecTraits<output_type ## 3>::make(s op b.x, s op b.y, s op b.z); \
|
||||||
|
} \
|
||||||
|
__device__ __forceinline__ output_type ## 4 operator op(const input_type ## 4 & a, scalar_type s) \
|
||||||
|
{ \
|
||||||
|
return VecTraits<output_type ## 4>::make(a.x op s, a.y op s, a.z op s, a.w op s); \
|
||||||
|
} \
|
||||||
|
__device__ __forceinline__ output_type ## 4 operator op(scalar_type s, const input_type ## 4 & b) \
|
||||||
|
{ \
|
||||||
|
return VecTraits<output_type ## 4>::make(s op b.x, s op b.y, s op b.z, s op b.w); \
|
||||||
|
}
|
||||||
|
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uchar, int, int)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uchar, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uchar, double, double)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, char, int, int)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, char, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, char, double, double)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, ushort, int, int)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, ushort, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, ushort, double, double)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, short, int, int)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, short, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, short, double, double)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, int, int, int)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, int, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, int, double, double)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uint, uint, uint)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uint, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uint, double, double)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, float, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, float, double, double)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, double, double, double)
|
||||||
|
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uchar, int, int)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uchar, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uchar, double, double)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, char, int, int)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, char, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, char, double, double)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, ushort, int, int)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, ushort, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, ushort, double, double)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, short, int, int)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, short, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, short, double, double)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, int, int, int)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, int, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, int, double, double)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uint, uint, uint)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uint, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uint, double, double)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, float, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, float, double, double)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, double, double, double)
|
||||||
|
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uchar, int, int)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uchar, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uchar, double, double)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, char, int, int)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, char, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, char, double, double)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, ushort, int, int)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, ushort, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, ushort, double, double)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, short, int, int)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, short, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, short, double, double)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, int, int, int)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, int, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, int, double, double)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uint, uint, uint)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uint, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uint, double, double)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, float, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, float, double, double)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, double, double, double)
|
||||||
|
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uchar, int, int)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uchar, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uchar, double, double)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, char, int, int)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, char, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, char, double, double)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, ushort, int, int)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, ushort, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, ushort, double, double)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, short, int, int)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, short, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, short, double, double)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, int, int, int)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, int, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, int, double, double)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uint, uint, uint)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uint, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uint, double, double)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, float, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, float, double, double)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, double, double, double)
|
||||||
|
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, uchar, uchar, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, char, char, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, ushort, ushort, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, short, short, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, int, int, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, uint, uint, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, float, float, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, double, double, uchar)
|
||||||
|
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, uchar, uchar, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, char, char, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, ushort, ushort, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, short, short, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, int, int, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, uint, uint, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, float, float, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, double, double, uchar)
|
||||||
|
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, uchar, uchar, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, char, char, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, ushort, ushort, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, short, short, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, int, int, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, uint, uint, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, float, float, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, double, double, uchar)
|
||||||
|
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, uchar, uchar, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, char, char, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, ushort, ushort, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, short, short, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, int, int, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, uint, uint, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, float, float, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, double, double, uchar)
|
||||||
|
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, uchar, uchar, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, char, char, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, ushort, ushort, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, short, short, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, int, int, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, uint, uint, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, float, float, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, double, double, uchar)
|
||||||
|
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, uchar, uchar, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, char, char, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, ushort, ushort, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, short, short, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, int, int, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, uint, uint, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, float, float, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, double, double, uchar)
|
||||||
|
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, uchar, uchar, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, char, char, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, ushort, ushort, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, short, short, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, int, int, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, uint, uint, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, float, float, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, double, double, uchar)
|
||||||
|
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, uchar, uchar, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, char, char, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, ushort, ushort, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, short, short, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, int, int, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, uint, uint, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, float, float, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, double, double, uchar)
|
||||||
|
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, uchar, uchar, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, char, char, char)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, ushort, ushort, ushort)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, short, short, short)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, int, int, int)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, uint, uint, uint)
|
||||||
|
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, uchar, uchar, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, char, char, char)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, ushort, ushort, ushort)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, short, short, short)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, int, int, int)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, uint, uint, uint)
|
||||||
|
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, uchar, uchar, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, char, char, char)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, ushort, ushort, ushort)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, short, short, short)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, int, int, int)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, uint, uint, uint)
|
||||||
|
|
||||||
|
#undef CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP
|
||||||
|
|
||||||
|
// binary function (vec & vec)
|
||||||
|
|
||||||
|
#define CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(func_name, func, input_type, output_type) \
|
||||||
|
__device__ __forceinline__ output_type ## 1 func_name(const input_type ## 1 & a, const input_type ## 1 & b) \
|
||||||
|
{ \
|
||||||
|
return VecTraits<output_type ## 1>::make(func (a.x, b.x)); \
|
||||||
|
} \
|
||||||
|
__device__ __forceinline__ output_type ## 2 func_name(const input_type ## 2 & a, const input_type ## 2 & b) \
|
||||||
|
{ \
|
||||||
|
return VecTraits<output_type ## 2>::make(func (a.x, b.x), func (a.y, b.y)); \
|
||||||
|
} \
|
||||||
|
__device__ __forceinline__ output_type ## 3 func_name(const input_type ## 3 & a, const input_type ## 3 & b) \
|
||||||
|
{ \
|
||||||
|
return VecTraits<output_type ## 3>::make(func (a.x, b.x), func (a.y, b.y), func (a.z, b.z)); \
|
||||||
|
} \
|
||||||
|
__device__ __forceinline__ output_type ## 4 func_name(const input_type ## 4 & a, const input_type ## 4 & b) \
|
||||||
|
{ \
|
||||||
|
return VecTraits<output_type ## 4>::make(func (a.x, b.x), func (a.y, b.y), func (a.z, b.z), func (a.w, b.w)); \
|
||||||
|
}
|
||||||
|
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, uchar, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, char, char)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, ushort, ushort)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, short, short)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, uint, uint)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, int, int)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::fmaxf, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::fmax, double, double)
|
||||||
|
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, uchar, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, char, char)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, ushort, ushort)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, short, short)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, uint, uint)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, int, int)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::fminf, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::fmin, double, double)
|
||||||
|
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, uchar, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, char, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, ushort, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, short, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, uint, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, int, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypot, double, double)
|
||||||
|
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, uchar, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, char, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, ushort, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, short, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, uint, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, int, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2, double, double)
|
||||||
|
|
||||||
|
#undef CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC
|
||||||
|
|
||||||
|
// binary function (vec & scalar)
|
||||||
|
|
||||||
|
#define CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(func_name, func, input_type, scalar_type, output_type) \
|
||||||
|
__device__ __forceinline__ output_type ## 1 func_name(const input_type ## 1 & a, scalar_type s) \
|
||||||
|
{ \
|
||||||
|
return VecTraits<output_type ## 1>::make(func ((output_type) a.x, (output_type) s)); \
|
||||||
|
} \
|
||||||
|
__device__ __forceinline__ output_type ## 1 func_name(scalar_type s, const input_type ## 1 & b) \
|
||||||
|
{ \
|
||||||
|
return VecTraits<output_type ## 1>::make(func ((output_type) s, (output_type) b.x)); \
|
||||||
|
} \
|
||||||
|
__device__ __forceinline__ output_type ## 2 func_name(const input_type ## 2 & a, scalar_type s) \
|
||||||
|
{ \
|
||||||
|
return VecTraits<output_type ## 2>::make(func ((output_type) a.x, (output_type) s), func ((output_type) a.y, (output_type) s)); \
|
||||||
|
} \
|
||||||
|
__device__ __forceinline__ output_type ## 2 func_name(scalar_type s, const input_type ## 2 & b) \
|
||||||
|
{ \
|
||||||
|
return VecTraits<output_type ## 2>::make(func ((output_type) s, (output_type) b.x), func ((output_type) s, (output_type) b.y)); \
|
||||||
|
} \
|
||||||
|
__device__ __forceinline__ output_type ## 3 func_name(const input_type ## 3 & a, scalar_type s) \
|
||||||
|
{ \
|
||||||
|
return VecTraits<output_type ## 3>::make(func ((output_type) a.x, (output_type) s), func ((output_type) a.y, (output_type) s), func ((output_type) a.z, (output_type) s)); \
|
||||||
|
} \
|
||||||
|
__device__ __forceinline__ output_type ## 3 func_name(scalar_type s, const input_type ## 3 & b) \
|
||||||
|
{ \
|
||||||
|
return VecTraits<output_type ## 3>::make(func ((output_type) s, (output_type) b.x), func ((output_type) s, (output_type) b.y), func ((output_type) s, (output_type) b.z)); \
|
||||||
|
} \
|
||||||
|
__device__ __forceinline__ output_type ## 4 func_name(const input_type ## 4 & a, scalar_type s) \
|
||||||
|
{ \
|
||||||
|
return VecTraits<output_type ## 4>::make(func ((output_type) a.x, (output_type) s), func ((output_type) a.y, (output_type) s), func ((output_type) a.z, (output_type) s), func ((output_type) a.w, (output_type) s)); \
|
||||||
|
} \
|
||||||
|
__device__ __forceinline__ output_type ## 4 func_name(scalar_type s, const input_type ## 4 & b) \
|
||||||
|
{ \
|
||||||
|
return VecTraits<output_type ## 4>::make(func ((output_type) s, (output_type) b.x), func ((output_type) s, (output_type) b.y), func ((output_type) s, (output_type) b.z), func ((output_type) s, (output_type) b.w)); \
|
||||||
|
}
|
||||||
|
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, uchar, uchar, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, uchar, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, uchar, double, double)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, char, char, char)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, char, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, char, double, double)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, ushort, ushort, ushort)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, ushort, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, ushort, double, double)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, short, short, short)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, short, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, short, double, double)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, uint, uint, uint)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, uint, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, uint, double, double)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, int, int, int)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, int, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, int, double, double)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, float, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, float, double, double)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, double, double, double)
|
||||||
|
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, uchar, uchar, uchar)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, uchar, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, uchar, double, double)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, char, char, char)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, char, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, char, double, double)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, ushort, ushort, ushort)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, ushort, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, ushort, double, double)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, short, short, short)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, short, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, short, double, double)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, uint, uint, uint)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, uint, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, uint, double, double)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, int, int, int)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, int, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, int, double, double)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, float, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, float, double, double)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, double, double, double)
|
||||||
|
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, uchar, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, uchar, double, double)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, char, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, char, double, double)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, ushort, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, ushort, double, double)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, short, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, short, double, double)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, uint, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, uint, double, double)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, int, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, int, double, double)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, float, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, float, double, double)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, double, double, double)
|
||||||
|
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, uchar, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, uchar, double, double)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, char, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, char, double, double)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, ushort, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, ushort, double, double)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, short, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, short, double, double)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, uint, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, uint, double, double)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, int, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, int, double, double)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, float, float, float)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, float, double, double)
|
||||||
|
CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, double, double, double)
|
||||||
|
|
||||||
|
#undef CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC
|
||||||
|
|
||||||
|
}}} // namespace cv { namespace cuda { namespace device
|
||||||
|
|
||||||
|
//! @endcond
|
||||||
|
|
||||||
|
#endif // OPENCV_CUDA_VECMATH_HPP
|
||||||
292
3rdpart/OpenCV/include/opencv2/core/cuda/vec_traits.hpp
Normal file
292
3rdpart/OpenCV/include/opencv2/core/cuda/vec_traits.hpp
Normal file
@@ -0,0 +1,292 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#ifndef OPENCV_CUDA_VEC_TRAITS_HPP
|
||||||
|
#define OPENCV_CUDA_VEC_TRAITS_HPP
|
||||||
|
|
||||||
|
#include "common.hpp"
|
||||||
|
#include "cuda_compat.hpp"
|
||||||
|
|
||||||
|
/** @file
|
||||||
|
* @deprecated Use @ref cudev instead.
|
||||||
|
*/
|
||||||
|
|
||||||
|
//! @cond IGNORED
|
||||||
|
|
||||||
|
namespace cv { namespace cuda { namespace device
|
||||||
|
{
|
||||||
|
using cv::cuda::device::compat::double4;
|
||||||
|
using cv::cuda::device::compat::make_double4;
|
||||||
|
|
||||||
|
template<typename T, int N> struct TypeVec;
|
||||||
|
|
||||||
|
struct __align__(8) uchar8
|
||||||
|
{
|
||||||
|
uchar a0, a1, a2, a3, a4, a5, a6, a7;
|
||||||
|
};
|
||||||
|
static __host__ __device__ __forceinline__ uchar8 make_uchar8(uchar a0, uchar a1, uchar a2, uchar a3, uchar a4, uchar a5, uchar a6, uchar a7)
|
||||||
|
{
|
||||||
|
uchar8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
|
||||||
|
return val;
|
||||||
|
}
|
||||||
|
struct __align__(8) char8
|
||||||
|
{
|
||||||
|
schar a0, a1, a2, a3, a4, a5, a6, a7;
|
||||||
|
};
|
||||||
|
static __host__ __device__ __forceinline__ char8 make_char8(schar a0, schar a1, schar a2, schar a3, schar a4, schar a5, schar a6, schar a7)
|
||||||
|
{
|
||||||
|
char8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
|
||||||
|
return val;
|
||||||
|
}
|
||||||
|
struct __align__(16) ushort8
|
||||||
|
{
|
||||||
|
ushort a0, a1, a2, a3, a4, a5, a6, a7;
|
||||||
|
};
|
||||||
|
static __host__ __device__ __forceinline__ ushort8 make_ushort8(ushort a0, ushort a1, ushort a2, ushort a3, ushort a4, ushort a5, ushort a6, ushort a7)
|
||||||
|
{
|
||||||
|
ushort8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
|
||||||
|
return val;
|
||||||
|
}
|
||||||
|
struct __align__(16) short8
|
||||||
|
{
|
||||||
|
short a0, a1, a2, a3, a4, a5, a6, a7;
|
||||||
|
};
|
||||||
|
static __host__ __device__ __forceinline__ short8 make_short8(short a0, short a1, short a2, short a3, short a4, short a5, short a6, short a7)
|
||||||
|
{
|
||||||
|
short8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
|
||||||
|
return val;
|
||||||
|
}
|
||||||
|
struct __align__(32) uint8
|
||||||
|
{
|
||||||
|
uint a0, a1, a2, a3, a4, a5, a6, a7;
|
||||||
|
};
|
||||||
|
static __host__ __device__ __forceinline__ uint8 make_uint8(uint a0, uint a1, uint a2, uint a3, uint a4, uint a5, uint a6, uint a7)
|
||||||
|
{
|
||||||
|
uint8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
|
||||||
|
return val;
|
||||||
|
}
|
||||||
|
struct __align__(32) int8
|
||||||
|
{
|
||||||
|
int a0, a1, a2, a3, a4, a5, a6, a7;
|
||||||
|
};
|
||||||
|
static __host__ __device__ __forceinline__ int8 make_int8(int a0, int a1, int a2, int a3, int a4, int a5, int a6, int a7)
|
||||||
|
{
|
||||||
|
int8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
|
||||||
|
return val;
|
||||||
|
}
|
||||||
|
struct __align__(32) float8
|
||||||
|
{
|
||||||
|
float a0, a1, a2, a3, a4, a5, a6, a7;
|
||||||
|
};
|
||||||
|
static __host__ __device__ __forceinline__ float8 make_float8(float a0, float a1, float a2, float a3, float a4, float a5, float a6, float a7)
|
||||||
|
{
|
||||||
|
float8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
|
||||||
|
return val;
|
||||||
|
}
|
||||||
|
struct double8
|
||||||
|
{
|
||||||
|
double a0, a1, a2, a3, a4, a5, a6, a7;
|
||||||
|
};
|
||||||
|
static __host__ __device__ __forceinline__ double8 make_double8(double a0, double a1, double a2, double a3, double a4, double a5, double a6, double a7)
|
||||||
|
{
|
||||||
|
double8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
|
||||||
|
return val;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define OPENCV_CUDA_IMPLEMENT_TYPE_VEC(type) \
|
||||||
|
template<> struct TypeVec<type, 1> { typedef type vec_type; }; \
|
||||||
|
template<> struct TypeVec<type ## 1, 1> { typedef type ## 1 vec_type; }; \
|
||||||
|
template<> struct TypeVec<type, 2> { typedef type ## 2 vec_type; }; \
|
||||||
|
template<> struct TypeVec<type ## 2, 2> { typedef type ## 2 vec_type; }; \
|
||||||
|
template<> struct TypeVec<type, 3> { typedef type ## 3 vec_type; }; \
|
||||||
|
template<> struct TypeVec<type ## 3, 3> { typedef type ## 3 vec_type; }; \
|
||||||
|
template<> struct TypeVec<type, 4> { typedef type ## 4 vec_type; }; \
|
||||||
|
template<> struct TypeVec<type ## 4, 4> { typedef type ## 4 vec_type; }; \
|
||||||
|
template<> struct TypeVec<type, 8> { typedef type ## 8 vec_type; }; \
|
||||||
|
template<> struct TypeVec<type ## 8, 8> { typedef type ## 8 vec_type; };
|
||||||
|
|
||||||
|
OPENCV_CUDA_IMPLEMENT_TYPE_VEC(uchar)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_TYPE_VEC(char)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_TYPE_VEC(ushort)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_TYPE_VEC(short)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_TYPE_VEC(int)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_TYPE_VEC(uint)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_TYPE_VEC(float)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_TYPE_VEC(double)
|
||||||
|
|
||||||
|
#undef OPENCV_CUDA_IMPLEMENT_TYPE_VEC
|
||||||
|
|
||||||
|
template<> struct TypeVec<schar, 1> { typedef schar vec_type; };
|
||||||
|
template<> struct TypeVec<schar, 2> { typedef char2 vec_type; };
|
||||||
|
template<> struct TypeVec<schar, 3> { typedef char3 vec_type; };
|
||||||
|
template<> struct TypeVec<schar, 4> { typedef char4 vec_type; };
|
||||||
|
template<> struct TypeVec<schar, 8> { typedef char8 vec_type; };
|
||||||
|
|
||||||
|
template<> struct TypeVec<bool, 1> { typedef uchar vec_type; };
|
||||||
|
template<> struct TypeVec<bool, 2> { typedef uchar2 vec_type; };
|
||||||
|
template<> struct TypeVec<bool, 3> { typedef uchar3 vec_type; };
|
||||||
|
template<> struct TypeVec<bool, 4> { typedef uchar4 vec_type; };
|
||||||
|
template<> struct TypeVec<bool, 8> { typedef uchar8 vec_type; };
|
||||||
|
|
||||||
|
template<typename T> struct VecTraits;
|
||||||
|
|
||||||
|
#define OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(type) \
|
||||||
|
template<> struct VecTraits<type> \
|
||||||
|
{ \
|
||||||
|
typedef type elem_type; \
|
||||||
|
enum {cn=1}; \
|
||||||
|
static __device__ __host__ __forceinline__ type all(type v) {return v;} \
|
||||||
|
static __device__ __host__ __forceinline__ type make(type x) {return x;} \
|
||||||
|
static __device__ __host__ __forceinline__ type make(const type* v) {return *v;} \
|
||||||
|
}; \
|
||||||
|
template<> struct VecTraits<type ## 1> \
|
||||||
|
{ \
|
||||||
|
typedef type elem_type; \
|
||||||
|
enum {cn=1}; \
|
||||||
|
static __device__ __host__ __forceinline__ type ## 1 all(type v) {return make_ ## type ## 1(v);} \
|
||||||
|
static __device__ __host__ __forceinline__ type ## 1 make(type x) {return make_ ## type ## 1(x);} \
|
||||||
|
static __device__ __host__ __forceinline__ type ## 1 make(const type* v) {return make_ ## type ## 1(*v);} \
|
||||||
|
}; \
|
||||||
|
template<> struct VecTraits<type ## 2> \
|
||||||
|
{ \
|
||||||
|
typedef type elem_type; \
|
||||||
|
enum {cn=2}; \
|
||||||
|
static __device__ __host__ __forceinline__ type ## 2 all(type v) {return make_ ## type ## 2(v, v);} \
|
||||||
|
static __device__ __host__ __forceinline__ type ## 2 make(type x, type y) {return make_ ## type ## 2(x, y);} \
|
||||||
|
static __device__ __host__ __forceinline__ type ## 2 make(const type* v) {return make_ ## type ## 2(v[0], v[1]);} \
|
||||||
|
}; \
|
||||||
|
template<> struct VecTraits<type ## 3> \
|
||||||
|
{ \
|
||||||
|
typedef type elem_type; \
|
||||||
|
enum {cn=3}; \
|
||||||
|
static __device__ __host__ __forceinline__ type ## 3 all(type v) {return make_ ## type ## 3(v, v, v);} \
|
||||||
|
static __device__ __host__ __forceinline__ type ## 3 make(type x, type y, type z) {return make_ ## type ## 3(x, y, z);} \
|
||||||
|
static __device__ __host__ __forceinline__ type ## 3 make(const type* v) {return make_ ## type ## 3(v[0], v[1], v[2]);} \
|
||||||
|
}; \
|
||||||
|
template<> struct VecTraits<type ## 4> \
|
||||||
|
{ \
|
||||||
|
typedef type elem_type; \
|
||||||
|
enum {cn=4}; \
|
||||||
|
static __device__ __host__ __forceinline__ type ## 4 all(type v) {return make_ ## type ## 4(v, v, v, v);} \
|
||||||
|
static __device__ __host__ __forceinline__ type ## 4 make(type x, type y, type z, type w) {return make_ ## type ## 4(x, y, z, w);} \
|
||||||
|
static __device__ __host__ __forceinline__ type ## 4 make(const type* v) {return make_ ## type ## 4(v[0], v[1], v[2], v[3]);} \
|
||||||
|
}; \
|
||||||
|
template<> struct VecTraits<type ## 8> \
|
||||||
|
{ \
|
||||||
|
typedef type elem_type; \
|
||||||
|
enum {cn=8}; \
|
||||||
|
static __device__ __host__ __forceinline__ type ## 8 all(type v) {return make_ ## type ## 8(v, v, v, v, v, v, v, v);} \
|
||||||
|
static __device__ __host__ __forceinline__ type ## 8 make(type a0, type a1, type a2, type a3, type a4, type a5, type a6, type a7) {return make_ ## type ## 8(a0, a1, a2, a3, a4, a5, a6, a7);} \
|
||||||
|
static __device__ __host__ __forceinline__ type ## 8 make(const type* v) {return make_ ## type ## 8(v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);} \
|
||||||
|
};
|
||||||
|
|
||||||
|
OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(uchar)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(ushort)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(short)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(int)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(uint)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(float)
|
||||||
|
OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(double)
|
||||||
|
|
||||||
|
#undef OPENCV_CUDA_IMPLEMENT_VEC_TRAITS
|
||||||
|
|
||||||
|
template<> struct VecTraits<char>
|
||||||
|
{
|
||||||
|
typedef char elem_type;
|
||||||
|
enum {cn=1};
|
||||||
|
static __device__ __host__ __forceinline__ char all(char v) {return v;}
|
||||||
|
static __device__ __host__ __forceinline__ char make(char x) {return x;}
|
||||||
|
static __device__ __host__ __forceinline__ char make(const char* x) {return *x;}
|
||||||
|
};
|
||||||
|
template<> struct VecTraits<schar>
|
||||||
|
{
|
||||||
|
typedef schar elem_type;
|
||||||
|
enum {cn=1};
|
||||||
|
static __device__ __host__ __forceinline__ schar all(schar v) {return v;}
|
||||||
|
static __device__ __host__ __forceinline__ schar make(schar x) {return x;}
|
||||||
|
static __device__ __host__ __forceinline__ schar make(const schar* x) {return *x;}
|
||||||
|
};
|
||||||
|
template<> struct VecTraits<char1>
|
||||||
|
{
|
||||||
|
typedef schar elem_type;
|
||||||
|
enum {cn=1};
|
||||||
|
static __device__ __host__ __forceinline__ char1 all(schar v) {return make_char1(v);}
|
||||||
|
static __device__ __host__ __forceinline__ char1 make(schar x) {return make_char1(x);}
|
||||||
|
static __device__ __host__ __forceinline__ char1 make(const schar* v) {return make_char1(v[0]);}
|
||||||
|
};
|
||||||
|
template<> struct VecTraits<char2>
|
||||||
|
{
|
||||||
|
typedef schar elem_type;
|
||||||
|
enum {cn=2};
|
||||||
|
static __device__ __host__ __forceinline__ char2 all(schar v) {return make_char2(v, v);}
|
||||||
|
static __device__ __host__ __forceinline__ char2 make(schar x, schar y) {return make_char2(x, y);}
|
||||||
|
static __device__ __host__ __forceinline__ char2 make(const schar* v) {return make_char2(v[0], v[1]);}
|
||||||
|
};
|
||||||
|
template<> struct VecTraits<char3>
|
||||||
|
{
|
||||||
|
typedef schar elem_type;
|
||||||
|
enum {cn=3};
|
||||||
|
static __device__ __host__ __forceinline__ char3 all(schar v) {return make_char3(v, v, v);}
|
||||||
|
static __device__ __host__ __forceinline__ char3 make(schar x, schar y, schar z) {return make_char3(x, y, z);}
|
||||||
|
static __device__ __host__ __forceinline__ char3 make(const schar* v) {return make_char3(v[0], v[1], v[2]);}
|
||||||
|
};
|
||||||
|
template<> struct VecTraits<char4>
|
||||||
|
{
|
||||||
|
typedef schar elem_type;
|
||||||
|
enum {cn=4};
|
||||||
|
static __device__ __host__ __forceinline__ char4 all(schar v) {return make_char4(v, v, v, v);}
|
||||||
|
static __device__ __host__ __forceinline__ char4 make(schar x, schar y, schar z, schar w) {return make_char4(x, y, z, w);}
|
||||||
|
static __device__ __host__ __forceinline__ char4 make(const schar* v) {return make_char4(v[0], v[1], v[2], v[3]);}
|
||||||
|
};
|
||||||
|
template<> struct VecTraits<char8>
|
||||||
|
{
|
||||||
|
typedef schar elem_type;
|
||||||
|
enum {cn=8};
|
||||||
|
static __device__ __host__ __forceinline__ char8 all(schar v) {return make_char8(v, v, v, v, v, v, v, v);}
|
||||||
|
static __device__ __host__ __forceinline__ char8 make(schar a0, schar a1, schar a2, schar a3, schar a4, schar a5, schar a6, schar a7) {return make_char8(a0, a1, a2, a3, a4, a5, a6, a7);}
|
||||||
|
static __device__ __host__ __forceinline__ char8 make(const schar* v) {return make_char8(v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);}
|
||||||
|
};
|
||||||
|
}}} // namespace cv { namespace cuda { namespace cudev
|
||||||
|
|
||||||
|
//! @endcond
|
||||||
|
|
||||||
|
#endif // OPENCV_CUDA_VEC_TRAITS_HPP
|
||||||
139
3rdpart/OpenCV/include/opencv2/core/cuda/warp.hpp
Normal file
139
3rdpart/OpenCV/include/opencv2/core/cuda/warp.hpp
Normal file
@@ -0,0 +1,139 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#ifndef OPENCV_CUDA_DEVICE_WARP_HPP
|
||||||
|
#define OPENCV_CUDA_DEVICE_WARP_HPP
|
||||||
|
|
||||||
|
/** @file
|
||||||
|
* @deprecated Use @ref cudev instead.
|
||||||
|
*/
|
||||||
|
|
||||||
|
//! @cond IGNORED
|
||||||
|
|
||||||
|
namespace cv { namespace cuda { namespace device
|
||||||
|
{
|
||||||
|
struct Warp
|
||||||
|
{
|
||||||
|
enum
|
||||||
|
{
|
||||||
|
LOG_WARP_SIZE = 5,
|
||||||
|
WARP_SIZE = 1 << LOG_WARP_SIZE,
|
||||||
|
STRIDE = WARP_SIZE
|
||||||
|
};
|
||||||
|
|
||||||
|
/** \brief Returns the warp lane ID of the calling thread. */
|
||||||
|
static __device__ __forceinline__ unsigned int laneId()
|
||||||
|
{
|
||||||
|
unsigned int ret;
|
||||||
|
asm("mov.u32 %0, %%laneid;" : "=r"(ret) );
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename It, typename T>
|
||||||
|
static __device__ __forceinline__ void fill(It beg, It end, const T& value)
|
||||||
|
{
|
||||||
|
for(It t = beg + laneId(); t < end; t += STRIDE)
|
||||||
|
*t = value;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename InIt, typename OutIt>
|
||||||
|
static __device__ __forceinline__ OutIt copy(InIt beg, InIt end, OutIt out)
|
||||||
|
{
|
||||||
|
for(InIt t = beg + laneId(); t < end; t += STRIDE, out += STRIDE)
|
||||||
|
*out = *t;
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename InIt, typename OutIt, class UnOp>
|
||||||
|
static __device__ __forceinline__ OutIt transform(InIt beg, InIt end, OutIt out, UnOp op)
|
||||||
|
{
|
||||||
|
for(InIt t = beg + laneId(); t < end; t += STRIDE, out += STRIDE)
|
||||||
|
*out = op(*t);
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename InIt1, typename InIt2, typename OutIt, class BinOp>
|
||||||
|
static __device__ __forceinline__ OutIt transform(InIt1 beg1, InIt1 end1, InIt2 beg2, OutIt out, BinOp op)
|
||||||
|
{
|
||||||
|
unsigned int lane = laneId();
|
||||||
|
|
||||||
|
InIt1 t1 = beg1 + lane;
|
||||||
|
InIt2 t2 = beg2 + lane;
|
||||||
|
for(; t1 < end1; t1 += STRIDE, t2 += STRIDE, out += STRIDE)
|
||||||
|
*out = op(*t1, *t2);
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T, class BinOp>
|
||||||
|
static __device__ __forceinline__ T reduce(volatile T *ptr, BinOp op)
|
||||||
|
{
|
||||||
|
const unsigned int lane = laneId();
|
||||||
|
|
||||||
|
if (lane < 16)
|
||||||
|
{
|
||||||
|
T partial = ptr[lane];
|
||||||
|
|
||||||
|
ptr[lane] = partial = op(partial, ptr[lane + 16]);
|
||||||
|
ptr[lane] = partial = op(partial, ptr[lane + 8]);
|
||||||
|
ptr[lane] = partial = op(partial, ptr[lane + 4]);
|
||||||
|
ptr[lane] = partial = op(partial, ptr[lane + 2]);
|
||||||
|
ptr[lane] = partial = op(partial, ptr[lane + 1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
return *ptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename OutIt, typename T>
|
||||||
|
static __device__ __forceinline__ void yota(OutIt beg, OutIt end, T value)
|
||||||
|
{
|
||||||
|
unsigned int lane = laneId();
|
||||||
|
value += lane;
|
||||||
|
|
||||||
|
for(OutIt t = beg + lane; t < end; t += STRIDE, value += STRIDE)
|
||||||
|
*t = value;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}}} // namespace cv { namespace cuda { namespace cudev
|
||||||
|
|
||||||
|
//! @endcond
|
||||||
|
|
||||||
|
#endif /* OPENCV_CUDA_DEVICE_WARP_HPP */
|
||||||
76
3rdpart/OpenCV/include/opencv2/core/cuda/warp_reduce.hpp
Normal file
76
3rdpart/OpenCV/include/opencv2/core/cuda/warp_reduce.hpp
Normal file
@@ -0,0 +1,76 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#ifndef OPENCV_CUDA_WARP_REDUCE_HPP__
|
||||||
|
#define OPENCV_CUDA_WARP_REDUCE_HPP__
|
||||||
|
|
||||||
|
/** @file
|
||||||
|
* @deprecated Use @ref cudev instead.
|
||||||
|
*/
|
||||||
|
|
||||||
|
//! @cond IGNORED
|
||||||
|
|
||||||
|
namespace cv { namespace cuda { namespace device
|
||||||
|
{
|
||||||
|
template <class T>
|
||||||
|
__device__ __forceinline__ T warp_reduce(volatile T *ptr , const unsigned int tid = threadIdx.x)
|
||||||
|
{
|
||||||
|
const unsigned int lane = tid & 31; // index of thread in warp (0..31)
|
||||||
|
|
||||||
|
if (lane < 16)
|
||||||
|
{
|
||||||
|
T partial = ptr[tid];
|
||||||
|
|
||||||
|
ptr[tid] = partial = partial + ptr[tid + 16];
|
||||||
|
ptr[tid] = partial = partial + ptr[tid + 8];
|
||||||
|
ptr[tid] = partial = partial + ptr[tid + 4];
|
||||||
|
ptr[tid] = partial = partial + ptr[tid + 2];
|
||||||
|
ptr[tid] = partial = partial + ptr[tid + 1];
|
||||||
|
}
|
||||||
|
|
||||||
|
return ptr[tid - lane];
|
||||||
|
}
|
||||||
|
}}} // namespace cv { namespace cuda { namespace cudev {
|
||||||
|
|
||||||
|
//! @endcond
|
||||||
|
|
||||||
|
#endif /* OPENCV_CUDA_WARP_REDUCE_HPP__ */
|
||||||
162
3rdpart/OpenCV/include/opencv2/core/cuda/warp_shuffle.hpp
Normal file
162
3rdpart/OpenCV/include/opencv2/core/cuda/warp_shuffle.hpp
Normal file
@@ -0,0 +1,162 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#ifndef OPENCV_CUDA_WARP_SHUFFLE_HPP
|
||||||
|
#define OPENCV_CUDA_WARP_SHUFFLE_HPP
|
||||||
|
|
||||||
|
/** @file
|
||||||
|
* @deprecated Use @ref cudev instead.
|
||||||
|
*/
|
||||||
|
|
||||||
|
//! @cond IGNORED
|
||||||
|
|
||||||
|
namespace cv { namespace cuda { namespace device
|
||||||
|
{
|
||||||
|
#if __CUDACC_VER_MAJOR__ >= 9
|
||||||
|
# define __shfl(x, y, z) __shfl_sync(0xFFFFFFFFU, x, y, z)
|
||||||
|
# define __shfl_up(x, y, z) __shfl_up_sync(0xFFFFFFFFU, x, y, z)
|
||||||
|
# define __shfl_down(x, y, z) __shfl_down_sync(0xFFFFFFFFU, x, y, z)
|
||||||
|
#endif
|
||||||
|
template <typename T>
|
||||||
|
__device__ __forceinline__ T shfl(T val, int srcLane, int width = warpSize)
|
||||||
|
{
|
||||||
|
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
|
||||||
|
return __shfl(val, srcLane, width);
|
||||||
|
#else
|
||||||
|
return T();
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
__device__ __forceinline__ unsigned int shfl(unsigned int val, int srcLane, int width = warpSize)
|
||||||
|
{
|
||||||
|
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
|
||||||
|
return (unsigned int) __shfl((int) val, srcLane, width);
|
||||||
|
#else
|
||||||
|
return 0;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
__device__ __forceinline__ double shfl(double val, int srcLane, int width = warpSize)
|
||||||
|
{
|
||||||
|
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
|
||||||
|
int lo = __double2loint(val);
|
||||||
|
int hi = __double2hiint(val);
|
||||||
|
|
||||||
|
lo = __shfl(lo, srcLane, width);
|
||||||
|
hi = __shfl(hi, srcLane, width);
|
||||||
|
|
||||||
|
return __hiloint2double(hi, lo);
|
||||||
|
#else
|
||||||
|
return 0.0;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
__device__ __forceinline__ T shfl_down(T val, unsigned int delta, int width = warpSize)
|
||||||
|
{
|
||||||
|
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
|
||||||
|
return __shfl_down(val, delta, width);
|
||||||
|
#else
|
||||||
|
return T();
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
__device__ __forceinline__ unsigned int shfl_down(unsigned int val, unsigned int delta, int width = warpSize)
|
||||||
|
{
|
||||||
|
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
|
||||||
|
return (unsigned int) __shfl_down((int) val, delta, width);
|
||||||
|
#else
|
||||||
|
return 0;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
__device__ __forceinline__ double shfl_down(double val, unsigned int delta, int width = warpSize)
|
||||||
|
{
|
||||||
|
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
|
||||||
|
int lo = __double2loint(val);
|
||||||
|
int hi = __double2hiint(val);
|
||||||
|
|
||||||
|
lo = __shfl_down(lo, delta, width);
|
||||||
|
hi = __shfl_down(hi, delta, width);
|
||||||
|
|
||||||
|
return __hiloint2double(hi, lo);
|
||||||
|
#else
|
||||||
|
return 0.0;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
__device__ __forceinline__ T shfl_up(T val, unsigned int delta, int width = warpSize)
|
||||||
|
{
|
||||||
|
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
|
||||||
|
return __shfl_up(val, delta, width);
|
||||||
|
#else
|
||||||
|
return T();
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
__device__ __forceinline__ unsigned int shfl_up(unsigned int val, unsigned int delta, int width = warpSize)
|
||||||
|
{
|
||||||
|
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
|
||||||
|
return (unsigned int) __shfl_up((int) val, delta, width);
|
||||||
|
#else
|
||||||
|
return 0;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
__device__ __forceinline__ double shfl_up(double val, unsigned int delta, int width = warpSize)
|
||||||
|
{
|
||||||
|
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
|
||||||
|
int lo = __double2loint(val);
|
||||||
|
int hi = __double2hiint(val);
|
||||||
|
|
||||||
|
lo = __shfl_up(lo, delta, width);
|
||||||
|
hi = __shfl_up(hi, delta, width);
|
||||||
|
|
||||||
|
return __hiloint2double(hi, lo);
|
||||||
|
#else
|
||||||
|
return 0.0;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}}}
|
||||||
|
|
||||||
|
# undef __shfl
|
||||||
|
# undef __shfl_up
|
||||||
|
# undef __shfl_down
|
||||||
|
|
||||||
|
//! @endcond
|
||||||
|
|
||||||
|
#endif // OPENCV_CUDA_WARP_SHUFFLE_HPP
|
||||||
86
3rdpart/OpenCV/include/opencv2/core/cuda_stream_accessor.hpp
Normal file
86
3rdpart/OpenCV/include/opencv2/core/cuda_stream_accessor.hpp
Normal file
@@ -0,0 +1,86 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#ifndef OPENCV_CORE_CUDA_STREAM_ACCESSOR_HPP
|
||||||
|
#define OPENCV_CORE_CUDA_STREAM_ACCESSOR_HPP
|
||||||
|
|
||||||
|
#ifndef __cplusplus
|
||||||
|
# error cuda_stream_accessor.hpp header must be compiled as C++
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/** @file cuda_stream_accessor.hpp
|
||||||
|
* This is only header file that depends on CUDA Runtime API. All other headers are independent.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <cuda_runtime.h>
|
||||||
|
#include "opencv2/core/cuda.hpp"
|
||||||
|
|
||||||
|
namespace cv
|
||||||
|
{
|
||||||
|
namespace cuda
|
||||||
|
{
|
||||||
|
|
||||||
|
//! @addtogroup cudacore_struct
|
||||||
|
//! @{
|
||||||
|
|
||||||
|
/** @brief Class that enables getting cudaStream_t from cuda::Stream
|
||||||
|
*/
|
||||||
|
struct StreamAccessor
|
||||||
|
{
|
||||||
|
CV_EXPORTS static cudaStream_t getStream(const Stream& stream);
|
||||||
|
CV_EXPORTS static Stream wrapStream(cudaStream_t stream);
|
||||||
|
};
|
||||||
|
|
||||||
|
/** @brief Class that enables getting cudaEvent_t from cuda::Event
|
||||||
|
*/
|
||||||
|
struct EventAccessor
|
||||||
|
{
|
||||||
|
CV_EXPORTS static cudaEvent_t getEvent(const Event& event);
|
||||||
|
CV_EXPORTS static Event wrapEvent(cudaEvent_t event);
|
||||||
|
};
|
||||||
|
|
||||||
|
//! @}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* OPENCV_CORE_CUDA_STREAM_ACCESSOR_HPP */
|
||||||
152
3rdpart/OpenCV/include/opencv2/core/cuda_types.hpp
Normal file
152
3rdpart/OpenCV/include/opencv2/core/cuda_types.hpp
Normal file
@@ -0,0 +1,152 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#ifndef OPENCV_CORE_CUDA_TYPES_HPP
|
||||||
|
#define OPENCV_CORE_CUDA_TYPES_HPP
|
||||||
|
|
||||||
|
#ifndef __cplusplus
|
||||||
|
# error cuda_types.hpp header must be compiled as C++
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__OPENCV_BUILD) && defined(__clang__)
|
||||||
|
#pragma clang diagnostic ignored "-Winconsistent-missing-override"
|
||||||
|
#endif
|
||||||
|
#if defined(__OPENCV_BUILD) && defined(__GNUC__) && __GNUC__ >= 5
|
||||||
|
#pragma GCC diagnostic ignored "-Wsuggest-override"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/** @file
|
||||||
|
* @deprecated Use @ref cudev instead.
|
||||||
|
*/
|
||||||
|
|
||||||
|
//! @cond IGNORED
|
||||||
|
|
||||||
|
#ifdef __CUDACC__
|
||||||
|
#define __CV_CUDA_HOST_DEVICE__ __host__ __device__ __forceinline__
|
||||||
|
#else
|
||||||
|
#define __CV_CUDA_HOST_DEVICE__
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "opencv2/core/cvdef.h"
|
||||||
|
#include "opencv2/core.hpp"
|
||||||
|
|
||||||
|
namespace cv
|
||||||
|
{
|
||||||
|
namespace cuda
|
||||||
|
{
|
||||||
|
|
||||||
|
// Simple lightweight structures that encapsulates information about an image on device.
|
||||||
|
// It is intended to pass to nvcc-compiled code. GpuMat depends on headers that nvcc can't compile
|
||||||
|
|
||||||
|
template <typename T> struct DevPtr
|
||||||
|
{
|
||||||
|
typedef T elem_type;
|
||||||
|
typedef int index_type;
|
||||||
|
|
||||||
|
enum { elem_size = sizeof(elem_type) };
|
||||||
|
|
||||||
|
T* data;
|
||||||
|
|
||||||
|
__CV_CUDA_HOST_DEVICE__ DevPtr() : data(0) {}
|
||||||
|
__CV_CUDA_HOST_DEVICE__ DevPtr(T* data_) : data(data_) {}
|
||||||
|
|
||||||
|
__CV_CUDA_HOST_DEVICE__ size_t elemSize() const { return elem_size; }
|
||||||
|
__CV_CUDA_HOST_DEVICE__ operator T*() { return data; }
|
||||||
|
__CV_CUDA_HOST_DEVICE__ operator const T*() const { return data; }
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T> struct PtrSz : public DevPtr<T>
|
||||||
|
{
|
||||||
|
__CV_CUDA_HOST_DEVICE__ PtrSz() : size(0) {}
|
||||||
|
__CV_CUDA_HOST_DEVICE__ PtrSz(T* data_, size_t size_) : DevPtr<T>(data_), size(size_) {}
|
||||||
|
|
||||||
|
size_t size;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T> struct PtrStep : public DevPtr<T>
|
||||||
|
{
|
||||||
|
__CV_CUDA_HOST_DEVICE__ PtrStep() : step(0) {}
|
||||||
|
__CV_CUDA_HOST_DEVICE__ PtrStep(T* data_, size_t step_) : DevPtr<T>(data_), step(step_) {}
|
||||||
|
|
||||||
|
size_t step;
|
||||||
|
|
||||||
|
__CV_CUDA_HOST_DEVICE__ T* ptr(int y = 0) { return ( T*)( ( char*)(((DevPtr<T>*)this)->data) + y * step); }
|
||||||
|
__CV_CUDA_HOST_DEVICE__ const T* ptr(int y = 0) const { return (const T*)( (const char*)(((DevPtr<T>*)this)->data) + y * step); }
|
||||||
|
|
||||||
|
__CV_CUDA_HOST_DEVICE__ T& operator ()(int y, int x) { return ptr(y)[x]; }
|
||||||
|
__CV_CUDA_HOST_DEVICE__ const T& operator ()(int y, int x) const { return ptr(y)[x]; }
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T> struct PtrStepSz : public PtrStep<T>
|
||||||
|
{
|
||||||
|
__CV_CUDA_HOST_DEVICE__ PtrStepSz() : cols(0), rows(0) {}
|
||||||
|
__CV_CUDA_HOST_DEVICE__ PtrStepSz(int rows_, int cols_, T* data_, size_t step_)
|
||||||
|
: PtrStep<T>(data_, step_), cols(cols_), rows(rows_) {}
|
||||||
|
|
||||||
|
template <typename U>
|
||||||
|
explicit PtrStepSz(const PtrStepSz<U>& d) : PtrStep<T>((T*)d.data, d.step), cols(d.cols), rows(d.rows){}
|
||||||
|
|
||||||
|
int cols;
|
||||||
|
int rows;
|
||||||
|
|
||||||
|
CV_NODISCARD_STD __CV_CUDA_HOST_DEVICE__ Size size() const { return {cols, rows}; }
|
||||||
|
CV_NODISCARD_STD __CV_CUDA_HOST_DEVICE__ T& operator ()(const Point &pos) { return (*this)(pos.y, pos.x); }
|
||||||
|
CV_NODISCARD_STD __CV_CUDA_HOST_DEVICE__ const T& operator ()(const Point &pos) const { return (*this)(pos.y, pos.x); }
|
||||||
|
using PtrStep<T>::operator();
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef PtrStepSz<unsigned char> PtrStepSzb;
|
||||||
|
typedef PtrStepSz<unsigned short> PtrStepSzus;
|
||||||
|
typedef PtrStepSz<float> PtrStepSzf;
|
||||||
|
typedef PtrStepSz<int> PtrStepSzi;
|
||||||
|
|
||||||
|
typedef PtrStep<unsigned char> PtrStepb;
|
||||||
|
typedef PtrStep<unsigned short> PtrStepus;
|
||||||
|
typedef PtrStep<float> PtrStepf;
|
||||||
|
typedef PtrStep<int> PtrStepi;
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//! @endcond
|
||||||
|
|
||||||
|
#endif /* OPENCV_CORE_CUDA_TYPES_HPP */
|
||||||
403
3rdpart/OpenCV/include/opencv2/core/cv_cpu_dispatch.h
Normal file
403
3rdpart/OpenCV/include/opencv2/core/cv_cpu_dispatch.h
Normal file
@@ -0,0 +1,403 @@
|
|||||||
|
// This file is part of OpenCV project.
|
||||||
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||||
|
// of this distribution and at http://opencv.org/license.html.
|
||||||
|
|
||||||
|
#if defined __OPENCV_BUILD \
|
||||||
|
|
||||||
|
#include "cv_cpu_config.h"
|
||||||
|
#include "cv_cpu_helper.h"
|
||||||
|
|
||||||
|
#ifdef CV_CPU_DISPATCH_MODE
|
||||||
|
#define CV_CPU_OPTIMIZATION_NAMESPACE __CV_CAT(opt_, CV_CPU_DISPATCH_MODE)
|
||||||
|
#define CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN namespace __CV_CAT(opt_, CV_CPU_DISPATCH_MODE) {
|
||||||
|
#define CV_CPU_OPTIMIZATION_NAMESPACE_END }
|
||||||
|
#else
|
||||||
|
#define CV_CPU_OPTIMIZATION_NAMESPACE cpu_baseline
|
||||||
|
#define CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN namespace cpu_baseline {
|
||||||
|
#define CV_CPU_OPTIMIZATION_NAMESPACE_END }
|
||||||
|
#define CV_CPU_BASELINE_MODE 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#define __CV_CPU_DISPATCH_CHAIN_END(fn, args, mode, ...) /* done */
|
||||||
|
#define __CV_CPU_DISPATCH(fn, args, mode, ...) __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||||
|
#define __CV_CPU_DISPATCH_EXPAND(fn, args, ...) __CV_EXPAND(__CV_CPU_DISPATCH(fn, args, __VA_ARGS__))
|
||||||
|
#define CV_CPU_DISPATCH(fn, args, ...) __CV_CPU_DISPATCH_EXPAND(fn, args, __VA_ARGS__, END) // expand macros
|
||||||
|
|
||||||
|
|
||||||
|
#if defined CV_ENABLE_INTRINSICS \
|
||||||
|
&& !defined CV_DISABLE_OPTIMIZATION \
|
||||||
|
&& !defined __CUDACC__ /* do not include SSE/AVX/NEON headers for NVCC compiler */ \
|
||||||
|
|
||||||
|
#ifdef CV_CPU_COMPILE_SSE2
|
||||||
|
# include <emmintrin.h>
|
||||||
|
# define CV_MMX 1
|
||||||
|
# define CV_SSE 1
|
||||||
|
# define CV_SSE2 1
|
||||||
|
#endif
|
||||||
|
#ifdef CV_CPU_COMPILE_SSE3
|
||||||
|
# include <pmmintrin.h>
|
||||||
|
# define CV_SSE3 1
|
||||||
|
#endif
|
||||||
|
#ifdef CV_CPU_COMPILE_SSSE3
|
||||||
|
# include <tmmintrin.h>
|
||||||
|
# define CV_SSSE3 1
|
||||||
|
#endif
|
||||||
|
#ifdef CV_CPU_COMPILE_SSE4_1
|
||||||
|
# include <smmintrin.h>
|
||||||
|
# define CV_SSE4_1 1
|
||||||
|
#endif
|
||||||
|
#ifdef CV_CPU_COMPILE_SSE4_2
|
||||||
|
# include <nmmintrin.h>
|
||||||
|
# define CV_SSE4_2 1
|
||||||
|
#endif
|
||||||
|
#ifdef CV_CPU_COMPILE_POPCNT
|
||||||
|
# ifdef _MSC_VER
|
||||||
|
# include <nmmintrin.h>
|
||||||
|
# if defined(_M_X64)
|
||||||
|
# define CV_POPCNT_U64 (int)_mm_popcnt_u64
|
||||||
|
# endif
|
||||||
|
# define CV_POPCNT_U32 _mm_popcnt_u32
|
||||||
|
# else
|
||||||
|
# include <popcntintrin.h>
|
||||||
|
# if defined(__x86_64__)
|
||||||
|
# define CV_POPCNT_U64 __builtin_popcountll
|
||||||
|
# endif
|
||||||
|
# define CV_POPCNT_U32 __builtin_popcount
|
||||||
|
# endif
|
||||||
|
# define CV_POPCNT 1
|
||||||
|
#endif
|
||||||
|
#ifdef CV_CPU_COMPILE_AVX
|
||||||
|
# include <immintrin.h>
|
||||||
|
# define CV_AVX 1
|
||||||
|
#endif
|
||||||
|
#ifdef CV_CPU_COMPILE_FP16
|
||||||
|
# if defined(__arm__) || defined(__aarch64__) || defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC)
|
||||||
|
# include <arm_neon.h>
|
||||||
|
# else
|
||||||
|
# include <immintrin.h>
|
||||||
|
# endif
|
||||||
|
# define CV_FP16 1
|
||||||
|
#endif
|
||||||
|
#ifdef CV_CPU_COMPILE_NEON_DOTPROD
|
||||||
|
# include <arm_neon.h>
|
||||||
|
# define CV_NEON_DOT 1
|
||||||
|
#endif
|
||||||
|
#ifdef CV_CPU_COMPILE_AVX2
|
||||||
|
# include <immintrin.h>
|
||||||
|
# define CV_AVX2 1
|
||||||
|
#endif
|
||||||
|
#ifdef CV_CPU_COMPILE_AVX_512F
|
||||||
|
# include <immintrin.h>
|
||||||
|
# define CV_AVX_512F 1
|
||||||
|
#endif
|
||||||
|
#ifdef CV_CPU_COMPILE_AVX512_COMMON
|
||||||
|
# define CV_AVX512_COMMON 1
|
||||||
|
# define CV_AVX_512CD 1
|
||||||
|
#endif
|
||||||
|
#ifdef CV_CPU_COMPILE_AVX512_KNL
|
||||||
|
# define CV_AVX512_KNL 1
|
||||||
|
# define CV_AVX_512ER 1
|
||||||
|
# define CV_AVX_512PF 1
|
||||||
|
#endif
|
||||||
|
#ifdef CV_CPU_COMPILE_AVX512_KNM
|
||||||
|
# define CV_AVX512_KNM 1
|
||||||
|
# define CV_AVX_5124FMAPS 1
|
||||||
|
# define CV_AVX_5124VNNIW 1
|
||||||
|
# define CV_AVX_512VPOPCNTDQ 1
|
||||||
|
#endif
|
||||||
|
#ifdef CV_CPU_COMPILE_AVX512_SKX
|
||||||
|
# define CV_AVX512_SKX 1
|
||||||
|
# define CV_AVX_512VL 1
|
||||||
|
# define CV_AVX_512BW 1
|
||||||
|
# define CV_AVX_512DQ 1
|
||||||
|
#endif
|
||||||
|
#ifdef CV_CPU_COMPILE_AVX512_CNL
|
||||||
|
# define CV_AVX512_CNL 1
|
||||||
|
# define CV_AVX_512IFMA 1
|
||||||
|
# define CV_AVX_512VBMI 1
|
||||||
|
#endif
|
||||||
|
#ifdef CV_CPU_COMPILE_AVX512_CLX
|
||||||
|
# define CV_AVX512_CLX 1
|
||||||
|
# define CV_AVX_512VNNI 1
|
||||||
|
#endif
|
||||||
|
#ifdef CV_CPU_COMPILE_AVX512_ICL
|
||||||
|
# define CV_AVX512_ICL 1
|
||||||
|
# undef CV_AVX_512IFMA
|
||||||
|
# define CV_AVX_512IFMA 1
|
||||||
|
# undef CV_AVX_512VBMI
|
||||||
|
# define CV_AVX_512VBMI 1
|
||||||
|
# undef CV_AVX_512VNNI
|
||||||
|
# define CV_AVX_512VNNI 1
|
||||||
|
# define CV_AVX_512VBMI2 1
|
||||||
|
# define CV_AVX_512BITALG 1
|
||||||
|
# define CV_AVX_512VPOPCNTDQ 1
|
||||||
|
#endif
|
||||||
|
#ifdef CV_CPU_COMPILE_FMA3
|
||||||
|
# define CV_FMA3 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined _WIN32 && (defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC)) && (defined(CV_CPU_COMPILE_NEON) || !defined(_MSC_VER))
|
||||||
|
# include <Intrin.h>
|
||||||
|
# include <arm_neon.h>
|
||||||
|
# define CV_NEON 1
|
||||||
|
#elif defined(__ARM_NEON)
|
||||||
|
# include <arm_neon.h>
|
||||||
|
# define CV_NEON 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* RVV-related macro states with different compiler
|
||||||
|
// +--------------------+----------+----------+
|
||||||
|
// | Macro | Upstream | XuanTie |
|
||||||
|
// +--------------------+----------+----------+
|
||||||
|
// | CV_CPU_COMPILE_RVV | defined | defined |
|
||||||
|
// | CV_RVV | 1 | 0 |
|
||||||
|
// | CV_RVV071 | 0 | 1 |
|
||||||
|
// | CV_TRY_RVV | 1 | 1 |
|
||||||
|
// +--------------------+----------+----------+
|
||||||
|
*/
|
||||||
|
#ifdef CV_CPU_COMPILE_RVV
|
||||||
|
# ifdef __riscv_vector_071
|
||||||
|
# define CV_RVV071 1
|
||||||
|
# else
|
||||||
|
# define CV_RVV 1
|
||||||
|
# endif
|
||||||
|
#include <riscv_vector.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef CV_CPU_COMPILE_VSX
|
||||||
|
# include <altivec.h>
|
||||||
|
# undef vector
|
||||||
|
# undef pixel
|
||||||
|
# undef bool
|
||||||
|
# define CV_VSX 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef CV_CPU_COMPILE_VSX3
|
||||||
|
# define CV_VSX3 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef CV_CPU_COMPILE_MSA
|
||||||
|
# include "hal/msa_macros.h"
|
||||||
|
# define CV_MSA 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef CV_CPU_COMPILE_LSX
|
||||||
|
# include <lsxintrin.h>
|
||||||
|
# define CV_LSX 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef CV_CPU_COMPILE_LASX
|
||||||
|
# include <lasxintrin.h>
|
||||||
|
# define CV_LASX 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef __EMSCRIPTEN__
|
||||||
|
# define CV_WASM_SIMD 1
|
||||||
|
# include <wasm_simd128.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif // CV_ENABLE_INTRINSICS && !CV_DISABLE_OPTIMIZATION && !__CUDACC__
|
||||||
|
|
||||||
|
#if defined CV_CPU_COMPILE_AVX && !defined CV_CPU_BASELINE_COMPILE_AVX
|
||||||
|
struct VZeroUpperGuard {
|
||||||
|
#ifdef __GNUC__
|
||||||
|
__attribute__((always_inline))
|
||||||
|
#endif
|
||||||
|
inline VZeroUpperGuard() { _mm256_zeroupper(); }
|
||||||
|
#ifdef __GNUC__
|
||||||
|
__attribute__((always_inline))
|
||||||
|
#endif
|
||||||
|
inline ~VZeroUpperGuard() { _mm256_zeroupper(); }
|
||||||
|
};
|
||||||
|
#define __CV_AVX_GUARD VZeroUpperGuard __vzeroupper_guard; CV_UNUSED(__vzeroupper_guard);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef __CV_AVX_GUARD
|
||||||
|
#define CV_AVX_GUARD __CV_AVX_GUARD
|
||||||
|
#else
|
||||||
|
#define CV_AVX_GUARD
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif // __OPENCV_BUILD
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#if !defined __OPENCV_BUILD /* Compatibility code */ \
|
||||||
|
&& !defined __CUDACC__ /* do not include SSE/AVX/NEON headers for NVCC compiler */
|
||||||
|
#if defined __SSE2__ || defined _M_X64 || (defined _M_IX86_FP && _M_IX86_FP >= 2)
|
||||||
|
# include <emmintrin.h>
|
||||||
|
# define CV_MMX 1
|
||||||
|
# define CV_SSE 1
|
||||||
|
# define CV_SSE2 1
|
||||||
|
#elif defined _WIN32 && (defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC)) && (defined(CV_CPU_COMPILE_NEON) || !defined(_MSC_VER))
|
||||||
|
# include <Intrin.h>
|
||||||
|
# include <arm_neon.h>
|
||||||
|
# define CV_NEON 1
|
||||||
|
#elif defined(__ARM_NEON)
|
||||||
|
# include <arm_neon.h>
|
||||||
|
# define CV_NEON 1
|
||||||
|
#ifdef __ARM_FEATURE_SVE
|
||||||
|
# include<arm_sve.h>
|
||||||
|
# define CV_SVE 1
|
||||||
|
#endif
|
||||||
|
#elif defined(__VSX__) && defined(__PPC64__) && defined(__LITTLE_ENDIAN__)
|
||||||
|
# include <altivec.h>
|
||||||
|
# undef vector
|
||||||
|
# undef pixel
|
||||||
|
# undef bool
|
||||||
|
# define CV_VSX 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef __F16C__
|
||||||
|
# include <immintrin.h>
|
||||||
|
# define CV_FP16 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif // !__OPENCV_BUILD && !__CUDACC (Compatibility code)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef CV_MMX
|
||||||
|
# define CV_MMX 0
|
||||||
|
#endif
|
||||||
|
#ifndef CV_SSE
|
||||||
|
# define CV_SSE 0
|
||||||
|
#endif
|
||||||
|
#ifndef CV_SSE2
|
||||||
|
# define CV_SSE2 0
|
||||||
|
#endif
|
||||||
|
#ifndef CV_SSE3
|
||||||
|
# define CV_SSE3 0
|
||||||
|
#endif
|
||||||
|
#ifndef CV_SSSE3
|
||||||
|
# define CV_SSSE3 0
|
||||||
|
#endif
|
||||||
|
#ifndef CV_SSE4_1
|
||||||
|
# define CV_SSE4_1 0
|
||||||
|
#endif
|
||||||
|
#ifndef CV_SSE4_2
|
||||||
|
# define CV_SSE4_2 0
|
||||||
|
#endif
|
||||||
|
#ifndef CV_POPCNT
|
||||||
|
# define CV_POPCNT 0
|
||||||
|
#endif
|
||||||
|
#ifndef CV_AVX
|
||||||
|
# define CV_AVX 0
|
||||||
|
#endif
|
||||||
|
#ifndef CV_FP16
|
||||||
|
# define CV_FP16 0
|
||||||
|
#endif
|
||||||
|
#ifndef CV_AVX2
|
||||||
|
# define CV_AVX2 0
|
||||||
|
#endif
|
||||||
|
#ifndef CV_FMA3
|
||||||
|
# define CV_FMA3 0
|
||||||
|
#endif
|
||||||
|
#ifndef CV_AVX_512F
|
||||||
|
# define CV_AVX_512F 0
|
||||||
|
#endif
|
||||||
|
#ifndef CV_AVX_512BW
|
||||||
|
# define CV_AVX_512BW 0
|
||||||
|
#endif
|
||||||
|
#ifndef CV_AVX_512CD
|
||||||
|
# define CV_AVX_512CD 0
|
||||||
|
#endif
|
||||||
|
#ifndef CV_AVX_512DQ
|
||||||
|
# define CV_AVX_512DQ 0
|
||||||
|
#endif
|
||||||
|
#ifndef CV_AVX_512ER
|
||||||
|
# define CV_AVX_512ER 0
|
||||||
|
#endif
|
||||||
|
#ifndef CV_AVX_512IFMA
|
||||||
|
# define CV_AVX_512IFMA 0
|
||||||
|
#endif
|
||||||
|
#define CV_AVX_512IFMA512 CV_AVX_512IFMA // deprecated
|
||||||
|
#ifndef CV_AVX_512PF
|
||||||
|
# define CV_AVX_512PF 0
|
||||||
|
#endif
|
||||||
|
#ifndef CV_AVX_512VBMI
|
||||||
|
# define CV_AVX_512VBMI 0
|
||||||
|
#endif
|
||||||
|
#ifndef CV_AVX_512VL
|
||||||
|
# define CV_AVX_512VL 0
|
||||||
|
#endif
|
||||||
|
#ifndef CV_AVX_5124FMAPS
|
||||||
|
# define CV_AVX_5124FMAPS 0
|
||||||
|
#endif
|
||||||
|
#ifndef CV_AVX_5124VNNIW
|
||||||
|
# define CV_AVX_5124VNNIW 0
|
||||||
|
#endif
|
||||||
|
#ifndef CV_AVX_512VPOPCNTDQ
|
||||||
|
# define CV_AVX_512VPOPCNTDQ 0
|
||||||
|
#endif
|
||||||
|
#ifndef CV_AVX_512VNNI
|
||||||
|
# define CV_AVX_512VNNI 0
|
||||||
|
#endif
|
||||||
|
#ifndef CV_AVX_512VBMI2
|
||||||
|
# define CV_AVX_512VBMI2 0
|
||||||
|
#endif
|
||||||
|
#ifndef CV_AVX_512BITALG
|
||||||
|
# define CV_AVX_512BITALG 0
|
||||||
|
#endif
|
||||||
|
#ifndef CV_AVX512_COMMON
|
||||||
|
# define CV_AVX512_COMMON 0
|
||||||
|
#endif
|
||||||
|
#ifndef CV_AVX512_KNL
|
||||||
|
# define CV_AVX512_KNL 0
|
||||||
|
#endif
|
||||||
|
#ifndef CV_AVX512_KNM
|
||||||
|
# define CV_AVX512_KNM 0
|
||||||
|
#endif
|
||||||
|
#ifndef CV_AVX512_SKX
|
||||||
|
# define CV_AVX512_SKX 0
|
||||||
|
#endif
|
||||||
|
#ifndef CV_AVX512_CNL
|
||||||
|
# define CV_AVX512_CNL 0
|
||||||
|
#endif
|
||||||
|
#ifndef CV_AVX512_CLX
|
||||||
|
# define CV_AVX512_CLX 0
|
||||||
|
#endif
|
||||||
|
#ifndef CV_AVX512_ICL
|
||||||
|
# define CV_AVX512_ICL 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef CV_NEON
|
||||||
|
# define CV_NEON 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef CV_SVE
|
||||||
|
# define CV_SVE 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef CV_RVV071
|
||||||
|
# define CV_RVV071 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef CV_VSX
|
||||||
|
# define CV_VSX 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef CV_VSX3
|
||||||
|
# define CV_VSX3 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef CV_MSA
|
||||||
|
# define CV_MSA 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef CV_WASM_SIMD
|
||||||
|
# define CV_WASM_SIMD 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef CV_RVV
|
||||||
|
# define CV_RVV 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef CV_LSX
|
||||||
|
# define CV_LSX 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef CV_LASX
|
||||||
|
# define CV_LASX 0
|
||||||
|
#endif
|
||||||
634
3rdpart/OpenCV/include/opencv2/core/cv_cpu_helper.h
Normal file
634
3rdpart/OpenCV/include/opencv2/core/cv_cpu_helper.h
Normal file
@@ -0,0 +1,634 @@
|
|||||||
|
// AUTOGENERATED, DO NOT EDIT
|
||||||
|
|
||||||
|
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE
|
||||||
|
# define CV_TRY_SSE 1
|
||||||
|
# define CV_CPU_FORCE_SSE 1
|
||||||
|
# define CV_CPU_HAS_SUPPORT_SSE 1
|
||||||
|
# define CV_CPU_CALL_SSE(fn, args) return (cpu_baseline::fn args)
|
||||||
|
# define CV_CPU_CALL_SSE_(fn, args) return (opt_SSE::fn args)
|
||||||
|
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE
|
||||||
|
# define CV_TRY_SSE 1
|
||||||
|
# define CV_CPU_FORCE_SSE 0
|
||||||
|
# define CV_CPU_HAS_SUPPORT_SSE (cv::checkHardwareSupport(CV_CPU_SSE))
|
||||||
|
# define CV_CPU_CALL_SSE(fn, args) if (CV_CPU_HAS_SUPPORT_SSE) return (opt_SSE::fn args)
|
||||||
|
# define CV_CPU_CALL_SSE_(fn, args) if (CV_CPU_HAS_SUPPORT_SSE) return (opt_SSE::fn args)
|
||||||
|
#else
|
||||||
|
# define CV_TRY_SSE 0
|
||||||
|
# define CV_CPU_FORCE_SSE 0
|
||||||
|
# define CV_CPU_HAS_SUPPORT_SSE 0
|
||||||
|
# define CV_CPU_CALL_SSE(fn, args)
|
||||||
|
# define CV_CPU_CALL_SSE_(fn, args)
|
||||||
|
#endif
|
||||||
|
#define __CV_CPU_DISPATCH_CHAIN_SSE(fn, args, mode, ...) CV_CPU_CALL_SSE(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||||
|
|
||||||
|
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE2
|
||||||
|
# define CV_TRY_SSE2 1
|
||||||
|
# define CV_CPU_FORCE_SSE2 1
|
||||||
|
# define CV_CPU_HAS_SUPPORT_SSE2 1
|
||||||
|
# define CV_CPU_CALL_SSE2(fn, args) return (cpu_baseline::fn args)
|
||||||
|
# define CV_CPU_CALL_SSE2_(fn, args) return (opt_SSE2::fn args)
|
||||||
|
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE2
|
||||||
|
# define CV_TRY_SSE2 1
|
||||||
|
# define CV_CPU_FORCE_SSE2 0
|
||||||
|
# define CV_CPU_HAS_SUPPORT_SSE2 (cv::checkHardwareSupport(CV_CPU_SSE2))
|
||||||
|
# define CV_CPU_CALL_SSE2(fn, args) if (CV_CPU_HAS_SUPPORT_SSE2) return (opt_SSE2::fn args)
|
||||||
|
# define CV_CPU_CALL_SSE2_(fn, args) if (CV_CPU_HAS_SUPPORT_SSE2) return (opt_SSE2::fn args)
|
||||||
|
#else
|
||||||
|
# define CV_TRY_SSE2 0
|
||||||
|
# define CV_CPU_FORCE_SSE2 0
|
||||||
|
# define CV_CPU_HAS_SUPPORT_SSE2 0
|
||||||
|
# define CV_CPU_CALL_SSE2(fn, args)
|
||||||
|
# define CV_CPU_CALL_SSE2_(fn, args)
|
||||||
|
#endif
|
||||||
|
#define __CV_CPU_DISPATCH_CHAIN_SSE2(fn, args, mode, ...) CV_CPU_CALL_SSE2(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||||
|
|
||||||
|
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE3
|
||||||
|
# define CV_TRY_SSE3 1
|
||||||
|
# define CV_CPU_FORCE_SSE3 1
|
||||||
|
# define CV_CPU_HAS_SUPPORT_SSE3 1
|
||||||
|
# define CV_CPU_CALL_SSE3(fn, args) return (cpu_baseline::fn args)
|
||||||
|
# define CV_CPU_CALL_SSE3_(fn, args) return (opt_SSE3::fn args)
|
||||||
|
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE3
|
||||||
|
# define CV_TRY_SSE3 1
|
||||||
|
# define CV_CPU_FORCE_SSE3 0
|
||||||
|
# define CV_CPU_HAS_SUPPORT_SSE3 (cv::checkHardwareSupport(CV_CPU_SSE3))
|
||||||
|
# define CV_CPU_CALL_SSE3(fn, args) if (CV_CPU_HAS_SUPPORT_SSE3) return (opt_SSE3::fn args)
|
||||||
|
# define CV_CPU_CALL_SSE3_(fn, args) if (CV_CPU_HAS_SUPPORT_SSE3) return (opt_SSE3::fn args)
|
||||||
|
#else
|
||||||
|
# define CV_TRY_SSE3 0
|
||||||
|
# define CV_CPU_FORCE_SSE3 0
|
||||||
|
# define CV_CPU_HAS_SUPPORT_SSE3 0
|
||||||
|
# define CV_CPU_CALL_SSE3(fn, args)
|
||||||
|
# define CV_CPU_CALL_SSE3_(fn, args)
|
||||||
|
#endif
|
||||||
|
#define __CV_CPU_DISPATCH_CHAIN_SSE3(fn, args, mode, ...) CV_CPU_CALL_SSE3(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||||
|
|
||||||
|
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSSE3
|
||||||
|
# define CV_TRY_SSSE3 1
|
||||||
|
# define CV_CPU_FORCE_SSSE3 1
|
||||||
|
# define CV_CPU_HAS_SUPPORT_SSSE3 1
|
||||||
|
# define CV_CPU_CALL_SSSE3(fn, args) return (cpu_baseline::fn args)
|
||||||
|
# define CV_CPU_CALL_SSSE3_(fn, args) return (opt_SSSE3::fn args)
|
||||||
|
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSSE3
|
||||||
|
# define CV_TRY_SSSE3 1
|
||||||
|
# define CV_CPU_FORCE_SSSE3 0
|
||||||
|
# define CV_CPU_HAS_SUPPORT_SSSE3 (cv::checkHardwareSupport(CV_CPU_SSSE3))
|
||||||
|
# define CV_CPU_CALL_SSSE3(fn, args) if (CV_CPU_HAS_SUPPORT_SSSE3) return (opt_SSSE3::fn args)
|
||||||
|
# define CV_CPU_CALL_SSSE3_(fn, args) if (CV_CPU_HAS_SUPPORT_SSSE3) return (opt_SSSE3::fn args)
|
||||||
|
#else
|
||||||
|
# define CV_TRY_SSSE3 0
|
||||||
|
# define CV_CPU_FORCE_SSSE3 0
|
||||||
|
# define CV_CPU_HAS_SUPPORT_SSSE3 0
|
||||||
|
# define CV_CPU_CALL_SSSE3(fn, args)
|
||||||
|
# define CV_CPU_CALL_SSSE3_(fn, args)
|
||||||
|
#endif
|
||||||
|
#define __CV_CPU_DISPATCH_CHAIN_SSSE3(fn, args, mode, ...) CV_CPU_CALL_SSSE3(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||||
|
|
||||||
|
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE4_1
|
||||||
|
# define CV_TRY_SSE4_1 1
|
||||||
|
# define CV_CPU_FORCE_SSE4_1 1
|
||||||
|
# define CV_CPU_HAS_SUPPORT_SSE4_1 1
|
||||||
|
# define CV_CPU_CALL_SSE4_1(fn, args) return (cpu_baseline::fn args)
|
||||||
|
# define CV_CPU_CALL_SSE4_1_(fn, args) return (opt_SSE4_1::fn args)
|
||||||
|
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE4_1
|
||||||
|
# define CV_TRY_SSE4_1 1
|
||||||
|
# define CV_CPU_FORCE_SSE4_1 0
|
||||||
|
# define CV_CPU_HAS_SUPPORT_SSE4_1 (cv::checkHardwareSupport(CV_CPU_SSE4_1))
|
||||||
|
# define CV_CPU_CALL_SSE4_1(fn, args) if (CV_CPU_HAS_SUPPORT_SSE4_1) return (opt_SSE4_1::fn args)
|
||||||
|
# define CV_CPU_CALL_SSE4_1_(fn, args) if (CV_CPU_HAS_SUPPORT_SSE4_1) return (opt_SSE4_1::fn args)
|
||||||
|
#else
|
||||||
|
# define CV_TRY_SSE4_1 0
|
||||||
|
# define CV_CPU_FORCE_SSE4_1 0
|
||||||
|
# define CV_CPU_HAS_SUPPORT_SSE4_1 0
|
||||||
|
# define CV_CPU_CALL_SSE4_1(fn, args)
|
||||||
|
# define CV_CPU_CALL_SSE4_1_(fn, args)
|
||||||
|
#endif
|
||||||
|
#define __CV_CPU_DISPATCH_CHAIN_SSE4_1(fn, args, mode, ...) CV_CPU_CALL_SSE4_1(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||||
|
|
||||||
|
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE4_2
|
||||||
|
# define CV_TRY_SSE4_2 1
|
||||||
|
# define CV_CPU_FORCE_SSE4_2 1
|
||||||
|
# define CV_CPU_HAS_SUPPORT_SSE4_2 1
|
||||||
|
# define CV_CPU_CALL_SSE4_2(fn, args) return (cpu_baseline::fn args)
|
||||||
|
# define CV_CPU_CALL_SSE4_2_(fn, args) return (opt_SSE4_2::fn args)
|
||||||
|
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE4_2
|
||||||
|
# define CV_TRY_SSE4_2 1
|
||||||
|
# define CV_CPU_FORCE_SSE4_2 0
|
||||||
|
# define CV_CPU_HAS_SUPPORT_SSE4_2 (cv::checkHardwareSupport(CV_CPU_SSE4_2))
|
||||||
|
# define CV_CPU_CALL_SSE4_2(fn, args) if (CV_CPU_HAS_SUPPORT_SSE4_2) return (opt_SSE4_2::fn args)
|
||||||
|
# define CV_CPU_CALL_SSE4_2_(fn, args) if (CV_CPU_HAS_SUPPORT_SSE4_2) return (opt_SSE4_2::fn args)
|
||||||
|
#else
|
||||||
|
# define CV_TRY_SSE4_2 0
|
||||||
|
# define CV_CPU_FORCE_SSE4_2 0
|
||||||
|
# define CV_CPU_HAS_SUPPORT_SSE4_2 0
|
||||||
|
# define CV_CPU_CALL_SSE4_2(fn, args)
|
||||||
|
# define CV_CPU_CALL_SSE4_2_(fn, args)
|
||||||
|
#endif
|
||||||
|
#define __CV_CPU_DISPATCH_CHAIN_SSE4_2(fn, args, mode, ...) CV_CPU_CALL_SSE4_2(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||||
|
|
||||||
|
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_POPCNT
|
||||||
|
# define CV_TRY_POPCNT 1
|
||||||
|
# define CV_CPU_FORCE_POPCNT 1
|
||||||
|
# define CV_CPU_HAS_SUPPORT_POPCNT 1
|
||||||
|
# define CV_CPU_CALL_POPCNT(fn, args) return (cpu_baseline::fn args)
|
||||||
|
# define CV_CPU_CALL_POPCNT_(fn, args) return (opt_POPCNT::fn args)
|
||||||
|
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_POPCNT
|
||||||
|
# define CV_TRY_POPCNT 1
|
||||||
|
# define CV_CPU_FORCE_POPCNT 0
|
||||||
|
# define CV_CPU_HAS_SUPPORT_POPCNT (cv::checkHardwareSupport(CV_CPU_POPCNT))
|
||||||
|
# define CV_CPU_CALL_POPCNT(fn, args) if (CV_CPU_HAS_SUPPORT_POPCNT) return (opt_POPCNT::fn args)
|
||||||
|
# define CV_CPU_CALL_POPCNT_(fn, args) if (CV_CPU_HAS_SUPPORT_POPCNT) return (opt_POPCNT::fn args)
|
||||||
|
#else
|
||||||
|
# define CV_TRY_POPCNT 0
|
||||||
|
# define CV_CPU_FORCE_POPCNT 0
|
||||||
|
# define CV_CPU_HAS_SUPPORT_POPCNT 0
|
||||||
|
# define CV_CPU_CALL_POPCNT(fn, args)
|
||||||
|
# define CV_CPU_CALL_POPCNT_(fn, args)
|
||||||
|
#endif
|
||||||
|
#define __CV_CPU_DISPATCH_CHAIN_POPCNT(fn, args, mode, ...) CV_CPU_CALL_POPCNT(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||||
|
|
||||||
|
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX
|
||||||
|
# define CV_TRY_AVX 1
|
||||||
|
# define CV_CPU_FORCE_AVX 1
|
||||||
|
# define CV_CPU_HAS_SUPPORT_AVX 1
|
||||||
|
# define CV_CPU_CALL_AVX(fn, args) return (cpu_baseline::fn args)
|
||||||
|
# define CV_CPU_CALL_AVX_(fn, args) return (opt_AVX::fn args)
|
||||||
|
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX
|
||||||
|
# define CV_TRY_AVX 1
|
||||||
|
# define CV_CPU_FORCE_AVX 0
|
||||||
|
# define CV_CPU_HAS_SUPPORT_AVX (cv::checkHardwareSupport(CV_CPU_AVX))
|
||||||
|
# define CV_CPU_CALL_AVX(fn, args) if (CV_CPU_HAS_SUPPORT_AVX) return (opt_AVX::fn args)
|
||||||
|
# define CV_CPU_CALL_AVX_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX) return (opt_AVX::fn args)
|
||||||
|
#else
|
||||||
|
# define CV_TRY_AVX 0
|
||||||
|
# define CV_CPU_FORCE_AVX 0
|
||||||
|
# define CV_CPU_HAS_SUPPORT_AVX 0
|
||||||
|
# define CV_CPU_CALL_AVX(fn, args)
|
||||||
|
# define CV_CPU_CALL_AVX_(fn, args)
|
||||||
|
#endif
|
||||||
|
#define __CV_CPU_DISPATCH_CHAIN_AVX(fn, args, mode, ...) CV_CPU_CALL_AVX(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||||
|
|
||||||
|
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_FP16
|
||||||
|
# define CV_TRY_FP16 1
|
||||||
|
# define CV_CPU_FORCE_FP16 1
|
||||||
|
# define CV_CPU_HAS_SUPPORT_FP16 1
|
||||||
|
# define CV_CPU_CALL_FP16(fn, args) return (cpu_baseline::fn args)
|
||||||
|
# define CV_CPU_CALL_FP16_(fn, args) return (opt_FP16::fn args)
|
||||||
|
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_FP16
|
||||||
|
# define CV_TRY_FP16 1
|
||||||
|
# define CV_CPU_FORCE_FP16 0
|
||||||
|
# define CV_CPU_HAS_SUPPORT_FP16 (cv::checkHardwareSupport(CV_CPU_FP16))
|
||||||
|
# define CV_CPU_CALL_FP16(fn, args) if (CV_CPU_HAS_SUPPORT_FP16) return (opt_FP16::fn args)
|
||||||
|
# define CV_CPU_CALL_FP16_(fn, args) if (CV_CPU_HAS_SUPPORT_FP16) return (opt_FP16::fn args)
|
||||||
|
#else
|
||||||
|
# define CV_TRY_FP16 0
|
||||||
|
# define CV_CPU_FORCE_FP16 0
|
||||||
|
# define CV_CPU_HAS_SUPPORT_FP16 0
|
||||||
|
# define CV_CPU_CALL_FP16(fn, args)
|
||||||
|
# define CV_CPU_CALL_FP16_(fn, args)
|
||||||
|
#endif
|
||||||
|
#define __CV_CPU_DISPATCH_CHAIN_FP16(fn, args, mode, ...) CV_CPU_CALL_FP16(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||||
|
|
||||||
|
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX2
|
||||||
|
# define CV_TRY_AVX2 1
|
||||||
|
# define CV_CPU_FORCE_AVX2 1
|
||||||
|
# define CV_CPU_HAS_SUPPORT_AVX2 1
|
||||||
|
# define CV_CPU_CALL_AVX2(fn, args) return (cpu_baseline::fn args)
|
||||||
|
# define CV_CPU_CALL_AVX2_(fn, args) return (opt_AVX2::fn args)
|
||||||
|
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX2
|
||||||
|
# define CV_TRY_AVX2 1
|
||||||
|
# define CV_CPU_FORCE_AVX2 0
|
||||||
|
# define CV_CPU_HAS_SUPPORT_AVX2 (cv::checkHardwareSupport(CV_CPU_AVX2))
|
||||||
|
# define CV_CPU_CALL_AVX2(fn, args) if (CV_CPU_HAS_SUPPORT_AVX2) return (opt_AVX2::fn args)
|
||||||
|
# define CV_CPU_CALL_AVX2_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX2) return (opt_AVX2::fn args)
|
||||||
|
#else
|
||||||
|
# define CV_TRY_AVX2 0
|
||||||
|
# define CV_CPU_FORCE_AVX2 0
|
||||||
|
# define CV_CPU_HAS_SUPPORT_AVX2 0
|
||||||
|
# define CV_CPU_CALL_AVX2(fn, args)
|
||||||
|
# define CV_CPU_CALL_AVX2_(fn, args)
|
||||||
|
#endif
|
||||||
|
#define __CV_CPU_DISPATCH_CHAIN_AVX2(fn, args, mode, ...) CV_CPU_CALL_AVX2(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||||
|
|
||||||
|
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_FMA3
|
||||||
|
# define CV_TRY_FMA3 1
|
||||||
|
# define CV_CPU_FORCE_FMA3 1
|
||||||
|
# define CV_CPU_HAS_SUPPORT_FMA3 1
|
||||||
|
# define CV_CPU_CALL_FMA3(fn, args) return (cpu_baseline::fn args)
|
||||||
|
# define CV_CPU_CALL_FMA3_(fn, args) return (opt_FMA3::fn args)
|
||||||
|
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_FMA3
|
||||||
|
# define CV_TRY_FMA3 1
|
||||||
|
# define CV_CPU_FORCE_FMA3 0
|
||||||
|
# define CV_CPU_HAS_SUPPORT_FMA3 (cv::checkHardwareSupport(CV_CPU_FMA3))
|
||||||
|
# define CV_CPU_CALL_FMA3(fn, args) if (CV_CPU_HAS_SUPPORT_FMA3) return (opt_FMA3::fn args)
|
||||||
|
# define CV_CPU_CALL_FMA3_(fn, args) if (CV_CPU_HAS_SUPPORT_FMA3) return (opt_FMA3::fn args)
|
||||||
|
#else
|
||||||
|
# define CV_TRY_FMA3 0
|
||||||
|
# define CV_CPU_FORCE_FMA3 0
|
||||||
|
# define CV_CPU_HAS_SUPPORT_FMA3 0
|
||||||
|
# define CV_CPU_CALL_FMA3(fn, args)
|
||||||
|
# define CV_CPU_CALL_FMA3_(fn, args)
|
||||||
|
#endif
|
||||||
|
#define __CV_CPU_DISPATCH_CHAIN_FMA3(fn, args, mode, ...) CV_CPU_CALL_FMA3(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||||
|
|
||||||
|
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX_512F
|
||||||
|
# define CV_TRY_AVX_512F 1
|
||||||
|
# define CV_CPU_FORCE_AVX_512F 1
|
||||||
|
# define CV_CPU_HAS_SUPPORT_AVX_512F 1
|
||||||
|
# define CV_CPU_CALL_AVX_512F(fn, args) return (cpu_baseline::fn args)
|
||||||
|
# define CV_CPU_CALL_AVX_512F_(fn, args) return (opt_AVX_512F::fn args)
|
||||||
|
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX_512F
|
||||||
|
# define CV_TRY_AVX_512F 1
|
||||||
|
# define CV_CPU_FORCE_AVX_512F 0
|
||||||
|
# define CV_CPU_HAS_SUPPORT_AVX_512F (cv::checkHardwareSupport(CV_CPU_AVX_512F))
|
||||||
|
# define CV_CPU_CALL_AVX_512F(fn, args) if (CV_CPU_HAS_SUPPORT_AVX_512F) return (opt_AVX_512F::fn args)
|
||||||
|
# define CV_CPU_CALL_AVX_512F_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX_512F) return (opt_AVX_512F::fn args)
|
||||||
|
#else
|
||||||
|
# define CV_TRY_AVX_512F 0
|
||||||
|
# define CV_CPU_FORCE_AVX_512F 0
|
||||||
|
# define CV_CPU_HAS_SUPPORT_AVX_512F 0
|
||||||
|
# define CV_CPU_CALL_AVX_512F(fn, args)
|
||||||
|
# define CV_CPU_CALL_AVX_512F_(fn, args)
|
||||||
|
#endif
|
||||||
|
#define __CV_CPU_DISPATCH_CHAIN_AVX_512F(fn, args, mode, ...) CV_CPU_CALL_AVX_512F(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||||
|
|
||||||
|
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_COMMON
|
||||||
|
# define CV_TRY_AVX512_COMMON 1
|
||||||
|
# define CV_CPU_FORCE_AVX512_COMMON 1
|
||||||
|
# define CV_CPU_HAS_SUPPORT_AVX512_COMMON 1
|
||||||
|
# define CV_CPU_CALL_AVX512_COMMON(fn, args) return (cpu_baseline::fn args)
|
||||||
|
# define CV_CPU_CALL_AVX512_COMMON_(fn, args) return (opt_AVX512_COMMON::fn args)
|
||||||
|
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_COMMON
|
||||||
|
# define CV_TRY_AVX512_COMMON 1
|
||||||
|
# define CV_CPU_FORCE_AVX512_COMMON 0
|
||||||
|
# define CV_CPU_HAS_SUPPORT_AVX512_COMMON (cv::checkHardwareSupport(CV_CPU_AVX512_COMMON))
|
||||||
|
# define CV_CPU_CALL_AVX512_COMMON(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_COMMON) return (opt_AVX512_COMMON::fn args)
|
||||||
|
# define CV_CPU_CALL_AVX512_COMMON_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_COMMON) return (opt_AVX512_COMMON::fn args)
|
||||||
|
#else
|
||||||
|
# define CV_TRY_AVX512_COMMON 0
|
||||||
|
# define CV_CPU_FORCE_AVX512_COMMON 0
|
||||||
|
# define CV_CPU_HAS_SUPPORT_AVX512_COMMON 0
|
||||||
|
# define CV_CPU_CALL_AVX512_COMMON(fn, args)
|
||||||
|
# define CV_CPU_CALL_AVX512_COMMON_(fn, args)
|
||||||
|
#endif
|
||||||
|
#define __CV_CPU_DISPATCH_CHAIN_AVX512_COMMON(fn, args, mode, ...) CV_CPU_CALL_AVX512_COMMON(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||||
|
|
||||||
|
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_KNL
|
||||||
|
# define CV_TRY_AVX512_KNL 1
|
||||||
|
# define CV_CPU_FORCE_AVX512_KNL 1
|
||||||
|
# define CV_CPU_HAS_SUPPORT_AVX512_KNL 1
|
||||||
|
# define CV_CPU_CALL_AVX512_KNL(fn, args) return (cpu_baseline::fn args)
|
||||||
|
# define CV_CPU_CALL_AVX512_KNL_(fn, args) return (opt_AVX512_KNL::fn args)
|
||||||
|
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_KNL
|
||||||
|
# define CV_TRY_AVX512_KNL 1
|
||||||
|
# define CV_CPU_FORCE_AVX512_KNL 0
|
||||||
|
# define CV_CPU_HAS_SUPPORT_AVX512_KNL (cv::checkHardwareSupport(CV_CPU_AVX512_KNL))
|
||||||
|
# define CV_CPU_CALL_AVX512_KNL(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_KNL) return (opt_AVX512_KNL::fn args)
|
||||||
|
# define CV_CPU_CALL_AVX512_KNL_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_KNL) return (opt_AVX512_KNL::fn args)
|
||||||
|
#else
|
||||||
|
# define CV_TRY_AVX512_KNL 0
|
||||||
|
# define CV_CPU_FORCE_AVX512_KNL 0
|
||||||
|
# define CV_CPU_HAS_SUPPORT_AVX512_KNL 0
|
||||||
|
# define CV_CPU_CALL_AVX512_KNL(fn, args)
|
||||||
|
# define CV_CPU_CALL_AVX512_KNL_(fn, args)
|
||||||
|
#endif
|
||||||
|
#define __CV_CPU_DISPATCH_CHAIN_AVX512_KNL(fn, args, mode, ...) CV_CPU_CALL_AVX512_KNL(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||||
|
|
||||||
|
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_KNM
|
||||||
|
# define CV_TRY_AVX512_KNM 1
|
||||||
|
# define CV_CPU_FORCE_AVX512_KNM 1
|
||||||
|
# define CV_CPU_HAS_SUPPORT_AVX512_KNM 1
|
||||||
|
# define CV_CPU_CALL_AVX512_KNM(fn, args) return (cpu_baseline::fn args)
|
||||||
|
# define CV_CPU_CALL_AVX512_KNM_(fn, args) return (opt_AVX512_KNM::fn args)
|
||||||
|
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_KNM
|
||||||
|
# define CV_TRY_AVX512_KNM 1
|
||||||
|
# define CV_CPU_FORCE_AVX512_KNM 0
|
||||||
|
# define CV_CPU_HAS_SUPPORT_AVX512_KNM (cv::checkHardwareSupport(CV_CPU_AVX512_KNM))
|
||||||
|
# define CV_CPU_CALL_AVX512_KNM(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_KNM) return (opt_AVX512_KNM::fn args)
|
||||||
|
# define CV_CPU_CALL_AVX512_KNM_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_KNM) return (opt_AVX512_KNM::fn args)
|
||||||
|
#else
|
||||||
|
# define CV_TRY_AVX512_KNM 0
|
||||||
|
# define CV_CPU_FORCE_AVX512_KNM 0
|
||||||
|
# define CV_CPU_HAS_SUPPORT_AVX512_KNM 0
|
||||||
|
# define CV_CPU_CALL_AVX512_KNM(fn, args)
|
||||||
|
# define CV_CPU_CALL_AVX512_KNM_(fn, args)
|
||||||
|
#endif
|
||||||
|
#define __CV_CPU_DISPATCH_CHAIN_AVX512_KNM(fn, args, mode, ...) CV_CPU_CALL_AVX512_KNM(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||||
|
|
||||||
|
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_SKX
|
||||||
|
# define CV_TRY_AVX512_SKX 1
|
||||||
|
# define CV_CPU_FORCE_AVX512_SKX 1
|
||||||
|
# define CV_CPU_HAS_SUPPORT_AVX512_SKX 1
|
||||||
|
# define CV_CPU_CALL_AVX512_SKX(fn, args) return (cpu_baseline::fn args)
|
||||||
|
# define CV_CPU_CALL_AVX512_SKX_(fn, args) return (opt_AVX512_SKX::fn args)
|
||||||
|
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_SKX
|
||||||
|
# define CV_TRY_AVX512_SKX 1
|
||||||
|
# define CV_CPU_FORCE_AVX512_SKX 0
|
||||||
|
# define CV_CPU_HAS_SUPPORT_AVX512_SKX (cv::checkHardwareSupport(CV_CPU_AVX512_SKX))
|
||||||
|
# define CV_CPU_CALL_AVX512_SKX(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_SKX) return (opt_AVX512_SKX::fn args)
|
||||||
|
# define CV_CPU_CALL_AVX512_SKX_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_SKX) return (opt_AVX512_SKX::fn args)
|
||||||
|
#else
|
||||||
|
# define CV_TRY_AVX512_SKX 0
|
||||||
|
# define CV_CPU_FORCE_AVX512_SKX 0
|
||||||
|
# define CV_CPU_HAS_SUPPORT_AVX512_SKX 0
|
||||||
|
# define CV_CPU_CALL_AVX512_SKX(fn, args)
|
||||||
|
# define CV_CPU_CALL_AVX512_SKX_(fn, args)
|
||||||
|
#endif
|
||||||
|
#define __CV_CPU_DISPATCH_CHAIN_AVX512_SKX(fn, args, mode, ...) CV_CPU_CALL_AVX512_SKX(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||||
|
|
||||||
|
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_CNL
|
||||||
|
# define CV_TRY_AVX512_CNL 1
|
||||||
|
# define CV_CPU_FORCE_AVX512_CNL 1
|
||||||
|
# define CV_CPU_HAS_SUPPORT_AVX512_CNL 1
|
||||||
|
# define CV_CPU_CALL_AVX512_CNL(fn, args) return (cpu_baseline::fn args)
|
||||||
|
# define CV_CPU_CALL_AVX512_CNL_(fn, args) return (opt_AVX512_CNL::fn args)
|
||||||
|
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_CNL
|
||||||
|
# define CV_TRY_AVX512_CNL 1
|
||||||
|
# define CV_CPU_FORCE_AVX512_CNL 0
|
||||||
|
# define CV_CPU_HAS_SUPPORT_AVX512_CNL (cv::checkHardwareSupport(CV_CPU_AVX512_CNL))
|
||||||
|
# define CV_CPU_CALL_AVX512_CNL(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_CNL) return (opt_AVX512_CNL::fn args)
|
||||||
|
# define CV_CPU_CALL_AVX512_CNL_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_CNL) return (opt_AVX512_CNL::fn args)
|
||||||
|
#else
|
||||||
|
# define CV_TRY_AVX512_CNL 0
|
||||||
|
# define CV_CPU_FORCE_AVX512_CNL 0
|
||||||
|
# define CV_CPU_HAS_SUPPORT_AVX512_CNL 0
|
||||||
|
# define CV_CPU_CALL_AVX512_CNL(fn, args)
|
||||||
|
# define CV_CPU_CALL_AVX512_CNL_(fn, args)
|
||||||
|
#endif
|
||||||
|
#define __CV_CPU_DISPATCH_CHAIN_AVX512_CNL(fn, args, mode, ...) CV_CPU_CALL_AVX512_CNL(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||||
|
|
||||||
|
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_CLX
|
||||||
|
# define CV_TRY_AVX512_CLX 1
|
||||||
|
# define CV_CPU_FORCE_AVX512_CLX 1
|
||||||
|
# define CV_CPU_HAS_SUPPORT_AVX512_CLX 1
|
||||||
|
# define CV_CPU_CALL_AVX512_CLX(fn, args) return (cpu_baseline::fn args)
|
||||||
|
# define CV_CPU_CALL_AVX512_CLX_(fn, args) return (opt_AVX512_CLX::fn args)
|
||||||
|
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_CLX
|
||||||
|
# define CV_TRY_AVX512_CLX 1
|
||||||
|
# define CV_CPU_FORCE_AVX512_CLX 0
|
||||||
|
# define CV_CPU_HAS_SUPPORT_AVX512_CLX (cv::checkHardwareSupport(CV_CPU_AVX512_CLX))
|
||||||
|
# define CV_CPU_CALL_AVX512_CLX(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_CLX) return (opt_AVX512_CLX::fn args)
|
||||||
|
# define CV_CPU_CALL_AVX512_CLX_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_CLX) return (opt_AVX512_CLX::fn args)
|
||||||
|
#else
|
||||||
|
# define CV_TRY_AVX512_CLX 0
|
||||||
|
# define CV_CPU_FORCE_AVX512_CLX 0
|
||||||
|
# define CV_CPU_HAS_SUPPORT_AVX512_CLX 0
|
||||||
|
# define CV_CPU_CALL_AVX512_CLX(fn, args)
|
||||||
|
# define CV_CPU_CALL_AVX512_CLX_(fn, args)
|
||||||
|
#endif
|
||||||
|
#define __CV_CPU_DISPATCH_CHAIN_AVX512_CLX(fn, args, mode, ...) CV_CPU_CALL_AVX512_CLX(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||||
|
|
||||||
|
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_ICL
|
||||||
|
# define CV_TRY_AVX512_ICL 1
|
||||||
|
# define CV_CPU_FORCE_AVX512_ICL 1
|
||||||
|
# define CV_CPU_HAS_SUPPORT_AVX512_ICL 1
|
||||||
|
# define CV_CPU_CALL_AVX512_ICL(fn, args) return (cpu_baseline::fn args)
|
||||||
|
# define CV_CPU_CALL_AVX512_ICL_(fn, args) return (opt_AVX512_ICL::fn args)
|
||||||
|
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_ICL
|
||||||
|
# define CV_TRY_AVX512_ICL 1
|
||||||
|
# define CV_CPU_FORCE_AVX512_ICL 0
|
||||||
|
# define CV_CPU_HAS_SUPPORT_AVX512_ICL (cv::checkHardwareSupport(CV_CPU_AVX512_ICL))
|
||||||
|
# define CV_CPU_CALL_AVX512_ICL(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_ICL) return (opt_AVX512_ICL::fn args)
|
||||||
|
# define CV_CPU_CALL_AVX512_ICL_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_ICL) return (opt_AVX512_ICL::fn args)
|
||||||
|
#else
|
||||||
|
# define CV_TRY_AVX512_ICL 0
|
||||||
|
# define CV_CPU_FORCE_AVX512_ICL 0
|
||||||
|
# define CV_CPU_HAS_SUPPORT_AVX512_ICL 0
|
||||||
|
# define CV_CPU_CALL_AVX512_ICL(fn, args)
|
||||||
|
# define CV_CPU_CALL_AVX512_ICL_(fn, args)
|
||||||
|
#endif
|
||||||
|
#define __CV_CPU_DISPATCH_CHAIN_AVX512_ICL(fn, args, mode, ...) CV_CPU_CALL_AVX512_ICL(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||||
|
|
||||||
|
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SVE
|
||||||
|
# define CV_TRY_SVE 1
|
||||||
|
# define CV_CPU_FORCE_SVE 1
|
||||||
|
# define CV_CPU_HAS_SUPPORT_SVE 1
|
||||||
|
# define CV_CPU_CALL_SVE(fn, args) return (cpu_baseline::fn args)
|
||||||
|
# define CV_CPU_CALL_SVE_(fn, args) return (opt_SVE::fn args)
|
||||||
|
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SVE
|
||||||
|
# define CV_TRY_SVE 1
|
||||||
|
# define CV_CPU_FORCE_SVE 0
|
||||||
|
# define CV_CPU_HAS_SUPPORT_SVE (cv::checkHardwareSupport(CV_CPU_SVE))
|
||||||
|
# define CV_CPU_CALL_SVE(fn, args) if (CV_CPU_HAS_SUPPORT_SVE) return (opt_SVE::fn args)
|
||||||
|
# define CV_CPU_CALL_SVE_(fn, args) if (CV_CPU_HAS_SUPPORT_SVE) return (opt_SVE::fn args)
|
||||||
|
#else
|
||||||
|
# define CV_TRY_SVE 0
|
||||||
|
# define CV_CPU_FORCE_SVE 0
|
||||||
|
# define CV_CPU_HAS_SUPPORT_SVE 0
|
||||||
|
# define CV_CPU_CALL_SVE(fn, args)
|
||||||
|
# define CV_CPU_CALL_SVE_(fn, args)
|
||||||
|
#endif
|
||||||
|
#define __CV_CPU_DISPATCH_CHAIN_SVE(fn, args, mode, ...) CV_CPU_CALL_SVE(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||||
|
|
||||||
|
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_NEON
|
||||||
|
# define CV_TRY_NEON 1
|
||||||
|
# define CV_CPU_FORCE_NEON 1
|
||||||
|
# define CV_CPU_HAS_SUPPORT_NEON 1
|
||||||
|
# define CV_CPU_CALL_NEON(fn, args) return (cpu_baseline::fn args)
|
||||||
|
# define CV_CPU_CALL_NEON_(fn, args) return (opt_NEON::fn args)
|
||||||
|
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_NEON
|
||||||
|
# define CV_TRY_NEON 1
|
||||||
|
# define CV_CPU_FORCE_NEON 0
|
||||||
|
# define CV_CPU_HAS_SUPPORT_NEON (cv::checkHardwareSupport(CV_CPU_NEON))
|
||||||
|
# define CV_CPU_CALL_NEON(fn, args) if (CV_CPU_HAS_SUPPORT_NEON) return (opt_NEON::fn args)
|
||||||
|
# define CV_CPU_CALL_NEON_(fn, args) if (CV_CPU_HAS_SUPPORT_NEON) return (opt_NEON::fn args)
|
||||||
|
#else
|
||||||
|
# define CV_TRY_NEON 0
|
||||||
|
# define CV_CPU_FORCE_NEON 0
|
||||||
|
# define CV_CPU_HAS_SUPPORT_NEON 0
|
||||||
|
# define CV_CPU_CALL_NEON(fn, args)
|
||||||
|
# define CV_CPU_CALL_NEON_(fn, args)
|
||||||
|
#endif
|
||||||
|
#define __CV_CPU_DISPATCH_CHAIN_NEON(fn, args, mode, ...) CV_CPU_CALL_NEON(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||||
|
|
||||||
|
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_NEON_DOTPROD
|
||||||
|
# define CV_TRY_NEON_DOTPROD 1
|
||||||
|
# define CV_CPU_FORCE_NEON_DOTPROD 1
|
||||||
|
# define CV_CPU_HAS_SUPPORT_NEON_DOTPROD 1
|
||||||
|
# define CV_CPU_CALL_NEON_DOTPROD(fn, args) return (cpu_baseline::fn args)
|
||||||
|
# define CV_CPU_CALL_NEON_DOTPROD_(fn, args) return (opt_NEON_DOTPROD::fn args)
|
||||||
|
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_NEON_DOTPROD
|
||||||
|
# define CV_TRY_NEON_DOTPROD 1
|
||||||
|
# define CV_CPU_FORCE_NEON_DOTPROD 0
|
||||||
|
# define CV_CPU_HAS_SUPPORT_NEON_DOTPROD (cv::checkHardwareSupport(CV_CPU_NEON_DOTPROD))
|
||||||
|
# define CV_CPU_CALL_NEON_DOTPROD(fn, args) if (CV_CPU_HAS_SUPPORT_NEON_DOTPROD) return (opt_NEON_DOTPROD::fn args)
|
||||||
|
# define CV_CPU_CALL_NEON_DOTPROD_(fn, args) if (CV_CPU_HAS_SUPPORT_NEON_DOTPROD) return (opt_NEON_DOTPROD::fn args)
|
||||||
|
#else
|
||||||
|
# define CV_TRY_NEON_DOTPROD 0
|
||||||
|
# define CV_CPU_FORCE_NEON_DOTPROD 0
|
||||||
|
# define CV_CPU_HAS_SUPPORT_NEON_DOTPROD 0
|
||||||
|
# define CV_CPU_CALL_NEON_DOTPROD(fn, args)
|
||||||
|
# define CV_CPU_CALL_NEON_DOTPROD_(fn, args)
|
||||||
|
#endif
|
||||||
|
#define __CV_CPU_DISPATCH_CHAIN_NEON_DOTPROD(fn, args, mode, ...) CV_CPU_CALL_NEON_DOTPROD(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||||
|
|
||||||
|
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_NEON_FP16
|
||||||
|
# define CV_TRY_NEON_FP16 1
|
||||||
|
# define CV_CPU_FORCE_NEON_FP16 1
|
||||||
|
# define CV_CPU_HAS_SUPPORT_NEON_FP16 1
|
||||||
|
# define CV_CPU_CALL_NEON_FP16(fn, args) return (cpu_baseline::fn args)
|
||||||
|
# define CV_CPU_CALL_NEON_FP16_(fn, args) return (opt_NEON_FP16::fn args)
|
||||||
|
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_NEON_FP16
|
||||||
|
# define CV_TRY_NEON_FP16 1
|
||||||
|
# define CV_CPU_FORCE_NEON_FP16 0
|
||||||
|
# define CV_CPU_HAS_SUPPORT_NEON_FP16 (cv::checkHardwareSupport(CV_CPU_NEON_FP16))
|
||||||
|
# define CV_CPU_CALL_NEON_FP16(fn, args) if (CV_CPU_HAS_SUPPORT_NEON_FP16) return (opt_NEON_FP16::fn args)
|
||||||
|
# define CV_CPU_CALL_NEON_FP16_(fn, args) if (CV_CPU_HAS_SUPPORT_NEON_FP16) return (opt_NEON_FP16::fn args)
|
||||||
|
#else
|
||||||
|
# define CV_TRY_NEON_FP16 0
|
||||||
|
# define CV_CPU_FORCE_NEON_FP16 0
|
||||||
|
# define CV_CPU_HAS_SUPPORT_NEON_FP16 0
|
||||||
|
# define CV_CPU_CALL_NEON_FP16(fn, args)
|
||||||
|
# define CV_CPU_CALL_NEON_FP16_(fn, args)
|
||||||
|
#endif
|
||||||
|
#define __CV_CPU_DISPATCH_CHAIN_NEON_FP16(fn, args, mode, ...) CV_CPU_CALL_NEON_FP16(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||||
|
|
||||||
|
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_NEON_BF16
|
||||||
|
# define CV_TRY_NEON_BF16 1
|
||||||
|
# define CV_CPU_FORCE_NEON_BF16 1
|
||||||
|
# define CV_CPU_HAS_SUPPORT_NEON_BF16 1
|
||||||
|
# define CV_CPU_CALL_NEON_BF16(fn, args) return (cpu_baseline::fn args)
|
||||||
|
# define CV_CPU_CALL_NEON_BF16_(fn, args) return (opt_NEON_BF16::fn args)
|
||||||
|
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_NEON_BF16
|
||||||
|
# define CV_TRY_NEON_BF16 1
|
||||||
|
# define CV_CPU_FORCE_NEON_BF16 0
|
||||||
|
# define CV_CPU_HAS_SUPPORT_NEON_BF16 (cv::checkHardwareSupport(CV_CPU_NEON_BF16))
|
||||||
|
# define CV_CPU_CALL_NEON_BF16(fn, args) if (CV_CPU_HAS_SUPPORT_NEON_BF16) return (opt_NEON_BF16::fn args)
|
||||||
|
# define CV_CPU_CALL_NEON_BF16_(fn, args) if (CV_CPU_HAS_SUPPORT_NEON_BF16) return (opt_NEON_BF16::fn args)
|
||||||
|
#else
|
||||||
|
# define CV_TRY_NEON_BF16 0
|
||||||
|
# define CV_CPU_FORCE_NEON_BF16 0
|
||||||
|
# define CV_CPU_HAS_SUPPORT_NEON_BF16 0
|
||||||
|
# define CV_CPU_CALL_NEON_BF16(fn, args)
|
||||||
|
# define CV_CPU_CALL_NEON_BF16_(fn, args)
|
||||||
|
#endif
|
||||||
|
#define __CV_CPU_DISPATCH_CHAIN_NEON_BF16(fn, args, mode, ...) CV_CPU_CALL_NEON_BF16(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||||
|
|
||||||
|
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_MSA
|
||||||
|
# define CV_TRY_MSA 1
|
||||||
|
# define CV_CPU_FORCE_MSA 1
|
||||||
|
# define CV_CPU_HAS_SUPPORT_MSA 1
|
||||||
|
# define CV_CPU_CALL_MSA(fn, args) return (cpu_baseline::fn args)
|
||||||
|
# define CV_CPU_CALL_MSA_(fn, args) return (opt_MSA::fn args)
|
||||||
|
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_MSA
|
||||||
|
# define CV_TRY_MSA 1
|
||||||
|
# define CV_CPU_FORCE_MSA 0
|
||||||
|
# define CV_CPU_HAS_SUPPORT_MSA (cv::checkHardwareSupport(CV_CPU_MSA))
|
||||||
|
# define CV_CPU_CALL_MSA(fn, args) if (CV_CPU_HAS_SUPPORT_MSA) return (opt_MSA::fn args)
|
||||||
|
# define CV_CPU_CALL_MSA_(fn, args) if (CV_CPU_HAS_SUPPORT_MSA) return (opt_MSA::fn args)
|
||||||
|
#else
|
||||||
|
# define CV_TRY_MSA 0
|
||||||
|
# define CV_CPU_FORCE_MSA 0
|
||||||
|
# define CV_CPU_HAS_SUPPORT_MSA 0
|
||||||
|
# define CV_CPU_CALL_MSA(fn, args)
|
||||||
|
# define CV_CPU_CALL_MSA_(fn, args)
|
||||||
|
#endif
|
||||||
|
#define __CV_CPU_DISPATCH_CHAIN_MSA(fn, args, mode, ...) CV_CPU_CALL_MSA(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||||
|
|
||||||
|
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_VSX
|
||||||
|
# define CV_TRY_VSX 1
|
||||||
|
# define CV_CPU_FORCE_VSX 1
|
||||||
|
# define CV_CPU_HAS_SUPPORT_VSX 1
|
||||||
|
# define CV_CPU_CALL_VSX(fn, args) return (cpu_baseline::fn args)
|
||||||
|
# define CV_CPU_CALL_VSX_(fn, args) return (opt_VSX::fn args)
|
||||||
|
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_VSX
|
||||||
|
# define CV_TRY_VSX 1
|
||||||
|
# define CV_CPU_FORCE_VSX 0
|
||||||
|
# define CV_CPU_HAS_SUPPORT_VSX (cv::checkHardwareSupport(CV_CPU_VSX))
|
||||||
|
# define CV_CPU_CALL_VSX(fn, args) if (CV_CPU_HAS_SUPPORT_VSX) return (opt_VSX::fn args)
|
||||||
|
# define CV_CPU_CALL_VSX_(fn, args) if (CV_CPU_HAS_SUPPORT_VSX) return (opt_VSX::fn args)
|
||||||
|
#else
|
||||||
|
# define CV_TRY_VSX 0
|
||||||
|
# define CV_CPU_FORCE_VSX 0
|
||||||
|
# define CV_CPU_HAS_SUPPORT_VSX 0
|
||||||
|
# define CV_CPU_CALL_VSX(fn, args)
|
||||||
|
# define CV_CPU_CALL_VSX_(fn, args)
|
||||||
|
#endif
|
||||||
|
#define __CV_CPU_DISPATCH_CHAIN_VSX(fn, args, mode, ...) CV_CPU_CALL_VSX(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||||
|
|
||||||
|
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_VSX3
|
||||||
|
# define CV_TRY_VSX3 1
|
||||||
|
# define CV_CPU_FORCE_VSX3 1
|
||||||
|
# define CV_CPU_HAS_SUPPORT_VSX3 1
|
||||||
|
# define CV_CPU_CALL_VSX3(fn, args) return (cpu_baseline::fn args)
|
||||||
|
# define CV_CPU_CALL_VSX3_(fn, args) return (opt_VSX3::fn args)
|
||||||
|
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_VSX3
|
||||||
|
# define CV_TRY_VSX3 1
|
||||||
|
# define CV_CPU_FORCE_VSX3 0
|
||||||
|
# define CV_CPU_HAS_SUPPORT_VSX3 (cv::checkHardwareSupport(CV_CPU_VSX3))
|
||||||
|
# define CV_CPU_CALL_VSX3(fn, args) if (CV_CPU_HAS_SUPPORT_VSX3) return (opt_VSX3::fn args)
|
||||||
|
# define CV_CPU_CALL_VSX3_(fn, args) if (CV_CPU_HAS_SUPPORT_VSX3) return (opt_VSX3::fn args)
|
||||||
|
#else
|
||||||
|
# define CV_TRY_VSX3 0
|
||||||
|
# define CV_CPU_FORCE_VSX3 0
|
||||||
|
# define CV_CPU_HAS_SUPPORT_VSX3 0
|
||||||
|
# define CV_CPU_CALL_VSX3(fn, args)
|
||||||
|
# define CV_CPU_CALL_VSX3_(fn, args)
|
||||||
|
#endif
|
||||||
|
#define __CV_CPU_DISPATCH_CHAIN_VSX3(fn, args, mode, ...) CV_CPU_CALL_VSX3(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||||
|
|
||||||
|
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_RVV
|
||||||
|
# define CV_TRY_RVV 1
|
||||||
|
# define CV_CPU_FORCE_RVV 1
|
||||||
|
# define CV_CPU_HAS_SUPPORT_RVV 1
|
||||||
|
# define CV_CPU_CALL_RVV(fn, args) return (cpu_baseline::fn args)
|
||||||
|
# define CV_CPU_CALL_RVV_(fn, args) return (opt_RVV::fn args)
|
||||||
|
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_RVV
|
||||||
|
# define CV_TRY_RVV 1
|
||||||
|
# define CV_CPU_FORCE_RVV 0
|
||||||
|
# define CV_CPU_HAS_SUPPORT_RVV (cv::checkHardwareSupport(CV_CPU_RVV))
|
||||||
|
# define CV_CPU_CALL_RVV(fn, args) if (CV_CPU_HAS_SUPPORT_RVV) return (opt_RVV::fn args)
|
||||||
|
# define CV_CPU_CALL_RVV_(fn, args) if (CV_CPU_HAS_SUPPORT_RVV) return (opt_RVV::fn args)
|
||||||
|
#else
|
||||||
|
# define CV_TRY_RVV 0
|
||||||
|
# define CV_CPU_FORCE_RVV 0
|
||||||
|
# define CV_CPU_HAS_SUPPORT_RVV 0
|
||||||
|
# define CV_CPU_CALL_RVV(fn, args)
|
||||||
|
# define CV_CPU_CALL_RVV_(fn, args)
|
||||||
|
#endif
|
||||||
|
#define __CV_CPU_DISPATCH_CHAIN_RVV(fn, args, mode, ...) CV_CPU_CALL_RVV(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||||
|
|
||||||
|
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_LSX
|
||||||
|
# define CV_TRY_LSX 1
|
||||||
|
# define CV_CPU_FORCE_LSX 1
|
||||||
|
# define CV_CPU_HAS_SUPPORT_LSX 1
|
||||||
|
# define CV_CPU_CALL_LSX(fn, args) return (cpu_baseline::fn args)
|
||||||
|
# define CV_CPU_CALL_LSX_(fn, args) return (opt_LSX::fn args)
|
||||||
|
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_LSX
|
||||||
|
# define CV_TRY_LSX 1
|
||||||
|
# define CV_CPU_FORCE_LSX 0
|
||||||
|
# define CV_CPU_HAS_SUPPORT_LSX (cv::checkHardwareSupport(CV_CPU_LSX))
|
||||||
|
# define CV_CPU_CALL_LSX(fn, args) if (CV_CPU_HAS_SUPPORT_LSX) return (opt_LSX::fn args)
|
||||||
|
# define CV_CPU_CALL_LSX_(fn, args) if (CV_CPU_HAS_SUPPORT_LSX) return (opt_LSX::fn args)
|
||||||
|
#else
|
||||||
|
# define CV_TRY_LSX 0
|
||||||
|
# define CV_CPU_FORCE_LSX 0
|
||||||
|
# define CV_CPU_HAS_SUPPORT_LSX 0
|
||||||
|
# define CV_CPU_CALL_LSX(fn, args)
|
||||||
|
# define CV_CPU_CALL_LSX_(fn, args)
|
||||||
|
#endif
|
||||||
|
#define __CV_CPU_DISPATCH_CHAIN_LSX(fn, args, mode, ...) CV_CPU_CALL_LSX(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||||
|
|
||||||
|
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_LASX
|
||||||
|
# define CV_TRY_LASX 1
|
||||||
|
# define CV_CPU_FORCE_LASX 1
|
||||||
|
# define CV_CPU_HAS_SUPPORT_LASX 1
|
||||||
|
# define CV_CPU_CALL_LASX(fn, args) return (cpu_baseline::fn args)
|
||||||
|
# define CV_CPU_CALL_LASX_(fn, args) return (opt_LASX::fn args)
|
||||||
|
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_LASX
|
||||||
|
# define CV_TRY_LASX 1
|
||||||
|
# define CV_CPU_FORCE_LASX 0
|
||||||
|
# define CV_CPU_HAS_SUPPORT_LASX (cv::checkHardwareSupport(CV_CPU_LASX))
|
||||||
|
# define CV_CPU_CALL_LASX(fn, args) if (CV_CPU_HAS_SUPPORT_LASX) return (opt_LASX::fn args)
|
||||||
|
# define CV_CPU_CALL_LASX_(fn, args) if (CV_CPU_HAS_SUPPORT_LASX) return (opt_LASX::fn args)
|
||||||
|
#else
|
||||||
|
# define CV_TRY_LASX 0
|
||||||
|
# define CV_CPU_FORCE_LASX 0
|
||||||
|
# define CV_CPU_HAS_SUPPORT_LASX 0
|
||||||
|
# define CV_CPU_CALL_LASX(fn, args)
|
||||||
|
# define CV_CPU_CALL_LASX_(fn, args)
|
||||||
|
#endif
|
||||||
|
#define __CV_CPU_DISPATCH_CHAIN_LASX(fn, args, mode, ...) CV_CPU_CALL_LASX(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||||
|
|
||||||
|
#define CV_CPU_CALL_BASELINE(fn, args) return (cpu_baseline::fn args)
|
||||||
|
#define __CV_CPU_DISPATCH_CHAIN_BASELINE(fn, args, mode, ...) CV_CPU_CALL_BASELINE(fn, args) /* last in sequence */
|
||||||
950
3rdpart/OpenCV/include/opencv2/core/cvdef.h
Normal file
950
3rdpart/OpenCV/include/opencv2/core/cvdef.h
Normal file
@@ -0,0 +1,950 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||||
|
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||||
|
// Copyright (C) 2015, Itseez Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#ifndef OPENCV_CORE_CVDEF_H
|
||||||
|
#define OPENCV_CORE_CVDEF_H
|
||||||
|
|
||||||
|
#include "opencv2/core/version.hpp"
|
||||||
|
|
||||||
|
//! @addtogroup core_utils
|
||||||
|
//! @{
|
||||||
|
|
||||||
|
#ifdef OPENCV_INCLUDE_PORT_FILE // User-provided header file with custom platform configuration
|
||||||
|
#include OPENCV_INCLUDE_PORT_FILE
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if !defined CV_DOXYGEN && !defined CV_IGNORE_DEBUG_BUILD_GUARD
|
||||||
|
#if (defined(_MSC_VER) && (defined(DEBUG) || defined(_DEBUG))) || \
|
||||||
|
(defined(_GLIBCXX_DEBUG) || defined(_GLIBCXX_DEBUG_PEDANTIC))
|
||||||
|
// Guard to prevent using of binary incompatible binaries / runtimes
|
||||||
|
// https://github.com/opencv/opencv/pull/9161
|
||||||
|
#define CV__DEBUG_NS_BEGIN namespace debug_build_guard {
|
||||||
|
#define CV__DEBUG_NS_END }
|
||||||
|
namespace cv { namespace debug_build_guard { } using namespace debug_build_guard; }
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef CV__DEBUG_NS_BEGIN
|
||||||
|
#define CV__DEBUG_NS_BEGIN
|
||||||
|
#define CV__DEBUG_NS_END
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef __OPENCV_BUILD
|
||||||
|
#include "cvconfig.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef __CV_EXPAND
|
||||||
|
#define __CV_EXPAND(x) x
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef __CV_CAT
|
||||||
|
#define __CV_CAT__(x, y) x ## y
|
||||||
|
#define __CV_CAT_(x, y) __CV_CAT__(x, y)
|
||||||
|
#define __CV_CAT(x, y) __CV_CAT_(x, y)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define __CV_VA_NUM_ARGS_HELPER(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, N, ...) N
|
||||||
|
#define __CV_VA_NUM_ARGS(...) __CV_EXPAND(__CV_VA_NUM_ARGS_HELPER(__VA_ARGS__, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0))
|
||||||
|
|
||||||
|
#ifdef CV_Func
|
||||||
|
// keep current value (through OpenCV port file)
|
||||||
|
#elif defined __GNUC__ || (defined (__cpluscplus) && (__cpluscplus >= 201103))
|
||||||
|
#define CV_Func __func__
|
||||||
|
#elif defined __clang__ && (__clang_minor__ * 100 + __clang_major__ >= 305)
|
||||||
|
#define CV_Func __func__
|
||||||
|
#elif defined(__STDC_VERSION__) && (__STDC_VERSION >= 199901)
|
||||||
|
#define CV_Func __func__
|
||||||
|
#elif defined _MSC_VER
|
||||||
|
#define CV_Func __FUNCTION__
|
||||||
|
#elif defined(__INTEL_COMPILER) && (_INTEL_COMPILER >= 600)
|
||||||
|
#define CV_Func __FUNCTION__
|
||||||
|
#elif defined __IBMCPP__ && __IBMCPP__ >=500
|
||||||
|
#define CV_Func __FUNCTION__
|
||||||
|
#elif defined __BORLAND__ && (__BORLANDC__ >= 0x550)
|
||||||
|
#define CV_Func __FUNC__
|
||||||
|
#else
|
||||||
|
#define CV_Func "<unknown>"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
//! @cond IGNORED
|
||||||
|
|
||||||
|
//////////////// static assert /////////////////
|
||||||
|
#define CVAUX_CONCAT_EXP(a, b) a##b
|
||||||
|
#define CVAUX_CONCAT(a, b) CVAUX_CONCAT_EXP(a,b)
|
||||||
|
|
||||||
|
#if defined(__clang__)
|
||||||
|
# ifndef __has_extension
|
||||||
|
# define __has_extension __has_feature /* compatibility, for older versions of clang */
|
||||||
|
# endif
|
||||||
|
# if __has_extension(cxx_static_assert)
|
||||||
|
# define CV_StaticAssert(condition, reason) static_assert((condition), reason " " #condition)
|
||||||
|
# elif __has_extension(c_static_assert)
|
||||||
|
# define CV_StaticAssert(condition, reason) _Static_assert((condition), reason " " #condition)
|
||||||
|
# endif
|
||||||
|
#elif defined(__GNUC__)
|
||||||
|
# if (defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L)
|
||||||
|
# define CV_StaticAssert(condition, reason) static_assert((condition), reason " " #condition)
|
||||||
|
# endif
|
||||||
|
#elif defined(_MSC_VER)
|
||||||
|
# if _MSC_VER >= 1600 /* MSVC 10 */
|
||||||
|
# define CV_StaticAssert(condition, reason) static_assert((condition), reason " " #condition)
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
#ifndef CV_StaticAssert
|
||||||
|
# if !defined(__clang__) && defined(__GNUC__) && (__GNUC__*100 + __GNUC_MINOR__ > 302)
|
||||||
|
# define CV_StaticAssert(condition, reason) ({ extern int __attribute__((error("CV_StaticAssert: " reason " " #condition))) CV_StaticAssert(); ((condition) ? 0 : CV_StaticAssert()); })
|
||||||
|
# else
|
||||||
|
namespace cv {
|
||||||
|
template <bool x> struct CV_StaticAssert_failed;
|
||||||
|
template <> struct CV_StaticAssert_failed<true> { enum { val = 1 }; };
|
||||||
|
template<int x> struct CV_StaticAssert_test {};
|
||||||
|
}
|
||||||
|
# define CV_StaticAssert(condition, reason)\
|
||||||
|
typedef cv::CV_StaticAssert_test< sizeof(cv::CV_StaticAssert_failed< static_cast<bool>(condition) >) > CVAUX_CONCAT(CV_StaticAssert_failed_at_, __LINE__)
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Suppress warning "-Wdeprecated-declarations" / C4996
|
||||||
|
#if defined(_MSC_VER)
|
||||||
|
#define CV_DO_PRAGMA(x) __pragma(x)
|
||||||
|
#elif defined(__GNUC__)
|
||||||
|
#define CV_DO_PRAGMA(x) _Pragma (#x)
|
||||||
|
#else
|
||||||
|
#define CV_DO_PRAGMA(x)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
#define CV_SUPPRESS_DEPRECATED_START \
|
||||||
|
CV_DO_PRAGMA(warning(push)) \
|
||||||
|
CV_DO_PRAGMA(warning(disable: 4996))
|
||||||
|
#define CV_SUPPRESS_DEPRECATED_END CV_DO_PRAGMA(warning(pop))
|
||||||
|
#elif defined (__clang__) || ((__GNUC__) && (__GNUC__*100 + __GNUC_MINOR__ > 405))
|
||||||
|
#define CV_SUPPRESS_DEPRECATED_START \
|
||||||
|
CV_DO_PRAGMA(GCC diagnostic push) \
|
||||||
|
CV_DO_PRAGMA(GCC diagnostic ignored "-Wdeprecated-declarations")
|
||||||
|
#define CV_SUPPRESS_DEPRECATED_END CV_DO_PRAGMA(GCC diagnostic pop)
|
||||||
|
#else
|
||||||
|
#define CV_SUPPRESS_DEPRECATED_START
|
||||||
|
#define CV_SUPPRESS_DEPRECATED_END
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define CV_UNUSED(name) (void)name
|
||||||
|
|
||||||
|
//! @endcond
|
||||||
|
|
||||||
|
// undef problematic defines sometimes defined by system headers (windows.h in particular)
|
||||||
|
#undef small
|
||||||
|
#undef min
|
||||||
|
#undef max
|
||||||
|
#undef abs
|
||||||
|
#undef Complex
|
||||||
|
|
||||||
|
#if defined __cplusplus
|
||||||
|
#include <limits>
|
||||||
|
#else
|
||||||
|
#include <limits.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "opencv2/core/hal/interface.h"
|
||||||
|
|
||||||
|
#if defined __ICL
|
||||||
|
# define CV_ICC __ICL
|
||||||
|
#elif defined __ICC
|
||||||
|
# define CV_ICC __ICC
|
||||||
|
#elif defined __ECL
|
||||||
|
# define CV_ICC __ECL
|
||||||
|
#elif defined __ECC
|
||||||
|
# define CV_ICC __ECC
|
||||||
|
#elif defined __INTEL_COMPILER
|
||||||
|
# define CV_ICC __INTEL_COMPILER
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined _WIN32
|
||||||
|
# define CV_CDECL __cdecl
|
||||||
|
# define CV_STDCALL __stdcall
|
||||||
|
#else
|
||||||
|
# define CV_CDECL
|
||||||
|
# define CV_STDCALL
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef CV_INLINE
|
||||||
|
# if defined __cplusplus
|
||||||
|
# define CV_INLINE static inline
|
||||||
|
# elif defined _MSC_VER
|
||||||
|
# define CV_INLINE __inline
|
||||||
|
# else
|
||||||
|
# define CV_INLINE static
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef CV_ALWAYS_INLINE
|
||||||
|
#if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
|
||||||
|
#define CV_ALWAYS_INLINE inline __attribute__((always_inline))
|
||||||
|
#elif defined(_MSC_VER)
|
||||||
|
#define CV_ALWAYS_INLINE __forceinline
|
||||||
|
#else
|
||||||
|
#define CV_ALWAYS_INLINE inline
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined CV_DISABLE_OPTIMIZATION || (defined CV_ICC && !defined CV_ENABLE_UNROLLED)
|
||||||
|
# define CV_ENABLE_UNROLLED 0
|
||||||
|
#else
|
||||||
|
# define CV_ENABLE_UNROLLED 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef __GNUC__
|
||||||
|
# define CV_DECL_ALIGNED(x) __attribute__ ((aligned (x)))
|
||||||
|
#elif defined _MSC_VER
|
||||||
|
# define CV_DECL_ALIGNED(x) __declspec(align(x))
|
||||||
|
#else
|
||||||
|
# define CV_DECL_ALIGNED(x)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* CPU features and intrinsics support */
|
||||||
|
#define CV_CPU_NONE 0
|
||||||
|
#define CV_CPU_MMX 1
|
||||||
|
#define CV_CPU_SSE 2
|
||||||
|
#define CV_CPU_SSE2 3
|
||||||
|
#define CV_CPU_SSE3 4
|
||||||
|
#define CV_CPU_SSSE3 5
|
||||||
|
#define CV_CPU_SSE4_1 6
|
||||||
|
#define CV_CPU_SSE4_2 7
|
||||||
|
#define CV_CPU_POPCNT 8
|
||||||
|
#define CV_CPU_FP16 9
|
||||||
|
#define CV_CPU_AVX 10
|
||||||
|
#define CV_CPU_AVX2 11
|
||||||
|
#define CV_CPU_FMA3 12
|
||||||
|
|
||||||
|
#define CV_CPU_AVX_512F 13
|
||||||
|
#define CV_CPU_AVX_512BW 14
|
||||||
|
#define CV_CPU_AVX_512CD 15
|
||||||
|
#define CV_CPU_AVX_512DQ 16
|
||||||
|
#define CV_CPU_AVX_512ER 17
|
||||||
|
#define CV_CPU_AVX_512IFMA512 18 // deprecated
|
||||||
|
#define CV_CPU_AVX_512IFMA 18
|
||||||
|
#define CV_CPU_AVX_512PF 19
|
||||||
|
#define CV_CPU_AVX_512VBMI 20
|
||||||
|
#define CV_CPU_AVX_512VL 21
|
||||||
|
#define CV_CPU_AVX_512VBMI2 22
|
||||||
|
#define CV_CPU_AVX_512VNNI 23
|
||||||
|
#define CV_CPU_AVX_512BITALG 24
|
||||||
|
#define CV_CPU_AVX_512VPOPCNTDQ 25
|
||||||
|
#define CV_CPU_AVX_5124VNNIW 26
|
||||||
|
#define CV_CPU_AVX_5124FMAPS 27
|
||||||
|
|
||||||
|
#define CV_CPU_NEON 100
|
||||||
|
#define CV_CPU_NEON_DOTPROD 101
|
||||||
|
#define CV_CPU_NEON_FP16 102
|
||||||
|
#define CV_CPU_NEON_BF16 103
|
||||||
|
#define CV_CPU_SVE 104
|
||||||
|
|
||||||
|
#define CV_CPU_MSA 150
|
||||||
|
|
||||||
|
#define CV_CPU_RISCVV 170
|
||||||
|
|
||||||
|
#define CV_CPU_VSX 200
|
||||||
|
#define CV_CPU_VSX3 201
|
||||||
|
|
||||||
|
#define CV_CPU_RVV 210
|
||||||
|
|
||||||
|
#define CV_CPU_LSX 230
|
||||||
|
#define CV_CPU_LASX 231
|
||||||
|
|
||||||
|
// CPU features groups
|
||||||
|
#define CV_CPU_AVX512_SKX 256
|
||||||
|
#define CV_CPU_AVX512_COMMON 257
|
||||||
|
#define CV_CPU_AVX512_KNL 258
|
||||||
|
#define CV_CPU_AVX512_KNM 259
|
||||||
|
#define CV_CPU_AVX512_CNL 260
|
||||||
|
#define CV_CPU_AVX512_CLX 261
|
||||||
|
#define CV_CPU_AVX512_ICL 262
|
||||||
|
|
||||||
|
// when adding to this list remember to update the following enum
|
||||||
|
#define CV_HARDWARE_MAX_FEATURE 512
|
||||||
|
|
||||||
|
/** @brief Available CPU features.
|
||||||
|
*/
|
||||||
|
enum CpuFeatures {
|
||||||
|
CPU_MMX = 1,
|
||||||
|
CPU_SSE = 2,
|
||||||
|
CPU_SSE2 = 3,
|
||||||
|
CPU_SSE3 = 4,
|
||||||
|
CPU_SSSE3 = 5,
|
||||||
|
CPU_SSE4_1 = 6,
|
||||||
|
CPU_SSE4_2 = 7,
|
||||||
|
CPU_POPCNT = 8,
|
||||||
|
CPU_FP16 = 9,
|
||||||
|
CPU_AVX = 10,
|
||||||
|
CPU_AVX2 = 11,
|
||||||
|
CPU_FMA3 = 12,
|
||||||
|
|
||||||
|
CPU_AVX_512F = 13,
|
||||||
|
CPU_AVX_512BW = 14,
|
||||||
|
CPU_AVX_512CD = 15,
|
||||||
|
CPU_AVX_512DQ = 16,
|
||||||
|
CPU_AVX_512ER = 17,
|
||||||
|
CPU_AVX_512IFMA512 = 18, // deprecated
|
||||||
|
CPU_AVX_512IFMA = 18,
|
||||||
|
CPU_AVX_512PF = 19,
|
||||||
|
CPU_AVX_512VBMI = 20,
|
||||||
|
CPU_AVX_512VL = 21,
|
||||||
|
CPU_AVX_512VBMI2 = 22,
|
||||||
|
CPU_AVX_512VNNI = 23,
|
||||||
|
CPU_AVX_512BITALG = 24,
|
||||||
|
CPU_AVX_512VPOPCNTDQ= 25,
|
||||||
|
CPU_AVX_5124VNNIW = 26,
|
||||||
|
CPU_AVX_5124FMAPS = 27,
|
||||||
|
|
||||||
|
CPU_NEON = 100,
|
||||||
|
CPU_NEON_DOTPROD = 101,
|
||||||
|
CPU_NEON_FP16 = 102,
|
||||||
|
CPU_NEON_BF16 = 103,
|
||||||
|
CPU_SVE = 104,
|
||||||
|
|
||||||
|
CPU_MSA = 150,
|
||||||
|
|
||||||
|
CPU_RISCVV = 170,
|
||||||
|
|
||||||
|
CPU_VSX = 200,
|
||||||
|
CPU_VSX3 = 201,
|
||||||
|
|
||||||
|
CPU_RVV = 210,
|
||||||
|
|
||||||
|
CPU_LSX = 230,
|
||||||
|
CPU_LASX = 231,
|
||||||
|
|
||||||
|
CPU_AVX512_SKX = 256, //!< Skylake-X with AVX-512F/CD/BW/DQ/VL
|
||||||
|
CPU_AVX512_COMMON = 257, //!< Common instructions AVX-512F/CD for all CPUs that support AVX-512
|
||||||
|
CPU_AVX512_KNL = 258, //!< Knights Landing with AVX-512F/CD/ER/PF
|
||||||
|
CPU_AVX512_KNM = 259, //!< Knights Mill with AVX-512F/CD/ER/PF/4FMAPS/4VNNIW/VPOPCNTDQ
|
||||||
|
CPU_AVX512_CNL = 260, //!< Cannon Lake with AVX-512F/CD/BW/DQ/VL/IFMA/VBMI
|
||||||
|
CPU_AVX512_CLX = 261, //!< Cascade Lake with AVX-512F/CD/BW/DQ/VL/VNNI
|
||||||
|
CPU_AVX512_ICL = 262, //!< Ice Lake with AVX-512F/CD/BW/DQ/VL/IFMA/VBMI/VNNI/VBMI2/BITALG/VPOPCNTDQ
|
||||||
|
|
||||||
|
CPU_MAX_FEATURE = 512 // see CV_HARDWARE_MAX_FEATURE
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
#include "cv_cpu_dispatch.h"
|
||||||
|
|
||||||
|
#if !defined(CV_STRONG_ALIGNMENT) && defined(__arm__) && !(defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC))
|
||||||
|
// int*, int64* should be propertly aligned pointers on ARMv7
|
||||||
|
#define CV_STRONG_ALIGNMENT 1
|
||||||
|
#endif
|
||||||
|
#if !defined(CV_STRONG_ALIGNMENT)
|
||||||
|
#define CV_STRONG_ALIGNMENT 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* fundamental constants */
|
||||||
|
#define CV_PI 3.1415926535897932384626433832795
|
||||||
|
#define CV_2PI 6.283185307179586476925286766559
|
||||||
|
#define CV_LOG2 0.69314718055994530941723212145818
|
||||||
|
|
||||||
|
#if defined __ARM_FP16_FORMAT_IEEE \
|
||||||
|
&& !defined __CUDACC__
|
||||||
|
# define CV_FP16_TYPE 1
|
||||||
|
#else
|
||||||
|
# define CV_FP16_TYPE 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
typedef union Cv16suf
|
||||||
|
{
|
||||||
|
short i;
|
||||||
|
ushort u;
|
||||||
|
#if CV_FP16_TYPE
|
||||||
|
__fp16 h;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
Cv16suf;
|
||||||
|
|
||||||
|
typedef union Cv32suf
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
unsigned u;
|
||||||
|
float f;
|
||||||
|
}
|
||||||
|
Cv32suf;
|
||||||
|
|
||||||
|
typedef union Cv64suf
|
||||||
|
{
|
||||||
|
int64 i;
|
||||||
|
uint64 u;
|
||||||
|
double f;
|
||||||
|
}
|
||||||
|
Cv64suf;
|
||||||
|
|
||||||
|
#ifndef OPENCV_ABI_COMPATIBILITY
|
||||||
|
#define OPENCV_ABI_COMPATIBILITY 400
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef __OPENCV_BUILD
|
||||||
|
# define DISABLE_OPENCV_3_COMPATIBILITY
|
||||||
|
# define OPENCV_DISABLE_DEPRECATED_COMPATIBILITY
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef CV_EXPORTS
|
||||||
|
# if (defined _WIN32 || defined WINCE || defined __CYGWIN__) && defined(CVAPI_EXPORTS)
|
||||||
|
# define CV_EXPORTS __declspec(dllexport)
|
||||||
|
# elif defined __GNUC__ && __GNUC__ >= 4 && (defined(CVAPI_EXPORTS) || defined(__APPLE__))
|
||||||
|
# define CV_EXPORTS __attribute__ ((visibility ("default")))
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef CV_EXPORTS
|
||||||
|
# define CV_EXPORTS
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
# define CV_EXPORTS_TEMPLATE
|
||||||
|
#else
|
||||||
|
# define CV_EXPORTS_TEMPLATE CV_EXPORTS
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef CV_DEPRECATED
|
||||||
|
# if defined(__GNUC__)
|
||||||
|
# define CV_DEPRECATED __attribute__ ((deprecated))
|
||||||
|
# elif defined(_MSC_VER)
|
||||||
|
# define CV_DEPRECATED __declspec(deprecated)
|
||||||
|
# else
|
||||||
|
# define CV_DEPRECATED
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef CV_DEPRECATED_EXTERNAL
|
||||||
|
# if defined(__OPENCV_BUILD)
|
||||||
|
# define CV_DEPRECATED_EXTERNAL /* nothing */
|
||||||
|
# else
|
||||||
|
# define CV_DEPRECATED_EXTERNAL CV_DEPRECATED
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef CV_EXTERN_C
|
||||||
|
# ifdef __cplusplus
|
||||||
|
# define CV_EXTERN_C extern "C"
|
||||||
|
# else
|
||||||
|
# define CV_EXTERN_C
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* special informative macros for wrapper generators */
|
||||||
|
#define CV_EXPORTS_W CV_EXPORTS
|
||||||
|
#define CV_EXPORTS_W_SIMPLE CV_EXPORTS
|
||||||
|
#define CV_EXPORTS_AS(synonym) CV_EXPORTS
|
||||||
|
#define CV_EXPORTS_W_MAP CV_EXPORTS
|
||||||
|
#define CV_EXPORTS_W_PARAMS CV_EXPORTS
|
||||||
|
#define CV_IN_OUT
|
||||||
|
#define CV_OUT
|
||||||
|
#define CV_PROP
|
||||||
|
#define CV_PROP_RW
|
||||||
|
#define CV_ND // Indicates that input data should be parsed into Mat without channels
|
||||||
|
#define CV_WRAP
|
||||||
|
#define CV_WRAP_AS(synonym)
|
||||||
|
#define CV_WRAP_MAPPABLE(mappable)
|
||||||
|
#define CV_WRAP_PHANTOM(phantom_header)
|
||||||
|
#define CV_WRAP_DEFAULT(val)
|
||||||
|
/* Indicates that the function parameter has filesystem path semantic */
|
||||||
|
#define CV_WRAP_FILE_PATH
|
||||||
|
|
||||||
|
/****************************************************************************************\
|
||||||
|
* Matrix type (Mat) *
|
||||||
|
\****************************************************************************************/
|
||||||
|
|
||||||
|
#define CV_MAX_DIM 32
|
||||||
|
#define CV_MAT_CN_MASK ((CV_CN_MAX - 1) << CV_CN_SHIFT)
|
||||||
|
#define CV_MAT_CN(flags) ((((flags) & CV_MAT_CN_MASK) >> CV_CN_SHIFT) + 1)
|
||||||
|
#define CV_MAT_TYPE_MASK (CV_DEPTH_MAX*CV_CN_MAX - 1)
|
||||||
|
#define CV_MAT_TYPE(flags) ((flags) & CV_MAT_TYPE_MASK)
|
||||||
|
#define CV_MAT_CONT_FLAG_SHIFT 14
|
||||||
|
#define CV_MAT_CONT_FLAG (1 << CV_MAT_CONT_FLAG_SHIFT)
|
||||||
|
#define CV_IS_MAT_CONT(flags) ((flags) & CV_MAT_CONT_FLAG)
|
||||||
|
#define CV_IS_CONT_MAT CV_IS_MAT_CONT
|
||||||
|
#define CV_SUBMAT_FLAG_SHIFT 15
|
||||||
|
#define CV_SUBMAT_FLAG (1 << CV_SUBMAT_FLAG_SHIFT)
|
||||||
|
#define CV_IS_SUBMAT(flags) ((flags) & CV_MAT_SUBMAT_FLAG)
|
||||||
|
|
||||||
|
/** Size of each channel item,
|
||||||
|
0x28442211 = 0010 1000 0100 0100 0010 0010 0001 0001 ~ array of sizeof(arr_type_elem) */
|
||||||
|
#define CV_ELEM_SIZE1(type) ((0x28442211 >> CV_MAT_DEPTH(type)*4) & 15)
|
||||||
|
|
||||||
|
#define CV_ELEM_SIZE(type) (CV_MAT_CN(type)*CV_ELEM_SIZE1(type))
|
||||||
|
|
||||||
|
#ifndef MIN
|
||||||
|
# define MIN(a,b) ((a) > (b) ? (b) : (a))
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef MAX
|
||||||
|
# define MAX(a,b) ((a) < (b) ? (b) : (a))
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/** min & max without jumps */
|
||||||
|
#define CV_IMIN(a, b) ((a) ^ (((a)^(b)) & (((a) < (b)) - 1)))
|
||||||
|
#define CV_IMAX(a, b) ((a) ^ (((a)^(b)) & (((a) > (b)) - 1)))
|
||||||
|
#define CV_SWAP(a,b,t) ((t) = (a), (a) = (b), (b) = (t))
|
||||||
|
#define CV_CMP(a,b) (((a) > (b)) - ((a) < (b)))
|
||||||
|
#define CV_SIGN(a) CV_CMP((a),0)
|
||||||
|
|
||||||
|
///////////////////////////////////////// Enum operators ///////////////////////////////////////
|
||||||
|
|
||||||
|
/**
|
||||||
|
|
||||||
|
Provides compatibility operators for both classical and C++11 enum classes,
|
||||||
|
as well as exposing the C++11 enum class members for backwards compatibility
|
||||||
|
|
||||||
|
@code
|
||||||
|
// Provides operators required for flag enums
|
||||||
|
CV_ENUM_FLAGS(AccessFlag)
|
||||||
|
|
||||||
|
// Exposes the listed members of the enum class AccessFlag to the current namespace
|
||||||
|
CV_ENUM_CLASS_EXPOSE(AccessFlag, ACCESS_READ [, ACCESS_WRITE [, ...] ]);
|
||||||
|
@endcode
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define __CV_ENUM_CLASS_EXPOSE_1(EnumType, MEMBER_CONST) \
|
||||||
|
static const EnumType MEMBER_CONST = EnumType::MEMBER_CONST; \
|
||||||
|
|
||||||
|
#define __CV_ENUM_CLASS_EXPOSE_2(EnumType, MEMBER_CONST, ...) \
|
||||||
|
__CV_ENUM_CLASS_EXPOSE_1(EnumType, MEMBER_CONST); \
|
||||||
|
__CV_EXPAND(__CV_ENUM_CLASS_EXPOSE_1(EnumType, __VA_ARGS__)); \
|
||||||
|
|
||||||
|
#define __CV_ENUM_CLASS_EXPOSE_3(EnumType, MEMBER_CONST, ...) \
|
||||||
|
__CV_ENUM_CLASS_EXPOSE_1(EnumType, MEMBER_CONST); \
|
||||||
|
__CV_EXPAND(__CV_ENUM_CLASS_EXPOSE_2(EnumType, __VA_ARGS__)); \
|
||||||
|
|
||||||
|
#define __CV_ENUM_CLASS_EXPOSE_4(EnumType, MEMBER_CONST, ...) \
|
||||||
|
__CV_ENUM_CLASS_EXPOSE_1(EnumType, MEMBER_CONST); \
|
||||||
|
__CV_EXPAND(__CV_ENUM_CLASS_EXPOSE_3(EnumType, __VA_ARGS__)); \
|
||||||
|
|
||||||
|
#define __CV_ENUM_CLASS_EXPOSE_5(EnumType, MEMBER_CONST, ...) \
|
||||||
|
__CV_ENUM_CLASS_EXPOSE_1(EnumType, MEMBER_CONST); \
|
||||||
|
__CV_EXPAND(__CV_ENUM_CLASS_EXPOSE_4(EnumType, __VA_ARGS__)); \
|
||||||
|
|
||||||
|
#define __CV_ENUM_CLASS_EXPOSE_6(EnumType, MEMBER_CONST, ...) \
|
||||||
|
__CV_ENUM_CLASS_EXPOSE_1(EnumType, MEMBER_CONST); \
|
||||||
|
__CV_EXPAND(__CV_ENUM_CLASS_EXPOSE_5(EnumType, __VA_ARGS__)); \
|
||||||
|
|
||||||
|
#define __CV_ENUM_CLASS_EXPOSE_7(EnumType, MEMBER_CONST, ...) \
|
||||||
|
__CV_ENUM_CLASS_EXPOSE_1(EnumType, MEMBER_CONST); \
|
||||||
|
__CV_EXPAND(__CV_ENUM_CLASS_EXPOSE_6(EnumType, __VA_ARGS__)); \
|
||||||
|
|
||||||
|
#define __CV_ENUM_CLASS_EXPOSE_8(EnumType, MEMBER_CONST, ...) \
|
||||||
|
__CV_ENUM_CLASS_EXPOSE_1(EnumType, MEMBER_CONST); \
|
||||||
|
__CV_EXPAND(__CV_ENUM_CLASS_EXPOSE_7(EnumType, __VA_ARGS__)); \
|
||||||
|
|
||||||
|
#define __CV_ENUM_CLASS_EXPOSE_9(EnumType, MEMBER_CONST, ...) \
|
||||||
|
__CV_ENUM_CLASS_EXPOSE_1(EnumType, MEMBER_CONST); \
|
||||||
|
__CV_EXPAND(__CV_ENUM_CLASS_EXPOSE_8(EnumType, __VA_ARGS__)); \
|
||||||
|
|
||||||
|
#define __CV_ENUM_FLAGS_LOGICAL_NOT(EnumType) \
|
||||||
|
static inline bool operator!(const EnumType& val) \
|
||||||
|
{ \
|
||||||
|
typedef std::underlying_type<EnumType>::type UnderlyingType; \
|
||||||
|
return !static_cast<UnderlyingType>(val); \
|
||||||
|
} \
|
||||||
|
|
||||||
|
#define __CV_ENUM_FLAGS_LOGICAL_NOT_EQ(Arg1Type, Arg2Type) \
|
||||||
|
static inline bool operator!=(const Arg1Type& a, const Arg2Type& b) \
|
||||||
|
{ \
|
||||||
|
return static_cast<int>(a) != static_cast<int>(b); \
|
||||||
|
} \
|
||||||
|
|
||||||
|
#define __CV_ENUM_FLAGS_LOGICAL_EQ(Arg1Type, Arg2Type) \
|
||||||
|
static inline bool operator==(const Arg1Type& a, const Arg2Type& b) \
|
||||||
|
{ \
|
||||||
|
return static_cast<int>(a) == static_cast<int>(b); \
|
||||||
|
} \
|
||||||
|
|
||||||
|
#define __CV_ENUM_FLAGS_BITWISE_NOT(EnumType) \
|
||||||
|
static inline EnumType operator~(const EnumType& val) \
|
||||||
|
{ \
|
||||||
|
typedef std::underlying_type<EnumType>::type UnderlyingType; \
|
||||||
|
return static_cast<EnumType>(~static_cast<UnderlyingType>(val)); \
|
||||||
|
} \
|
||||||
|
|
||||||
|
#define __CV_ENUM_FLAGS_BITWISE_OR(EnumType, Arg1Type, Arg2Type) \
|
||||||
|
static inline EnumType operator|(const Arg1Type& a, const Arg2Type& b) \
|
||||||
|
{ \
|
||||||
|
typedef std::underlying_type<EnumType>::type UnderlyingType; \
|
||||||
|
return static_cast<EnumType>(static_cast<UnderlyingType>(a) | static_cast<UnderlyingType>(b)); \
|
||||||
|
} \
|
||||||
|
|
||||||
|
#define __CV_ENUM_FLAGS_BITWISE_AND(EnumType, Arg1Type, Arg2Type) \
|
||||||
|
static inline EnumType operator&(const Arg1Type& a, const Arg2Type& b) \
|
||||||
|
{ \
|
||||||
|
typedef std::underlying_type<EnumType>::type UnderlyingType; \
|
||||||
|
return static_cast<EnumType>(static_cast<UnderlyingType>(a) & static_cast<UnderlyingType>(b)); \
|
||||||
|
} \
|
||||||
|
|
||||||
|
#define __CV_ENUM_FLAGS_BITWISE_XOR(EnumType, Arg1Type, Arg2Type) \
|
||||||
|
static inline EnumType operator^(const Arg1Type& a, const Arg2Type& b) \
|
||||||
|
{ \
|
||||||
|
typedef std::underlying_type<EnumType>::type UnderlyingType; \
|
||||||
|
return static_cast<EnumType>(static_cast<UnderlyingType>(a) ^ static_cast<UnderlyingType>(b)); \
|
||||||
|
} \
|
||||||
|
|
||||||
|
#define __CV_ENUM_FLAGS_BITWISE_OR_EQ(EnumType, Arg1Type) \
|
||||||
|
static inline EnumType& operator|=(EnumType& _this, const Arg1Type& val) \
|
||||||
|
{ \
|
||||||
|
_this = static_cast<EnumType>(static_cast<int>(_this) | static_cast<int>(val)); \
|
||||||
|
return _this; \
|
||||||
|
} \
|
||||||
|
|
||||||
|
#define __CV_ENUM_FLAGS_BITWISE_AND_EQ(EnumType, Arg1Type) \
|
||||||
|
static inline EnumType& operator&=(EnumType& _this, const Arg1Type& val) \
|
||||||
|
{ \
|
||||||
|
_this = static_cast<EnumType>(static_cast<int>(_this) & static_cast<int>(val)); \
|
||||||
|
return _this; \
|
||||||
|
} \
|
||||||
|
|
||||||
|
#define __CV_ENUM_FLAGS_BITWISE_XOR_EQ(EnumType, Arg1Type) \
|
||||||
|
static inline EnumType& operator^=(EnumType& _this, const Arg1Type& val) \
|
||||||
|
{ \
|
||||||
|
_this = static_cast<EnumType>(static_cast<int>(_this) ^ static_cast<int>(val)); \
|
||||||
|
return _this; \
|
||||||
|
} \
|
||||||
|
|
||||||
|
#define CV_ENUM_CLASS_EXPOSE(EnumType, ...) \
|
||||||
|
__CV_EXPAND(__CV_CAT(__CV_ENUM_CLASS_EXPOSE_, __CV_VA_NUM_ARGS(__VA_ARGS__))(EnumType, __VA_ARGS__)); \
|
||||||
|
|
||||||
|
#define CV_ENUM_FLAGS(EnumType) \
|
||||||
|
__CV_ENUM_FLAGS_LOGICAL_NOT (EnumType) \
|
||||||
|
__CV_ENUM_FLAGS_LOGICAL_EQ (EnumType, int) \
|
||||||
|
__CV_ENUM_FLAGS_LOGICAL_NOT_EQ (EnumType, int) \
|
||||||
|
\
|
||||||
|
__CV_ENUM_FLAGS_BITWISE_NOT (EnumType) \
|
||||||
|
__CV_ENUM_FLAGS_BITWISE_OR (EnumType, EnumType, EnumType) \
|
||||||
|
__CV_ENUM_FLAGS_BITWISE_AND (EnumType, EnumType, EnumType) \
|
||||||
|
__CV_ENUM_FLAGS_BITWISE_XOR (EnumType, EnumType, EnumType) \
|
||||||
|
\
|
||||||
|
__CV_ENUM_FLAGS_BITWISE_OR_EQ (EnumType, EnumType) \
|
||||||
|
__CV_ENUM_FLAGS_BITWISE_AND_EQ (EnumType, EnumType) \
|
||||||
|
__CV_ENUM_FLAGS_BITWISE_XOR_EQ (EnumType, EnumType) \
|
||||||
|
|
||||||
|
/****************************************************************************************\
|
||||||
|
* static analysys *
|
||||||
|
\****************************************************************************************/
|
||||||
|
|
||||||
|
// In practice, some macro are not processed correctly (noreturn is not detected).
|
||||||
|
// We need to use simplified definition for them.
|
||||||
|
#ifndef CV_STATIC_ANALYSIS
|
||||||
|
# if defined(__KLOCWORK__) || defined(__clang_analyzer__) || defined(__COVERITY__)
|
||||||
|
# define CV_STATIC_ANALYSIS 1
|
||||||
|
# endif
|
||||||
|
#else
|
||||||
|
# if defined(CV_STATIC_ANALYSIS) && !(__CV_CAT(1, CV_STATIC_ANALYSIS) == 1) // defined and not empty
|
||||||
|
# if 0 == CV_STATIC_ANALYSIS
|
||||||
|
# undef CV_STATIC_ANALYSIS
|
||||||
|
# endif
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/****************************************************************************************\
|
||||||
|
* Thread sanitizer *
|
||||||
|
\****************************************************************************************/
|
||||||
|
#ifndef CV_THREAD_SANITIZER
|
||||||
|
# if defined(__has_feature)
|
||||||
|
# if __has_feature(thread_sanitizer)
|
||||||
|
# define CV_THREAD_SANITIZER
|
||||||
|
# endif
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/****************************************************************************************\
|
||||||
|
* exchange-add operation for atomic operations on reference counters *
|
||||||
|
\****************************************************************************************/
|
||||||
|
|
||||||
|
#ifdef CV_XADD
|
||||||
|
// allow to use user-defined macro
|
||||||
|
#elif defined __GNUC__ || defined __clang__
|
||||||
|
# if defined __clang__ && __clang_major__ >= 3 && !defined __EMSCRIPTEN__ && !defined __INTEL_COMPILER
|
||||||
|
# ifdef __ATOMIC_ACQ_REL
|
||||||
|
# define CV_XADD(addr, delta) __c11_atomic_fetch_add((_Atomic(int)*)(addr), delta, __ATOMIC_ACQ_REL)
|
||||||
|
# else
|
||||||
|
# define CV_XADD(addr, delta) __atomic_fetch_add((_Atomic(int)*)(addr), delta, 4)
|
||||||
|
# endif
|
||||||
|
# else
|
||||||
|
# if defined __ATOMIC_ACQ_REL && !defined __clang__
|
||||||
|
// version for gcc >= 4.7
|
||||||
|
# define CV_XADD(addr, delta) (int)__atomic_fetch_add((unsigned*)(addr), (unsigned)(delta), __ATOMIC_ACQ_REL)
|
||||||
|
# else
|
||||||
|
# define CV_XADD(addr, delta) (int)__sync_fetch_and_add((unsigned*)(addr), (unsigned)(delta))
|
||||||
|
# endif
|
||||||
|
# endif
|
||||||
|
#elif defined _MSC_VER && !defined RC_INVOKED
|
||||||
|
# include <intrin.h>
|
||||||
|
# define CV_XADD(addr, delta) (int)_InterlockedExchangeAdd((long volatile*)addr, delta)
|
||||||
|
#else
|
||||||
|
#ifdef OPENCV_FORCE_UNSAFE_XADD
|
||||||
|
CV_INLINE int CV_XADD(int* addr, int delta) { int tmp = *addr; *addr += delta; return tmp; }
|
||||||
|
#else
|
||||||
|
#error "OpenCV: can't define safe CV_XADD macro for current platform (unsupported). Define CV_XADD macro through custom port header (see OPENCV_INCLUDE_PORT_FILE)"
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
/****************************************************************************************\
|
||||||
|
* CV_NORETURN attribute *
|
||||||
|
\****************************************************************************************/
|
||||||
|
|
||||||
|
#ifndef CV_NORETURN
|
||||||
|
# if defined(__GNUC__)
|
||||||
|
# define CV_NORETURN __attribute__((__noreturn__))
|
||||||
|
# elif defined(_MSC_VER) && (_MSC_VER >= 1300)
|
||||||
|
# define CV_NORETURN __declspec(noreturn)
|
||||||
|
# else
|
||||||
|
# define CV_NORETURN /* nothing by default */
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/****************************************************************************************\
|
||||||
|
* CV_NODISCARD_STD attribute (C++17) *
|
||||||
|
* encourages the compiler to issue a warning if the return value is discarded *
|
||||||
|
\****************************************************************************************/
|
||||||
|
#ifndef CV_NODISCARD_STD
|
||||||
|
# ifndef __has_cpp_attribute
|
||||||
|
// workaround preprocessor non-compliance https://reviews.llvm.org/D57851
|
||||||
|
# define __has_cpp_attribute(__x) 0
|
||||||
|
# endif
|
||||||
|
# if __has_cpp_attribute(nodiscard)
|
||||||
|
# if defined(__NVCC__) && __CUDACC_VER_MAJOR__ < 12
|
||||||
|
# define CV_NODISCARD_STD
|
||||||
|
# else
|
||||||
|
# define CV_NODISCARD_STD [[nodiscard]]
|
||||||
|
# endif
|
||||||
|
# elif __cplusplus >= 201703L
|
||||||
|
// available when compiler is C++17 compliant
|
||||||
|
# define CV_NODISCARD_STD [[nodiscard]]
|
||||||
|
# elif defined(__INTEL_COMPILER)
|
||||||
|
// see above, available when C++17 is enabled
|
||||||
|
# elif defined(_MSC_VER) && _MSC_VER >= 1911 && _MSVC_LANG >= 201703L
|
||||||
|
// available with VS2017 v15.3+ with /std:c++17 or higher; works on functions and classes
|
||||||
|
# define CV_NODISCARD_STD [[nodiscard]]
|
||||||
|
# elif defined(__GNUC__) && (((__GNUC__ * 100) + __GNUC_MINOR__) >= 700) && (__cplusplus >= 201103L)
|
||||||
|
// available with GCC 7.0+; works on functions, works or silently fails on classes
|
||||||
|
# define CV_NODISCARD_STD [[nodiscard]]
|
||||||
|
# elif defined(__GNUC__) && (((__GNUC__ * 100) + __GNUC_MINOR__) >= 408) && (__cplusplus >= 201103L)
|
||||||
|
// available with GCC 4.8+ but it usually does nothing and can fail noisily -- therefore not used
|
||||||
|
// define CV_NODISCARD_STD [[gnu::warn_unused_result]]
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
#ifndef CV_NODISCARD_STD
|
||||||
|
# define CV_NODISCARD_STD /* nothing by default */
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
/****************************************************************************************\
|
||||||
|
* C++ 11 *
|
||||||
|
\****************************************************************************************/
|
||||||
|
#ifdef __cplusplus
|
||||||
|
// MSVC was stuck at __cplusplus == 199711L for a long time, even where it supports C++11,
|
||||||
|
// so check _MSC_VER instead. See:
|
||||||
|
// <https://devblogs.microsoft.com/cppblog/msvc-now-correctly-reports-__cplusplus>
|
||||||
|
# if defined(_MSC_VER)
|
||||||
|
# if _MSC_VER < 1800
|
||||||
|
# error "OpenCV 4.x+ requires enabled C++11 support"
|
||||||
|
# endif
|
||||||
|
# elif __cplusplus < 201103L
|
||||||
|
# error "OpenCV 4.x+ requires enabled C++11 support"
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef CV_CXX11
|
||||||
|
# define CV_CXX11 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef CV_OVERRIDE
|
||||||
|
# define CV_OVERRIDE override
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef CV_FINAL
|
||||||
|
# define CV_FINAL final
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef CV_NOEXCEPT
|
||||||
|
# define CV_NOEXCEPT noexcept
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef CV_CONSTEXPR
|
||||||
|
# define CV_CONSTEXPR constexpr
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Integer types portability
|
||||||
|
#ifdef __cplusplus
|
||||||
|
#include <cstdint>
|
||||||
|
namespace cv {
|
||||||
|
using std::int8_t;
|
||||||
|
using std::uint8_t;
|
||||||
|
using std::int16_t;
|
||||||
|
using std::uint16_t;
|
||||||
|
using std::int32_t;
|
||||||
|
using std::uint32_t;
|
||||||
|
using std::int64_t;
|
||||||
|
using std::uint64_t;
|
||||||
|
}
|
||||||
|
#else // pure C
|
||||||
|
#include <stdint.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
namespace cv
|
||||||
|
{
|
||||||
|
|
||||||
|
class hfloat
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
#if CV_FP16_TYPE
|
||||||
|
|
||||||
|
hfloat() : h(0) {}
|
||||||
|
explicit hfloat(float x) { h = (__fp16)x; }
|
||||||
|
operator float() const { return (float)h; }
|
||||||
|
protected:
|
||||||
|
__fp16 h;
|
||||||
|
|
||||||
|
#else
|
||||||
|
hfloat() : w(0) {}
|
||||||
|
explicit hfloat(float x)
|
||||||
|
{
|
||||||
|
#if CV_FP16 && CV_AVX2
|
||||||
|
__m128 v = _mm_load_ss(&x);
|
||||||
|
w = (ushort)_mm_cvtsi128_si32(_mm_cvtps_ph(v, 0));
|
||||||
|
#else
|
||||||
|
Cv32suf in;
|
||||||
|
in.f = x;
|
||||||
|
unsigned sign = in.u & 0x80000000;
|
||||||
|
in.u ^= sign;
|
||||||
|
|
||||||
|
if( in.u >= 0x47800000 )
|
||||||
|
w = (ushort)(in.u > 0x7f800000 ? 0x7e00 : 0x7c00);
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (in.u < 0x38800000)
|
||||||
|
{
|
||||||
|
in.f += 0.5f;
|
||||||
|
w = (ushort)(in.u - 0x3f000000);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
unsigned t = in.u + 0xc8000fff;
|
||||||
|
w = (ushort)((t + ((in.u >> 13) & 1)) >> 13);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
w = (ushort)(w | (sign >> 16));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
operator float() const
|
||||||
|
{
|
||||||
|
#if CV_FP16 && CV_AVX2
|
||||||
|
float f;
|
||||||
|
_mm_store_ss(&f, _mm_cvtph_ps(_mm_cvtsi32_si128(w)));
|
||||||
|
return f;
|
||||||
|
#else
|
||||||
|
Cv32suf out;
|
||||||
|
|
||||||
|
unsigned t = ((w & 0x7fff) << 13) + 0x38000000;
|
||||||
|
unsigned sign = (w & 0x8000) << 16;
|
||||||
|
unsigned e = w & 0x7c00;
|
||||||
|
|
||||||
|
out.u = t + (1 << 23);
|
||||||
|
out.u = (e >= 0x7c00 ? t + 0x38000000 :
|
||||||
|
e == 0 ? (static_cast<void>(out.f -= 6.103515625e-05f), out.u) : t) | sign;
|
||||||
|
return out.f;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
protected:
|
||||||
|
ushort w;
|
||||||
|
|
||||||
|
#endif
|
||||||
|
};
|
||||||
|
|
||||||
|
inline hfloat hfloatFromBits(ushort w) {
|
||||||
|
#if CV_FP16_TYPE
|
||||||
|
Cv16suf u;
|
||||||
|
u.u = w;
|
||||||
|
hfloat res(float(u.h));
|
||||||
|
return res;
|
||||||
|
#else
|
||||||
|
Cv32suf out;
|
||||||
|
|
||||||
|
unsigned t = ((w & 0x7fff) << 13) + 0x38000000;
|
||||||
|
unsigned sign = (w & 0x8000) << 16;
|
||||||
|
unsigned e = w & 0x7c00;
|
||||||
|
|
||||||
|
out.u = t + (1 << 23);
|
||||||
|
out.u = (e >= 0x7c00 ? t + 0x38000000 :
|
||||||
|
e == 0 ? (static_cast<void>(out.f -= 6.103515625e-05f), out.u) : t) | sign;
|
||||||
|
hfloat res(out.f);
|
||||||
|
return res;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
#if !defined(__OPENCV_BUILD) && !(defined __STDCPP_FLOAT16_T__) && !(defined __ARM_NEON)
|
||||||
|
typedef hfloat float16_t;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/** @brief Constructs the 'fourcc' code, used in video codecs and many other places.
|
||||||
|
Simply call it with 4 chars like `CV_FOURCC('I', 'Y', 'U', 'V')`
|
||||||
|
*/
|
||||||
|
CV_INLINE int CV_FOURCC(char c1, char c2, char c3, char c4)
|
||||||
|
{
|
||||||
|
return (c1 & 255) + ((c2 & 255) << 8) + ((c3 & 255) << 16) + ((c4 & 255) << 24);
|
||||||
|
}
|
||||||
|
|
||||||
|
//! Macro to construct the fourcc code of the codec. Same as CV_FOURCC()
|
||||||
|
#define CV_FOURCC_MACRO(c1, c2, c3, c4) (((c1) & 255) + (((c2) & 255) << 8) + (((c3) & 255) << 16) + (((c4) & 255) << 24))
|
||||||
|
|
||||||
|
//! @}
|
||||||
|
|
||||||
|
#ifndef __cplusplus
|
||||||
|
#include "opencv2/core/fast_math.hpp" // define cvRound(double)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif // OPENCV_CORE_CVDEF_H
|
||||||
189
3rdpart/OpenCV/include/opencv2/core/cvstd.hpp
Normal file
189
3rdpart/OpenCV/include/opencv2/core/cvstd.hpp
Normal file
@@ -0,0 +1,189 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||||
|
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#ifndef OPENCV_CORE_CVSTD_HPP
|
||||||
|
#define OPENCV_CORE_CVSTD_HPP
|
||||||
|
|
||||||
|
#ifndef __cplusplus
|
||||||
|
# error cvstd.hpp header must be compiled as C++
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "opencv2/core/cvdef.h"
|
||||||
|
#include <cstddef>
|
||||||
|
#include <cstring>
|
||||||
|
#include <cctype>
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
// import useful primitives from stl
|
||||||
|
# include <algorithm>
|
||||||
|
# include <utility>
|
||||||
|
# include <cstdlib> //for abs(int)
|
||||||
|
# include <cmath>
|
||||||
|
|
||||||
|
namespace cv
|
||||||
|
{
|
||||||
|
static inline uchar abs(uchar a) { return a; }
|
||||||
|
static inline ushort abs(ushort a) { return a; }
|
||||||
|
static inline unsigned abs(unsigned a) { return a; }
|
||||||
|
static inline uint64 abs(uint64 a) { return a; }
|
||||||
|
|
||||||
|
using std::min;
|
||||||
|
using std::max;
|
||||||
|
using std::abs;
|
||||||
|
using std::swap;
|
||||||
|
using std::sqrt;
|
||||||
|
using std::exp;
|
||||||
|
using std::pow;
|
||||||
|
using std::log;
|
||||||
|
}
|
||||||
|
|
||||||
|
#include "cvstd_wrapper.hpp"
|
||||||
|
|
||||||
|
namespace cv {
|
||||||
|
|
||||||
|
//! @addtogroup core_utils
|
||||||
|
//! @{
|
||||||
|
|
||||||
|
//////////////////////////// memory management functions ////////////////////////////
|
||||||
|
|
||||||
|
/** @brief Allocates an aligned memory buffer.
|
||||||
|
|
||||||
|
The function allocates the buffer of the specified size and returns it. When the buffer size is 16
|
||||||
|
bytes or more, the returned buffer is aligned to 16 bytes.
|
||||||
|
@param bufSize Allocated buffer size.
|
||||||
|
*/
|
||||||
|
CV_EXPORTS void* fastMalloc(size_t bufSize);
|
||||||
|
|
||||||
|
/** @brief Deallocates a memory buffer.
|
||||||
|
|
||||||
|
The function deallocates the buffer allocated with fastMalloc . If NULL pointer is passed, the
|
||||||
|
function does nothing. C version of the function clears the pointer *pptr* to avoid problems with
|
||||||
|
double memory deallocation.
|
||||||
|
@param ptr Pointer to the allocated buffer.
|
||||||
|
*/
|
||||||
|
CV_EXPORTS void fastFree(void* ptr);
|
||||||
|
|
||||||
|
/*!
|
||||||
|
The STL-compliant memory Allocator based on cv::fastMalloc() and cv::fastFree()
|
||||||
|
*/
|
||||||
|
template<typename _Tp> class Allocator
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
typedef _Tp value_type;
|
||||||
|
typedef value_type* pointer;
|
||||||
|
typedef const value_type* const_pointer;
|
||||||
|
typedef value_type& reference;
|
||||||
|
typedef const value_type& const_reference;
|
||||||
|
typedef size_t size_type;
|
||||||
|
typedef ptrdiff_t difference_type;
|
||||||
|
template<typename U> class rebind { typedef Allocator<U> other; };
|
||||||
|
|
||||||
|
explicit Allocator() {}
|
||||||
|
~Allocator() {}
|
||||||
|
explicit Allocator(Allocator const&) {}
|
||||||
|
template<typename U>
|
||||||
|
explicit Allocator(Allocator<U> const&) {}
|
||||||
|
|
||||||
|
// address
|
||||||
|
pointer address(reference r) { return &r; }
|
||||||
|
const_pointer address(const_reference r) { return &r; }
|
||||||
|
|
||||||
|
pointer allocate(size_type count, const void* =0) { return reinterpret_cast<pointer>(fastMalloc(count * sizeof (_Tp))); }
|
||||||
|
void deallocate(pointer p, size_type) { fastFree(p); }
|
||||||
|
|
||||||
|
void construct(pointer p, const _Tp& v) { new(static_cast<void*>(p)) _Tp(v); }
|
||||||
|
void destroy(pointer p) { p->~_Tp(); }
|
||||||
|
|
||||||
|
size_type max_size() const { return cv::max(static_cast<_Tp>(-1)/sizeof(_Tp), 1); }
|
||||||
|
};
|
||||||
|
|
||||||
|
//! @} core_utils
|
||||||
|
|
||||||
|
|
||||||
|
//! @addtogroup core_basic
|
||||||
|
//! @{
|
||||||
|
|
||||||
|
//////////////////////////////// string class ////////////////////////////////
|
||||||
|
|
||||||
|
class CV_EXPORTS FileNode; //for string constructor from FileNode
|
||||||
|
|
||||||
|
typedef std::string String;
|
||||||
|
|
||||||
|
#ifndef OPENCV_DISABLE_STRING_LOWER_UPPER_CONVERSIONS
|
||||||
|
|
||||||
|
//! @cond IGNORED
|
||||||
|
namespace details {
|
||||||
|
// std::tolower is int->int
|
||||||
|
static inline char char_tolower(char ch)
|
||||||
|
{
|
||||||
|
return (char)std::tolower((int)ch);
|
||||||
|
}
|
||||||
|
// std::toupper is int->int
|
||||||
|
static inline char char_toupper(char ch)
|
||||||
|
{
|
||||||
|
return (char)std::toupper((int)ch);
|
||||||
|
}
|
||||||
|
} // namespace details
|
||||||
|
//! @endcond
|
||||||
|
|
||||||
|
static inline std::string toLowerCase(const std::string& str)
|
||||||
|
{
|
||||||
|
std::string result(str);
|
||||||
|
std::transform(result.begin(), result.end(), result.begin(), details::char_tolower);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline std::string toUpperCase(const std::string& str)
|
||||||
|
{
|
||||||
|
std::string result(str);
|
||||||
|
std::transform(result.begin(), result.end(), result.begin(), details::char_toupper);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // OPENCV_DISABLE_STRING_LOWER_UPPER_CONVERSIONS
|
||||||
|
|
||||||
|
//! @} core_basic
|
||||||
|
} // cv
|
||||||
|
|
||||||
|
#endif //OPENCV_CORE_CVSTD_HPP
|
||||||
197
3rdpart/OpenCV/include/opencv2/core/cvstd.inl.hpp
Normal file
197
3rdpart/OpenCV/include/opencv2/core/cvstd.inl.hpp
Normal file
@@ -0,0 +1,197 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||||
|
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#ifndef OPENCV_CORE_CVSTDINL_HPP
|
||||||
|
#define OPENCV_CORE_CVSTDINL_HPP
|
||||||
|
|
||||||
|
#include <complex>
|
||||||
|
#include <ostream>
|
||||||
|
#include <sstream>
|
||||||
|
|
||||||
|
//! @cond IGNORED
|
||||||
|
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
#pragma warning( push )
|
||||||
|
#pragma warning( disable: 4127 )
|
||||||
|
#endif
|
||||||
|
|
||||||
|
namespace cv
|
||||||
|
{
|
||||||
|
|
||||||
|
template<typename _Tp> class DataType< std::complex<_Tp> >
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
typedef std::complex<_Tp> value_type;
|
||||||
|
typedef value_type work_type;
|
||||||
|
typedef _Tp channel_type;
|
||||||
|
|
||||||
|
enum { generic_type = 0,
|
||||||
|
depth = DataType<channel_type>::depth,
|
||||||
|
channels = 2,
|
||||||
|
fmt = DataType<channel_type>::fmt + ((channels - 1) << 8),
|
||||||
|
type = CV_MAKETYPE(depth, channels) };
|
||||||
|
|
||||||
|
typedef Vec<channel_type, channels> vec_type;
|
||||||
|
};
|
||||||
|
|
||||||
|
static inline
|
||||||
|
std::ostream& operator << (std::ostream& out, Ptr<Formatted> fmtd)
|
||||||
|
{
|
||||||
|
fmtd->reset();
|
||||||
|
for(const char* str = fmtd->next(); str; str = fmtd->next())
|
||||||
|
out << str;
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline
|
||||||
|
std::ostream& operator << (std::ostream& out, const Mat& mtx)
|
||||||
|
{
|
||||||
|
return out << Formatter::get()->format(mtx);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline
|
||||||
|
std::ostream& operator << (std::ostream& out, const UMat& m)
|
||||||
|
{
|
||||||
|
return out << m.getMat(ACCESS_READ);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename _Tp> static inline
|
||||||
|
std::ostream& operator << (std::ostream& out, const Complex<_Tp>& c)
|
||||||
|
{
|
||||||
|
return out << "(" << c.re << "," << c.im << ")";
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename _Tp> static inline
|
||||||
|
std::ostream& operator << (std::ostream& out, const std::vector<Point_<_Tp> >& vec)
|
||||||
|
{
|
||||||
|
return out << Formatter::get()->format(Mat(vec));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template<typename _Tp> static inline
|
||||||
|
std::ostream& operator << (std::ostream& out, const std::vector<Point3_<_Tp> >& vec)
|
||||||
|
{
|
||||||
|
return out << Formatter::get()->format(Mat(vec));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template<typename _Tp, int m, int n> static inline
|
||||||
|
std::ostream& operator << (std::ostream& out, const Matx<_Tp, m, n>& matx)
|
||||||
|
{
|
||||||
|
return out << Formatter::get()->format(Mat(matx));
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename _Tp> static inline
|
||||||
|
std::ostream& operator << (std::ostream& out, const Point_<_Tp>& p)
|
||||||
|
{
|
||||||
|
out << "[" << p.x << ", " << p.y << "]";
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename _Tp> static inline
|
||||||
|
std::ostream& operator << (std::ostream& out, const Point3_<_Tp>& p)
|
||||||
|
{
|
||||||
|
out << "[" << p.x << ", " << p.y << ", " << p.z << "]";
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename _Tp, int n> static inline
|
||||||
|
std::ostream& operator << (std::ostream& out, const Vec<_Tp, n>& vec)
|
||||||
|
{
|
||||||
|
out << "[";
|
||||||
|
if (cv::traits::Depth<_Tp>::value <= CV_32S)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < n - 1; ++i) {
|
||||||
|
out << (int)vec[i] << ", ";
|
||||||
|
}
|
||||||
|
out << (int)vec[n-1] << "]";
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for (int i = 0; i < n - 1; ++i) {
|
||||||
|
out << vec[i] << ", ";
|
||||||
|
}
|
||||||
|
out << vec[n-1] << "]";
|
||||||
|
}
|
||||||
|
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename _Tp> static inline
|
||||||
|
std::ostream& operator << (std::ostream& out, const Size_<_Tp>& size)
|
||||||
|
{
|
||||||
|
return out << "[" << size.width << " x " << size.height << "]";
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename _Tp> static inline
|
||||||
|
std::ostream& operator << (std::ostream& out, const Rect_<_Tp>& rect)
|
||||||
|
{
|
||||||
|
return out << "[" << rect.width << " x " << rect.height << " from (" << rect.x << ", " << rect.y << ")]";
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline std::ostream& operator << (std::ostream& out, const MatSize& msize)
|
||||||
|
{
|
||||||
|
int i, dims = msize.dims();
|
||||||
|
for( i = 0; i < dims; i++ )
|
||||||
|
{
|
||||||
|
out << msize[i];
|
||||||
|
if( i < dims-1 )
|
||||||
|
out << " x ";
|
||||||
|
}
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline std::ostream &operator<< (std::ostream &s, cv::Range &r)
|
||||||
|
{
|
||||||
|
return s << "[" << r.start << " : " << r.end << ")";
|
||||||
|
}
|
||||||
|
|
||||||
|
} // cv
|
||||||
|
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
#pragma warning( pop )
|
||||||
|
#endif
|
||||||
|
|
||||||
|
//! @endcond
|
||||||
|
|
||||||
|
#endif // OPENCV_CORE_CVSTDINL_HPP
|
||||||
154
3rdpart/OpenCV/include/opencv2/core/cvstd_wrapper.hpp
Normal file
154
3rdpart/OpenCV/include/opencv2/core/cvstd_wrapper.hpp
Normal file
@@ -0,0 +1,154 @@
|
|||||||
|
// This file is part of OpenCV project.
|
||||||
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||||
|
// of this distribution and at http://opencv.org/license.html.
|
||||||
|
|
||||||
|
#ifndef OPENCV_CORE_CVSTD_WRAPPER_HPP
|
||||||
|
#define OPENCV_CORE_CVSTD_WRAPPER_HPP
|
||||||
|
|
||||||
|
#include "opencv2/core/cvdef.h"
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <memory> // std::shared_ptr
|
||||||
|
#include <type_traits> // std::enable_if
|
||||||
|
|
||||||
|
namespace cv {
|
||||||
|
|
||||||
|
using std::nullptr_t;
|
||||||
|
|
||||||
|
//! @addtogroup core_basic
|
||||||
|
//! @{
|
||||||
|
|
||||||
|
#ifdef CV_DOXYGEN
|
||||||
|
|
||||||
|
template <typename _Tp> using Ptr = std::shared_ptr<_Tp>; // In ideal world it should look like this, but we need some compatibility workarounds below
|
||||||
|
|
||||||
|
template<typename _Tp, typename ... A1> static inline
|
||||||
|
Ptr<_Tp> makePtr(const A1&... a1) { return std::make_shared<_Tp>(a1...); }
|
||||||
|
|
||||||
|
#else // cv::Ptr with compatibility workarounds
|
||||||
|
|
||||||
|
// It should be defined for C-API types only.
|
||||||
|
// C++ types should use regular "delete" operator.
|
||||||
|
template<typename Y> struct DefaultDeleter;
|
||||||
|
#if 0
|
||||||
|
{
|
||||||
|
void operator()(Y* p) const;
|
||||||
|
};
|
||||||
|
#endif
|
||||||
|
|
||||||
|
namespace sfinae {
|
||||||
|
template<typename C, typename Ret, typename... Args>
|
||||||
|
struct has_parenthesis_operator
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
template<typename T>
|
||||||
|
static CV_CONSTEXPR std::true_type has_parenthesis_operator_check(typename std::is_same<typename std::decay<decltype(std::declval<T>().operator()(std::declval<Args>()...))>::type, Ret>::type*);
|
||||||
|
|
||||||
|
template<typename> static CV_CONSTEXPR std::false_type has_parenthesis_operator_check(...);
|
||||||
|
|
||||||
|
typedef decltype(has_parenthesis_operator_check<C>(0)) type;
|
||||||
|
|
||||||
|
public:
|
||||||
|
#if __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1900/*MSVS 2015*/)
|
||||||
|
static CV_CONSTEXPR bool value = type::value;
|
||||||
|
#else
|
||||||
|
// support MSVS 2013
|
||||||
|
static const int value = type::value;
|
||||||
|
#endif
|
||||||
|
};
|
||||||
|
} // namespace sfinae
|
||||||
|
|
||||||
|
template <typename T, typename = void>
|
||||||
|
struct has_custom_delete
|
||||||
|
: public std::false_type {};
|
||||||
|
|
||||||
|
// Force has_custom_delete to std::false_type when NVCC is compiling CUDA source files
|
||||||
|
#ifndef __CUDACC__
|
||||||
|
template <typename T>
|
||||||
|
struct has_custom_delete<T, typename std::enable_if< sfinae::has_parenthesis_operator<DefaultDeleter<T>, void, T*>::value >::type >
|
||||||
|
: public std::true_type {};
|
||||||
|
#endif
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
struct Ptr : public std::shared_ptr<T>
|
||||||
|
{
|
||||||
|
#if 0
|
||||||
|
using std::shared_ptr<T>::shared_ptr; // GCC 5.x can't handle this
|
||||||
|
#else
|
||||||
|
inline Ptr() CV_NOEXCEPT : std::shared_ptr<T>() {}
|
||||||
|
inline Ptr(nullptr_t) CV_NOEXCEPT : std::shared_ptr<T>(nullptr) {}
|
||||||
|
template<typename Y, typename D> inline Ptr(Y* p, D d) : std::shared_ptr<T>(p, d) {}
|
||||||
|
template<typename D> inline Ptr(nullptr_t, D d) : std::shared_ptr<T>(nullptr, d) {}
|
||||||
|
|
||||||
|
template<typename Y> inline Ptr(const Ptr<Y>& r, T* ptr) CV_NOEXCEPT : std::shared_ptr<T>(r, ptr) {}
|
||||||
|
|
||||||
|
inline Ptr(const Ptr<T>& o) CV_NOEXCEPT : std::shared_ptr<T>(o) {}
|
||||||
|
inline Ptr(Ptr<T>&& o) CV_NOEXCEPT : std::shared_ptr<T>(std::move(o)) {}
|
||||||
|
|
||||||
|
template<typename Y> inline Ptr(const Ptr<Y>& o) CV_NOEXCEPT : std::shared_ptr<T>(o) {}
|
||||||
|
template<typename Y> inline Ptr(Ptr<Y>&& o) CV_NOEXCEPT : std::shared_ptr<T>(std::move(o)) {}
|
||||||
|
#endif
|
||||||
|
inline Ptr(const std::shared_ptr<T>& o) CV_NOEXCEPT : std::shared_ptr<T>(o) {}
|
||||||
|
inline Ptr(std::shared_ptr<T>&& o) CV_NOEXCEPT : std::shared_ptr<T>(std::move(o)) {}
|
||||||
|
|
||||||
|
// Overload with custom DefaultDeleter: Ptr<IplImage>(...)
|
||||||
|
template<typename Y>
|
||||||
|
inline Ptr(const std::true_type&, Y* ptr) : std::shared_ptr<T>(ptr, DefaultDeleter<Y>()) {}
|
||||||
|
|
||||||
|
// Overload without custom deleter: Ptr<std::string>(...);
|
||||||
|
template<typename Y>
|
||||||
|
inline Ptr(const std::false_type&, Y* ptr) : std::shared_ptr<T>(ptr) {}
|
||||||
|
|
||||||
|
template<typename Y = T>
|
||||||
|
inline Ptr(Y* ptr) : Ptr(has_custom_delete<Y>(), ptr) {}
|
||||||
|
|
||||||
|
// Overload with custom DefaultDeleter: Ptr<IplImage>(...)
|
||||||
|
template<typename Y>
|
||||||
|
inline void reset(const std::true_type&, Y* ptr) { std::shared_ptr<T>::reset(ptr, DefaultDeleter<Y>()); }
|
||||||
|
|
||||||
|
// Overload without custom deleter: Ptr<std::string>(...);
|
||||||
|
template<typename Y>
|
||||||
|
inline void reset(const std::false_type&, Y* ptr) { std::shared_ptr<T>::reset(ptr); }
|
||||||
|
|
||||||
|
template<typename Y>
|
||||||
|
inline void reset(Y* ptr) { Ptr<T>::reset(has_custom_delete<Y>(), ptr); }
|
||||||
|
|
||||||
|
template<class Y, class Deleter>
|
||||||
|
void reset(Y* ptr, Deleter d) { std::shared_ptr<T>::reset(ptr, d); }
|
||||||
|
|
||||||
|
void reset() CV_NOEXCEPT { std::shared_ptr<T>::reset(); }
|
||||||
|
|
||||||
|
Ptr& operator=(const Ptr& o) { std::shared_ptr<T>::operator =(o); return *this; }
|
||||||
|
template<typename Y> inline Ptr& operator=(const Ptr<Y>& o) { std::shared_ptr<T>::operator =(o); return *this; }
|
||||||
|
|
||||||
|
T* operator->() const CV_NOEXCEPT { return std::shared_ptr<T>::get();}
|
||||||
|
typename std::add_lvalue_reference<T>::type operator*() const CV_NOEXCEPT { return *std::shared_ptr<T>::get(); }
|
||||||
|
|
||||||
|
// OpenCV 3.x methods (not a part of standard C++ library)
|
||||||
|
inline void release() { std::shared_ptr<T>::reset(); }
|
||||||
|
inline operator T* () const { return std::shared_ptr<T>::get(); }
|
||||||
|
inline bool empty() const { return std::shared_ptr<T>::get() == nullptr; }
|
||||||
|
|
||||||
|
template<typename Y> inline
|
||||||
|
Ptr<Y> staticCast() const CV_NOEXCEPT { return std::static_pointer_cast<Y>(*this); }
|
||||||
|
|
||||||
|
template<typename Y> inline
|
||||||
|
Ptr<Y> constCast() const CV_NOEXCEPT { return std::const_pointer_cast<Y>(*this); }
|
||||||
|
|
||||||
|
template<typename Y> inline
|
||||||
|
Ptr<Y> dynamicCast() const CV_NOEXCEPT { return std::dynamic_pointer_cast<Y>(*this); }
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename _Tp, typename ... A1> static inline
|
||||||
|
Ptr<_Tp> makePtr(const A1&... a1)
|
||||||
|
{
|
||||||
|
static_assert( !has_custom_delete<_Tp>::value, "Can't use this makePtr with custom DefaultDeleter");
|
||||||
|
return (Ptr<_Tp>)std::make_shared<_Tp>(a1...);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // CV_DOXYGEN
|
||||||
|
|
||||||
|
//! @} core_basic
|
||||||
|
} // cv
|
||||||
|
|
||||||
|
#endif //OPENCV_CORE_CVSTD_WRAPPER_HPP
|
||||||
69
3rdpart/OpenCV/include/opencv2/core/detail/async_promise.hpp
Normal file
69
3rdpart/OpenCV/include/opencv2/core/detail/async_promise.hpp
Normal file
@@ -0,0 +1,69 @@
|
|||||||
|
// This file is part of OpenCV project.
|
||||||
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||||
|
// of this distribution and at http://opencv.org/license.html.
|
||||||
|
|
||||||
|
#ifndef OPENCV_CORE_ASYNC_PROMISE_HPP
|
||||||
|
#define OPENCV_CORE_ASYNC_PROMISE_HPP
|
||||||
|
|
||||||
|
#include "../async.hpp"
|
||||||
|
|
||||||
|
#include "exception_ptr.hpp"
|
||||||
|
|
||||||
|
namespace cv {
|
||||||
|
|
||||||
|
/** @addtogroup core_async
|
||||||
|
@{
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
/** @brief Provides result of asynchronous operations
|
||||||
|
|
||||||
|
*/
|
||||||
|
class CV_EXPORTS AsyncPromise
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
~AsyncPromise() CV_NOEXCEPT;
|
||||||
|
AsyncPromise() CV_NOEXCEPT;
|
||||||
|
explicit AsyncPromise(const AsyncPromise& o) CV_NOEXCEPT;
|
||||||
|
AsyncPromise& operator=(const AsyncPromise& o) CV_NOEXCEPT;
|
||||||
|
void release() CV_NOEXCEPT;
|
||||||
|
|
||||||
|
/** Returns associated AsyncArray
|
||||||
|
@note Can be called once
|
||||||
|
*/
|
||||||
|
AsyncArray getArrayResult();
|
||||||
|
|
||||||
|
/** Stores asynchronous result.
|
||||||
|
@param[in] value result
|
||||||
|
*/
|
||||||
|
void setValue(InputArray value);
|
||||||
|
|
||||||
|
// TODO "move" setters
|
||||||
|
|
||||||
|
#if CV__EXCEPTION_PTR
|
||||||
|
/** Stores exception.
|
||||||
|
@param[in] exception exception to be raised in AsyncArray
|
||||||
|
*/
|
||||||
|
void setException(std::exception_ptr exception);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/** Stores exception.
|
||||||
|
@param[in] exception exception to be raised in AsyncArray
|
||||||
|
*/
|
||||||
|
void setException(const cv::Exception& exception);
|
||||||
|
|
||||||
|
explicit AsyncPromise(AsyncPromise&& o) { p = o.p; o.p = NULL; }
|
||||||
|
AsyncPromise& operator=(AsyncPromise&& o) CV_NOEXCEPT { std::swap(p, o.p); return *this; }
|
||||||
|
|
||||||
|
|
||||||
|
// PImpl
|
||||||
|
typedef struct AsyncArray::Impl Impl; friend struct AsyncArray::Impl;
|
||||||
|
inline void* _getImpl() const CV_NOEXCEPT { return p; }
|
||||||
|
protected:
|
||||||
|
Impl* p;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
//! @}
|
||||||
|
} // namespace
|
||||||
|
#endif // OPENCV_CORE_ASYNC_PROMISE_HPP
|
||||||
@@ -0,0 +1,49 @@
|
|||||||
|
// This file is part of OpenCV project.
|
||||||
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||||
|
// of this distribution and at http://opencv.org/license.html.
|
||||||
|
|
||||||
|
#ifndef OPENCV_CORE_DETAIL_DISPATCH_HELPER_IMPL_HPP
|
||||||
|
#define OPENCV_CORE_DETAIL_DISPATCH_HELPER_IMPL_HPP
|
||||||
|
|
||||||
|
//! @cond IGNORED
|
||||||
|
|
||||||
|
namespace cv {
|
||||||
|
namespace detail {
|
||||||
|
|
||||||
|
template<template<typename> class Functor, typename... Args>
|
||||||
|
static inline void depthDispatch(const int depth, Args&&... args)
|
||||||
|
{
|
||||||
|
switch (depth)
|
||||||
|
{
|
||||||
|
case CV_8U:
|
||||||
|
Functor<uint8_t>{}(std::forward<Args>(args)...);
|
||||||
|
break;
|
||||||
|
case CV_8S:
|
||||||
|
Functor<int8_t>{}(std::forward<Args>(args)...);
|
||||||
|
break;
|
||||||
|
case CV_16U:
|
||||||
|
Functor<uint16_t>{}(std::forward<Args>(args)...);
|
||||||
|
break;
|
||||||
|
case CV_16S:
|
||||||
|
Functor<int16_t>{}(std::forward<Args>(args)...);
|
||||||
|
break;
|
||||||
|
case CV_32S:
|
||||||
|
Functor<int32_t>{}(std::forward<Args>(args)...);
|
||||||
|
break;
|
||||||
|
case CV_32F:
|
||||||
|
Functor<float>{}(std::forward<Args>(args)...);
|
||||||
|
break;
|
||||||
|
case CV_64F:
|
||||||
|
Functor<double>{}(std::forward<Args>(args)...);
|
||||||
|
break;
|
||||||
|
case CV_16F:
|
||||||
|
default:
|
||||||
|
CV_Error(cv::Error::BadDepth, "Unsupported matrix type.");
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
}}
|
||||||
|
|
||||||
|
//! @endcond
|
||||||
|
|
||||||
|
#endif //OPENCV_CORE_DETAIL_DISPATCH_HELPER_IMPL_HPP
|
||||||
21
3rdpart/OpenCV/include/opencv2/core/detail/exception_ptr.hpp
Normal file
21
3rdpart/OpenCV/include/opencv2/core/detail/exception_ptr.hpp
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
// This file is part of OpenCV project.
|
||||||
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||||
|
// of this distribution and at http://opencv.org/license.html.
|
||||||
|
|
||||||
|
#ifndef OPENCV_CORE_DETAILS_EXCEPTION_PTR_H
|
||||||
|
#define OPENCV_CORE_DETAILS_EXCEPTION_PTR_H
|
||||||
|
|
||||||
|
#ifndef CV__EXCEPTION_PTR
|
||||||
|
# if defined(__ANDROID__) && defined(ATOMIC_INT_LOCK_FREE) && ATOMIC_INT_LOCK_FREE < 2
|
||||||
|
# define CV__EXCEPTION_PTR 0 // Not supported, details: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58938
|
||||||
|
# else
|
||||||
|
# define CV__EXCEPTION_PTR 1
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
#ifndef CV__EXCEPTION_PTR
|
||||||
|
# define CV__EXCEPTION_PTR 0
|
||||||
|
#elif CV__EXCEPTION_PTR
|
||||||
|
# include <exception> // std::exception_ptr
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif // OPENCV_CORE_DETAILS_EXCEPTION_PTR_H
|
||||||
184
3rdpart/OpenCV/include/opencv2/core/directx.hpp
Normal file
184
3rdpart/OpenCV/include/opencv2/core/directx.hpp
Normal file
@@ -0,0 +1,184 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors as is and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the copyright holders or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#ifndef OPENCV_CORE_DIRECTX_HPP
|
||||||
|
#define OPENCV_CORE_DIRECTX_HPP
|
||||||
|
|
||||||
|
#include "mat.hpp"
|
||||||
|
#include "ocl.hpp"
|
||||||
|
|
||||||
|
#if !defined(__d3d11_h__)
|
||||||
|
struct ID3D11Device;
|
||||||
|
struct ID3D11Texture2D;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if !defined(__d3d10_h__)
|
||||||
|
struct ID3D10Device;
|
||||||
|
struct ID3D10Texture2D;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if !defined(_D3D9_H_)
|
||||||
|
struct IDirect3DDevice9;
|
||||||
|
struct IDirect3DDevice9Ex;
|
||||||
|
struct IDirect3DSurface9;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
namespace cv { namespace directx {
|
||||||
|
|
||||||
|
namespace ocl {
|
||||||
|
using namespace cv::ocl;
|
||||||
|
|
||||||
|
//! @addtogroup core_directx
|
||||||
|
// This section describes OpenCL and DirectX interoperability.
|
||||||
|
//
|
||||||
|
// To enable DirectX support, configure OpenCV using CMake with WITH_DIRECTX=ON . Note, DirectX is
|
||||||
|
// supported only on Windows.
|
||||||
|
//
|
||||||
|
// To use OpenCL functionality you should first initialize OpenCL context from DirectX resource.
|
||||||
|
//
|
||||||
|
//! @{
|
||||||
|
|
||||||
|
// TODO static functions in the Context class
|
||||||
|
//! @brief Creates OpenCL context from D3D11 device
|
||||||
|
//
|
||||||
|
//! @param pD3D11Device - pointer to D3D11 device
|
||||||
|
//! @return Returns reference to OpenCL Context
|
||||||
|
CV_EXPORTS Context& initializeContextFromD3D11Device(ID3D11Device* pD3D11Device);
|
||||||
|
|
||||||
|
//! @brief Creates OpenCL context from D3D10 device
|
||||||
|
//
|
||||||
|
//! @param pD3D10Device - pointer to D3D10 device
|
||||||
|
//! @return Returns reference to OpenCL Context
|
||||||
|
CV_EXPORTS Context& initializeContextFromD3D10Device(ID3D10Device* pD3D10Device);
|
||||||
|
|
||||||
|
//! @brief Creates OpenCL context from Direct3DDevice9Ex device
|
||||||
|
//
|
||||||
|
//! @param pDirect3DDevice9Ex - pointer to Direct3DDevice9Ex device
|
||||||
|
//! @return Returns reference to OpenCL Context
|
||||||
|
CV_EXPORTS Context& initializeContextFromDirect3DDevice9Ex(IDirect3DDevice9Ex* pDirect3DDevice9Ex);
|
||||||
|
|
||||||
|
//! @brief Creates OpenCL context from Direct3DDevice9 device
|
||||||
|
//
|
||||||
|
//! @param pDirect3DDevice9 - pointer to Direct3Device9 device
|
||||||
|
//! @return Returns reference to OpenCL Context
|
||||||
|
CV_EXPORTS Context& initializeContextFromDirect3DDevice9(IDirect3DDevice9* pDirect3DDevice9);
|
||||||
|
|
||||||
|
//! @}
|
||||||
|
|
||||||
|
} // namespace cv::directx::ocl
|
||||||
|
|
||||||
|
//! @addtogroup core_directx
|
||||||
|
//! @{
|
||||||
|
|
||||||
|
//! @brief Converts InputArray to ID3D11Texture2D. If destination texture format is DXGI_FORMAT_NV12 then
|
||||||
|
//! input UMat expected to be in BGR format and data will be downsampled and color-converted to NV12.
|
||||||
|
//
|
||||||
|
//! @note Note: Destination texture must be allocated by application. Function does memory copy from src to
|
||||||
|
//! pD3D11Texture2D
|
||||||
|
//
|
||||||
|
//! @param src - source InputArray
|
||||||
|
//! @param pD3D11Texture2D - destination D3D11 texture
|
||||||
|
CV_EXPORTS void convertToD3D11Texture2D(InputArray src, ID3D11Texture2D* pD3D11Texture2D);
|
||||||
|
|
||||||
|
//! @brief Converts ID3D11Texture2D to OutputArray. If input texture format is DXGI_FORMAT_NV12 then
|
||||||
|
//! data will be upsampled and color-converted to BGR format.
|
||||||
|
//
|
||||||
|
//! @note Note: Destination matrix will be re-allocated if it has not enough memory to match texture size.
|
||||||
|
//! function does memory copy from pD3D11Texture2D to dst
|
||||||
|
//
|
||||||
|
//! @param pD3D11Texture2D - source D3D11 texture
|
||||||
|
//! @param dst - destination OutputArray
|
||||||
|
CV_EXPORTS void convertFromD3D11Texture2D(ID3D11Texture2D* pD3D11Texture2D, OutputArray dst);
|
||||||
|
|
||||||
|
//! @brief Converts InputArray to ID3D10Texture2D
|
||||||
|
//
|
||||||
|
//! @note Note: function does memory copy from src to
|
||||||
|
//! pD3D10Texture2D
|
||||||
|
//
|
||||||
|
//! @param src - source InputArray
|
||||||
|
//! @param pD3D10Texture2D - destination D3D10 texture
|
||||||
|
CV_EXPORTS void convertToD3D10Texture2D(InputArray src, ID3D10Texture2D* pD3D10Texture2D);
|
||||||
|
|
||||||
|
//! @brief Converts ID3D10Texture2D to OutputArray
|
||||||
|
//
|
||||||
|
//! @note Note: function does memory copy from pD3D10Texture2D
|
||||||
|
//! to dst
|
||||||
|
//
|
||||||
|
//! @param pD3D10Texture2D - source D3D10 texture
|
||||||
|
//! @param dst - destination OutputArray
|
||||||
|
CV_EXPORTS void convertFromD3D10Texture2D(ID3D10Texture2D* pD3D10Texture2D, OutputArray dst);
|
||||||
|
|
||||||
|
//! @brief Converts InputArray to IDirect3DSurface9
|
||||||
|
//
|
||||||
|
//! @note Note: function does memory copy from src to
|
||||||
|
//! pDirect3DSurface9
|
||||||
|
//
|
||||||
|
//! @param src - source InputArray
|
||||||
|
//! @param pDirect3DSurface9 - destination D3D10 texture
|
||||||
|
//! @param surfaceSharedHandle - shared handle
|
||||||
|
CV_EXPORTS void convertToDirect3DSurface9(InputArray src, IDirect3DSurface9* pDirect3DSurface9, void* surfaceSharedHandle = NULL);
|
||||||
|
|
||||||
|
//! @brief Converts IDirect3DSurface9 to OutputArray
|
||||||
|
//
|
||||||
|
//! @note Note: function does memory copy from pDirect3DSurface9
|
||||||
|
//! to dst
|
||||||
|
//
|
||||||
|
//! @param pDirect3DSurface9 - source D3D10 texture
|
||||||
|
//! @param dst - destination OutputArray
|
||||||
|
//! @param surfaceSharedHandle - shared handle
|
||||||
|
CV_EXPORTS void convertFromDirect3DSurface9(IDirect3DSurface9* pDirect3DSurface9, OutputArray dst, void* surfaceSharedHandle = NULL);
|
||||||
|
|
||||||
|
//! @brief Get OpenCV type from DirectX type
|
||||||
|
//! @param iDXGI_FORMAT - enum DXGI_FORMAT for D3D10/D3D11
|
||||||
|
//! @return OpenCV type or -1 if there is no equivalent
|
||||||
|
CV_EXPORTS int getTypeFromDXGI_FORMAT(const int iDXGI_FORMAT); // enum DXGI_FORMAT for D3D10/D3D11
|
||||||
|
|
||||||
|
//! @brief Get OpenCV type from DirectX type
|
||||||
|
//! @param iD3DFORMAT - enum D3DTYPE for D3D9
|
||||||
|
//! @return OpenCV type or -1 if there is no equivalent
|
||||||
|
CV_EXPORTS int getTypeFromD3DFORMAT(const int iD3DFORMAT); // enum D3DTYPE for D3D9
|
||||||
|
|
||||||
|
//! @}
|
||||||
|
|
||||||
|
} } // namespace cv::directx
|
||||||
|
|
||||||
|
#endif // OPENCV_CORE_DIRECTX_HPP
|
||||||
979
3rdpart/OpenCV/include/opencv2/core/dualquaternion.hpp
Normal file
979
3rdpart/OpenCV/include/opencv2/core/dualquaternion.hpp
Normal file
@@ -0,0 +1,979 @@
|
|||||||
|
// This file is part of OpenCV project.
|
||||||
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||||
|
// of this distribution and at http://opencv.org/license.html.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2020, Huawei Technologies Co., Ltd. All rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
//
|
||||||
|
// Author: Liangqian Kong <kongliangqian@huawei.com>
|
||||||
|
// Longbu Wang <wanglongbu@huawei.com>
|
||||||
|
#ifndef OPENCV_CORE_DUALQUATERNION_HPP
|
||||||
|
#define OPENCV_CORE_DUALQUATERNION_HPP
|
||||||
|
|
||||||
|
#include <opencv2/core/quaternion.hpp>
|
||||||
|
#include <opencv2/core/affine.hpp>
|
||||||
|
|
||||||
|
namespace cv{
|
||||||
|
//! @addtogroup core_quaternion
|
||||||
|
//! @{
|
||||||
|
|
||||||
|
template <typename _Tp> class DualQuat;
|
||||||
|
template <typename _Tp> std::ostream& operator<<(std::ostream&, const DualQuat<_Tp>&);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Dual quaternions were introduced to describe rotation together with translation while ordinary
|
||||||
|
* quaternions can only describe rotation. It can be used for shortest path pose interpolation,
|
||||||
|
* local pose optimization or volumetric deformation. More details can be found
|
||||||
|
* - https://en.wikipedia.org/wiki/Dual_quaternion
|
||||||
|
* - ["A beginners guide to dual-quaternions: what they are, how they work, and how to use them for 3D character hierarchies", Ben Kenwright, 2012](https://borodust.org/public/shared/beginner_dual_quats.pdf)
|
||||||
|
* - ["Dual Quaternions", Yan-Bin Jia, 2013](http://web.cs.iastate.edu/~cs577/handouts/dual-quaternion.pdf)
|
||||||
|
* - ["Geometric Skinning with Approximate Dual Quaternion Blending", Kavan, 2008](https://www.cs.utah.edu/~ladislav/kavan08geometric/kavan08geometric)
|
||||||
|
* - http://rodolphe-vaillant.fr/?e=29
|
||||||
|
*
|
||||||
|
* A unit dual quaternion can be classically represented as:
|
||||||
|
* \f[
|
||||||
|
* \begin{equation}
|
||||||
|
* \begin{split}
|
||||||
|
* \sigma &= \left(r+\frac{\epsilon}{2}tr\right)\\
|
||||||
|
* &= [w, x, y, z, w\_, x\_, y\_, z\_]
|
||||||
|
* \end{split}
|
||||||
|
* \end{equation}
|
||||||
|
* \f]
|
||||||
|
* where \f$r, t\f$ represents the rotation (ordinary unit quaternion) and translation (pure ordinary quaternion) respectively.
|
||||||
|
*
|
||||||
|
* A general dual quaternions which consist of two quaternions is usually represented in form of:
|
||||||
|
* \f[
|
||||||
|
* \sigma = p + \epsilon q
|
||||||
|
* \f]
|
||||||
|
* where the introduced dual unit \f$\epsilon\f$ satisfies \f$\epsilon^2 = \epsilon^3 =...=0\f$, and \f$p, q\f$ are quaternions.
|
||||||
|
*
|
||||||
|
* Alternatively, dual quaternions can also be interpreted as four components which are all [dual numbers](https://www.cs.utah.edu/~ladislav/kavan08geometric/kavan08geometric):
|
||||||
|
* \f[
|
||||||
|
* \sigma = \hat{q}_w + \hat{q}_xi + \hat{q}_yj + \hat{q}_zk
|
||||||
|
* \f]
|
||||||
|
* If we set \f$\hat{q}_x, \hat{q}_y\f$ and \f$\hat{q}_z\f$ equal to 0, a dual quaternion is transformed to a dual number. see normalize().
|
||||||
|
*
|
||||||
|
* If you want to create a dual quaternion, you can use:
|
||||||
|
*
|
||||||
|
* ```
|
||||||
|
* using namespace cv;
|
||||||
|
* double angle = CV_PI;
|
||||||
|
*
|
||||||
|
* // create from eight number
|
||||||
|
* DualQuatd dq1(1, 2, 3, 4, 5, 6, 7, 8); //p = [1,2,3,4]. q=[5,6,7,8]
|
||||||
|
*
|
||||||
|
* // create from Vec
|
||||||
|
* Vec<double, 8> v{1,2,3,4,5,6,7,8};
|
||||||
|
* DualQuatd dq_v{v};
|
||||||
|
*
|
||||||
|
* // create from two quaternion
|
||||||
|
* Quatd p(1, 2, 3, 4);
|
||||||
|
* Quatd q(5, 6, 7, 8);
|
||||||
|
* DualQuatd dq2 = DualQuatd::createFromQuat(p, q);
|
||||||
|
*
|
||||||
|
* // create from an angle, an axis and a translation
|
||||||
|
* Vec3d axis{0, 0, 1};
|
||||||
|
* Vec3d trans{3, 4, 5};
|
||||||
|
* DualQuatd dq3 = DualQuatd::createFromAngleAxisTrans(angle, axis, trans);
|
||||||
|
*
|
||||||
|
* // If you already have an instance of class Affine3, then you can use
|
||||||
|
* Affine3d R = dq3.toAffine3();
|
||||||
|
* DualQuatd dq4 = DualQuatd::createFromAffine3(R);
|
||||||
|
*
|
||||||
|
* // or create directly by affine transformation matrix Rt
|
||||||
|
* // see createFromMat() in detail for the form of Rt
|
||||||
|
* Matx44d Rt = dq3.toMat();
|
||||||
|
* DualQuatd dq5 = DualQuatd::createFromMat(Rt);
|
||||||
|
*
|
||||||
|
* // Any rotation + translation movement can
|
||||||
|
* // be expressed as a rotation + translation around the same line in space (expressed by Plucker
|
||||||
|
* // coords), and here's a way to represent it this way.
|
||||||
|
* Vec3d axis{1, 1, 1}; // axis will be normalized in createFromPitch
|
||||||
|
* Vec3d trans{3, 4 ,5};
|
||||||
|
* axis = axis / std::sqrt(axis.dot(axis));// The formula for computing moment that I use below requires a normalized axis
|
||||||
|
* Vec3d moment = 1.0 / 2 * (trans.cross(axis) + axis.cross(trans.cross(axis)) *
|
||||||
|
* std::cos(rotation_angle / 2) / std::sin(rotation_angle / 2));
|
||||||
|
* double d = trans.dot(qaxis);
|
||||||
|
* DualQuatd dq6 = DualQuatd::createFromPitch(angle, d, axis, moment);
|
||||||
|
* ```
|
||||||
|
*
|
||||||
|
* A point \f$v=(x, y, z)\f$ in form of dual quaternion is \f$[1+\epsilon v]=[1,0,0,0,0,x,y,z]\f$.
|
||||||
|
* The transformation of a point \f$v_1\f$ to another point \f$v_2\f$ under the dual quaternion \f$\sigma\f$ is
|
||||||
|
* \f[
|
||||||
|
* 1 + \epsilon v_2 = \sigma * (1 + \epsilon v_1) * \sigma^{\star}
|
||||||
|
* \f]
|
||||||
|
* where \f$\sigma^{\star}=p^*-\epsilon q^*.\f$
|
||||||
|
*
|
||||||
|
* A line in the \f$Pl\ddot{u}cker\f$ coordinates \f$(\hat{l}, m)\f$ defined by the dual quaternion \f$l=\hat{l}+\epsilon m\f$.
|
||||||
|
* To transform a line, \f[l_2 = \sigma * l_1 * \sigma^*,\f] where \f$\sigma=r+\frac{\epsilon}{2}rt\f$ and
|
||||||
|
* \f$\sigma^*=p^*+\epsilon q^*\f$.
|
||||||
|
*
|
||||||
|
* To extract the Vec<double, 8> or Vec<float, 8>, see toVec();
|
||||||
|
*
|
||||||
|
* To extract the affine transformation matrix, see toMat();
|
||||||
|
*
|
||||||
|
* To extract the instance of Affine3, see toAffine3();
|
||||||
|
*
|
||||||
|
* If two quaternions \f$q_0, q_1\f$ are needed to be interpolated, you can use sclerp()
|
||||||
|
* ```
|
||||||
|
* DualQuatd::sclerp(q0, q1, t)
|
||||||
|
* ```
|
||||||
|
* or dqblend().
|
||||||
|
* ```
|
||||||
|
* DualQuatd::dqblend(q0, q1, t)
|
||||||
|
* ```
|
||||||
|
* With more than two dual quaternions to be blended, you can use generalize linear dual quaternion blending
|
||||||
|
* with the corresponding weights, i.e. gdqblend().
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
template <typename _Tp>
|
||||||
|
class CV_EXPORTS DualQuat{
|
||||||
|
static_assert(std::is_floating_point<_Tp>::value, "Dual quaternion only make sense with type of float or double");
|
||||||
|
using value_type = _Tp;
|
||||||
|
|
||||||
|
public:
|
||||||
|
static constexpr _Tp CV_DUAL_QUAT_EPS = (_Tp)1.e-6;
|
||||||
|
|
||||||
|
DualQuat();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief create from eight same type numbers.
|
||||||
|
*/
|
||||||
|
DualQuat(const _Tp w, const _Tp x, const _Tp y, const _Tp z, const _Tp w_, const _Tp x_, const _Tp y_, const _Tp z_);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief create from a double or float vector.
|
||||||
|
*/
|
||||||
|
DualQuat(const Vec<_Tp, 8> &q);
|
||||||
|
|
||||||
|
_Tp w, x, y, z, w_, x_, y_, z_;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief create Dual Quaternion from two same type quaternions p and q.
|
||||||
|
* A Dual Quaternion \f$\sigma\f$ has the form:
|
||||||
|
* \f[\sigma = p + \epsilon q\f]
|
||||||
|
* where p and q are defined as follows:
|
||||||
|
* \f[\begin{equation}
|
||||||
|
* \begin{split}
|
||||||
|
* p &= w + x\boldsymbol{i} + y\boldsymbol{j} + z\boldsymbol{k}\\
|
||||||
|
* q &= w\_ + x\_\boldsymbol{i} + y\_\boldsymbol{j} + z\_\boldsymbol{k}.
|
||||||
|
* \end{split}
|
||||||
|
* \end{equation}
|
||||||
|
* \f]
|
||||||
|
* The p and q are the real part and dual part respectively.
|
||||||
|
* @param realPart a quaternion, real part of dual quaternion.
|
||||||
|
* @param dualPart a quaternion, dual part of dual quaternion.
|
||||||
|
* @sa Quat
|
||||||
|
*/
|
||||||
|
static DualQuat<_Tp> createFromQuat(const Quat<_Tp> &realPart, const Quat<_Tp> &dualPart);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief create a dual quaternion from a rotation angle \f$\theta\f$, a rotation axis
|
||||||
|
* \f$\boldsymbol{u}\f$ and a translation \f$\boldsymbol{t}\f$.
|
||||||
|
* It generates a dual quaternion \f$\sigma\f$ in the form of
|
||||||
|
* \f[\begin{equation}
|
||||||
|
* \begin{split}
|
||||||
|
* \sigma &= r + \frac{\epsilon}{2}\boldsymbol{t}r \\
|
||||||
|
* &= [\cos(\frac{\theta}{2}), \boldsymbol{u}\sin(\frac{\theta}{2})]
|
||||||
|
* + \frac{\epsilon}{2}[0, \boldsymbol{t}][[\cos(\frac{\theta}{2}),
|
||||||
|
* \boldsymbol{u}\sin(\frac{\theta}{2})]]\\
|
||||||
|
* &= \cos(\frac{\theta}{2}) + \boldsymbol{u}\sin(\frac{\theta}{2})
|
||||||
|
* + \frac{\epsilon}{2}(-(\boldsymbol{t} \cdot \boldsymbol{u})\sin(\frac{\theta}{2})
|
||||||
|
* + \boldsymbol{t}\cos(\frac{\theta}{2}) + \boldsymbol{u} \times \boldsymbol{t} \sin(\frac{\theta}{2})).
|
||||||
|
* \end{split}
|
||||||
|
* \end{equation}\f]
|
||||||
|
* @param angle rotation angle.
|
||||||
|
* @param axis rotation axis.
|
||||||
|
* @param translation a vector of length 3.
|
||||||
|
* @note Axis will be normalized in this function. And translation is applied
|
||||||
|
* after the rotation. Use @ref createFromQuat(r, r * t / 2) to create a dual quaternion
|
||||||
|
* which translation is applied before rotation.
|
||||||
|
* @sa Quat
|
||||||
|
*/
|
||||||
|
static DualQuat<_Tp> createFromAngleAxisTrans(const _Tp angle, const Vec<_Tp, 3> &axis, const Vec<_Tp, 3> &translation);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Transform this dual quaternion to an affine transformation matrix \f$M\f$.
|
||||||
|
* Dual quaternion consists of a rotation \f$r=[a,b,c,d]\f$ and a translation \f$t=[\Delta x,\Delta y,\Delta z]\f$. The
|
||||||
|
* affine transformation matrix \f$M\f$ has the form
|
||||||
|
* \f[
|
||||||
|
* \begin{bmatrix}
|
||||||
|
* 1-2(e_2^2 +e_3^2) &2(e_1e_2-e_0e_3) &2(e_0e_2+e_1e_3) &\Delta x\\
|
||||||
|
* 2(e_0e_3+e_1e_2) &1-2(e_1^2+e_3^2) &2(e_2e_3-e_0e_1) &\Delta y\\
|
||||||
|
* 2(e_1e_3-e_0e_2) &2(e_0e_1+e_2e_3) &1-2(e_1^2-e_2^2) &\Delta z\\
|
||||||
|
* 0&0&0&1
|
||||||
|
* \end{bmatrix}
|
||||||
|
* \f]
|
||||||
|
* if A is a matrix consisting of n points to be transformed, this could be achieved by
|
||||||
|
* \f[
|
||||||
|
* new\_A = M * A
|
||||||
|
* \f]
|
||||||
|
* where A has the form
|
||||||
|
* \f[
|
||||||
|
* \begin{bmatrix}
|
||||||
|
* x_0& x_1& x_2&...&x_n\\
|
||||||
|
* y_0& y_1& y_2&...&y_n\\
|
||||||
|
* z_0& z_1& z_2&...&z_n\\
|
||||||
|
* 1&1&1&...&1
|
||||||
|
* \end{bmatrix}
|
||||||
|
* \f]
|
||||||
|
* where the same subscript represent the same point. The size of A should be \f$[4,n]\f$.
|
||||||
|
* and the same size for matrix new_A.
|
||||||
|
* @param _R 4x4 matrix that represents rotations and translation.
|
||||||
|
* @note Translation is applied after the rotation. Use createFromQuat(r, r * t / 2) to create
|
||||||
|
* a dual quaternion which translation is applied before rotation.
|
||||||
|
*/
|
||||||
|
static DualQuat<_Tp> createFromMat(InputArray _R);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief create dual quaternion from an affine matrix. The definition of affine matrix can refer to createFromMat()
|
||||||
|
*/
|
||||||
|
static DualQuat<_Tp> createFromAffine3(const Affine3<_Tp> &R);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief A dual quaternion is a vector in form of
|
||||||
|
* \f[
|
||||||
|
* \begin{equation}
|
||||||
|
* \begin{split}
|
||||||
|
* \sigma &=\boldsymbol{p} + \epsilon \boldsymbol{q}\\
|
||||||
|
* &= \cos\hat{\frac{\theta}{2}}+\overline{\hat{l}}\sin\frac{\hat{\theta}}{2}
|
||||||
|
* \end{split}
|
||||||
|
* \end{equation}
|
||||||
|
* \f]
|
||||||
|
* where \f$\hat{\theta}\f$ is dual angle and \f$\overline{\hat{l}}\f$ is dual axis:
|
||||||
|
* \f[
|
||||||
|
* \hat{\theta}=\theta + \epsilon d,\\
|
||||||
|
* \overline{\hat{l}}= \hat{l} +\epsilon m.
|
||||||
|
* \f]
|
||||||
|
* In this representation, \f$\theta\f$ is rotation angle and \f$(\hat{l},m)\f$ is the screw axis, d is the translation distance along the axis.
|
||||||
|
*
|
||||||
|
* @param angle rotation angle.
|
||||||
|
* @param d translation along the rotation axis.
|
||||||
|
* @param axis rotation axis represented by quaternion with w = 0.
|
||||||
|
* @param moment the moment of line, and it should be orthogonal to axis.
|
||||||
|
* @note Translation is applied after the rotation. Use createFromQuat(r, r * t / 2) to create
|
||||||
|
* a dual quaternion which translation is applied before rotation.
|
||||||
|
*/
|
||||||
|
static DualQuat<_Tp> createFromPitch(const _Tp angle, const _Tp d, const Vec<_Tp, 3> &axis, const Vec<_Tp, 3> &moment);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief return a quaternion which represent the real part of dual quaternion.
|
||||||
|
* The definition of real part is in createFromQuat().
|
||||||
|
* @sa createFromQuat, getDualPart
|
||||||
|
*/
|
||||||
|
Quat<_Tp> getRealPart() const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief return a quaternion which represent the dual part of dual quaternion.
|
||||||
|
* The definition of dual part is in createFromQuat().
|
||||||
|
* @sa createFromQuat, getRealPart
|
||||||
|
*/
|
||||||
|
Quat<_Tp> getDualPart() const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief return the conjugate of a dual quaternion.
|
||||||
|
* \f[
|
||||||
|
* \begin{equation}
|
||||||
|
* \begin{split}
|
||||||
|
* \sigma^* &= (p + \epsilon q)^*
|
||||||
|
* &= (p^* + \epsilon q^*)
|
||||||
|
* \end{split}
|
||||||
|
* \end{equation}
|
||||||
|
* \f]
|
||||||
|
* @param dq a dual quaternion.
|
||||||
|
*/
|
||||||
|
template <typename T>
|
||||||
|
friend DualQuat<T> conjugate(const DualQuat<T> &dq);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief return the conjugate of a dual quaternion.
|
||||||
|
* \f[
|
||||||
|
* \begin{equation}
|
||||||
|
* \begin{split}
|
||||||
|
* \sigma^* &= (p + \epsilon q)^*
|
||||||
|
* &= (p^* + \epsilon q^*)
|
||||||
|
* \end{split}
|
||||||
|
* \end{equation}
|
||||||
|
* \f]
|
||||||
|
*/
|
||||||
|
DualQuat<_Tp> conjugate() const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief return the rotation in quaternion form.
|
||||||
|
*/
|
||||||
|
Quat<_Tp> getRotation(QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief return the translation vector.
|
||||||
|
* The rotation \f$r\f$ in this dual quaternion \f$\sigma\f$ is applied before translation \f$t\f$.
|
||||||
|
* The dual quaternion \f$\sigma\f$ is defined as
|
||||||
|
* \f[\begin{equation}
|
||||||
|
* \begin{split}
|
||||||
|
* \sigma &= p + \epsilon q \\
|
||||||
|
* &= r + \frac{\epsilon}{2}{t}r.
|
||||||
|
* \end{split}
|
||||||
|
* \end{equation}\f]
|
||||||
|
* Thus, the translation can be obtained as follows
|
||||||
|
* \f[t = 2qp^*.\f]
|
||||||
|
* @param assumeUnit if @ref QUAT_ASSUME_UNIT, this dual quaternion assume to be a unit dual quaternion
|
||||||
|
* and this function will save some computations.
|
||||||
|
* @note This dual quaternion's translation is applied after the rotation.
|
||||||
|
*/
|
||||||
|
Vec<_Tp, 3> getTranslation(QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief return the norm \f$||\sigma||\f$ of dual quaternion \f$\sigma = p + \epsilon q\f$.
|
||||||
|
* \f[
|
||||||
|
* \begin{equation}
|
||||||
|
* \begin{split}
|
||||||
|
* ||\sigma|| &= \sqrt{\sigma * \sigma^*} \\
|
||||||
|
* &= ||p|| + \epsilon \frac{p \cdot q}{||p||}.
|
||||||
|
* \end{split}
|
||||||
|
* \end{equation}
|
||||||
|
* \f]
|
||||||
|
* Generally speaking, the norm of a not unit dual
|
||||||
|
* quaternion is a dual number. For convenience, we return it in the form of a dual quaternion
|
||||||
|
* , i.e.
|
||||||
|
* \f[ ||\sigma|| = [||p||, 0, 0, 0, \frac{p \cdot q}{||p||}, 0, 0, 0].\f]
|
||||||
|
*
|
||||||
|
* @note The data type of dual number is dual quaternion.
|
||||||
|
*/
|
||||||
|
DualQuat<_Tp> norm() const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief return a normalized dual quaternion.
|
||||||
|
* A dual quaternion can be expressed as
|
||||||
|
* \f[
|
||||||
|
* \begin{equation}
|
||||||
|
* \begin{split}
|
||||||
|
* \sigma &= p + \epsilon q\\
|
||||||
|
* &=||\sigma||\left(r+\frac{1}{2}tr\right)
|
||||||
|
* \end{split}
|
||||||
|
* \end{equation}
|
||||||
|
* \f]
|
||||||
|
* where \f$r, t\f$ represents the rotation (ordinary quaternion) and translation (pure ordinary quaternion) respectively,
|
||||||
|
* and \f$||\sigma||\f$ is the norm of dual quaternion(a dual number).
|
||||||
|
* A dual quaternion is unit if and only if
|
||||||
|
* \f[
|
||||||
|
* ||p||=1, p \cdot q=0
|
||||||
|
* \f]
|
||||||
|
* where \f$\cdot\f$ means dot product.
|
||||||
|
* The process of normalization is
|
||||||
|
* \f[
|
||||||
|
* \sigma_{u}=\frac{\sigma}{||\sigma||}
|
||||||
|
* \f]
|
||||||
|
* Next, we simply proof \f$\sigma_u\f$ is a unit dual quaternion:
|
||||||
|
* \f[
|
||||||
|
* \renewcommand{\Im}{\operatorname{Im}}
|
||||||
|
* \begin{equation}
|
||||||
|
* \begin{split}
|
||||||
|
* \sigma_{u}=\frac{\sigma}{||\sigma||}&=\frac{p + \epsilon q}{||p||+\epsilon\frac{p\cdot q}{||p||}}\\
|
||||||
|
* &=\frac{p}{||p||}+\epsilon\left(\frac{q}{||p||}-p\frac{p\cdot q}{||p||^3}\right)\\
|
||||||
|
* &=\frac{p}{||p||}+\epsilon\frac{1}{||p||^2}\left(qp^{*}-p\cdot q\right)\frac{p}{||p||}\\
|
||||||
|
* &=\frac{p}{||p||}+\epsilon\frac{1}{||p||^2}\Im(qp^*)\frac{p}{||p||}.\\
|
||||||
|
* \end{split}
|
||||||
|
* \end{equation}
|
||||||
|
* \f]
|
||||||
|
* As expected, the real part is a rotation and dual part is a pure quaternion.
|
||||||
|
*/
|
||||||
|
DualQuat<_Tp> normalize() const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief if \f$\sigma = p + \epsilon q\f$ is a dual quaternion, p is not zero,
|
||||||
|
* the inverse dual quaternion is
|
||||||
|
* \f[\sigma^{-1} = \frac{\sigma^*}{||\sigma||^2}, \f]
|
||||||
|
* or equivalentlly,
|
||||||
|
* \f[\sigma^{-1} = p^{-1} - \epsilon p^{-1}qp^{-1}.\f]
|
||||||
|
* @param dq a dual quaternion.
|
||||||
|
* @param assumeUnit if @ref QUAT_ASSUME_UNIT, dual quaternion dq assume to be a unit dual quaternion
|
||||||
|
* and this function will save some computations.
|
||||||
|
*/
|
||||||
|
template <typename T>
|
||||||
|
friend DualQuat<T> inv(const DualQuat<T> &dq, QuatAssumeType assumeUnit);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief if \f$\sigma = p + \epsilon q\f$ is a dual quaternion, p is not zero,
|
||||||
|
* the inverse dual quaternion is
|
||||||
|
* \f[\sigma^{-1} = \frac{\sigma^*}{||\sigma||^2}, \f]
|
||||||
|
* or equivalentlly,
|
||||||
|
* \f[\sigma^{-1} = p^{-1} - \epsilon p^{-1}qp^{-1}.\f]
|
||||||
|
* @param assumeUnit if @ref QUAT_ASSUME_UNIT, this dual quaternion assume to be a unit dual quaternion
|
||||||
|
* and this function will save some computations.
|
||||||
|
*/
|
||||||
|
DualQuat<_Tp> inv(QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief return the dot product of two dual quaternion.
|
||||||
|
* @param p other dual quaternion.
|
||||||
|
*/
|
||||||
|
_Tp dot(DualQuat<_Tp> p) const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
** @brief return the value of \f$p^t\f$ where p is a dual quaternion.
|
||||||
|
* This could be calculated as:
|
||||||
|
* \f[
|
||||||
|
* p^t = \exp(t\ln p)
|
||||||
|
* \f]
|
||||||
|
* @param dq a dual quaternion.
|
||||||
|
* @param t index of power function.
|
||||||
|
* @param assumeUnit if @ref QUAT_ASSUME_UNIT, dual quaternion dq assume to be a unit dual quaternion
|
||||||
|
* and this function will save some computations.
|
||||||
|
*/
|
||||||
|
template <typename T>
|
||||||
|
friend DualQuat<T> power(const DualQuat<T> &dq, const T t, QuatAssumeType assumeUnit);
|
||||||
|
|
||||||
|
/**
|
||||||
|
** @brief return the value of \f$p^t\f$ where p is a dual quaternion.
|
||||||
|
* This could be calculated as:
|
||||||
|
* \f[
|
||||||
|
* p^t = \exp(t\ln p)
|
||||||
|
* \f]
|
||||||
|
*
|
||||||
|
* @param t index of power function.
|
||||||
|
* @param assumeUnit if @ref QUAT_ASSUME_UNIT, this dual quaternion assume to be a unit dual quaternion
|
||||||
|
* and this function will save some computations.
|
||||||
|
*/
|
||||||
|
DualQuat<_Tp> power(const _Tp t, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief return the value of \f$p^q\f$ where p and q are dual quaternions.
|
||||||
|
* This could be calculated as:
|
||||||
|
* \f[
|
||||||
|
* p^q = \exp(q\ln p)
|
||||||
|
* \f]
|
||||||
|
* @param p a dual quaternion.
|
||||||
|
* @param q a dual quaternion.
|
||||||
|
* @param assumeUnit if @ref QUAT_ASSUME_UNIT, dual quaternion p assume to be a dual unit quaternion
|
||||||
|
* and this function will save some computations.
|
||||||
|
*/
|
||||||
|
template <typename T>
|
||||||
|
friend DualQuat<T> power(const DualQuat<T>& p, const DualQuat<T>& q, QuatAssumeType assumeUnit);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief return the value of \f$p^q\f$ where p and q are dual quaternions.
|
||||||
|
* This could be calculated as:
|
||||||
|
* \f[
|
||||||
|
* p^q = \exp(q\ln p)
|
||||||
|
* \f]
|
||||||
|
*
|
||||||
|
* @param q a dual quaternion
|
||||||
|
* @param assumeUnit if @ref QUAT_ASSUME_UNIT, this dual quaternion assume to be a dual unit quaternion
|
||||||
|
* and this function will save some computations.
|
||||||
|
*/
|
||||||
|
DualQuat<_Tp> power(const DualQuat<_Tp>& q, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief return the value of exponential function value
|
||||||
|
* @param dq a dual quaternion.
|
||||||
|
*/
|
||||||
|
template <typename T>
|
||||||
|
friend DualQuat<T> exp(const DualQuat<T> &dq);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief return the value of exponential function value
|
||||||
|
*/
|
||||||
|
DualQuat<_Tp> exp() const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief return the value of logarithm function value
|
||||||
|
*
|
||||||
|
* @param dq a dual quaternion.
|
||||||
|
* @param assumeUnit if @ref QUAT_ASSUME_UNIT, dual quaternion dq assume to be a unit dual quaternion
|
||||||
|
* and this function will save some computations.
|
||||||
|
*/
|
||||||
|
template <typename T>
|
||||||
|
friend DualQuat<T> log(const DualQuat<T> &dq, QuatAssumeType assumeUnit);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief return the value of logarithm function value
|
||||||
|
* @param assumeUnit if @ref QUAT_ASSUME_UNIT, this dual quaternion assume to be a unit dual quaternion
|
||||||
|
* and this function will save some computations.
|
||||||
|
*/
|
||||||
|
DualQuat<_Tp> log(QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Transform this dual quaternion to a vector.
|
||||||
|
*/
|
||||||
|
Vec<_Tp, 8> toVec() const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Transform this dual quaternion to a affine transformation matrix
|
||||||
|
* the form of matrix, see createFromMat().
|
||||||
|
*/
|
||||||
|
Matx<_Tp, 4, 4> toMat(QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Transform this dual quaternion to a instance of Affine3.
|
||||||
|
*/
|
||||||
|
Affine3<_Tp> toAffine3(QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief The screw linear interpolation(ScLERP) is an extension of spherical linear interpolation of dual quaternion.
|
||||||
|
* If \f$\sigma_1\f$ and \f$\sigma_2\f$ are two dual quaternions representing the initial and final pose.
|
||||||
|
* The interpolation of ScLERP function can be defined as:
|
||||||
|
* \f[
|
||||||
|
* ScLERP(t;\sigma_1,\sigma_2) = \sigma_1 * (\sigma_1^{-1} * \sigma_2)^t, t\in[0,1]
|
||||||
|
* \f]
|
||||||
|
*
|
||||||
|
* @param q1 a dual quaternion represents a initial pose.
|
||||||
|
* @param q2 a dual quaternion represents a final pose.
|
||||||
|
* @param t interpolation parameter
|
||||||
|
* @param directChange if true, it always return the shortest path.
|
||||||
|
* @param assumeUnit if @ref QUAT_ASSUME_UNIT, this dual quaternion assume to be a unit dual quaternion
|
||||||
|
* and this function will save some computations.
|
||||||
|
*
|
||||||
|
* For example
|
||||||
|
* ```
|
||||||
|
* double angle1 = CV_PI / 2;
|
||||||
|
* Vec3d axis{0, 0, 1};
|
||||||
|
* Vec3d t(0, 0, 3);
|
||||||
|
* DualQuatd initial = DualQuatd::createFromAngleAxisTrans(angle1, axis, t);
|
||||||
|
* double angle2 = CV_PI;
|
||||||
|
* DualQuatd final = DualQuatd::createFromAngleAxisTrans(angle2, axis, t);
|
||||||
|
* DualQuatd inter = DualQuatd::sclerp(initial, final, 0.5);
|
||||||
|
* ```
|
||||||
|
*/
|
||||||
|
static DualQuat<_Tp> sclerp(const DualQuat<_Tp> &q1, const DualQuat<_Tp> &q2, const _Tp t,
|
||||||
|
bool directChange=true, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT);
|
||||||
|
/**
|
||||||
|
* @brief The method of Dual Quaternion linear Blending(DQB) is to compute a transformation between dual quaternion
|
||||||
|
* \f$q_1\f$ and \f$q_2\f$ and can be defined as:
|
||||||
|
* \f[
|
||||||
|
* DQB(t;{\boldsymbol{q}}_1,{\boldsymbol{q}}_2)=
|
||||||
|
* \frac{(1-t){\boldsymbol{q}}_1+t{\boldsymbol{q}}_2}{||(1-t){\boldsymbol{q}}_1+t{\boldsymbol{q}}_2||}.
|
||||||
|
* \f]
|
||||||
|
* where \f$q_1\f$ and \f$q_2\f$ are unit dual quaternions representing the input transformations.
|
||||||
|
* If you want to use DQB that works for more than two rigid transformations, see @ref gdqblend
|
||||||
|
*
|
||||||
|
* @param q1 a unit dual quaternion representing the input transformations.
|
||||||
|
* @param q2 a unit dual quaternion representing the input transformations.
|
||||||
|
* @param t parameter \f$t\in[0,1]\f$.
|
||||||
|
* @param assumeUnit if @ref QUAT_ASSUME_UNIT, this dual quaternion assume to be a unit dual quaternion
|
||||||
|
* and this function will save some computations.
|
||||||
|
*
|
||||||
|
* @sa gdqblend
|
||||||
|
*/
|
||||||
|
static DualQuat<_Tp> dqblend(const DualQuat<_Tp> &q1, const DualQuat<_Tp> &q2, const _Tp t,
|
||||||
|
QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief The generalized Dual Quaternion linear Blending works for more than two rigid transformations.
|
||||||
|
* If these transformations are expressed as unit dual quaternions \f$q_1,...,q_n\f$ with convex weights
|
||||||
|
* \f$w = (w_1,...,w_n)\f$, the generalized DQB is simply
|
||||||
|
* \f[
|
||||||
|
* gDQB(\boldsymbol{w};{\boldsymbol{q}}_1,...,{\boldsymbol{q}}_n)=\frac{w_1{\boldsymbol{q}}_1+...+w_n{\boldsymbol{q}}_n}
|
||||||
|
* {||w_1{\boldsymbol{q}}_1+...+w_n{\boldsymbol{q}}_n||}.
|
||||||
|
* \f]
|
||||||
|
* @param dualquat vector of dual quaternions
|
||||||
|
* @param weights vector of weights, the size of weights should be the same as dualquat, and the weights should
|
||||||
|
* satisfy \f$\sum_0^n w_{i} = 1\f$ and \f$w_i>0\f$.
|
||||||
|
* @param assumeUnit if @ref QUAT_ASSUME_UNIT, these dual quaternions assume to be unit quaternions
|
||||||
|
* and this function will save some computations.
|
||||||
|
* @note the type of weights' element should be the same as the date type of dual quaternion inside the dualquat.
|
||||||
|
*/
|
||||||
|
template <int cn>
|
||||||
|
static DualQuat<_Tp> gdqblend(const Vec<DualQuat<_Tp>, cn> &dualquat, InputArray weights,
|
||||||
|
QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief The generalized Dual Quaternion linear Blending works for more than two rigid transformations.
|
||||||
|
* If these transformations are expressed as unit dual quaternions \f$q_1,...,q_n\f$ with convex weights
|
||||||
|
* \f$w = (w_1,...,w_n)\f$, the generalized DQB is simply
|
||||||
|
* \f[
|
||||||
|
* gDQB(\boldsymbol{w};{\boldsymbol{q}}_1,...,{\boldsymbol{q}}_n)=\frac{w_1{\boldsymbol{q}}_1+...+w_n{\boldsymbol{q}}_n}
|
||||||
|
* {||w_1{\boldsymbol{q}}_1+...+w_n{\boldsymbol{q}}_n||}.
|
||||||
|
* \f]
|
||||||
|
* @param dualquat The dual quaternions which have 8 channels and 1 row or 1 col.
|
||||||
|
* @param weights vector of weights, the size of weights should be the same as dualquat, and the weights should
|
||||||
|
* satisfy \f$\sum_0^n w_{i} = 1\f$ and \f$w_i>0\f$.
|
||||||
|
* @param assumeUnit if @ref QUAT_ASSUME_UNIT, these dual quaternions assume to be unit quaternions
|
||||||
|
* and this function will save some computations.
|
||||||
|
* @note the type of weights' element should be the same as the date type of dual quaternion inside the dualquat.
|
||||||
|
*/
|
||||||
|
static DualQuat<_Tp> gdqblend(InputArray dualquat, InputArray weights,
|
||||||
|
QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Return opposite dual quaternion \f$-p\f$
|
||||||
|
* which satisfies \f$p + (-p) = 0.\f$
|
||||||
|
*
|
||||||
|
* For example
|
||||||
|
* ```
|
||||||
|
* DualQuatd q{1, 2, 3, 4, 5, 6, 7, 8};
|
||||||
|
* std::cout << -q << std::endl; // [-1, -2, -3, -4, -5, -6, -7, -8]
|
||||||
|
* ```
|
||||||
|
*/
|
||||||
|
DualQuat<_Tp> operator-() const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief return true if two dual quaternions p and q are nearly equal, i.e. when the absolute
|
||||||
|
* value of each \f$p_i\f$ and \f$q_i\f$ is less than CV_DUAL_QUAT_EPS.
|
||||||
|
*/
|
||||||
|
bool operator==(const DualQuat<_Tp>&) const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Subtraction operator of two dual quaternions p and q.
|
||||||
|
* It returns a new dual quaternion that each value is the sum of \f$p_i\f$ and \f$-q_i\f$.
|
||||||
|
*
|
||||||
|
* For example
|
||||||
|
* ```
|
||||||
|
* DualQuatd p{1, 2, 3, 4, 5, 6, 7, 8};
|
||||||
|
* DualQuatd q{5, 6, 7, 8, 9, 10, 11, 12};
|
||||||
|
* std::cout << p - q << std::endl; //[-4, -4, -4, -4, 4, -4, -4, -4]
|
||||||
|
* ```
|
||||||
|
*/
|
||||||
|
DualQuat<_Tp> operator-(const DualQuat<_Tp>&) const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Subtraction assignment operator of two dual quaternions p and q.
|
||||||
|
* It subtracts right operand from the left operand and assign the result to left operand.
|
||||||
|
*
|
||||||
|
* For example
|
||||||
|
* ```
|
||||||
|
* DualQuatd p{1, 2, 3, 4, 5, 6, 7, 8};
|
||||||
|
* DualQuatd q{5, 6, 7, 8, 9, 10, 11, 12};
|
||||||
|
* p -= q; // equivalent to p = p - q
|
||||||
|
* std::cout << p << std::endl; //[-4, -4, -4, -4, 4, -4, -4, -4]
|
||||||
|
*
|
||||||
|
* ```
|
||||||
|
*/
|
||||||
|
DualQuat<_Tp>& operator-=(const DualQuat<_Tp>&);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Addition operator of two dual quaternions p and q.
|
||||||
|
* It returns a new dual quaternion that each value is the sum of \f$p_i\f$ and \f$q_i\f$.
|
||||||
|
*
|
||||||
|
* For example
|
||||||
|
* ```
|
||||||
|
* DualQuatd p{1, 2, 3, 4, 5, 6, 7, 8};
|
||||||
|
* DualQuatd q{5, 6, 7, 8, 9, 10, 11, 12};
|
||||||
|
* std::cout << p + q << std::endl; //[6, 8, 10, 12, 14, 16, 18, 20]
|
||||||
|
* ```
|
||||||
|
*/
|
||||||
|
DualQuat<_Tp> operator+(const DualQuat<_Tp>&) const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Addition assignment operator of two dual quaternions p and q.
|
||||||
|
* It adds right operand to the left operand and assign the result to left operand.
|
||||||
|
*
|
||||||
|
* For example
|
||||||
|
* ```
|
||||||
|
* DualQuatd p{1, 2, 3, 4, 5, 6, 7, 8};
|
||||||
|
* DualQuatd q{5, 6, 7, 8, 9, 10, 11, 12};
|
||||||
|
* p += q; // equivalent to p = p + q
|
||||||
|
* std::cout << p << std::endl; //[6, 8, 10, 12, 14, 16, 18, 20]
|
||||||
|
*
|
||||||
|
* ```
|
||||||
|
*/
|
||||||
|
DualQuat<_Tp>& operator+=(const DualQuat<_Tp>&);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Multiplication assignment operator of two quaternions.
|
||||||
|
* It multiplies right operand with the left operand and assign the result to left operand.
|
||||||
|
*
|
||||||
|
* Rule of dual quaternion multiplication:
|
||||||
|
* The dual quaternion can be written as an ordered pair of quaternions [A, B]. Thus
|
||||||
|
* \f[
|
||||||
|
* \begin{equation}
|
||||||
|
* \begin{split}
|
||||||
|
* p * q &= [A, B][C, D]\\
|
||||||
|
* &=[AC, AD + BC]
|
||||||
|
* \end{split}
|
||||||
|
* \end{equation}
|
||||||
|
* \f]
|
||||||
|
*
|
||||||
|
* For example
|
||||||
|
* ```
|
||||||
|
* DualQuatd p{1, 2, 3, 4, 5, 6, 7, 8};
|
||||||
|
* DualQuatd q{5, 6, 7, 8, 9, 10, 11, 12};
|
||||||
|
* p *= q;
|
||||||
|
* std::cout << p << std::endl; //[-60, 12, 30, 24, -216, 80, 124, 120]
|
||||||
|
* ```
|
||||||
|
*/
|
||||||
|
DualQuat<_Tp>& operator*=(const DualQuat<_Tp>&);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Multiplication assignment operator of a quaternions and a scalar.
|
||||||
|
* It multiplies right operand with the left operand and assign the result to left operand.
|
||||||
|
*
|
||||||
|
* Rule of dual quaternion multiplication with a scalar:
|
||||||
|
* \f[
|
||||||
|
* \begin{equation}
|
||||||
|
* \begin{split}
|
||||||
|
* p * s &= [w, x, y, z, w\_, x\_, y\_, z\_] * s\\
|
||||||
|
* &=[w s, x s, y s, z s, w\_ \space s, x\_ \space s, y\_ \space s, z\_ \space s].
|
||||||
|
* \end{split}
|
||||||
|
* \end{equation}
|
||||||
|
* \f]
|
||||||
|
*
|
||||||
|
* For example
|
||||||
|
* ```
|
||||||
|
* DualQuatd p{1, 2, 3, 4, 5, 6, 7, 8};
|
||||||
|
* double s = 2.0;
|
||||||
|
* p *= s;
|
||||||
|
* std::cout << p << std::endl; //[2, 4, 6, 8, 10, 12, 14, 16]
|
||||||
|
* ```
|
||||||
|
* @note the type of scalar should be equal to the dual quaternion.
|
||||||
|
*/
|
||||||
|
DualQuat<_Tp> operator*=(const _Tp s);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Multiplication operator of two dual quaternions q and p.
|
||||||
|
* Multiplies values on either side of the operator.
|
||||||
|
*
|
||||||
|
* Rule of dual quaternion multiplication:
|
||||||
|
* The dual quaternion can be written as an ordered pair of quaternions [A, B]. Thus
|
||||||
|
* \f[
|
||||||
|
* \begin{equation}
|
||||||
|
* \begin{split}
|
||||||
|
* p * q &= [A, B][C, D]\\
|
||||||
|
* &=[AC, AD + BC]
|
||||||
|
* \end{split}
|
||||||
|
* \end{equation}
|
||||||
|
* \f]
|
||||||
|
*
|
||||||
|
* For example
|
||||||
|
* ```
|
||||||
|
* DualQuatd p{1, 2, 3, 4, 5, 6, 7, 8};
|
||||||
|
* DualQuatd q{5, 6, 7, 8, 9, 10, 11, 12};
|
||||||
|
* std::cout << p * q << std::endl; //[-60, 12, 30, 24, -216, 80, 124, 120]
|
||||||
|
* ```
|
||||||
|
*/
|
||||||
|
DualQuat<_Tp> operator*(const DualQuat<_Tp>&) const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Division operator of a dual quaternions and a scalar.
|
||||||
|
* It divides left operand with the right operand and assign the result to left operand.
|
||||||
|
*
|
||||||
|
* Rule of dual quaternion division with a scalar:
|
||||||
|
* \f[
|
||||||
|
* \begin{equation}
|
||||||
|
* \begin{split}
|
||||||
|
* p / s &= [w, x, y, z, w\_, x\_, y\_, z\_] / s\\
|
||||||
|
* &=[w/s, x/s, y/s, z/s, w\_/s, x\_/s, y\_/s, z\_/s].
|
||||||
|
* \end{split}
|
||||||
|
* \end{equation}
|
||||||
|
* \f]
|
||||||
|
*
|
||||||
|
* For example
|
||||||
|
* ```
|
||||||
|
* DualQuatd p{1, 2, 3, 4, 5, 6, 7, 8};
|
||||||
|
* double s = 2.0;
|
||||||
|
* p /= s; // equivalent to p = p / s
|
||||||
|
* std::cout << p << std::endl; //[0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4]
|
||||||
|
* ```
|
||||||
|
* @note the type of scalar should be equal to this dual quaternion.
|
||||||
|
*/
|
||||||
|
DualQuat<_Tp> operator/(const _Tp s) const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Division operator of two dual quaternions p and q.
|
||||||
|
* Divides left hand operand by right hand operand.
|
||||||
|
*
|
||||||
|
* Rule of dual quaternion division with a dual quaternion:
|
||||||
|
* \f[
|
||||||
|
* \begin{equation}
|
||||||
|
* \begin{split}
|
||||||
|
* p / q &= p * q.inv()\\
|
||||||
|
* \end{split}
|
||||||
|
* \end{equation}
|
||||||
|
* \f]
|
||||||
|
*
|
||||||
|
* For example
|
||||||
|
* ```
|
||||||
|
* DualQuatd p{1, 2, 3, 4, 5, 6, 7, 8};
|
||||||
|
* DualQuatd q{5, 6, 7, 8, 9, 10, 11, 12};
|
||||||
|
* std::cout << p / q << std::endl; // equivalent to p * q.inv()
|
||||||
|
* ```
|
||||||
|
*/
|
||||||
|
DualQuat<_Tp> operator/(const DualQuat<_Tp>&) const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Division assignment operator of two dual quaternions p and q;
|
||||||
|
* It divides left operand with the right operand and assign the result to left operand.
|
||||||
|
*
|
||||||
|
* Rule of dual quaternion division with a quaternion:
|
||||||
|
* \f[
|
||||||
|
* \begin{equation}
|
||||||
|
* \begin{split}
|
||||||
|
* p / q&= p * q.inv()\\
|
||||||
|
* \end{split}
|
||||||
|
* \end{equation}
|
||||||
|
* \f]
|
||||||
|
*
|
||||||
|
* For example
|
||||||
|
* ```
|
||||||
|
* DualQuatd p{1, 2, 3, 4, 5, 6, 7, 8};
|
||||||
|
* DualQuatd q{5, 6, 7, 8, 9, 10, 11, 12};
|
||||||
|
* p /= q; // equivalent to p = p * q.inv()
|
||||||
|
* std::cout << p << std::endl;
|
||||||
|
* ```
|
||||||
|
*/
|
||||||
|
DualQuat<_Tp>& operator/=(const DualQuat<_Tp>&);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Division assignment operator of a dual quaternions and a scalar.
|
||||||
|
* It divides left operand with the right operand and assign the result to left operand.
|
||||||
|
*
|
||||||
|
* Rule of dual quaternion division with a scalar:
|
||||||
|
* \f[
|
||||||
|
* \begin{equation}
|
||||||
|
* \begin{split}
|
||||||
|
* p / s &= [w, x, y, z, w\_, x\_, y\_ ,z\_] / s\\
|
||||||
|
* &=[w / s, x / s, y / s, z / s, w\_ / \space s, x\_ / \space s, y\_ / \space s, z\_ / \space s].
|
||||||
|
* \end{split}
|
||||||
|
* \end{equation}
|
||||||
|
* \f]
|
||||||
|
*
|
||||||
|
* For example
|
||||||
|
* ```
|
||||||
|
* DualQuatd p{1, 2, 3, 4, 5, 6, 7, 8};
|
||||||
|
* double s = 2.0;;
|
||||||
|
* p /= s; // equivalent to p = p / s
|
||||||
|
* std::cout << p << std::endl; //[0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0]
|
||||||
|
* ```
|
||||||
|
* @note the type of scalar should be equal to the dual quaternion.
|
||||||
|
*/
|
||||||
|
Quat<_Tp>& operator/=(const _Tp s);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Addition operator of a scalar and a dual quaternions.
|
||||||
|
* Adds right hand operand from left hand operand.
|
||||||
|
*
|
||||||
|
* For example
|
||||||
|
* ```
|
||||||
|
* DualQuatd p{1, 2, 3, 4, 5, 6, 7, 8};
|
||||||
|
* double scalar = 2.0;
|
||||||
|
* std::cout << scalar + p << std::endl; //[3.0, 2, 3, 4, 5, 6, 7, 8]
|
||||||
|
* ```
|
||||||
|
* @note the type of scalar should be equal to the dual quaternion.
|
||||||
|
*/
|
||||||
|
template <typename T>
|
||||||
|
friend DualQuat<T> cv::operator+(const T s, const DualQuat<T>&);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Addition operator of a dual quaternions and a scalar.
|
||||||
|
* Adds right hand operand from left hand operand.
|
||||||
|
*
|
||||||
|
* For example
|
||||||
|
* ```
|
||||||
|
* DualQuatd p{1, 2, 3, 4, 5, 6, 7, 8};
|
||||||
|
* double scalar = 2.0;
|
||||||
|
* std::cout << p + scalar << std::endl; //[3.0, 2, 3, 4, 5, 6, 7, 8]
|
||||||
|
* ```
|
||||||
|
* @note the type of scalar should be equal to the dual quaternion.
|
||||||
|
*/
|
||||||
|
template <typename T>
|
||||||
|
friend DualQuat<T> cv::operator+(const DualQuat<T>&, const T s);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Multiplication operator of a scalar and a dual quaternions.
|
||||||
|
* It multiplies right operand with the left operand and assign the result to left operand.
|
||||||
|
*
|
||||||
|
* Rule of dual quaternion multiplication with a scalar:
|
||||||
|
* \f[
|
||||||
|
* \begin{equation}
|
||||||
|
* \begin{split}
|
||||||
|
* p * s &= [w, x, y, z, w\_, x\_, y\_, z\_] * s\\
|
||||||
|
* &=[w s, x s, y s, z s, w\_ \space s, x\_ \space s, y\_ \space s, z\_ \space s].
|
||||||
|
* \end{split}
|
||||||
|
* \end{equation}
|
||||||
|
* \f]
|
||||||
|
*
|
||||||
|
* For example
|
||||||
|
* ```
|
||||||
|
* DualQuatd p{1, 2, 3, 4, 5, 6, 7, 8};
|
||||||
|
* double s = 2.0;
|
||||||
|
* std::cout << s * p << std::endl; //[2, 4, 6, 8, 10, 12, 14, 16]
|
||||||
|
* ```
|
||||||
|
* @note the type of scalar should be equal to the dual quaternion.
|
||||||
|
*/
|
||||||
|
template <typename T>
|
||||||
|
friend DualQuat<T> cv::operator*(const T s, const DualQuat<T>&);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Subtraction operator of a dual quaternion and a scalar.
|
||||||
|
* Subtracts right hand operand from left hand operand.
|
||||||
|
*
|
||||||
|
* For example
|
||||||
|
* ```
|
||||||
|
* DualQuatd p{1, 2, 3, 4, 5, 6, 7, 8};
|
||||||
|
* double scalar = 2.0;
|
||||||
|
* std::cout << p - scalar << std::endl; //[-1, 2, 3, 4, 5, 6, 7, 8]
|
||||||
|
* ```
|
||||||
|
* @note the type of scalar should be equal to the dual quaternion.
|
||||||
|
*/
|
||||||
|
template <typename T>
|
||||||
|
friend DualQuat<T> cv::operator-(const DualQuat<T>&, const T s);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Subtraction operator of a scalar and a dual quaternions.
|
||||||
|
* Subtracts right hand operand from left hand operand.
|
||||||
|
*
|
||||||
|
* For example
|
||||||
|
* ```
|
||||||
|
* DualQuatd p{1, 2, 3, 4, 5, 6, 7, 8};
|
||||||
|
* double scalar = 2.0;
|
||||||
|
* std::cout << scalar - p << std::endl; //[1.0, -2, -3, -4, -5, -6, -7, -8]
|
||||||
|
* ```
|
||||||
|
* @note the type of scalar should be equal to the dual quaternion.
|
||||||
|
*/
|
||||||
|
template <typename T>
|
||||||
|
friend DualQuat<T> cv::operator-(const T s, const DualQuat<T>&);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Multiplication operator of a dual quaternions and a scalar.
|
||||||
|
* It multiplies right operand with the left operand and assign the result to left operand.
|
||||||
|
*
|
||||||
|
* Rule of dual quaternion multiplication with a scalar:
|
||||||
|
* \f[
|
||||||
|
* \begin{equation}
|
||||||
|
* \begin{split}
|
||||||
|
* p * s &= [w, x, y, z, w\_, x\_, y\_, z\_] * s\\
|
||||||
|
* &=[w s, x s, y s, z s, w\_ \space s, x\_ \space s, y\_ \space s, z\_ \space s].
|
||||||
|
* \end{split}
|
||||||
|
* \end{equation}
|
||||||
|
* \f]
|
||||||
|
*
|
||||||
|
* For example
|
||||||
|
* ```
|
||||||
|
* DualQuatd p{1, 2, 3, 4, 5, 6, 7, 8};
|
||||||
|
* double s = 2.0;
|
||||||
|
* std::cout << p * s << std::endl; //[2, 4, 6, 8, 10, 12, 14, 16]
|
||||||
|
* ```
|
||||||
|
* @note the type of scalar should be equal to the dual quaternion.
|
||||||
|
*/
|
||||||
|
template <typename T>
|
||||||
|
friend DualQuat<T> cv::operator*(const DualQuat<T>&, const T s);
|
||||||
|
|
||||||
|
template <typename S>
|
||||||
|
friend std::ostream& cv::operator<<(std::ostream&, const DualQuat<S>&);
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
using DualQuatd = DualQuat<double>;
|
||||||
|
using DualQuatf = DualQuat<float>;
|
||||||
|
|
||||||
|
//! @} core
|
||||||
|
}//namespace
|
||||||
|
|
||||||
|
#include "dualquaternion.inl.hpp"
|
||||||
|
|
||||||
|
#endif /* OPENCV_CORE_QUATERNION_HPP */
|
||||||
487
3rdpart/OpenCV/include/opencv2/core/dualquaternion.inl.hpp
Normal file
487
3rdpart/OpenCV/include/opencv2/core/dualquaternion.inl.hpp
Normal file
@@ -0,0 +1,487 @@
|
|||||||
|
// This file is part of OpenCV project.
|
||||||
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||||
|
// of this distribution and at http://opencv.org/license.html.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2020, Huawei Technologies Co., Ltd. All rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
//
|
||||||
|
// Author: Liangqian Kong <kongliangqian@huawei.com>
|
||||||
|
// Longbu Wang <wanglongbu@huawei.com>
|
||||||
|
|
||||||
|
#ifndef OPENCV_CORE_DUALQUATERNION_INL_HPP
|
||||||
|
#define OPENCV_CORE_DUALQUATERNION_INL_HPP
|
||||||
|
|
||||||
|
#ifndef OPENCV_CORE_DUALQUATERNION_HPP
|
||||||
|
#error This is not a standalone header. Include dualquaternion.hpp instead.
|
||||||
|
#endif
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//Implementation
|
||||||
|
namespace cv {
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
DualQuat<T>::DualQuat():w(0), x(0), y(0), z(0), w_(0), x_(0), y_(0), z_(0){}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
DualQuat<T>::DualQuat(const T vw, const T vx, const T vy, const T vz, const T _w, const T _x, const T _y, const T _z):
|
||||||
|
w(vw), x(vx), y(vy), z(vz), w_(_w), x_(_x), y_(_y), z_(_z){}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
DualQuat<T>::DualQuat(const Vec<T, 8> &q):w(q[0]), x(q[1]), y(q[2]), z(q[3]),
|
||||||
|
w_(q[4]), x_(q[5]), y_(q[6]), z_(q[7]){}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
DualQuat<T> DualQuat<T>::createFromQuat(const Quat<T> &realPart, const Quat<T> &dualPart)
|
||||||
|
{
|
||||||
|
T w = realPart.w;
|
||||||
|
T x = realPart.x;
|
||||||
|
T y = realPart.y;
|
||||||
|
T z = realPart.z;
|
||||||
|
T w_ = dualPart.w;
|
||||||
|
T x_ = dualPart.x;
|
||||||
|
T y_ = dualPart.y;
|
||||||
|
T z_ = dualPart.z;
|
||||||
|
return DualQuat<T>(w, x, y, z, w_, x_, y_, z_);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
DualQuat<T> DualQuat<T>::createFromAngleAxisTrans(const T angle, const Vec<T, 3> &axis, const Vec<T, 3> &trans)
|
||||||
|
{
|
||||||
|
Quat<T> r = Quat<T>::createFromAngleAxis(angle, axis);
|
||||||
|
Quat<T> t{0, trans[0], trans[1], trans[2]};
|
||||||
|
return createFromQuat(r, t * r * T(0.5));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
DualQuat<T> DualQuat<T>::createFromMat(InputArray _R)
|
||||||
|
{
|
||||||
|
CV_CheckTypeEQ(_R.type(), cv::traits::Type<T>::value, "");
|
||||||
|
if (_R.size() != Size(4, 4))
|
||||||
|
{
|
||||||
|
CV_Error(Error::StsBadArg, "The input matrix must have 4 columns and 4 rows");
|
||||||
|
}
|
||||||
|
Mat R = _R.getMat();
|
||||||
|
Quat<T> r = Quat<T>::createFromRotMat(R.colRange(0, 3).rowRange(0, 3));
|
||||||
|
Quat<T> trans(0, R.at<T>(0, 3), R.at<T>(1, 3), R.at<T>(2, 3));
|
||||||
|
return createFromQuat(r, trans * r * T(0.5));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
DualQuat<T> DualQuat<T>::createFromAffine3(const Affine3<T> &R)
|
||||||
|
{
|
||||||
|
return createFromMat(R.matrix);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
DualQuat<T> DualQuat<T>::createFromPitch(const T angle, const T d, const Vec<T, 3> &axis, const Vec<T, 3> &moment)
|
||||||
|
{
|
||||||
|
T half_angle = angle * T(0.5), half_d = d * T(0.5);
|
||||||
|
Quat<T> qaxis = Quat<T>(0, axis[0], axis[1], axis[2]).normalize();
|
||||||
|
Quat<T> qmoment = Quat<T>(0, moment[0], moment[1], moment[2]);
|
||||||
|
qmoment -= qaxis * axis.dot(moment);
|
||||||
|
Quat<T> dual = -half_d * std::sin(half_angle) + std::sin(half_angle) * qmoment +
|
||||||
|
half_d * std::cos(half_angle) * qaxis;
|
||||||
|
return createFromQuat(Quat<T>::createFromAngleAxis(angle, axis), dual);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
inline bool DualQuat<T>::operator==(const DualQuat<T> &q) const
|
||||||
|
{
|
||||||
|
return (abs(w - q.w) < CV_DUAL_QUAT_EPS && abs(x - q.x) < CV_DUAL_QUAT_EPS &&
|
||||||
|
abs(y - q.y) < CV_DUAL_QUAT_EPS && abs(z - q.z) < CV_DUAL_QUAT_EPS &&
|
||||||
|
abs(w_ - q.w_) < CV_DUAL_QUAT_EPS && abs(x_ - q.x_) < CV_DUAL_QUAT_EPS &&
|
||||||
|
abs(y_ - q.y_) < CV_DUAL_QUAT_EPS && abs(z_ - q.z_) < CV_DUAL_QUAT_EPS);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
inline Quat<T> DualQuat<T>::getRealPart() const
|
||||||
|
{
|
||||||
|
return Quat<T>(w, x, y, z);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
inline Quat<T> DualQuat<T>::getDualPart() const
|
||||||
|
{
|
||||||
|
return Quat<T>(w_, x_, y_, z_);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
inline DualQuat<T> conjugate(const DualQuat<T> &dq)
|
||||||
|
{
|
||||||
|
return dq.conjugate();
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
inline DualQuat<T> DualQuat<T>::conjugate() const
|
||||||
|
{
|
||||||
|
return DualQuat<T>(w, -x, -y, -z, w_, -x_, -y_, -z_);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
DualQuat<T> DualQuat<T>::norm() const
|
||||||
|
{
|
||||||
|
Quat<T> real = getRealPart();
|
||||||
|
T realNorm = real.norm();
|
||||||
|
Quat<T> dual = getDualPart();
|
||||||
|
if (realNorm < CV_DUAL_QUAT_EPS){
|
||||||
|
return DualQuat<T>(0, 0, 0, 0, 0, 0, 0, 0);
|
||||||
|
}
|
||||||
|
return DualQuat<T>(realNorm, 0, 0, 0, real.dot(dual) / realNorm, 0, 0, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
inline Quat<T> DualQuat<T>::getRotation(QuatAssumeType assumeUnit) const
|
||||||
|
{
|
||||||
|
if (assumeUnit)
|
||||||
|
{
|
||||||
|
return getRealPart();
|
||||||
|
}
|
||||||
|
return getRealPart().normalize();
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
inline Vec<T, 3> DualQuat<T>::getTranslation(QuatAssumeType assumeUnit) const
|
||||||
|
{
|
||||||
|
Quat<T> trans = T(2.0) * (getDualPart() * getRealPart().inv(assumeUnit));
|
||||||
|
return Vec<T, 3>{trans[1], trans[2], trans[3]};
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
DualQuat<T> DualQuat<T>::normalize() const
|
||||||
|
{
|
||||||
|
Quat<T> p = getRealPart();
|
||||||
|
Quat<T> q = getDualPart();
|
||||||
|
T p_norm = p.norm();
|
||||||
|
if (p_norm < CV_DUAL_QUAT_EPS)
|
||||||
|
{
|
||||||
|
CV_Error(Error::StsBadArg, "Cannot normalize this dual quaternion: the norm is too small.");
|
||||||
|
}
|
||||||
|
Quat<T> p_nr = p / p_norm;
|
||||||
|
Quat<T> q_nr = q / p_norm;
|
||||||
|
return createFromQuat(p_nr, q_nr - p_nr * p_nr.dot(q_nr));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
inline T DualQuat<T>::dot(DualQuat<T> q) const
|
||||||
|
{
|
||||||
|
return q.w * w + q.x * x + q.y * y + q.z * z + q.w_ * w_ + q.x_ * x_ + q.y_ * y_ + q.z_ * z_;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
inline DualQuat<T> inv(const DualQuat<T> &dq, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT)
|
||||||
|
{
|
||||||
|
return dq.inv(assumeUnit);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
inline DualQuat<T> DualQuat<T>::inv(QuatAssumeType assumeUnit) const
|
||||||
|
{
|
||||||
|
Quat<T> real = getRealPart();
|
||||||
|
Quat<T> dual = getDualPart();
|
||||||
|
return createFromQuat(real.inv(assumeUnit), -real.inv(assumeUnit) * dual * real.inv(assumeUnit));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
inline DualQuat<T> DualQuat<T>::operator-(const DualQuat<T> &q) const
|
||||||
|
{
|
||||||
|
return DualQuat<T>(w - q.w, x - q.x, y - q.y, z - q.z, w_ - q.w_, x_ - q.x_, y_ - q.y_, z_ - q.z_);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
inline DualQuat<T> DualQuat<T>::operator-() const
|
||||||
|
{
|
||||||
|
return DualQuat<T>(-w, -x, -y, -z, -w_, -x_, -y_, -z_);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
inline DualQuat<T> DualQuat<T>::operator+(const DualQuat<T> &q) const
|
||||||
|
{
|
||||||
|
return DualQuat<T>(w + q.w, x + q.x, y + q.y, z + q.z, w_ + q.w_, x_ + q.x_, y_ + q.y_, z_ + q.z_);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
inline DualQuat<T>& DualQuat<T>::operator+=(const DualQuat<T> &q)
|
||||||
|
{
|
||||||
|
*this = *this + q;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
inline DualQuat<T> DualQuat<T>::operator*(const DualQuat<T> &q) const
|
||||||
|
{
|
||||||
|
Quat<T> A = getRealPart();
|
||||||
|
Quat<T> B = getDualPart();
|
||||||
|
Quat<T> C = q.getRealPart();
|
||||||
|
Quat<T> D = q.getDualPart();
|
||||||
|
return DualQuat<T>::createFromQuat(A * C, A * D + B * C);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
inline DualQuat<T>& DualQuat<T>::operator*=(const DualQuat<T> &q)
|
||||||
|
{
|
||||||
|
*this = *this * q;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
inline DualQuat<T> operator+(const T a, const DualQuat<T> &q)
|
||||||
|
{
|
||||||
|
return DualQuat<T>(a + q.w, q.x, q.y, q.z, q.w_, q.x_, q.y_, q.z_);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
inline DualQuat<T> operator+(const DualQuat<T> &q, const T a)
|
||||||
|
{
|
||||||
|
return DualQuat<T>(a + q.w, q.x, q.y, q.z, q.w_, q.x_, q.y_, q.z_);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
inline DualQuat<T> operator-(const DualQuat<T> &q, const T a)
|
||||||
|
{
|
||||||
|
return DualQuat<T>(q.w - a, q.x, q.y, q.z, q.w_, q.x_, q.y_, q.z_);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
inline DualQuat<T>& DualQuat<T>::operator-=(const DualQuat<T> &q)
|
||||||
|
{
|
||||||
|
*this = *this - q;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
inline DualQuat<T> operator-(const T a, const DualQuat<T> &q)
|
||||||
|
{
|
||||||
|
return DualQuat<T>(a - q.w, -q.x, -q.y, -q.z, -q.w_, -q.x_, -q.y_, -q.z_);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
inline DualQuat<T> operator*(const T a, const DualQuat<T> &q)
|
||||||
|
{
|
||||||
|
return DualQuat<T>(q.w * a, q.x * a, q.y * a, q.z * a, q.w_ * a, q.x_ * a, q.y_ * a, q.z_ * a);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
inline DualQuat<T> operator*(const DualQuat<T> &q, const T a)
|
||||||
|
{
|
||||||
|
return DualQuat<T>(q.w * a, q.x * a, q.y * a, q.z * a, q.w_ * a, q.x_ * a, q.y_ * a, q.z_ * a);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
inline DualQuat<T> DualQuat<T>::operator/(const T a) const
|
||||||
|
{
|
||||||
|
return DualQuat<T>(w / a, x / a, y / a, z / a, w_ / a, x_ / a, y_ / a, z_ / a);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
inline DualQuat<T> DualQuat<T>::operator/(const DualQuat<T> &q) const
|
||||||
|
{
|
||||||
|
return *this * q.inv();
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
inline DualQuat<T>& DualQuat<T>::operator/=(const DualQuat<T> &q)
|
||||||
|
{
|
||||||
|
*this = *this / q;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
std::ostream & operator<<(std::ostream &os, const DualQuat<T> &q)
|
||||||
|
{
|
||||||
|
os << "DualQuat " << Vec<T, 8>{q.w, q.x, q.y, q.z, q.w_, q.x_, q.y_, q.z_};
|
||||||
|
return os;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
inline DualQuat<T> exp(const DualQuat<T> &dq)
|
||||||
|
{
|
||||||
|
return dq.exp();
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace detail {
|
||||||
|
|
||||||
|
template <typename _Tp>
|
||||||
|
Matx<_Tp, 4, 4> jacob_exp(const Quat<_Tp> &q)
|
||||||
|
{
|
||||||
|
_Tp nv = std::sqrt(q.x * q.x + q.y * q.y + q.z * q.z);
|
||||||
|
_Tp sinc_nv = abs(nv) < cv::DualQuat<_Tp>::CV_DUAL_QUAT_EPS ? _Tp(1.0) - nv * nv * _Tp(1.0/6.0) : std::sin(nv) / nv;
|
||||||
|
_Tp csiii_nv = abs(nv) < cv::DualQuat<_Tp>::CV_DUAL_QUAT_EPS ? -_Tp(1.0/3.0) : (std::cos(nv) - sinc_nv) / nv / nv;
|
||||||
|
Matx<_Tp, 4, 4> J_exp_quat {
|
||||||
|
std::cos(nv), -sinc_nv * q.x, -sinc_nv * q.y, -sinc_nv * q.z,
|
||||||
|
sinc_nv * q.x, csiii_nv * q.x * q.x + sinc_nv, csiii_nv * q.x * q.y, csiii_nv * q.x * q.z,
|
||||||
|
sinc_nv * q.y, csiii_nv * q.y * q.x, csiii_nv * q.y * q.y + sinc_nv, csiii_nv * q.y * q.z,
|
||||||
|
sinc_nv * q.z, csiii_nv * q.z * q.x, csiii_nv * q.z * q.y, csiii_nv * q.z * q.z + sinc_nv
|
||||||
|
};
|
||||||
|
return std::exp(q.w) * J_exp_quat;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace detail
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
DualQuat<T> DualQuat<T>::exp() const
|
||||||
|
{
|
||||||
|
Quat<T> real = getRealPart();
|
||||||
|
return createFromQuat(real.exp(), Quat<T>(detail::jacob_exp(real) * getDualPart().toVec()));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
DualQuat<T> log(const DualQuat<T> &dq, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT)
|
||||||
|
{
|
||||||
|
return dq.log(assumeUnit);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
DualQuat<T> DualQuat<T>::log(QuatAssumeType assumeUnit) const
|
||||||
|
{
|
||||||
|
Quat<T> plog = getRealPart().log(assumeUnit);
|
||||||
|
Matx<T, 4, 4> jacob = detail::jacob_exp(plog);
|
||||||
|
return createFromQuat(plog, Quat<T>(jacob.inv() * getDualPart().toVec()));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
inline DualQuat<T> power(const DualQuat<T> &dq, const T t, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT)
|
||||||
|
{
|
||||||
|
return dq.power(t, assumeUnit);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
inline DualQuat<T> DualQuat<T>::power(const T t, QuatAssumeType assumeUnit) const
|
||||||
|
{
|
||||||
|
return (t * log(assumeUnit)).exp();
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
inline DualQuat<T> power(const DualQuat<T> &p, const DualQuat<T> &q, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT)
|
||||||
|
{
|
||||||
|
return p.power(q, assumeUnit);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
inline DualQuat<T> DualQuat<T>::power(const DualQuat<T> &q, QuatAssumeType assumeUnit) const
|
||||||
|
{
|
||||||
|
return (q * log(assumeUnit)).exp();
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
inline Vec<T, 8> DualQuat<T>::toVec() const
|
||||||
|
{
|
||||||
|
return Vec<T, 8>(w, x, y, z, w_, x_, y_, z_);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
Affine3<T> DualQuat<T>::toAffine3(QuatAssumeType assumeUnit) const
|
||||||
|
{
|
||||||
|
return Affine3<T>(toMat(assumeUnit));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
Matx<T, 4, 4> DualQuat<T>::toMat(QuatAssumeType assumeUnit) const
|
||||||
|
{
|
||||||
|
Matx<T, 4, 4> rot44 = getRotation(assumeUnit).toRotMat4x4();
|
||||||
|
Vec<T, 3> translation = getTranslation(assumeUnit);
|
||||||
|
rot44(0, 3) = translation[0];
|
||||||
|
rot44(1, 3) = translation[1];
|
||||||
|
rot44(2, 3) = translation[2];
|
||||||
|
return rot44;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
DualQuat<T> DualQuat<T>::sclerp(const DualQuat<T> &q0, const DualQuat<T> &q1, const T t, bool directChange, QuatAssumeType assumeUnit)
|
||||||
|
{
|
||||||
|
DualQuat<T> v0(q0), v1(q1);
|
||||||
|
if (!assumeUnit)
|
||||||
|
{
|
||||||
|
v0 = v0.normalize();
|
||||||
|
v1 = v1.normalize();
|
||||||
|
}
|
||||||
|
Quat<T> v0Real = v0.getRealPart();
|
||||||
|
Quat<T> v1Real = v1.getRealPart();
|
||||||
|
if (directChange && v1Real.dot(v0Real) < 0)
|
||||||
|
{
|
||||||
|
v0 = -v0;
|
||||||
|
}
|
||||||
|
DualQuat<T> v0inv1 = v0.inv() * v1;
|
||||||
|
return v0 * v0inv1.power(t, QUAT_ASSUME_UNIT);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
DualQuat<T> DualQuat<T>::dqblend(const DualQuat<T> &q1, const DualQuat<T> &q2, const T t, QuatAssumeType assumeUnit)
|
||||||
|
{
|
||||||
|
DualQuat<T> v1(q1), v2(q2);
|
||||||
|
if (!assumeUnit)
|
||||||
|
{
|
||||||
|
v1 = v1.normalize();
|
||||||
|
v2 = v2.normalize();
|
||||||
|
}
|
||||||
|
if (v1.getRotation(assumeUnit).dot(v2.getRotation(assumeUnit)) < 0)
|
||||||
|
{
|
||||||
|
return ((1 - t) * v1 - t * v2).normalize();
|
||||||
|
}
|
||||||
|
return ((1 - t) * v1 + t * v2).normalize();
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
DualQuat<T> DualQuat<T>::gdqblend(InputArray _dualquat, InputArray _weight, QuatAssumeType assumeUnit)
|
||||||
|
{
|
||||||
|
CV_CheckTypeEQ(_weight.type(), cv::traits::Type<T>::value, "");
|
||||||
|
CV_CheckTypeEQ(_dualquat.type(), CV_MAKETYPE(CV_MAT_DEPTH(cv::traits::Type<T>::value), 8), "");
|
||||||
|
Size dq_s = _dualquat.size();
|
||||||
|
if (dq_s != _weight.size() || (dq_s.height != 1 && dq_s.width != 1))
|
||||||
|
{
|
||||||
|
CV_Error(Error::StsBadArg, "The size of weight must be the same as dualquat, both of them should be (1, n) or (n, 1)");
|
||||||
|
}
|
||||||
|
Mat dualquat = _dualquat.getMat(), weight = _weight.getMat();
|
||||||
|
const int cn = std::max(dq_s.width, dq_s.height);
|
||||||
|
if (!assumeUnit)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < cn; ++i)
|
||||||
|
{
|
||||||
|
dualquat.at<Vec<T, 8>>(i) = DualQuat<T>{dualquat.at<Vec<T, 8>>(i)}.normalize().toVec();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Vec<T, 8> dq_blend = dualquat.at<Vec<T, 8>>(0) * weight.at<T>(0);
|
||||||
|
Quat<T> q0 = DualQuat<T> {dualquat.at<Vec<T, 8>>(0)}.getRotation(assumeUnit);
|
||||||
|
for (int i = 1; i < cn; ++i)
|
||||||
|
{
|
||||||
|
T k = q0.dot(DualQuat<T>{dualquat.at<Vec<T, 8>>(i)}.getRotation(assumeUnit)) < 0 ? -1: 1;
|
||||||
|
dq_blend = dq_blend + dualquat.at<Vec<T, 8>>(i) * k * weight.at<T>(i);
|
||||||
|
}
|
||||||
|
return DualQuat<T>{dq_blend}.normalize();
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
template <int cn>
|
||||||
|
DualQuat<T> DualQuat<T>::gdqblend(const Vec<DualQuat<T>, cn> &_dualquat, InputArray _weight, QuatAssumeType assumeUnit)
|
||||||
|
{
|
||||||
|
Vec<DualQuat<T>, cn> dualquat(_dualquat);
|
||||||
|
if (cn == 0)
|
||||||
|
{
|
||||||
|
return DualQuat<T>(1, 0, 0, 0, 0, 0, 0, 0);
|
||||||
|
}
|
||||||
|
Mat dualquat_mat(cn, 1, CV_64FC(8));
|
||||||
|
for (int i = 0; i < cn ; ++i)
|
||||||
|
{
|
||||||
|
dualquat_mat.at<Vec<T, 8>>(i) = dualquat[i].toVec();
|
||||||
|
}
|
||||||
|
return gdqblend(dualquat_mat, _weight, assumeUnit);
|
||||||
|
}
|
||||||
|
|
||||||
|
} //namespace cv
|
||||||
|
|
||||||
|
#endif /*OPENCV_CORE_DUALQUATERNION_INL_HPP*/
|
||||||
425
3rdpart/OpenCV/include/opencv2/core/eigen.hpp
Normal file
425
3rdpart/OpenCV/include/opencv2/core/eigen.hpp
Normal file
@@ -0,0 +1,425 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||||
|
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef OPENCV_CORE_EIGEN_HPP
|
||||||
|
#define OPENCV_CORE_EIGEN_HPP
|
||||||
|
|
||||||
|
#ifndef EIGEN_WORLD_VERSION
|
||||||
|
#error "Wrong usage of OpenCV's Eigen utility header. Include Eigen's headers first. See https://github.com/opencv/opencv/issues/17366"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "opencv2/core.hpp"
|
||||||
|
|
||||||
|
#if defined _MSC_VER && _MSC_VER >= 1200
|
||||||
|
#ifndef NOMINMAX
|
||||||
|
#define NOMINMAX // fix https://github.com/opencv/opencv/issues/17548
|
||||||
|
#endif
|
||||||
|
#pragma warning( disable: 4714 ) //__forceinline is not inlined
|
||||||
|
#pragma warning( disable: 4127 ) //conditional expression is constant
|
||||||
|
#pragma warning( disable: 4244 ) //conversion from '__int64' to 'int', possible loss of data
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if !defined(OPENCV_DISABLE_EIGEN_TENSOR_SUPPORT)
|
||||||
|
#if EIGEN_WORLD_VERSION == 3 && EIGEN_MAJOR_VERSION >= 3
|
||||||
|
#include <unsupported/Eigen/CXX11/Tensor>
|
||||||
|
#define OPENCV_EIGEN_TENSOR_SUPPORT 1
|
||||||
|
#endif // EIGEN_WORLD_VERSION == 3 && EIGEN_MAJOR_VERSION >= 3
|
||||||
|
#endif // !defined(OPENCV_DISABLE_EIGEN_TENSOR_SUPPORT)
|
||||||
|
|
||||||
|
namespace cv
|
||||||
|
{
|
||||||
|
|
||||||
|
/** @addtogroup core_eigen
|
||||||
|
These functions are provided for OpenCV-Eigen interoperability. They convert `Mat`
|
||||||
|
objects to corresponding `Eigen::Matrix` objects and vice-versa. Consult the [Eigen
|
||||||
|
documentation](https://eigen.tuxfamily.org/dox/group__TutorialMatrixClass.html) for
|
||||||
|
information about the `Matrix` template type.
|
||||||
|
|
||||||
|
@note Using these functions requires the `Eigen/Dense` or similar header to be
|
||||||
|
included before this header.
|
||||||
|
*/
|
||||||
|
//! @{
|
||||||
|
|
||||||
|
#if defined(OPENCV_EIGEN_TENSOR_SUPPORT) || defined(CV_DOXYGEN)
|
||||||
|
/** @brief Converts an Eigen::Tensor to a cv::Mat.
|
||||||
|
|
||||||
|
The method converts an Eigen::Tensor with shape (H x W x C) to a cv::Mat where:
|
||||||
|
H = number of rows
|
||||||
|
W = number of columns
|
||||||
|
C = number of channels
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
\code
|
||||||
|
Eigen::Tensor<float, 3, Eigen::RowMajor> a_tensor(...);
|
||||||
|
// populate tensor with values
|
||||||
|
Mat a_mat;
|
||||||
|
eigen2cv(a_tensor, a_mat);
|
||||||
|
\endcode
|
||||||
|
*/
|
||||||
|
template <typename _Tp, int _layout> static inline
|
||||||
|
void eigen2cv( const Eigen::Tensor<_Tp, 3, _layout> &src, OutputArray dst )
|
||||||
|
{
|
||||||
|
if( !(_layout & Eigen::RowMajorBit) )
|
||||||
|
{
|
||||||
|
const std::array<int, 3> shuffle{2, 1, 0};
|
||||||
|
Eigen::Tensor<_Tp, 3, !_layout> row_major_tensor = src.swap_layout().shuffle(shuffle);
|
||||||
|
Mat _src(src.dimension(0), src.dimension(1), CV_MAKETYPE(DataType<_Tp>::type, src.dimension(2)), row_major_tensor.data());
|
||||||
|
_src.copyTo(dst);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
Mat _src(src.dimension(0), src.dimension(1), CV_MAKETYPE(DataType<_Tp>::type, src.dimension(2)), (void *)src.data());
|
||||||
|
_src.copyTo(dst);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** @brief Converts a cv::Mat to an Eigen::Tensor.
|
||||||
|
|
||||||
|
The method converts a cv::Mat to an Eigen Tensor with shape (H x W x C) where:
|
||||||
|
H = number of rows
|
||||||
|
W = number of columns
|
||||||
|
C = number of channels
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
\code
|
||||||
|
Mat a_mat(...);
|
||||||
|
// populate Mat with values
|
||||||
|
Eigen::Tensor<float, 3, Eigen::RowMajor> a_tensor(...);
|
||||||
|
cv2eigen(a_mat, a_tensor);
|
||||||
|
\endcode
|
||||||
|
*/
|
||||||
|
template <typename _Tp, int _layout> static inline
|
||||||
|
void cv2eigen( const Mat &src, Eigen::Tensor<_Tp, 3, _layout> &dst )
|
||||||
|
{
|
||||||
|
if( !(_layout & Eigen::RowMajorBit) )
|
||||||
|
{
|
||||||
|
Eigen::Tensor<_Tp, 3, !_layout> row_major_tensor(src.rows, src.cols, src.channels());
|
||||||
|
Mat _dst(src.rows, src.cols, CV_MAKETYPE(DataType<_Tp>::type, src.channels()), row_major_tensor.data());
|
||||||
|
if (src.type() == _dst.type())
|
||||||
|
src.copyTo(_dst);
|
||||||
|
else
|
||||||
|
src.convertTo(_dst, _dst.type());
|
||||||
|
const std::array<int, 3> shuffle{2, 1, 0};
|
||||||
|
dst = row_major_tensor.swap_layout().shuffle(shuffle);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
dst.resize(src.rows, src.cols, src.channels());
|
||||||
|
Mat _dst(src.rows, src.cols, CV_MAKETYPE(DataType<_Tp>::type, src.channels()), dst.data());
|
||||||
|
if (src.type() == _dst.type())
|
||||||
|
src.copyTo(_dst);
|
||||||
|
else
|
||||||
|
src.convertTo(_dst, _dst.type());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** @brief Maps cv::Mat data to an Eigen::TensorMap.
|
||||||
|
|
||||||
|
The method wraps an existing Mat data array with an Eigen TensorMap of shape (H x W x C) where:
|
||||||
|
H = number of rows
|
||||||
|
W = number of columns
|
||||||
|
C = number of channels
|
||||||
|
|
||||||
|
Explicit instantiation of the return type is required.
|
||||||
|
|
||||||
|
@note Caller should be aware of the lifetime of the cv::Mat instance and take appropriate safety measures.
|
||||||
|
The cv::Mat instance will retain ownership of the data and the Eigen::TensorMap will lose access when the cv::Mat data is deallocated.
|
||||||
|
|
||||||
|
The example below initializes a cv::Mat and produces an Eigen::TensorMap:
|
||||||
|
\code
|
||||||
|
float arr[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
|
||||||
|
Mat a_mat(2, 2, CV_32FC3, arr);
|
||||||
|
Eigen::TensorMap<Eigen::Tensor<float, 3, Eigen::RowMajor>> a_tensormap = cv2eigen_tensormap<float>(a_mat);
|
||||||
|
\endcode
|
||||||
|
*/
|
||||||
|
template <typename _Tp> static inline
|
||||||
|
Eigen::TensorMap<Eigen::Tensor<_Tp, 3, Eigen::RowMajor>> cv2eigen_tensormap(InputArray src)
|
||||||
|
{
|
||||||
|
Mat mat = src.getMat();
|
||||||
|
CV_CheckTypeEQ(mat.type(), CV_MAKETYPE(traits::Type<_Tp>::value, mat.channels()), "");
|
||||||
|
return Eigen::TensorMap<Eigen::Tensor<_Tp, 3, Eigen::RowMajor>>((_Tp *)mat.data, mat.rows, mat.cols, mat.channels());
|
||||||
|
}
|
||||||
|
#endif // OPENCV_EIGEN_TENSOR_SUPPORT
|
||||||
|
|
||||||
|
template<typename _Tp, int _rows, int _cols, int _options, int _maxRows, int _maxCols> static inline
|
||||||
|
void eigen2cv( const Eigen::Matrix<_Tp, _rows, _cols, _options, _maxRows, _maxCols>& src, OutputArray dst )
|
||||||
|
{
|
||||||
|
if( !(src.Flags & Eigen::RowMajorBit) )
|
||||||
|
{
|
||||||
|
Mat _src(src.cols(), src.rows(), traits::Type<_Tp>::value,
|
||||||
|
(void*)src.data(), src.outerStride()*sizeof(_Tp));
|
||||||
|
transpose(_src, dst);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
Mat _src(src.rows(), src.cols(), traits::Type<_Tp>::value,
|
||||||
|
(void*)src.data(), src.outerStride()*sizeof(_Tp));
|
||||||
|
_src.copyTo(dst);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Matx case
|
||||||
|
template<typename _Tp, int _rows, int _cols, int _options, int _maxRows, int _maxCols> static inline
|
||||||
|
void eigen2cv( const Eigen::Matrix<_Tp, _rows, _cols, _options, _maxRows, _maxCols>& src,
|
||||||
|
Matx<_Tp, _rows, _cols>& dst )
|
||||||
|
{
|
||||||
|
if( !(src.Flags & Eigen::RowMajorBit) )
|
||||||
|
{
|
||||||
|
dst = Matx<_Tp, _cols, _rows>(static_cast<const _Tp*>(src.data())).t();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
dst = Matx<_Tp, _rows, _cols>(static_cast<const _Tp*>(src.data()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename _Tp, int _rows, int _cols, int _options, int _maxRows, int _maxCols> static inline
|
||||||
|
void cv2eigen( const Mat& src,
|
||||||
|
Eigen::Matrix<_Tp, _rows, _cols, _options, _maxRows, _maxCols>& dst )
|
||||||
|
{
|
||||||
|
CV_DbgAssert(src.rows == _rows && src.cols == _cols);
|
||||||
|
if( !(dst.Flags & Eigen::RowMajorBit) )
|
||||||
|
{
|
||||||
|
const Mat _dst(src.cols, src.rows, traits::Type<_Tp>::value,
|
||||||
|
dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
|
||||||
|
if( src.type() == _dst.type() )
|
||||||
|
transpose(src, _dst);
|
||||||
|
else if( src.cols == src.rows )
|
||||||
|
{
|
||||||
|
src.convertTo(_dst, _dst.type());
|
||||||
|
transpose(_dst, _dst);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
Mat(src.t()).convertTo(_dst, _dst.type());
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
const Mat _dst(src.rows, src.cols, traits::Type<_Tp>::value,
|
||||||
|
dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
|
||||||
|
src.convertTo(_dst, _dst.type());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Matx case
|
||||||
|
template<typename _Tp, int _rows, int _cols, int _options, int _maxRows, int _maxCols> static inline
|
||||||
|
void cv2eigen( const Matx<_Tp, _rows, _cols>& src,
|
||||||
|
Eigen::Matrix<_Tp, _rows, _cols, _options, _maxRows, _maxCols>& dst )
|
||||||
|
{
|
||||||
|
if( !(dst.Flags & Eigen::RowMajorBit) )
|
||||||
|
{
|
||||||
|
const Mat _dst(_cols, _rows, traits::Type<_Tp>::value,
|
||||||
|
dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
|
||||||
|
transpose(src, _dst);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
const Mat _dst(_rows, _cols, traits::Type<_Tp>::value,
|
||||||
|
dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
|
||||||
|
Mat(src).copyTo(_dst);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename _Tp> static inline
|
||||||
|
void cv2eigen( const Mat& src,
|
||||||
|
Eigen::Matrix<_Tp, Eigen::Dynamic, Eigen::Dynamic>& dst )
|
||||||
|
{
|
||||||
|
dst.resize(src.rows, src.cols);
|
||||||
|
if( !(dst.Flags & Eigen::RowMajorBit) )
|
||||||
|
{
|
||||||
|
const Mat _dst(src.cols, src.rows, traits::Type<_Tp>::value,
|
||||||
|
dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
|
||||||
|
if( src.type() == _dst.type() )
|
||||||
|
transpose(src, _dst);
|
||||||
|
else if( src.cols == src.rows )
|
||||||
|
{
|
||||||
|
src.convertTo(_dst, _dst.type());
|
||||||
|
transpose(_dst, _dst);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
Mat(src.t()).convertTo(_dst, _dst.type());
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
const Mat _dst(src.rows, src.cols, traits::Type<_Tp>::value,
|
||||||
|
dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
|
||||||
|
src.convertTo(_dst, _dst.type());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename _Tp> static inline
|
||||||
|
void cv2eigen( const Mat& src,
|
||||||
|
Eigen::Matrix<_Tp, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>& dst )
|
||||||
|
{
|
||||||
|
CV_CheckEQ(src.dims, 2, "");
|
||||||
|
dst.resize(src.rows, src.cols);
|
||||||
|
const Mat _dst(src.rows, src.cols, traits::Type<_Tp>::value,
|
||||||
|
dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
|
||||||
|
src.convertTo(_dst, _dst.type());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Matx case
|
||||||
|
template<typename _Tp, int _rows, int _cols> static inline
|
||||||
|
void cv2eigen( const Matx<_Tp, _rows, _cols>& src,
|
||||||
|
Eigen::Matrix<_Tp, Eigen::Dynamic, Eigen::Dynamic>& dst )
|
||||||
|
{
|
||||||
|
dst.resize(_rows, _cols);
|
||||||
|
if( !(dst.Flags & Eigen::RowMajorBit) )
|
||||||
|
{
|
||||||
|
const Mat _dst(_cols, _rows, traits::Type<_Tp>::value,
|
||||||
|
dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
|
||||||
|
transpose(src, _dst);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
const Mat _dst(_rows, _cols, traits::Type<_Tp>::value,
|
||||||
|
dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
|
||||||
|
Mat(src).copyTo(_dst);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename _Tp, int _rows, int _cols> static inline
|
||||||
|
void cv2eigen( const Matx<_Tp, _rows, _cols>& src,
|
||||||
|
Eigen::Matrix<_Tp, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>& dst )
|
||||||
|
{
|
||||||
|
CV_CheckEQ(src.dims, 2, "");
|
||||||
|
dst.resize(_rows, _cols);
|
||||||
|
const Mat _dst(_rows, _cols, traits::Type<_Tp>::value,
|
||||||
|
dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
|
||||||
|
Mat(src).copyTo(_dst);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename _Tp> static inline
|
||||||
|
void cv2eigen( const Mat& src,
|
||||||
|
Eigen::Matrix<_Tp, Eigen::Dynamic, 1>& dst )
|
||||||
|
{
|
||||||
|
CV_Assert(src.cols == 1);
|
||||||
|
dst.resize(src.rows);
|
||||||
|
|
||||||
|
if( !(dst.Flags & Eigen::RowMajorBit) )
|
||||||
|
{
|
||||||
|
const Mat _dst(src.cols, src.rows, traits::Type<_Tp>::value,
|
||||||
|
dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
|
||||||
|
if( src.type() == _dst.type() )
|
||||||
|
transpose(src, _dst);
|
||||||
|
else
|
||||||
|
Mat(src.t()).convertTo(_dst, _dst.type());
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
const Mat _dst(src.rows, src.cols, traits::Type<_Tp>::value,
|
||||||
|
dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
|
||||||
|
src.convertTo(_dst, _dst.type());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Matx case
|
||||||
|
template<typename _Tp, int _rows> static inline
|
||||||
|
void cv2eigen( const Matx<_Tp, _rows, 1>& src,
|
||||||
|
Eigen::Matrix<_Tp, Eigen::Dynamic, 1>& dst )
|
||||||
|
{
|
||||||
|
dst.resize(_rows);
|
||||||
|
|
||||||
|
if( !(dst.Flags & Eigen::RowMajorBit) )
|
||||||
|
{
|
||||||
|
const Mat _dst(1, _rows, traits::Type<_Tp>::value,
|
||||||
|
dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
|
||||||
|
transpose(src, _dst);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
const Mat _dst(_rows, 1, traits::Type<_Tp>::value,
|
||||||
|
dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
|
||||||
|
src.copyTo(_dst);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template<typename _Tp> static inline
|
||||||
|
void cv2eigen( const Mat& src,
|
||||||
|
Eigen::Matrix<_Tp, 1, Eigen::Dynamic>& dst )
|
||||||
|
{
|
||||||
|
CV_Assert(src.rows == 1);
|
||||||
|
dst.resize(src.cols);
|
||||||
|
if( !(dst.Flags & Eigen::RowMajorBit) )
|
||||||
|
{
|
||||||
|
const Mat _dst(src.cols, src.rows, traits::Type<_Tp>::value,
|
||||||
|
dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
|
||||||
|
if( src.type() == _dst.type() )
|
||||||
|
transpose(src, _dst);
|
||||||
|
else
|
||||||
|
Mat(src.t()).convertTo(_dst, _dst.type());
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
const Mat _dst(src.rows, src.cols, traits::Type<_Tp>::value,
|
||||||
|
dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
|
||||||
|
src.convertTo(_dst, _dst.type());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//Matx
|
||||||
|
template<typename _Tp, int _cols> static inline
|
||||||
|
void cv2eigen( const Matx<_Tp, 1, _cols>& src,
|
||||||
|
Eigen::Matrix<_Tp, 1, Eigen::Dynamic>& dst )
|
||||||
|
{
|
||||||
|
dst.resize(_cols);
|
||||||
|
if( !(dst.Flags & Eigen::RowMajorBit) )
|
||||||
|
{
|
||||||
|
const Mat _dst(_cols, 1, traits::Type<_Tp>::value,
|
||||||
|
dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
|
||||||
|
transpose(src, _dst);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
const Mat _dst(1, _cols, traits::Type<_Tp>::value,
|
||||||
|
dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
|
||||||
|
Mat(src).copyTo(_dst);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//! @}
|
||||||
|
|
||||||
|
} // cv
|
||||||
|
|
||||||
|
#endif
|
||||||
441
3rdpart/OpenCV/include/opencv2/core/fast_math.hpp
Normal file
441
3rdpart/OpenCV/include/opencv2/core/fast_math.hpp
Normal file
@@ -0,0 +1,441 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||||
|
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||||
|
// Copyright (C) 2015, Itseez Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#ifndef OPENCV_CORE_FAST_MATH_HPP
|
||||||
|
#define OPENCV_CORE_FAST_MATH_HPP
|
||||||
|
|
||||||
|
#include "opencv2/core/cvdef.h"
|
||||||
|
|
||||||
|
//! @addtogroup core_utils
|
||||||
|
//! @{
|
||||||
|
|
||||||
|
/****************************************************************************************\
|
||||||
|
* fast math *
|
||||||
|
\****************************************************************************************/
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
# include <cmath>
|
||||||
|
#else
|
||||||
|
# ifdef __BORLANDC__
|
||||||
|
# include <fastmath.h>
|
||||||
|
# else
|
||||||
|
# include <math.h>
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__CUDACC__)
|
||||||
|
// nothing, intrinsics/asm code is not supported
|
||||||
|
#else
|
||||||
|
#if ((defined _MSC_VER && defined _M_X64) \
|
||||||
|
|| (defined __GNUC__ && defined __SSE2__)) \
|
||||||
|
&& !defined(OPENCV_SKIP_INCLUDE_EMMINTRIN_H)
|
||||||
|
#include <emmintrin.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined __PPC64__ && defined __GNUC__ && defined _ARCH_PWR8 \
|
||||||
|
&& !defined(OPENCV_SKIP_INCLUDE_ALTIVEC_H)
|
||||||
|
#include <altivec.h>
|
||||||
|
#undef vector
|
||||||
|
#undef bool
|
||||||
|
#undef pixel
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(CV_INLINE_ROUND_FLT)
|
||||||
|
// user-specified version
|
||||||
|
// CV_INLINE_ROUND_DBL should be defined too
|
||||||
|
#elif defined __GNUC__ && defined __arm__ && (defined __ARM_PCS_VFP || defined __ARM_VFPV3__ || defined __ARM_NEON) && !defined __SOFTFP__
|
||||||
|
// 1. general scheme
|
||||||
|
#define ARM_ROUND(_value, _asm_string) \
|
||||||
|
int res; \
|
||||||
|
float temp; \
|
||||||
|
CV_UNUSED(temp); \
|
||||||
|
__asm__(_asm_string : [res] "=r" (res), [temp] "=w" (temp) : [value] "w" (_value)); \
|
||||||
|
return res
|
||||||
|
// 2. version for double
|
||||||
|
#ifdef __clang__
|
||||||
|
#define CV_INLINE_ROUND_DBL(value) ARM_ROUND(value, "vcvtr.s32.f64 %[temp], %[value] \n vmov %[res], %[temp]")
|
||||||
|
#else
|
||||||
|
#define CV_INLINE_ROUND_DBL(value) ARM_ROUND(value, "vcvtr.s32.f64 %[temp], %P[value] \n vmov %[res], %[temp]")
|
||||||
|
#endif
|
||||||
|
// 3. version for float
|
||||||
|
#define CV_INLINE_ROUND_FLT(value) ARM_ROUND(value, "vcvtr.s32.f32 %[temp], %[value]\n vmov %[res], %[temp]")
|
||||||
|
#elif defined __PPC64__ && defined __GNUC__ && defined _ARCH_PWR8
|
||||||
|
// P8 and newer machines can convert fp32/64 to int quickly.
|
||||||
|
#define CV_INLINE_ROUND_DBL(value) \
|
||||||
|
int out; \
|
||||||
|
double temp; \
|
||||||
|
__asm__( "fctiw %[temp],%[in]\n\tmfvsrwz %[out],%[temp]\n\t" : [out] "=r" (out), [temp] "=d" (temp) : [in] "d" ((double)(value)) : ); \
|
||||||
|
return out;
|
||||||
|
|
||||||
|
// FP32 also works with FP64 routine above
|
||||||
|
#define CV_INLINE_ROUND_FLT(value) CV_INLINE_ROUND_DBL(value)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef CV_INLINE_ISINF_FLT
|
||||||
|
// user-specified version
|
||||||
|
// CV_INLINE_ISINF_DBL should be defined too
|
||||||
|
#elif defined __PPC64__ && defined _ARCH_PWR9 && defined(scalar_test_data_class)
|
||||||
|
#define CV_INLINE_ISINF_DBL(value) return scalar_test_data_class(value, 0x30);
|
||||||
|
#define CV_INLINE_ISINF_FLT(value) CV_INLINE_ISINF_DBL(value)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef CV_INLINE_ISNAN_FLT
|
||||||
|
// user-specified version
|
||||||
|
// CV_INLINE_ISNAN_DBL should be defined too
|
||||||
|
#elif defined __PPC64__ && defined _ARCH_PWR9 && defined(scalar_test_data_class)
|
||||||
|
#define CV_INLINE_ISNAN_DBL(value) return scalar_test_data_class(value, 0x40);
|
||||||
|
#define CV_INLINE_ISNAN_FLT(value) CV_INLINE_ISNAN_DBL(value)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if !defined(OPENCV_USE_FASTMATH_BUILTINS) \
|
||||||
|
&& ( \
|
||||||
|
defined(__x86_64__) || defined(__i686__) \
|
||||||
|
|| defined(__arm__) \
|
||||||
|
|| defined(__PPC64__) \
|
||||||
|
)
|
||||||
|
/* Let builtin C math functions when available. Dedicated hardware is available to
|
||||||
|
round and convert FP values. */
|
||||||
|
#define OPENCV_USE_FASTMATH_BUILTINS 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Enable builtin math functions if possible, desired, and available.
|
||||||
|
Note, not all math functions inline equally. E.g lrint will not inline
|
||||||
|
without the -fno-math-errno option. */
|
||||||
|
#if defined(CV_ICC)
|
||||||
|
// nothing
|
||||||
|
#elif defined(OPENCV_USE_FASTMATH_BUILTINS) && OPENCV_USE_FASTMATH_BUILTINS
|
||||||
|
#if defined(__clang__)
|
||||||
|
#define CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS
|
||||||
|
#if !defined(CV_INLINE_ISNAN_DBL) && __has_builtin(__builtin_isnan)
|
||||||
|
#define CV_INLINE_ISNAN_DBL(value) return __builtin_isnan(value);
|
||||||
|
#endif
|
||||||
|
#if !defined(CV_INLINE_ISNAN_FLT) && __has_builtin(__builtin_isnan)
|
||||||
|
#define CV_INLINE_ISNAN_FLT(value) return __builtin_isnan(value);
|
||||||
|
#endif
|
||||||
|
#if !defined(CV_INLINE_ISINF_DBL) && __has_builtin(__builtin_isinf)
|
||||||
|
#define CV_INLINE_ISINF_DBL(value) return __builtin_isinf(value);
|
||||||
|
#endif
|
||||||
|
#if !defined(CV_INLINE_ISINF_FLT) && __has_builtin(__builtin_isinf)
|
||||||
|
#define CV_INLINE_ISINF_FLT(value) return __builtin_isinf(value);
|
||||||
|
#endif
|
||||||
|
#elif defined(__GNUC__)
|
||||||
|
#define CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS
|
||||||
|
#if !defined(CV_INLINE_ISNAN_DBL)
|
||||||
|
#define CV_INLINE_ISNAN_DBL(value) return __builtin_isnan(value);
|
||||||
|
#endif
|
||||||
|
#if !defined(CV_INLINE_ISNAN_FLT)
|
||||||
|
#define CV_INLINE_ISNAN_FLT(value) return __builtin_isnanf(value);
|
||||||
|
#endif
|
||||||
|
#if !defined(CV_INLINE_ISINF_DBL)
|
||||||
|
#define CV_INLINE_ISINF_DBL(value) return __builtin_isinf(value);
|
||||||
|
#endif
|
||||||
|
#if !defined(CV_INLINE_ISINF_FLT)
|
||||||
|
#define CV_INLINE_ISINF_FLT(value) return __builtin_isinff(value);
|
||||||
|
#endif
|
||||||
|
#elif defined(_MSC_VER)
|
||||||
|
#if !defined(CV_INLINE_ISNAN_DBL)
|
||||||
|
#define CV_INLINE_ISNAN_DBL(value) return isnan(value);
|
||||||
|
#endif
|
||||||
|
#if !defined(CV_INLINE_ISNAN_FLT)
|
||||||
|
#define CV_INLINE_ISNAN_FLT(value) return isnan(value);
|
||||||
|
#endif
|
||||||
|
#if !defined(CV_INLINE_ISINF_DBL)
|
||||||
|
#define CV_INLINE_ISINF_DBL(value) return isinf(value);
|
||||||
|
#endif
|
||||||
|
#if !defined(CV_INLINE_ISINF_FLT)
|
||||||
|
#define CV_INLINE_ISINF_FLT(value) return isinf(value);
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif // defined(__CUDACC__)
|
||||||
|
|
||||||
|
/** @brief Rounds floating-point number to the nearest integer
|
||||||
|
|
||||||
|
@param value floating-point number. If the value is outside of INT_MIN ... INT_MAX range, the
|
||||||
|
result is not defined.
|
||||||
|
*/
|
||||||
|
CV_INLINE int
|
||||||
|
cvRound( double value )
|
||||||
|
{
|
||||||
|
#if defined CV_INLINE_ROUND_DBL
|
||||||
|
CV_INLINE_ROUND_DBL(value);
|
||||||
|
#elif defined(_MSC_VER) && (defined(_M_ARM64) || defined(_M_ARM64EC))
|
||||||
|
float64x1_t v = vdup_n_f64(value);
|
||||||
|
int64x1_t r = vcvtn_s64_f64(v);
|
||||||
|
return static_cast<int>(vget_lane_s64(r, 0));
|
||||||
|
#elif ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __SSE2__)) && !defined(__CUDACC__)
|
||||||
|
__m128d t = _mm_set_sd( value );
|
||||||
|
return _mm_cvtsd_si32(t);
|
||||||
|
#elif defined _MSC_VER && defined _M_IX86
|
||||||
|
int t;
|
||||||
|
__asm
|
||||||
|
{
|
||||||
|
fld value;
|
||||||
|
fistp t;
|
||||||
|
}
|
||||||
|
return t;
|
||||||
|
#elif defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || \
|
||||||
|
defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS
|
||||||
|
return (int)__builtin_lrint(value);
|
||||||
|
#else
|
||||||
|
return (int)lrint(value);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/** @brief Rounds floating-point number to the nearest integer not larger than the original.
|
||||||
|
|
||||||
|
The function computes an integer i such that:
|
||||||
|
\f[i \le \texttt{value} < i+1\f]
|
||||||
|
@param value floating-point number. If the value is outside of INT_MIN ... INT_MAX range, the
|
||||||
|
result is not defined.
|
||||||
|
*/
|
||||||
|
CV_INLINE int cvFloor( double value )
|
||||||
|
{
|
||||||
|
#if defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || \
|
||||||
|
defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS
|
||||||
|
return (int)__builtin_floor(value);
|
||||||
|
#elif defined __loongarch64
|
||||||
|
int i;
|
||||||
|
double tmp;
|
||||||
|
__asm__ ("ftintrm.l.d %[tmp], %[in] \n\t"
|
||||||
|
"movfr2gr.d %[i], %[tmp] \n\t"
|
||||||
|
: [i] "=r" (i), [tmp] "=f" (tmp)
|
||||||
|
: [in] "f" (value)
|
||||||
|
:);
|
||||||
|
return i;
|
||||||
|
#else
|
||||||
|
int i = (int)value;
|
||||||
|
return i - (i > value);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/** @brief Rounds floating-point number to the nearest integer not smaller than the original.
|
||||||
|
|
||||||
|
The function computes an integer i such that:
|
||||||
|
\f[i \le \texttt{value} < i+1\f]
|
||||||
|
@param value floating-point number. If the value is outside of INT_MIN ... INT_MAX range, the
|
||||||
|
result is not defined.
|
||||||
|
*/
|
||||||
|
CV_INLINE int cvCeil( double value )
|
||||||
|
{
|
||||||
|
#if defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || \
|
||||||
|
defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS
|
||||||
|
return (int)__builtin_ceil(value);
|
||||||
|
#elif defined __loongarch64
|
||||||
|
int i;
|
||||||
|
double tmp;
|
||||||
|
__asm__ ("ftintrp.l.d %[tmp], %[in] \n\t"
|
||||||
|
"movfr2gr.d %[i], %[tmp] \n\t"
|
||||||
|
: [i] "=r" (i), [tmp] "=f" (tmp)
|
||||||
|
: [in] "f" (value)
|
||||||
|
:);
|
||||||
|
return i;
|
||||||
|
#else
|
||||||
|
int i = (int)value;
|
||||||
|
return i + (i < value);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/** @brief Determines if the argument is Not A Number.
|
||||||
|
|
||||||
|
@param value The input floating-point value
|
||||||
|
|
||||||
|
The function returns 1 if the argument is Not A Number (as defined by IEEE754 standard), 0
|
||||||
|
otherwise. */
|
||||||
|
CV_INLINE int cvIsNaN( double value )
|
||||||
|
{
|
||||||
|
#if defined CV_INLINE_ISNAN_DBL
|
||||||
|
CV_INLINE_ISNAN_DBL(value);
|
||||||
|
#else
|
||||||
|
Cv64suf ieee754;
|
||||||
|
ieee754.f = value;
|
||||||
|
return ((unsigned)(ieee754.u >> 32) & 0x7fffffff) +
|
||||||
|
((unsigned)ieee754.u != 0) > 0x7ff00000;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/** @brief Determines if the argument is Infinity.
|
||||||
|
|
||||||
|
@param value The input floating-point value
|
||||||
|
|
||||||
|
The function returns 1 if the argument is a plus or minus infinity (as defined by IEEE754 standard)
|
||||||
|
and 0 otherwise. */
|
||||||
|
CV_INLINE int cvIsInf( double value )
|
||||||
|
{
|
||||||
|
#if defined CV_INLINE_ISINF_DBL
|
||||||
|
CV_INLINE_ISINF_DBL(value);
|
||||||
|
#elif defined(__x86_64__) || defined(_M_X64) || defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) || defined(__PPC64__) || defined(__loongarch64)
|
||||||
|
Cv64suf ieee754;
|
||||||
|
ieee754.f = value;
|
||||||
|
return (ieee754.u & 0x7fffffffffffffff) ==
|
||||||
|
0x7ff0000000000000;
|
||||||
|
#else
|
||||||
|
Cv64suf ieee754;
|
||||||
|
ieee754.f = value;
|
||||||
|
return ((unsigned)(ieee754.u >> 32) & 0x7fffffff) == 0x7ff00000 &&
|
||||||
|
(unsigned)ieee754.u == 0;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
|
||||||
|
/** @overload */
|
||||||
|
CV_INLINE int cvRound(float value)
|
||||||
|
{
|
||||||
|
#if defined CV_INLINE_ROUND_FLT
|
||||||
|
CV_INLINE_ROUND_FLT(value);
|
||||||
|
#elif defined(_MSC_VER) && (defined(_M_ARM64) || defined(_M_ARM64EC))
|
||||||
|
float32x2_t v = vdup_n_f32(value);
|
||||||
|
int32x2_t r = vcvtn_s32_f32(v);
|
||||||
|
return vget_lane_s32(r, 0);
|
||||||
|
#elif ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __SSE2__)) && !defined(__CUDACC__)
|
||||||
|
__m128 t = _mm_set_ss( value );
|
||||||
|
return _mm_cvtss_si32(t);
|
||||||
|
#elif defined _MSC_VER && defined _M_IX86
|
||||||
|
int t;
|
||||||
|
__asm
|
||||||
|
{
|
||||||
|
fld value;
|
||||||
|
fistp t;
|
||||||
|
}
|
||||||
|
return t;
|
||||||
|
#elif defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || \
|
||||||
|
defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS
|
||||||
|
return (int)__builtin_lrintf(value);
|
||||||
|
#else
|
||||||
|
return (int)lrintf(value);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/** @overload */
|
||||||
|
CV_INLINE int cvRound( int value )
|
||||||
|
{
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** @overload */
|
||||||
|
CV_INLINE int cvFloor( float value )
|
||||||
|
{
|
||||||
|
#if defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || \
|
||||||
|
defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS
|
||||||
|
return (int)__builtin_floorf(value);
|
||||||
|
#elif defined __loongarch__
|
||||||
|
int i;
|
||||||
|
float tmp;
|
||||||
|
__asm__ ("ftintrm.w.s %[tmp], %[in] \n\t"
|
||||||
|
"movfr2gr.s %[i], %[tmp] \n\t"
|
||||||
|
: [i] "=r" (i), [tmp] "=f" (tmp)
|
||||||
|
: [in] "f" (value)
|
||||||
|
:);
|
||||||
|
return i;
|
||||||
|
#else
|
||||||
|
int i = (int)value;
|
||||||
|
return i - (i > value);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/** @overload */
|
||||||
|
CV_INLINE int cvFloor( int value )
|
||||||
|
{
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** @overload */
|
||||||
|
CV_INLINE int cvCeil( float value )
|
||||||
|
{
|
||||||
|
#if defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || \
|
||||||
|
defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS
|
||||||
|
return (int)__builtin_ceilf(value);
|
||||||
|
#elif defined __loongarch__
|
||||||
|
int i;
|
||||||
|
float tmp;
|
||||||
|
__asm__ ("ftintrp.w.s %[tmp], %[in] \n\t"
|
||||||
|
"movfr2gr.s %[i], %[tmp] \n\t"
|
||||||
|
: [i] "=r" (i), [tmp] "=f" (tmp)
|
||||||
|
: [in] "f" (value)
|
||||||
|
:);
|
||||||
|
return i;
|
||||||
|
#else
|
||||||
|
int i = (int)value;
|
||||||
|
return i + (i < value);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/** @overload */
|
||||||
|
CV_INLINE int cvCeil( int value )
|
||||||
|
{
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** @overload */
|
||||||
|
CV_INLINE int cvIsNaN( float value )
|
||||||
|
{
|
||||||
|
#if defined CV_INLINE_ISNAN_FLT
|
||||||
|
CV_INLINE_ISNAN_FLT(value);
|
||||||
|
#else
|
||||||
|
Cv32suf ieee754;
|
||||||
|
ieee754.f = value;
|
||||||
|
return (ieee754.u & 0x7fffffff) > 0x7f800000;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/** @overload */
|
||||||
|
CV_INLINE int cvIsInf( float value )
|
||||||
|
{
|
||||||
|
#if defined CV_INLINE_ISINF_FLT
|
||||||
|
CV_INLINE_ISINF_FLT(value);
|
||||||
|
#else
|
||||||
|
Cv32suf ieee754;
|
||||||
|
ieee754.f = value;
|
||||||
|
return (ieee754.u & 0x7fffffff) == 0x7f800000;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // __cplusplus
|
||||||
|
|
||||||
|
//! @} core_utils
|
||||||
|
|
||||||
|
#endif
|
||||||
260
3rdpart/OpenCV/include/opencv2/core/hal/hal.hpp
Normal file
260
3rdpart/OpenCV/include/opencv2/core/hal/hal.hpp
Normal file
@@ -0,0 +1,260 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||||
|
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||||
|
// Copyright (C) 2015, Itseez Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#ifndef OPENCV_HAL_HPP
|
||||||
|
#define OPENCV_HAL_HPP
|
||||||
|
|
||||||
|
#include "opencv2/core/cvdef.h"
|
||||||
|
#include "opencv2/core/cvstd.hpp"
|
||||||
|
#include "opencv2/core/hal/interface.h"
|
||||||
|
|
||||||
|
namespace cv { namespace hal {
|
||||||
|
|
||||||
|
//! @addtogroup core_hal_functions
|
||||||
|
//! @{
|
||||||
|
|
||||||
|
CV_EXPORTS int normHamming(const uchar* a, int n);
|
||||||
|
CV_EXPORTS int normHamming(const uchar* a, const uchar* b, int n);
|
||||||
|
|
||||||
|
CV_EXPORTS int normHamming(const uchar* a, int n, int cellSize);
|
||||||
|
CV_EXPORTS int normHamming(const uchar* a, const uchar* b, int n, int cellSize);
|
||||||
|
|
||||||
|
CV_EXPORTS int LU32f(float* A, size_t astep, int m, float* b, size_t bstep, int n);
|
||||||
|
CV_EXPORTS int LU64f(double* A, size_t astep, int m, double* b, size_t bstep, int n);
|
||||||
|
CV_EXPORTS bool Cholesky32f(float* A, size_t astep, int m, float* b, size_t bstep, int n);
|
||||||
|
CV_EXPORTS bool Cholesky64f(double* A, size_t astep, int m, double* b, size_t bstep, int n);
|
||||||
|
CV_EXPORTS void SVD32f(float* At, size_t astep, float* W, float* U, size_t ustep, float* Vt, size_t vstep, int m, int n, int flags);
|
||||||
|
CV_EXPORTS void SVD64f(double* At, size_t astep, double* W, double* U, size_t ustep, double* Vt, size_t vstep, int m, int n, int flags);
|
||||||
|
CV_EXPORTS int QR32f(float* A, size_t astep, int m, int n, int k, float* b, size_t bstep, float* hFactors);
|
||||||
|
CV_EXPORTS int QR64f(double* A, size_t astep, int m, int n, int k, double* b, size_t bstep, double* hFactors);
|
||||||
|
|
||||||
|
CV_EXPORTS void gemm32f(const float* src1, size_t src1_step, const float* src2, size_t src2_step,
|
||||||
|
float alpha, const float* src3, size_t src3_step, float beta, float* dst, size_t dst_step,
|
||||||
|
int m_a, int n_a, int n_d, int flags);
|
||||||
|
CV_EXPORTS void gemm64f(const double* src1, size_t src1_step, const double* src2, size_t src2_step,
|
||||||
|
double alpha, const double* src3, size_t src3_step, double beta, double* dst, size_t dst_step,
|
||||||
|
int m_a, int n_a, int n_d, int flags);
|
||||||
|
CV_EXPORTS void gemm32fc(const float* src1, size_t src1_step, const float* src2, size_t src2_step,
|
||||||
|
float alpha, const float* src3, size_t src3_step, float beta, float* dst, size_t dst_step,
|
||||||
|
int m_a, int n_a, int n_d, int flags);
|
||||||
|
CV_EXPORTS void gemm64fc(const double* src1, size_t src1_step, const double* src2, size_t src2_step,
|
||||||
|
double alpha, const double* src3, size_t src3_step, double beta, double* dst, size_t dst_step,
|
||||||
|
int m_a, int n_a, int n_d, int flags);
|
||||||
|
|
||||||
|
CV_EXPORTS int normL1_(const uchar* a, const uchar* b, int n);
|
||||||
|
CV_EXPORTS float normL1_(const float* a, const float* b, int n);
|
||||||
|
CV_EXPORTS float normL2Sqr_(const float* a, const float* b, int n);
|
||||||
|
|
||||||
|
CV_EXPORTS void exp32f(const float* src, float* dst, int n);
|
||||||
|
CV_EXPORTS void exp64f(const double* src, double* dst, int n);
|
||||||
|
CV_EXPORTS void log32f(const float* src, float* dst, int n);
|
||||||
|
CV_EXPORTS void log64f(const double* src, double* dst, int n);
|
||||||
|
|
||||||
|
CV_EXPORTS void cartToPolar32f(const float* x, const float* y, float* mag, float* angle, int n, bool angleInDegrees);
|
||||||
|
CV_EXPORTS void cartToPolar64f(const double* x, const double* y, double* mag, double* angle, int n, bool angleInDegrees);
|
||||||
|
CV_EXPORTS void fastAtan32f(const float* y, const float* x, float* dst, int n, bool angleInDegrees);
|
||||||
|
CV_EXPORTS void fastAtan64f(const double* y, const double* x, double* dst, int n, bool angleInDegrees);
|
||||||
|
CV_EXPORTS void magnitude32f(const float* x, const float* y, float* dst, int n);
|
||||||
|
CV_EXPORTS void magnitude64f(const double* x, const double* y, double* dst, int n);
|
||||||
|
CV_EXPORTS void polarToCart32f(const float* mag, const float* angle, float* x, float* y, int n, bool angleInDegrees);
|
||||||
|
CV_EXPORTS void polarToCart64f(const double* mag, const double* angle, double* x, double* y, int n, bool angleInDegrees);
|
||||||
|
CV_EXPORTS void sqrt32f(const float* src, float* dst, int len);
|
||||||
|
CV_EXPORTS void sqrt64f(const double* src, double* dst, int len);
|
||||||
|
CV_EXPORTS void invSqrt32f(const float* src, float* dst, int len);
|
||||||
|
CV_EXPORTS void invSqrt64f(const double* src, double* dst, int len);
|
||||||
|
|
||||||
|
CV_EXPORTS void split8u(const uchar* src, uchar** dst, int len, int cn );
|
||||||
|
CV_EXPORTS void split16u(const ushort* src, ushort** dst, int len, int cn );
|
||||||
|
CV_EXPORTS void split32s(const int* src, int** dst, int len, int cn );
|
||||||
|
CV_EXPORTS void split64s(const int64* src, int64** dst, int len, int cn );
|
||||||
|
|
||||||
|
CV_EXPORTS void merge8u(const uchar** src, uchar* dst, int len, int cn );
|
||||||
|
CV_EXPORTS void merge16u(const ushort** src, ushort* dst, int len, int cn );
|
||||||
|
CV_EXPORTS void merge32s(const int** src, int* dst, int len, int cn );
|
||||||
|
CV_EXPORTS void merge64s(const int64** src, int64* dst, int len, int cn );
|
||||||
|
|
||||||
|
CV_EXPORTS void add8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
|
||||||
|
CV_EXPORTS void add8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* );
|
||||||
|
CV_EXPORTS void add16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* );
|
||||||
|
CV_EXPORTS void add16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* );
|
||||||
|
CV_EXPORTS void add32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* );
|
||||||
|
CV_EXPORTS void add32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* );
|
||||||
|
CV_EXPORTS void add64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* );
|
||||||
|
|
||||||
|
CV_EXPORTS void sub8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
|
||||||
|
CV_EXPORTS void sub8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* );
|
||||||
|
CV_EXPORTS void sub16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* );
|
||||||
|
CV_EXPORTS void sub16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* );
|
||||||
|
CV_EXPORTS void sub32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* );
|
||||||
|
CV_EXPORTS void sub32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* );
|
||||||
|
CV_EXPORTS void sub64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* );
|
||||||
|
|
||||||
|
CV_EXPORTS void max8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
|
||||||
|
CV_EXPORTS void max8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* );
|
||||||
|
CV_EXPORTS void max16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* );
|
||||||
|
CV_EXPORTS void max16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* );
|
||||||
|
CV_EXPORTS void max32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* );
|
||||||
|
CV_EXPORTS void max32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* );
|
||||||
|
CV_EXPORTS void max64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* );
|
||||||
|
|
||||||
|
CV_EXPORTS void min8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
|
||||||
|
CV_EXPORTS void min8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* );
|
||||||
|
CV_EXPORTS void min16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* );
|
||||||
|
CV_EXPORTS void min16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* );
|
||||||
|
CV_EXPORTS void min32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* );
|
||||||
|
CV_EXPORTS void min32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* );
|
||||||
|
CV_EXPORTS void min64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* );
|
||||||
|
|
||||||
|
CV_EXPORTS void absdiff8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
|
||||||
|
CV_EXPORTS void absdiff8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* );
|
||||||
|
CV_EXPORTS void absdiff16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* );
|
||||||
|
CV_EXPORTS void absdiff16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* );
|
||||||
|
CV_EXPORTS void absdiff32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* );
|
||||||
|
CV_EXPORTS void absdiff32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* );
|
||||||
|
CV_EXPORTS void absdiff64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* );
|
||||||
|
|
||||||
|
CV_EXPORTS void and8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
|
||||||
|
CV_EXPORTS void or8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
|
||||||
|
CV_EXPORTS void xor8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
|
||||||
|
CV_EXPORTS void not8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
|
||||||
|
|
||||||
|
CV_EXPORTS void cmp8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
|
||||||
|
CV_EXPORTS void cmp8s(const schar* src1, size_t step1, const schar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
|
||||||
|
CV_EXPORTS void cmp16u(const ushort* src1, size_t step1, const ushort* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
|
||||||
|
CV_EXPORTS void cmp16s(const short* src1, size_t step1, const short* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
|
||||||
|
CV_EXPORTS void cmp32s(const int* src1, size_t step1, const int* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
|
||||||
|
CV_EXPORTS void cmp32f(const float* src1, size_t step1, const float* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
|
||||||
|
CV_EXPORTS void cmp64f(const double* src1, size_t step1, const double* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
|
||||||
|
|
||||||
|
CV_EXPORTS void mul8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* scale);
|
||||||
|
CV_EXPORTS void mul8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* scale);
|
||||||
|
CV_EXPORTS void mul16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* scale);
|
||||||
|
CV_EXPORTS void mul16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* scale);
|
||||||
|
CV_EXPORTS void mul32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* scale);
|
||||||
|
CV_EXPORTS void mul32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* scale);
|
||||||
|
CV_EXPORTS void mul64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* scale);
|
||||||
|
|
||||||
|
CV_EXPORTS void div8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* scale);
|
||||||
|
CV_EXPORTS void div8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* scale);
|
||||||
|
CV_EXPORTS void div16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* scale);
|
||||||
|
CV_EXPORTS void div16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* scale);
|
||||||
|
CV_EXPORTS void div32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* scale);
|
||||||
|
CV_EXPORTS void div32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* scale);
|
||||||
|
CV_EXPORTS void div64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* scale);
|
||||||
|
|
||||||
|
CV_EXPORTS void recip8u( const uchar *, size_t, const uchar * src2, size_t step2, uchar* dst, size_t step, int width, int height, void* scale);
|
||||||
|
CV_EXPORTS void recip8s( const schar *, size_t, const schar * src2, size_t step2, schar* dst, size_t step, int width, int height, void* scale);
|
||||||
|
CV_EXPORTS void recip16u( const ushort *, size_t, const ushort * src2, size_t step2, ushort* dst, size_t step, int width, int height, void* scale);
|
||||||
|
CV_EXPORTS void recip16s( const short *, size_t, const short * src2, size_t step2, short* dst, size_t step, int width, int height, void* scale);
|
||||||
|
CV_EXPORTS void recip32s( const int *, size_t, const int * src2, size_t step2, int* dst, size_t step, int width, int height, void* scale);
|
||||||
|
CV_EXPORTS void recip32f( const float *, size_t, const float * src2, size_t step2, float* dst, size_t step, int width, int height, void* scale);
|
||||||
|
CV_EXPORTS void recip64f( const double *, size_t, const double * src2, size_t step2, double* dst, size_t step, int width, int height, void* scale);
|
||||||
|
|
||||||
|
CV_EXPORTS void addWeighted8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _scalars );
|
||||||
|
CV_EXPORTS void addWeighted8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* scalars );
|
||||||
|
CV_EXPORTS void addWeighted16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* scalars );
|
||||||
|
CV_EXPORTS void addWeighted16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* scalars );
|
||||||
|
CV_EXPORTS void addWeighted32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* scalars );
|
||||||
|
CV_EXPORTS void addWeighted32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* scalars );
|
||||||
|
CV_EXPORTS void addWeighted64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* scalars );
|
||||||
|
|
||||||
|
CV_EXPORTS void cvt16f32f( const hfloat* src, float* dst, int len );
|
||||||
|
CV_EXPORTS void cvt32f16f( const float* src, hfloat* dst, int len );
|
||||||
|
|
||||||
|
CV_EXPORTS void addRNGBias32f( float* arr, const float* scaleBiasPairs, int len );
|
||||||
|
CV_EXPORTS void addRNGBias64f( double* arr, const double* scaleBiasPairs, int len );
|
||||||
|
|
||||||
|
struct CV_EXPORTS DFT1D
|
||||||
|
{
|
||||||
|
static Ptr<DFT1D> create(int len, int count, int depth, int flags, bool * useBuffer = 0);
|
||||||
|
virtual void apply(const uchar *src, uchar *dst) = 0;
|
||||||
|
virtual ~DFT1D() {}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct CV_EXPORTS DFT2D
|
||||||
|
{
|
||||||
|
static Ptr<DFT2D> create(int width, int height, int depth,
|
||||||
|
int src_channels, int dst_channels,
|
||||||
|
int flags, int nonzero_rows = 0);
|
||||||
|
virtual void apply(const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step) = 0;
|
||||||
|
virtual ~DFT2D() {}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct CV_EXPORTS DCT2D
|
||||||
|
{
|
||||||
|
static Ptr<DCT2D> create(int width, int height, int depth, int flags);
|
||||||
|
virtual void apply(const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step) = 0;
|
||||||
|
virtual ~DCT2D() {}
|
||||||
|
};
|
||||||
|
|
||||||
|
//! @} core_hal
|
||||||
|
|
||||||
|
//=============================================================================
|
||||||
|
// for binary compatibility with 3.0
|
||||||
|
|
||||||
|
//! @cond IGNORED
|
||||||
|
|
||||||
|
CV_EXPORTS int LU(float* A, size_t astep, int m, float* b, size_t bstep, int n);
|
||||||
|
CV_EXPORTS int LU(double* A, size_t astep, int m, double* b, size_t bstep, int n);
|
||||||
|
CV_EXPORTS bool Cholesky(float* A, size_t astep, int m, float* b, size_t bstep, int n);
|
||||||
|
CV_EXPORTS bool Cholesky(double* A, size_t astep, int m, double* b, size_t bstep, int n);
|
||||||
|
|
||||||
|
CV_EXPORTS void exp(const float* src, float* dst, int n);
|
||||||
|
CV_EXPORTS void exp(const double* src, double* dst, int n);
|
||||||
|
CV_EXPORTS void log(const float* src, float* dst, int n);
|
||||||
|
CV_EXPORTS void log(const double* src, double* dst, int n);
|
||||||
|
|
||||||
|
CV_EXPORTS void fastAtan2(const float* y, const float* x, float* dst, int n, bool angleInDegrees);
|
||||||
|
CV_EXPORTS void magnitude(const float* x, const float* y, float* dst, int n);
|
||||||
|
CV_EXPORTS void magnitude(const double* x, const double* y, double* dst, int n);
|
||||||
|
CV_EXPORTS void sqrt(const float* src, float* dst, int len);
|
||||||
|
CV_EXPORTS void sqrt(const double* src, double* dst, int len);
|
||||||
|
CV_EXPORTS void invSqrt(const float* src, float* dst, int len);
|
||||||
|
CV_EXPORTS void invSqrt(const double* src, double* dst, int len);
|
||||||
|
|
||||||
|
//! @endcond
|
||||||
|
|
||||||
|
}} //cv::hal
|
||||||
|
|
||||||
|
#endif //OPENCV_HAL_HPP
|
||||||
190
3rdpart/OpenCV/include/opencv2/core/hal/interface.h
Normal file
190
3rdpart/OpenCV/include/opencv2/core/hal/interface.h
Normal file
@@ -0,0 +1,190 @@
|
|||||||
|
#ifndef OPENCV_CORE_HAL_INTERFACE_H
|
||||||
|
#define OPENCV_CORE_HAL_INTERFACE_H
|
||||||
|
|
||||||
|
//! @addtogroup core_hal_interface
|
||||||
|
//! @{
|
||||||
|
|
||||||
|
//! @name Return codes
|
||||||
|
//! @{
|
||||||
|
#define CV_HAL_ERROR_OK 0
|
||||||
|
#define CV_HAL_ERROR_NOT_IMPLEMENTED 1
|
||||||
|
#define CV_HAL_ERROR_UNKNOWN -1
|
||||||
|
//! @}
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
#include <cstddef>
|
||||||
|
#else
|
||||||
|
#include <stddef.h>
|
||||||
|
#include <stdbool.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
//! @name Data types
|
||||||
|
//! primitive types
|
||||||
|
//! - schar - signed 1 byte integer
|
||||||
|
//! - uchar - unsigned 1 byte integer
|
||||||
|
//! - short - signed 2 byte integer
|
||||||
|
//! - ushort - unsigned 2 byte integer
|
||||||
|
//! - int - signed 4 byte integer
|
||||||
|
//! - uint - unsigned 4 byte integer
|
||||||
|
//! - int64 - signed 8 byte integer
|
||||||
|
//! - uint64 - unsigned 8 byte integer
|
||||||
|
//! @{
|
||||||
|
#if !defined _MSC_VER && !defined __BORLANDC__
|
||||||
|
# if defined __cplusplus && __cplusplus >= 201103L && !defined __APPLE__
|
||||||
|
# include <cstdint>
|
||||||
|
# ifdef __NEWLIB__
|
||||||
|
typedef unsigned int uint;
|
||||||
|
# else
|
||||||
|
typedef std::uint32_t uint;
|
||||||
|
# endif
|
||||||
|
# else
|
||||||
|
# include <stdint.h>
|
||||||
|
typedef uint32_t uint;
|
||||||
|
# endif
|
||||||
|
#else
|
||||||
|
typedef unsigned uint;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
typedef signed char schar;
|
||||||
|
|
||||||
|
#ifndef __IPL_H__
|
||||||
|
typedef unsigned char uchar;
|
||||||
|
typedef unsigned short ushort;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined _MSC_VER || defined __BORLANDC__
|
||||||
|
typedef __int64 int64;
|
||||||
|
typedef unsigned __int64 uint64;
|
||||||
|
# define CV_BIG_INT(n) n##I64
|
||||||
|
# define CV_BIG_UINT(n) n##UI64
|
||||||
|
#else
|
||||||
|
typedef int64_t int64;
|
||||||
|
typedef uint64_t uint64;
|
||||||
|
# define CV_BIG_INT(n) n##LL
|
||||||
|
# define CV_BIG_UINT(n) n##ULL
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define CV_USRTYPE1 (void)"CV_USRTYPE1 support has been dropped in OpenCV 4.0"
|
||||||
|
|
||||||
|
#define CV_CN_MAX 512
|
||||||
|
#define CV_CN_SHIFT 3
|
||||||
|
#define CV_DEPTH_MAX (1 << CV_CN_SHIFT)
|
||||||
|
|
||||||
|
#define CV_8U 0
|
||||||
|
#define CV_8S 1
|
||||||
|
#define CV_16U 2
|
||||||
|
#define CV_16S 3
|
||||||
|
#define CV_32S 4
|
||||||
|
#define CV_32F 5
|
||||||
|
#define CV_64F 6
|
||||||
|
#define CV_16F 7
|
||||||
|
|
||||||
|
#define CV_MAT_DEPTH_MASK (CV_DEPTH_MAX - 1)
|
||||||
|
#define CV_MAT_DEPTH(flags) ((flags) & CV_MAT_DEPTH_MASK)
|
||||||
|
|
||||||
|
#define CV_MAKETYPE(depth,cn) (CV_MAT_DEPTH(depth) + (((cn)-1) << CV_CN_SHIFT))
|
||||||
|
#define CV_MAKE_TYPE CV_MAKETYPE
|
||||||
|
|
||||||
|
#define CV_8UC1 CV_MAKETYPE(CV_8U,1)
|
||||||
|
#define CV_8UC2 CV_MAKETYPE(CV_8U,2)
|
||||||
|
#define CV_8UC3 CV_MAKETYPE(CV_8U,3)
|
||||||
|
#define CV_8UC4 CV_MAKETYPE(CV_8U,4)
|
||||||
|
#define CV_8UC(n) CV_MAKETYPE(CV_8U,(n))
|
||||||
|
|
||||||
|
#define CV_8SC1 CV_MAKETYPE(CV_8S,1)
|
||||||
|
#define CV_8SC2 CV_MAKETYPE(CV_8S,2)
|
||||||
|
#define CV_8SC3 CV_MAKETYPE(CV_8S,3)
|
||||||
|
#define CV_8SC4 CV_MAKETYPE(CV_8S,4)
|
||||||
|
#define CV_8SC(n) CV_MAKETYPE(CV_8S,(n))
|
||||||
|
|
||||||
|
#define CV_16UC1 CV_MAKETYPE(CV_16U,1)
|
||||||
|
#define CV_16UC2 CV_MAKETYPE(CV_16U,2)
|
||||||
|
#define CV_16UC3 CV_MAKETYPE(CV_16U,3)
|
||||||
|
#define CV_16UC4 CV_MAKETYPE(CV_16U,4)
|
||||||
|
#define CV_16UC(n) CV_MAKETYPE(CV_16U,(n))
|
||||||
|
|
||||||
|
#define CV_16SC1 CV_MAKETYPE(CV_16S,1)
|
||||||
|
#define CV_16SC2 CV_MAKETYPE(CV_16S,2)
|
||||||
|
#define CV_16SC3 CV_MAKETYPE(CV_16S,3)
|
||||||
|
#define CV_16SC4 CV_MAKETYPE(CV_16S,4)
|
||||||
|
#define CV_16SC(n) CV_MAKETYPE(CV_16S,(n))
|
||||||
|
|
||||||
|
#define CV_32SC1 CV_MAKETYPE(CV_32S,1)
|
||||||
|
#define CV_32SC2 CV_MAKETYPE(CV_32S,2)
|
||||||
|
#define CV_32SC3 CV_MAKETYPE(CV_32S,3)
|
||||||
|
#define CV_32SC4 CV_MAKETYPE(CV_32S,4)
|
||||||
|
#define CV_32SC(n) CV_MAKETYPE(CV_32S,(n))
|
||||||
|
|
||||||
|
#define CV_32FC1 CV_MAKETYPE(CV_32F,1)
|
||||||
|
#define CV_32FC2 CV_MAKETYPE(CV_32F,2)
|
||||||
|
#define CV_32FC3 CV_MAKETYPE(CV_32F,3)
|
||||||
|
#define CV_32FC4 CV_MAKETYPE(CV_32F,4)
|
||||||
|
#define CV_32FC(n) CV_MAKETYPE(CV_32F,(n))
|
||||||
|
|
||||||
|
#define CV_64FC1 CV_MAKETYPE(CV_64F,1)
|
||||||
|
#define CV_64FC2 CV_MAKETYPE(CV_64F,2)
|
||||||
|
#define CV_64FC3 CV_MAKETYPE(CV_64F,3)
|
||||||
|
#define CV_64FC4 CV_MAKETYPE(CV_64F,4)
|
||||||
|
#define CV_64FC(n) CV_MAKETYPE(CV_64F,(n))
|
||||||
|
|
||||||
|
#define CV_16FC1 CV_MAKETYPE(CV_16F,1)
|
||||||
|
#define CV_16FC2 CV_MAKETYPE(CV_16F,2)
|
||||||
|
#define CV_16FC3 CV_MAKETYPE(CV_16F,3)
|
||||||
|
#define CV_16FC4 CV_MAKETYPE(CV_16F,4)
|
||||||
|
#define CV_16FC(n) CV_MAKETYPE(CV_16F,(n))
|
||||||
|
//! @}
|
||||||
|
|
||||||
|
//! @name Comparison operation
|
||||||
|
//! @sa cv::CmpTypes
|
||||||
|
//! @{
|
||||||
|
#define CV_HAL_CMP_EQ 0
|
||||||
|
#define CV_HAL_CMP_GT 1
|
||||||
|
#define CV_HAL_CMP_GE 2
|
||||||
|
#define CV_HAL_CMP_LT 3
|
||||||
|
#define CV_HAL_CMP_LE 4
|
||||||
|
#define CV_HAL_CMP_NE 5
|
||||||
|
//! @}
|
||||||
|
|
||||||
|
//! @name Border processing modes
|
||||||
|
//! @sa cv::BorderTypes
|
||||||
|
//! @{
|
||||||
|
#define CV_HAL_BORDER_CONSTANT 0
|
||||||
|
#define CV_HAL_BORDER_REPLICATE 1
|
||||||
|
#define CV_HAL_BORDER_REFLECT 2
|
||||||
|
#define CV_HAL_BORDER_WRAP 3
|
||||||
|
#define CV_HAL_BORDER_REFLECT_101 4
|
||||||
|
#define CV_HAL_BORDER_TRANSPARENT 5
|
||||||
|
#define CV_HAL_BORDER_ISOLATED 16
|
||||||
|
//! @}
|
||||||
|
|
||||||
|
//! @name DFT flags
|
||||||
|
//! @{
|
||||||
|
#define CV_HAL_DFT_INVERSE 1
|
||||||
|
#define CV_HAL_DFT_SCALE 2
|
||||||
|
#define CV_HAL_DFT_ROWS 4
|
||||||
|
#define CV_HAL_DFT_COMPLEX_OUTPUT 16
|
||||||
|
#define CV_HAL_DFT_REAL_OUTPUT 32
|
||||||
|
#define CV_HAL_DFT_TWO_STAGE 64
|
||||||
|
#define CV_HAL_DFT_STAGE_COLS 128
|
||||||
|
#define CV_HAL_DFT_IS_CONTINUOUS 512
|
||||||
|
#define CV_HAL_DFT_IS_INPLACE 1024
|
||||||
|
//! @}
|
||||||
|
|
||||||
|
//! @name SVD flags
|
||||||
|
//! @{
|
||||||
|
#define CV_HAL_SVD_NO_UV 1
|
||||||
|
#define CV_HAL_SVD_SHORT_UV 2
|
||||||
|
#define CV_HAL_SVD_MODIFY_A 4
|
||||||
|
#define CV_HAL_SVD_FULL_UV 8
|
||||||
|
//! @}
|
||||||
|
|
||||||
|
//! @name Gemm flags
|
||||||
|
//! @{
|
||||||
|
#define CV_HAL_GEMM_1_T 1
|
||||||
|
#define CV_HAL_GEMM_2_T 2
|
||||||
|
#define CV_HAL_GEMM_3_T 4
|
||||||
|
//! @}
|
||||||
|
|
||||||
|
//! @}
|
||||||
|
|
||||||
|
#endif
|
||||||
988
3rdpart/OpenCV/include/opencv2/core/hal/intrin.hpp
Normal file
988
3rdpart/OpenCV/include/opencv2/core/hal/intrin.hpp
Normal file
@@ -0,0 +1,988 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||||
|
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||||
|
// Copyright (C) 2015, Itseez Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#ifndef OPENCV_HAL_INTRIN_HPP
|
||||||
|
#define OPENCV_HAL_INTRIN_HPP
|
||||||
|
|
||||||
|
#include <cmath>
|
||||||
|
#include <float.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include "opencv2/core/cvdef.h"
|
||||||
|
|
||||||
|
#if defined(__GNUC__) && __GNUC__ == 12
|
||||||
|
#pragma GCC diagnostic push
|
||||||
|
#pragma GCC diagnostic ignored "-Wuninitialized"
|
||||||
|
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define OPENCV_HAL_ADD(a, b) ((a) + (b))
|
||||||
|
#define OPENCV_HAL_AND(a, b) ((a) & (b))
|
||||||
|
#define OPENCV_HAL_NOP(a) (a)
|
||||||
|
#define OPENCV_HAL_1ST(a, b) (a)
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
inline unsigned int trailingZeros32(unsigned int value) {
|
||||||
|
#if defined(_MSC_VER)
|
||||||
|
#if (_MSC_VER < 1700) || defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC)
|
||||||
|
unsigned long index = 0;
|
||||||
|
_BitScanForward(&index, value);
|
||||||
|
return (unsigned int)index;
|
||||||
|
#elif defined(__clang__)
|
||||||
|
// clang-cl doesn't export _tzcnt_u32 for non BMI systems
|
||||||
|
return value ? __builtin_ctz(value) : 32;
|
||||||
|
#else
|
||||||
|
return _tzcnt_u32(value);
|
||||||
|
#endif
|
||||||
|
#elif defined(__GNUC__) || defined(__GNUG__)
|
||||||
|
return __builtin_ctz(value);
|
||||||
|
#elif defined(__ICC) || defined(__INTEL_COMPILER)
|
||||||
|
return _bit_scan_forward(value);
|
||||||
|
#elif defined(__clang__)
|
||||||
|
return llvm.cttz.i32(value, true);
|
||||||
|
#else
|
||||||
|
static const int MultiplyDeBruijnBitPosition[32] = {
|
||||||
|
0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
|
||||||
|
31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 };
|
||||||
|
return MultiplyDeBruijnBitPosition[((uint32_t)((value & -value) * 0x077CB531U)) >> 27];
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// unlike HAL API, which is in cv::hal,
|
||||||
|
// we put intrinsics into cv namespace to make its
|
||||||
|
// access from within opencv code more accessible
|
||||||
|
namespace cv {
|
||||||
|
|
||||||
|
namespace hal {
|
||||||
|
|
||||||
|
enum StoreMode
|
||||||
|
{
|
||||||
|
STORE_UNALIGNED = 0,
|
||||||
|
STORE_ALIGNED = 1,
|
||||||
|
STORE_ALIGNED_NOCACHE = 2
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO FIXIT: Don't use "God" traits. Split on separate cases.
|
||||||
|
template<typename _Tp> struct V_TypeTraits
|
||||||
|
{
|
||||||
|
};
|
||||||
|
|
||||||
|
#define CV_INTRIN_DEF_TYPE_TRAITS(type, int_type_, uint_type_, abs_type_, w_type_, q_type_, sum_type_) \
|
||||||
|
template<> struct V_TypeTraits<type> \
|
||||||
|
{ \
|
||||||
|
typedef type value_type; \
|
||||||
|
typedef int_type_ int_type; \
|
||||||
|
typedef abs_type_ abs_type; \
|
||||||
|
typedef uint_type_ uint_type; \
|
||||||
|
typedef w_type_ w_type; \
|
||||||
|
typedef q_type_ q_type; \
|
||||||
|
typedef sum_type_ sum_type; \
|
||||||
|
\
|
||||||
|
static inline int_type reinterpret_int(type x) \
|
||||||
|
{ \
|
||||||
|
union { type l; int_type i; } v; \
|
||||||
|
v.l = x; \
|
||||||
|
return v.i; \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
static inline type reinterpret_from_int(int_type x) \
|
||||||
|
{ \
|
||||||
|
union { type l; int_type i; } v; \
|
||||||
|
v.i = x; \
|
||||||
|
return v.l; \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(type, int_type_, uint_type_, abs_type_, w_type_, sum_type_) \
|
||||||
|
template<> struct V_TypeTraits<type> \
|
||||||
|
{ \
|
||||||
|
typedef type value_type; \
|
||||||
|
typedef int_type_ int_type; \
|
||||||
|
typedef abs_type_ abs_type; \
|
||||||
|
typedef uint_type_ uint_type; \
|
||||||
|
typedef w_type_ w_type; \
|
||||||
|
typedef sum_type_ sum_type; \
|
||||||
|
\
|
||||||
|
static inline int_type reinterpret_int(type x) \
|
||||||
|
{ \
|
||||||
|
union { type l; int_type i; } v; \
|
||||||
|
v.l = x; \
|
||||||
|
return v.i; \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
static inline type reinterpret_from_int(int_type x) \
|
||||||
|
{ \
|
||||||
|
union { type l; int_type i; } v; \
|
||||||
|
v.i = x; \
|
||||||
|
return v.l; \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
CV_INTRIN_DEF_TYPE_TRAITS(uchar, schar, uchar, uchar, ushort, unsigned, unsigned);
|
||||||
|
CV_INTRIN_DEF_TYPE_TRAITS(schar, schar, uchar, uchar, short, int, int);
|
||||||
|
CV_INTRIN_DEF_TYPE_TRAITS(ushort, short, ushort, ushort, unsigned, uint64, unsigned);
|
||||||
|
CV_INTRIN_DEF_TYPE_TRAITS(short, short, ushort, ushort, int, int64, int);
|
||||||
|
CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(unsigned, int, unsigned, unsigned, uint64, unsigned);
|
||||||
|
CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(int, int, unsigned, unsigned, int64, int);
|
||||||
|
CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(float, int, unsigned, float, double, float);
|
||||||
|
CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(uint64, int64, uint64, uint64, void, uint64);
|
||||||
|
CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(int64, int64, uint64, uint64, void, int64);
|
||||||
|
CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(double, int64, uint64, double, void, double);
|
||||||
|
|
||||||
|
#ifndef CV_DOXYGEN
|
||||||
|
|
||||||
|
#ifndef CV_CPU_OPTIMIZATION_HAL_NAMESPACE
|
||||||
|
#ifdef CV_FORCE_SIMD128_CPP
|
||||||
|
#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE hal_EMULATOR_CPP
|
||||||
|
#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN namespace hal_EMULATOR_CPP {
|
||||||
|
#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END }
|
||||||
|
#elif defined(CV_CPU_DISPATCH_MODE)
|
||||||
|
#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE __CV_CAT(hal_, CV_CPU_DISPATCH_MODE)
|
||||||
|
#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN namespace __CV_CAT(hal_, CV_CPU_DISPATCH_MODE) {
|
||||||
|
#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END }
|
||||||
|
#else
|
||||||
|
#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE hal_baseline
|
||||||
|
#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN namespace hal_baseline {
|
||||||
|
#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END }
|
||||||
|
#endif
|
||||||
|
#endif // CV_CPU_OPTIMIZATION_HAL_NAMESPACE
|
||||||
|
|
||||||
|
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
|
||||||
|
|
||||||
|
template <typename _VecTp> inline _VecTp v_setzero_();
|
||||||
|
template <typename _VecTp> inline _VecTp v_setall_(uchar);
|
||||||
|
template <typename _VecTp> inline _VecTp v_setall_(schar);
|
||||||
|
template <typename _VecTp> inline _VecTp v_setall_(ushort);
|
||||||
|
template <typename _VecTp> inline _VecTp v_setall_(short);
|
||||||
|
template <typename _VecTp> inline _VecTp v_setall_(unsigned);
|
||||||
|
template <typename _VecTp> inline _VecTp v_setall_(int);
|
||||||
|
template <typename _VecTp> inline _VecTp v_setall_(uint64);
|
||||||
|
template <typename _VecTp> inline _VecTp v_setall_(int64);
|
||||||
|
template <typename _VecTp> inline _VecTp v_setall_(float);
|
||||||
|
template <typename _VecTp> inline _VecTp v_setall_(double);
|
||||||
|
|
||||||
|
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
|
||||||
|
using namespace CV_CPU_OPTIMIZATION_HAL_NAMESPACE;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef CV_DOXYGEN
|
||||||
|
# undef CV_AVX2
|
||||||
|
# undef CV_SSE2
|
||||||
|
# undef CV_NEON
|
||||||
|
# undef CV_VSX
|
||||||
|
# undef CV_FP16
|
||||||
|
# undef CV_MSA
|
||||||
|
# undef CV_RVV
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if (CV_SSE2 || CV_NEON || CV_VSX || CV_MSA || CV_WASM_SIMD || CV_RVV071 || CV_LSX) && !defined(CV_FORCE_SIMD128_CPP)
|
||||||
|
#define CV__SIMD_FORWARD 128
|
||||||
|
#include "opencv2/core/hal/intrin_forward.hpp"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if CV_SSE2 && !defined(CV_FORCE_SIMD128_CPP)
|
||||||
|
|
||||||
|
#include "opencv2/core/hal/intrin_sse_em.hpp"
|
||||||
|
#include "opencv2/core/hal/intrin_sse.hpp"
|
||||||
|
|
||||||
|
#elif CV_NEON && !defined(CV_FORCE_SIMD128_CPP)
|
||||||
|
|
||||||
|
#include "opencv2/core/hal/intrin_neon.hpp"
|
||||||
|
|
||||||
|
#elif CV_RVV071 && !defined(CV_FORCE_SIMD128_CPP)
|
||||||
|
#define CV_SIMD128_CPP 0
|
||||||
|
#include "opencv2/core/hal/intrin_rvv071.hpp"
|
||||||
|
|
||||||
|
#elif CV_VSX && !defined(CV_FORCE_SIMD128_CPP)
|
||||||
|
|
||||||
|
#include "opencv2/core/hal/intrin_vsx.hpp"
|
||||||
|
|
||||||
|
#elif CV_MSA && !defined(CV_FORCE_SIMD128_CPP)
|
||||||
|
|
||||||
|
#include "opencv2/core/hal/intrin_msa.hpp"
|
||||||
|
|
||||||
|
#elif CV_WASM_SIMD && !defined(CV_FORCE_SIMD128_CPP)
|
||||||
|
#include "opencv2/core/hal/intrin_wasm.hpp"
|
||||||
|
|
||||||
|
#elif CV_RVV && !defined(CV_FORCE_SIMD128_CPP)
|
||||||
|
#include "opencv2/core/hal/intrin_rvv_scalable.hpp"
|
||||||
|
|
||||||
|
#elif CV_LSX && !defined(CV_FORCE_SIMD128_CPP)
|
||||||
|
|
||||||
|
#include "opencv2/core/hal/intrin_lsx.hpp"
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
#include "opencv2/core/hal/intrin_cpp.hpp"
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// AVX2 can be used together with SSE2, so
|
||||||
|
// we define those two sets of intrinsics at once.
|
||||||
|
// Most of the intrinsics do not conflict (the proper overloaded variant is
|
||||||
|
// resolved by the argument types, e.g. v_float32x4 ~ SSE2, v_float32x8 ~ AVX2),
|
||||||
|
// but some of AVX2 intrinsics get v256_ prefix instead of v_, e.g. v256_load() vs v_load().
|
||||||
|
// Correspondingly, the wide intrinsics (which are mapped to the "widest"
|
||||||
|
// available instruction set) will get vx_ prefix
|
||||||
|
// (and will be mapped to v256_ counterparts) (e.g. vx_load() => v256_load())
|
||||||
|
#if CV_AVX2
|
||||||
|
|
||||||
|
#define CV__SIMD_FORWARD 256
|
||||||
|
#include "opencv2/core/hal/intrin_forward.hpp"
|
||||||
|
#include "opencv2/core/hal/intrin_avx.hpp"
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// AVX512 can be used together with SSE2 and AVX2, so
|
||||||
|
// we define those sets of intrinsics at once.
|
||||||
|
// For some of AVX512 intrinsics get v512_ prefix instead of v_, e.g. v512_load() vs v_load().
|
||||||
|
// Wide intrinsics will be mapped to v512_ counterparts in this case(e.g. vx_load() => v512_load())
|
||||||
|
#if CV_AVX512_SKX
|
||||||
|
|
||||||
|
#define CV__SIMD_FORWARD 512
|
||||||
|
#include "opencv2/core/hal/intrin_forward.hpp"
|
||||||
|
#include "opencv2/core/hal/intrin_avx512.hpp"
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if CV_LASX
|
||||||
|
|
||||||
|
#define CV__SIMD_FORWARD 256
|
||||||
|
#include "opencv2/core/hal/intrin_forward.hpp"
|
||||||
|
#include "opencv2/core/hal/intrin_lasx.hpp"
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
//! @cond IGNORED
|
||||||
|
|
||||||
|
namespace cv {
|
||||||
|
|
||||||
|
#ifndef CV_DOXYGEN
|
||||||
|
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef CV_SIMD128
|
||||||
|
#define CV_SIMD128 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef CV_SIMD128_CPP
|
||||||
|
#define CV_SIMD128_CPP 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef CV_SIMD128_64F
|
||||||
|
#define CV_SIMD128_64F 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef CV_SIMD256
|
||||||
|
#define CV_SIMD256 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef CV_SIMD256_64F
|
||||||
|
#define CV_SIMD256_64F 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef CV_SIMD512
|
||||||
|
#define CV_SIMD512 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef CV_SIMD512_64F
|
||||||
|
#define CV_SIMD512_64F 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef CV_SIMD128_FP16
|
||||||
|
#define CV_SIMD128_FP16 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef CV_SIMD256_FP16
|
||||||
|
#define CV_SIMD256_FP16 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef CV_SIMD512_FP16
|
||||||
|
#define CV_SIMD512_FP16 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef CV_SIMD_SCALABLE
|
||||||
|
#define CV_SIMD_SCALABLE 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef CV_SIMD_SCALABLE_64F
|
||||||
|
#define CV_SIMD_SCALABLE_64F 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
//==================================================================================================
|
||||||
|
|
||||||
|
template<typename _Tp> struct V_RegTraits
|
||||||
|
{
|
||||||
|
};
|
||||||
|
|
||||||
|
#define CV_DEF_REG_TRAITS(prefix, _reg, lane_type, suffix, _u_reg, _w_reg, _q_reg, _int_reg, _round_reg) \
|
||||||
|
template<> struct V_RegTraits<_reg> \
|
||||||
|
{ \
|
||||||
|
typedef _reg reg; \
|
||||||
|
typedef _u_reg u_reg; \
|
||||||
|
typedef _w_reg w_reg; \
|
||||||
|
typedef _q_reg q_reg; \
|
||||||
|
typedef _int_reg int_reg; \
|
||||||
|
typedef _round_reg round_reg; \
|
||||||
|
}
|
||||||
|
|
||||||
|
#if CV_SIMD128 || CV_SIMD128_CPP
|
||||||
|
CV_DEF_REG_TRAITS(v, v_uint8x16, uchar, u8, v_uint8x16, v_uint16x8, v_uint32x4, v_int8x16, void);
|
||||||
|
CV_DEF_REG_TRAITS(v, v_int8x16, schar, s8, v_uint8x16, v_int16x8, v_int32x4, v_int8x16, void);
|
||||||
|
CV_DEF_REG_TRAITS(v, v_uint16x8, ushort, u16, v_uint16x8, v_uint32x4, v_uint64x2, v_int16x8, void);
|
||||||
|
CV_DEF_REG_TRAITS(v, v_int16x8, short, s16, v_uint16x8, v_int32x4, v_int64x2, v_int16x8, void);
|
||||||
|
CV_DEF_REG_TRAITS(v, v_uint32x4, unsigned, u32, v_uint32x4, v_uint64x2, void, v_int32x4, void);
|
||||||
|
CV_DEF_REG_TRAITS(v, v_int32x4, int, s32, v_uint32x4, v_int64x2, void, v_int32x4, void);
|
||||||
|
#if CV_SIMD128_64F || CV_SIMD128_CPP
|
||||||
|
CV_DEF_REG_TRAITS(v, v_float32x4, float, f32, v_float32x4, v_float64x2, void, v_int32x4, v_int32x4);
|
||||||
|
#else
|
||||||
|
CV_DEF_REG_TRAITS(v, v_float32x4, float, f32, v_float32x4, void, void, v_int32x4, v_int32x4);
|
||||||
|
#endif
|
||||||
|
CV_DEF_REG_TRAITS(v, v_uint64x2, uint64, u64, v_uint64x2, void, void, v_int64x2, void);
|
||||||
|
CV_DEF_REG_TRAITS(v, v_int64x2, int64, s64, v_uint64x2, void, void, v_int64x2, void);
|
||||||
|
#if CV_SIMD128_64F
|
||||||
|
CV_DEF_REG_TRAITS(v, v_float64x2, double, f64, v_float64x2, void, void, v_int64x2, v_int32x4);
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if CV_SIMD256
|
||||||
|
CV_DEF_REG_TRAITS(v256, v_uint8x32, uchar, u8, v_uint8x32, v_uint16x16, v_uint32x8, v_int8x32, void);
|
||||||
|
CV_DEF_REG_TRAITS(v256, v_int8x32, schar, s8, v_uint8x32, v_int16x16, v_int32x8, v_int8x32, void);
|
||||||
|
CV_DEF_REG_TRAITS(v256, v_uint16x16, ushort, u16, v_uint16x16, v_uint32x8, v_uint64x4, v_int16x16, void);
|
||||||
|
CV_DEF_REG_TRAITS(v256, v_int16x16, short, s16, v_uint16x16, v_int32x8, v_int64x4, v_int16x16, void);
|
||||||
|
CV_DEF_REG_TRAITS(v256, v_uint32x8, unsigned, u32, v_uint32x8, v_uint64x4, void, v_int32x8, void);
|
||||||
|
CV_DEF_REG_TRAITS(v256, v_int32x8, int, s32, v_uint32x8, v_int64x4, void, v_int32x8, void);
|
||||||
|
CV_DEF_REG_TRAITS(v256, v_float32x8, float, f32, v_float32x8, v_float64x4, void, v_int32x8, v_int32x8);
|
||||||
|
CV_DEF_REG_TRAITS(v256, v_uint64x4, uint64, u64, v_uint64x4, void, void, v_int64x4, void);
|
||||||
|
CV_DEF_REG_TRAITS(v256, v_int64x4, int64, s64, v_uint64x4, void, void, v_int64x4, void);
|
||||||
|
CV_DEF_REG_TRAITS(v256, v_float64x4, double, f64, v_float64x4, void, void, v_int64x4, v_int32x8);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if CV_SIMD512
|
||||||
|
CV_DEF_REG_TRAITS(v512, v_uint8x64, uchar, u8, v_uint8x64, v_uint16x32, v_uint32x16, v_int8x64, void);
|
||||||
|
CV_DEF_REG_TRAITS(v512, v_int8x64, schar, s8, v_uint8x64, v_int16x32, v_int32x16, v_int8x64, void);
|
||||||
|
CV_DEF_REG_TRAITS(v512, v_uint16x32, ushort, u16, v_uint16x32, v_uint32x16, v_uint64x8, v_int16x32, void);
|
||||||
|
CV_DEF_REG_TRAITS(v512, v_int16x32, short, s16, v_uint16x32, v_int32x16, v_int64x8, v_int16x32, void);
|
||||||
|
CV_DEF_REG_TRAITS(v512, v_uint32x16, unsigned, u32, v_uint32x16, v_uint64x8, void, v_int32x16, void);
|
||||||
|
CV_DEF_REG_TRAITS(v512, v_int32x16, int, s32, v_uint32x16, v_int64x8, void, v_int32x16, void);
|
||||||
|
CV_DEF_REG_TRAITS(v512, v_float32x16, float, f32, v_float32x16, v_float64x8, void, v_int32x16, v_int32x16);
|
||||||
|
CV_DEF_REG_TRAITS(v512, v_uint64x8, uint64, u64, v_uint64x8, void, void, v_int64x8, void);
|
||||||
|
CV_DEF_REG_TRAITS(v512, v_int64x8, int64, s64, v_uint64x8, void, void, v_int64x8, void);
|
||||||
|
CV_DEF_REG_TRAITS(v512, v_float64x8, double, f64, v_float64x8, void, void, v_int64x8, v_int32x16);
|
||||||
|
#endif
|
||||||
|
#if CV_SIMD_SCALABLE
|
||||||
|
CV_DEF_REG_TRAITS(v, v_uint8, uchar, u8, v_uint8, v_uint16, v_uint32, v_int8, void);
|
||||||
|
CV_DEF_REG_TRAITS(v, v_int8, schar, s8, v_uint8, v_int16, v_int32, v_int8, void);
|
||||||
|
CV_DEF_REG_TRAITS(v, v_uint16, ushort, u16, v_uint16, v_uint32, v_uint64, v_int16, void);
|
||||||
|
CV_DEF_REG_TRAITS(v, v_int16, short, s16, v_uint16, v_int32, v_int64, v_int16, void);
|
||||||
|
CV_DEF_REG_TRAITS(v, v_uint32, unsigned, u32, v_uint32, v_uint64, void, v_int32, void);
|
||||||
|
CV_DEF_REG_TRAITS(v, v_int32, int, s32, v_uint32, v_int64, void, v_int32, void);
|
||||||
|
CV_DEF_REG_TRAITS(v, v_float32, float, f32, v_float32, v_float64, void, v_int32, v_int32);
|
||||||
|
CV_DEF_REG_TRAITS(v, v_uint64, uint64, u64, v_uint64, void, void, v_int64, void);
|
||||||
|
CV_DEF_REG_TRAITS(v, v_int64, int64, s64, v_uint64, void, void, v_int64, void);
|
||||||
|
CV_DEF_REG_TRAITS(v, v_float64, double, f64, v_float64, void, void, v_int64, v_int32);
|
||||||
|
#endif
|
||||||
|
//! @endcond
|
||||||
|
|
||||||
|
#if CV_SIMD512 && (!defined(CV__SIMD_FORCE_WIDTH) || CV__SIMD_FORCE_WIDTH == 512)
|
||||||
|
#define CV__SIMD_NAMESPACE simd512
|
||||||
|
namespace CV__SIMD_NAMESPACE {
|
||||||
|
#define CV_SIMD 1
|
||||||
|
#define CV_SIMD_64F CV_SIMD512_64F
|
||||||
|
#define CV_SIMD_FP16 CV_SIMD512_FP16
|
||||||
|
#define CV_SIMD_WIDTH 64
|
||||||
|
//! @addtogroup core_hal_intrin
|
||||||
|
//! @{
|
||||||
|
//! @brief Maximum available vector register capacity 8-bit unsigned integer values
|
||||||
|
typedef v_uint8x64 v_uint8;
|
||||||
|
//! @brief Maximum available vector register capacity 8-bit signed integer values
|
||||||
|
typedef v_int8x64 v_int8;
|
||||||
|
//! @brief Maximum available vector register capacity 16-bit unsigned integer values
|
||||||
|
typedef v_uint16x32 v_uint16;
|
||||||
|
//! @brief Maximum available vector register capacity 16-bit signed integer values
|
||||||
|
typedef v_int16x32 v_int16;
|
||||||
|
//! @brief Maximum available vector register capacity 32-bit unsigned integer values
|
||||||
|
typedef v_uint32x16 v_uint32;
|
||||||
|
//! @brief Maximum available vector register capacity 32-bit signed integer values
|
||||||
|
typedef v_int32x16 v_int32;
|
||||||
|
//! @brief Maximum available vector register capacity 64-bit unsigned integer values
|
||||||
|
typedef v_uint64x8 v_uint64;
|
||||||
|
//! @brief Maximum available vector register capacity 64-bit signed integer values
|
||||||
|
typedef v_int64x8 v_int64;
|
||||||
|
//! @brief Maximum available vector register capacity 32-bit floating point values (single precision)
|
||||||
|
typedef v_float32x16 v_float32;
|
||||||
|
#if CV_SIMD512_64F
|
||||||
|
//! @brief Maximum available vector register capacity 64-bit floating point values (double precision)
|
||||||
|
typedef v_float64x8 v_float64;
|
||||||
|
#endif
|
||||||
|
//! @}
|
||||||
|
|
||||||
|
#define VXPREFIX(func) v512##func
|
||||||
|
} // namespace
|
||||||
|
using namespace CV__SIMD_NAMESPACE;
|
||||||
|
#elif CV_SIMD256 && (!defined(CV__SIMD_FORCE_WIDTH) || CV__SIMD_FORCE_WIDTH == 256)
|
||||||
|
#define CV__SIMD_NAMESPACE simd256
|
||||||
|
namespace CV__SIMD_NAMESPACE {
|
||||||
|
#define CV_SIMD 1
|
||||||
|
#define CV_SIMD_64F CV_SIMD256_64F
|
||||||
|
#define CV_SIMD_FP16 CV_SIMD256_FP16
|
||||||
|
#define CV_SIMD_WIDTH 32
|
||||||
|
//! @addtogroup core_hal_intrin
|
||||||
|
//! @{
|
||||||
|
//! @brief Maximum available vector register capacity 8-bit unsigned integer values
|
||||||
|
typedef v_uint8x32 v_uint8;
|
||||||
|
//! @brief Maximum available vector register capacity 8-bit signed integer values
|
||||||
|
typedef v_int8x32 v_int8;
|
||||||
|
//! @brief Maximum available vector register capacity 16-bit unsigned integer values
|
||||||
|
typedef v_uint16x16 v_uint16;
|
||||||
|
//! @brief Maximum available vector register capacity 16-bit signed integer values
|
||||||
|
typedef v_int16x16 v_int16;
|
||||||
|
//! @brief Maximum available vector register capacity 32-bit unsigned integer values
|
||||||
|
typedef v_uint32x8 v_uint32;
|
||||||
|
//! @brief Maximum available vector register capacity 32-bit signed integer values
|
||||||
|
typedef v_int32x8 v_int32;
|
||||||
|
//! @brief Maximum available vector register capacity 64-bit unsigned integer values
|
||||||
|
typedef v_uint64x4 v_uint64;
|
||||||
|
//! @brief Maximum available vector register capacity 64-bit signed integer values
|
||||||
|
typedef v_int64x4 v_int64;
|
||||||
|
//! @brief Maximum available vector register capacity 32-bit floating point values (single precision)
|
||||||
|
typedef v_float32x8 v_float32;
|
||||||
|
#if CV_SIMD256_64F
|
||||||
|
//! @brief Maximum available vector register capacity 64-bit floating point values (double precision)
|
||||||
|
typedef v_float64x4 v_float64;
|
||||||
|
#endif
|
||||||
|
//! @}
|
||||||
|
|
||||||
|
#define VXPREFIX(func) v256##func
|
||||||
|
} // namespace
|
||||||
|
using namespace CV__SIMD_NAMESPACE;
|
||||||
|
#elif (CV_SIMD128 || CV_SIMD128_CPP) && (!defined(CV__SIMD_FORCE_WIDTH) || CV__SIMD_FORCE_WIDTH == 128)
|
||||||
|
#if defined CV_SIMD128_CPP
|
||||||
|
#define CV__SIMD_NAMESPACE simd128_cpp
|
||||||
|
#else
|
||||||
|
#define CV__SIMD_NAMESPACE simd128
|
||||||
|
#endif
|
||||||
|
namespace CV__SIMD_NAMESPACE {
|
||||||
|
#define CV_SIMD CV_SIMD128
|
||||||
|
#define CV_SIMD_64F CV_SIMD128_64F
|
||||||
|
#define CV_SIMD_WIDTH 16
|
||||||
|
//! @addtogroup core_hal_intrin
|
||||||
|
//! @{
|
||||||
|
//! @brief Maximum available vector register capacity 8-bit unsigned integer values
|
||||||
|
typedef v_uint8x16 v_uint8;
|
||||||
|
//! @brief Maximum available vector register capacity 8-bit signed integer values
|
||||||
|
typedef v_int8x16 v_int8;
|
||||||
|
//! @brief Maximum available vector register capacity 16-bit unsigned integer values
|
||||||
|
typedef v_uint16x8 v_uint16;
|
||||||
|
//! @brief Maximum available vector register capacity 16-bit signed integer values
|
||||||
|
typedef v_int16x8 v_int16;
|
||||||
|
//! @brief Maximum available vector register capacity 32-bit unsigned integer values
|
||||||
|
typedef v_uint32x4 v_uint32;
|
||||||
|
//! @brief Maximum available vector register capacity 32-bit signed integer values
|
||||||
|
typedef v_int32x4 v_int32;
|
||||||
|
//! @brief Maximum available vector register capacity 64-bit unsigned integer values
|
||||||
|
typedef v_uint64x2 v_uint64;
|
||||||
|
//! @brief Maximum available vector register capacity 64-bit signed integer values
|
||||||
|
typedef v_int64x2 v_int64;
|
||||||
|
//! @brief Maximum available vector register capacity 32-bit floating point values (single precision)
|
||||||
|
typedef v_float32x4 v_float32;
|
||||||
|
#if CV_SIMD128_64F
|
||||||
|
//! @brief Maximum available vector register capacity 64-bit floating point values (double precision)
|
||||||
|
typedef v_float64x2 v_float64;
|
||||||
|
#endif
|
||||||
|
//! @}
|
||||||
|
|
||||||
|
#define VXPREFIX(func) v##func
|
||||||
|
} // namespace
|
||||||
|
using namespace CV__SIMD_NAMESPACE;
|
||||||
|
|
||||||
|
#elif CV_SIMD_SCALABLE
|
||||||
|
#define CV__SIMD_NAMESPACE simd
|
||||||
|
namespace CV__SIMD_NAMESPACE {
|
||||||
|
#define CV_SIMD 0
|
||||||
|
#define CV_SIMD_WIDTH 128 /* 1024/8 */
|
||||||
|
|
||||||
|
#define VXPREFIX(func) v##func
|
||||||
|
} // namespace
|
||||||
|
using namespace CV__SIMD_NAMESPACE;
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
//! @cond IGNORED
|
||||||
|
#ifndef CV_SIMD_64F
|
||||||
|
#define CV_SIMD_64F 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
namespace CV__SIMD_NAMESPACE {
|
||||||
|
//! @addtogroup core_hal_intrin
|
||||||
|
//! @{
|
||||||
|
//! @name Wide init with value
|
||||||
|
//! @{
|
||||||
|
//! @brief Create maximum available capacity vector with elements set to a specific value
|
||||||
|
inline v_uint8 vx_setall_u8(uchar v) { return VXPREFIX(_setall_u8)(v); }
|
||||||
|
inline v_int8 vx_setall_s8(schar v) { return VXPREFIX(_setall_s8)(v); }
|
||||||
|
inline v_uint16 vx_setall_u16(ushort v) { return VXPREFIX(_setall_u16)(v); }
|
||||||
|
inline v_int16 vx_setall_s16(short v) { return VXPREFIX(_setall_s16)(v); }
|
||||||
|
inline v_int32 vx_setall_s32(int v) { return VXPREFIX(_setall_s32)(v); }
|
||||||
|
inline v_uint32 vx_setall_u32(unsigned v) { return VXPREFIX(_setall_u32)(v); }
|
||||||
|
inline v_float32 vx_setall_f32(float v) { return VXPREFIX(_setall_f32)(v); }
|
||||||
|
inline v_int64 vx_setall_s64(int64 v) { return VXPREFIX(_setall_s64)(v); }
|
||||||
|
inline v_uint64 vx_setall_u64(uint64 v) { return VXPREFIX(_setall_u64)(v); }
|
||||||
|
#if CV_SIMD_64F || CV_SIMD_SCALABLE_64F
|
||||||
|
inline v_float64 vx_setall_f64(double v) { return VXPREFIX(_setall_f64)(v); }
|
||||||
|
#endif
|
||||||
|
//! @}
|
||||||
|
|
||||||
|
//! @name Wide init with zero
|
||||||
|
//! @{
|
||||||
|
//! @brief Create maximum available capacity vector with elements set to zero
|
||||||
|
inline v_uint8 vx_setzero_u8() { return VXPREFIX(_setzero_u8)(); }
|
||||||
|
inline v_int8 vx_setzero_s8() { return VXPREFIX(_setzero_s8)(); }
|
||||||
|
inline v_uint16 vx_setzero_u16() { return VXPREFIX(_setzero_u16)(); }
|
||||||
|
inline v_int16 vx_setzero_s16() { return VXPREFIX(_setzero_s16)(); }
|
||||||
|
inline v_int32 vx_setzero_s32() { return VXPREFIX(_setzero_s32)(); }
|
||||||
|
inline v_uint32 vx_setzero_u32() { return VXPREFIX(_setzero_u32)(); }
|
||||||
|
inline v_float32 vx_setzero_f32() { return VXPREFIX(_setzero_f32)(); }
|
||||||
|
inline v_int64 vx_setzero_s64() { return VXPREFIX(_setzero_s64)(); }
|
||||||
|
inline v_uint64 vx_setzero_u64() { return VXPREFIX(_setzero_u64)(); }
|
||||||
|
#if CV_SIMD_64F || CV_SIMD_SCALABLE_64F
|
||||||
|
inline v_float64 vx_setzero_f64() { return VXPREFIX(_setzero_f64)(); }
|
||||||
|
#endif
|
||||||
|
//! @}
|
||||||
|
|
||||||
|
//! @name Wide load from memory
|
||||||
|
//! @{
|
||||||
|
//! @brief Load maximum available capacity register contents from memory
|
||||||
|
inline v_uint8 vx_load(const uchar * ptr) { return VXPREFIX(_load)(ptr); }
|
||||||
|
inline v_int8 vx_load(const schar * ptr) { return VXPREFIX(_load)(ptr); }
|
||||||
|
inline v_uint16 vx_load(const ushort * ptr) { return VXPREFIX(_load)(ptr); }
|
||||||
|
inline v_int16 vx_load(const short * ptr) { return VXPREFIX(_load)(ptr); }
|
||||||
|
inline v_int32 vx_load(const int * ptr) { return VXPREFIX(_load)(ptr); }
|
||||||
|
inline v_uint32 vx_load(const unsigned * ptr) { return VXPREFIX(_load)(ptr); }
|
||||||
|
inline v_float32 vx_load(const float * ptr) { return VXPREFIX(_load)(ptr); }
|
||||||
|
inline v_int64 vx_load(const int64 * ptr) { return VXPREFIX(_load)(ptr); }
|
||||||
|
inline v_uint64 vx_load(const uint64 * ptr) { return VXPREFIX(_load)(ptr); }
|
||||||
|
#if CV_SIMD_64F || CV_SIMD_SCALABLE_64F
|
||||||
|
inline v_float64 vx_load(const double * ptr) { return VXPREFIX(_load)(ptr); }
|
||||||
|
#endif
|
||||||
|
//! @}
|
||||||
|
|
||||||
|
//! @name Wide load from memory(aligned)
|
||||||
|
//! @{
|
||||||
|
//! @brief Load maximum available capacity register contents from memory(aligned)
|
||||||
|
inline v_uint8 vx_load_aligned(const uchar * ptr) { return VXPREFIX(_load_aligned)(ptr); }
|
||||||
|
inline v_int8 vx_load_aligned(const schar * ptr) { return VXPREFIX(_load_aligned)(ptr); }
|
||||||
|
inline v_uint16 vx_load_aligned(const ushort * ptr) { return VXPREFIX(_load_aligned)(ptr); }
|
||||||
|
inline v_int16 vx_load_aligned(const short * ptr) { return VXPREFIX(_load_aligned)(ptr); }
|
||||||
|
inline v_int32 vx_load_aligned(const int * ptr) { return VXPREFIX(_load_aligned)(ptr); }
|
||||||
|
inline v_uint32 vx_load_aligned(const unsigned * ptr) { return VXPREFIX(_load_aligned)(ptr); }
|
||||||
|
inline v_float32 vx_load_aligned(const float * ptr) { return VXPREFIX(_load_aligned)(ptr); }
|
||||||
|
inline v_int64 vx_load_aligned(const int64 * ptr) { return VXPREFIX(_load_aligned)(ptr); }
|
||||||
|
inline v_uint64 vx_load_aligned(const uint64 * ptr) { return VXPREFIX(_load_aligned)(ptr); }
|
||||||
|
#if CV_SIMD_64F || CV_SIMD_SCALABLE_64F
|
||||||
|
inline v_float64 vx_load_aligned(const double * ptr) { return VXPREFIX(_load_aligned)(ptr); }
|
||||||
|
#endif
|
||||||
|
//! @}
|
||||||
|
|
||||||
|
//! @name Wide load lower half from memory
|
||||||
|
//! @{
|
||||||
|
//! @brief Load lower half of maximum available capacity register from memory
|
||||||
|
inline v_uint8 vx_load_low(const uchar * ptr) { return VXPREFIX(_load_low)(ptr); }
|
||||||
|
inline v_int8 vx_load_low(const schar * ptr) { return VXPREFIX(_load_low)(ptr); }
|
||||||
|
inline v_uint16 vx_load_low(const ushort * ptr) { return VXPREFIX(_load_low)(ptr); }
|
||||||
|
inline v_int16 vx_load_low(const short * ptr) { return VXPREFIX(_load_low)(ptr); }
|
||||||
|
inline v_int32 vx_load_low(const int * ptr) { return VXPREFIX(_load_low)(ptr); }
|
||||||
|
inline v_uint32 vx_load_low(const unsigned * ptr) { return VXPREFIX(_load_low)(ptr); }
|
||||||
|
inline v_float32 vx_load_low(const float * ptr) { return VXPREFIX(_load_low)(ptr); }
|
||||||
|
inline v_int64 vx_load_low(const int64 * ptr) { return VXPREFIX(_load_low)(ptr); }
|
||||||
|
inline v_uint64 vx_load_low(const uint64 * ptr) { return VXPREFIX(_load_low)(ptr); }
|
||||||
|
#if CV_SIMD_64F || CV_SIMD_SCALABLE_64F
|
||||||
|
inline v_float64 vx_load_low(const double * ptr) { return VXPREFIX(_load_low)(ptr); }
|
||||||
|
#endif
|
||||||
|
//! @}
|
||||||
|
|
||||||
|
//! @name Wide load halfs from memory
|
||||||
|
//! @{
|
||||||
|
//! @brief Load maximum available capacity register contents from two memory blocks
|
||||||
|
inline v_uint8 vx_load_halves(const uchar * ptr0, const uchar * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
|
||||||
|
inline v_int8 vx_load_halves(const schar * ptr0, const schar * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
|
||||||
|
inline v_uint16 vx_load_halves(const ushort * ptr0, const ushort * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
|
||||||
|
inline v_int16 vx_load_halves(const short * ptr0, const short * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
|
||||||
|
inline v_int32 vx_load_halves(const int * ptr0, const int * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
|
||||||
|
inline v_uint32 vx_load_halves(const unsigned * ptr0, const unsigned * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
|
||||||
|
inline v_float32 vx_load_halves(const float * ptr0, const float * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
|
||||||
|
inline v_int64 vx_load_halves(const int64 * ptr0, const int64 * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
|
||||||
|
inline v_uint64 vx_load_halves(const uint64 * ptr0, const uint64 * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
|
||||||
|
#if CV_SIMD_64F || CV_SIMD_SCALABLE_64F
|
||||||
|
inline v_float64 vx_load_halves(const double * ptr0, const double * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
|
||||||
|
#endif
|
||||||
|
//! @}
|
||||||
|
|
||||||
|
//! @name Wide LUT of elements
|
||||||
|
//! @{
|
||||||
|
//! @brief Load maximum available capacity register contents with array elements by provided indexes
|
||||||
|
inline v_uint8 vx_lut(const uchar * ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
|
||||||
|
inline v_int8 vx_lut(const schar * ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
|
||||||
|
inline v_uint16 vx_lut(const ushort * ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
|
||||||
|
inline v_int16 vx_lut(const short* ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
|
||||||
|
inline v_int32 vx_lut(const int* ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
|
||||||
|
inline v_uint32 vx_lut(const unsigned* ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
|
||||||
|
inline v_float32 vx_lut(const float* ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
|
||||||
|
inline v_int64 vx_lut(const int64 * ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
|
||||||
|
inline v_uint64 vx_lut(const uint64 * ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
|
||||||
|
#if CV_SIMD_64F || CV_SIMD_SCALABLE_64F
|
||||||
|
inline v_float64 vx_lut(const double* ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
|
||||||
|
#endif
|
||||||
|
//! @}
|
||||||
|
|
||||||
|
//! @name Wide LUT of element pairs
|
||||||
|
//! @{
|
||||||
|
//! @brief Load maximum available capacity register contents with array element pairs by provided indexes
|
||||||
|
inline v_uint8 vx_lut_pairs(const uchar * ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
|
||||||
|
inline v_int8 vx_lut_pairs(const schar * ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
|
||||||
|
inline v_uint16 vx_lut_pairs(const ushort * ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
|
||||||
|
inline v_int16 vx_lut_pairs(const short* ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
|
||||||
|
inline v_int32 vx_lut_pairs(const int* ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
|
||||||
|
inline v_uint32 vx_lut_pairs(const unsigned* ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
|
||||||
|
inline v_float32 vx_lut_pairs(const float* ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
|
||||||
|
inline v_int64 vx_lut_pairs(const int64 * ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
|
||||||
|
inline v_uint64 vx_lut_pairs(const uint64 * ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
|
||||||
|
#if CV_SIMD_64F || CV_SIMD_SCALABLE_64F
|
||||||
|
inline v_float64 vx_lut_pairs(const double* ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
|
||||||
|
#endif
|
||||||
|
//! @}
|
||||||
|
|
||||||
|
//! @name Wide LUT of element quads
|
||||||
|
//! @{
|
||||||
|
//! @brief Load maximum available capacity register contents with array element quads by provided indexes
|
||||||
|
inline v_uint8 vx_lut_quads(const uchar* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); }
|
||||||
|
inline v_int8 vx_lut_quads(const schar* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); }
|
||||||
|
inline v_uint16 vx_lut_quads(const ushort* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); }
|
||||||
|
inline v_int16 vx_lut_quads(const short* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); }
|
||||||
|
inline v_int32 vx_lut_quads(const int* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); }
|
||||||
|
inline v_uint32 vx_lut_quads(const unsigned* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); }
|
||||||
|
inline v_float32 vx_lut_quads(const float* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); }
|
||||||
|
//! @}
|
||||||
|
|
||||||
|
//! @name Wide load with double expansion
|
||||||
|
//! @{
|
||||||
|
//! @brief Load maximum available capacity register contents from memory with double expand
|
||||||
|
inline v_uint16 vx_load_expand(const uchar * ptr) { return VXPREFIX(_load_expand)(ptr); }
|
||||||
|
inline v_int16 vx_load_expand(const schar * ptr) { return VXPREFIX(_load_expand)(ptr); }
|
||||||
|
inline v_uint32 vx_load_expand(const ushort * ptr) { return VXPREFIX(_load_expand)(ptr); }
|
||||||
|
inline v_int32 vx_load_expand(const short* ptr) { return VXPREFIX(_load_expand)(ptr); }
|
||||||
|
inline v_int64 vx_load_expand(const int* ptr) { return VXPREFIX(_load_expand)(ptr); }
|
||||||
|
inline v_uint64 vx_load_expand(const unsigned* ptr) { return VXPREFIX(_load_expand)(ptr); }
|
||||||
|
inline v_float32 vx_load_expand(const hfloat * ptr) { return VXPREFIX(_load_expand)(ptr); }
|
||||||
|
//! @}
|
||||||
|
|
||||||
|
//! @name Wide load with quad expansion
|
||||||
|
//! @{
|
||||||
|
//! @brief Load maximum available capacity register contents from memory with quad expand
|
||||||
|
inline v_uint32 vx_load_expand_q(const uchar * ptr) { return VXPREFIX(_load_expand_q)(ptr); }
|
||||||
|
inline v_int32 vx_load_expand_q(const schar * ptr) { return VXPREFIX(_load_expand_q)(ptr); }
|
||||||
|
//! @}
|
||||||
|
|
||||||
|
/** @brief SIMD processing state cleanup call */
|
||||||
|
inline void vx_cleanup() { VXPREFIX(_cleanup)(); }
|
||||||
|
|
||||||
|
#if !CV_SIMD_SCALABLE
|
||||||
|
// Compatibility layer
|
||||||
|
#if !(CV_NEON && !defined(CV_FORCE_SIMD128_CPP))
|
||||||
|
template<typename T> struct VTraits {
|
||||||
|
static inline int vlanes() { return T::nlanes; }
|
||||||
|
enum { nlanes = T::nlanes, max_nlanes = T::nlanes };
|
||||||
|
using lane_type = typename T::lane_type;
|
||||||
|
};
|
||||||
|
|
||||||
|
//////////// get0 ////////////
|
||||||
|
#define OPENCV_HAL_WRAP_GRT0(_Tpvec) \
|
||||||
|
inline typename VTraits<_Tpvec>::lane_type v_get0(const _Tpvec& v) \
|
||||||
|
{ \
|
||||||
|
return v.get0(); \
|
||||||
|
}
|
||||||
|
|
||||||
|
OPENCV_HAL_WRAP_GRT0(v_uint8)
|
||||||
|
OPENCV_HAL_WRAP_GRT0(v_int8)
|
||||||
|
OPENCV_HAL_WRAP_GRT0(v_uint16)
|
||||||
|
OPENCV_HAL_WRAP_GRT0(v_int16)
|
||||||
|
OPENCV_HAL_WRAP_GRT0(v_uint32)
|
||||||
|
OPENCV_HAL_WRAP_GRT0(v_int32)
|
||||||
|
OPENCV_HAL_WRAP_GRT0(v_uint64)
|
||||||
|
OPENCV_HAL_WRAP_GRT0(v_int64)
|
||||||
|
OPENCV_HAL_WRAP_GRT0(v_float32)
|
||||||
|
#if CV_SIMD_64F
|
||||||
|
OPENCV_HAL_WRAP_GRT0(v_float64)
|
||||||
|
#endif
|
||||||
|
#if CV_SIMD_WIDTH != 16/*128*/ && CV_SIMD128
|
||||||
|
OPENCV_HAL_WRAP_GRT0(v_uint8x16)
|
||||||
|
OPENCV_HAL_WRAP_GRT0(v_uint16x8)
|
||||||
|
OPENCV_HAL_WRAP_GRT0(v_uint32x4)
|
||||||
|
OPENCV_HAL_WRAP_GRT0(v_uint64x2)
|
||||||
|
OPENCV_HAL_WRAP_GRT0(v_int8x16)
|
||||||
|
OPENCV_HAL_WRAP_GRT0(v_int16x8)
|
||||||
|
OPENCV_HAL_WRAP_GRT0(v_int32x4)
|
||||||
|
OPENCV_HAL_WRAP_GRT0(v_int64x2)
|
||||||
|
OPENCV_HAL_WRAP_GRT0(v_float32x4)
|
||||||
|
#if CV_SIMD_64F
|
||||||
|
OPENCV_HAL_WRAP_GRT0(v_float64x2)
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
#if CV_SIMD_WIDTH != 32/*256*/ && CV_SIMD256
|
||||||
|
OPENCV_HAL_WRAP_GRT0(v_uint8x32)
|
||||||
|
OPENCV_HAL_WRAP_GRT0(v_uint16x16)
|
||||||
|
OPENCV_HAL_WRAP_GRT0(v_uint32x8)
|
||||||
|
OPENCV_HAL_WRAP_GRT0(v_uint64x4)
|
||||||
|
OPENCV_HAL_WRAP_GRT0(v_int8x32)
|
||||||
|
OPENCV_HAL_WRAP_GRT0(v_int16x16)
|
||||||
|
OPENCV_HAL_WRAP_GRT0(v_int32x8)
|
||||||
|
OPENCV_HAL_WRAP_GRT0(v_int64x4)
|
||||||
|
OPENCV_HAL_WRAP_GRT0(v_float32x8)
|
||||||
|
#if CV_SIMD_64F
|
||||||
|
OPENCV_HAL_WRAP_GRT0(v_float64x4)
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define OPENCV_HAL_WRAP_BIN_OP_ADDSUB(_Tpvec) \
|
||||||
|
template<typename... Args> \
|
||||||
|
inline _Tpvec v_add(const _Tpvec& f1, const _Tpvec& f2, const _Tpvec& f3, const Args&... vf) { \
|
||||||
|
return v_add(v_add(f1, f2), f3, vf...); \
|
||||||
|
}
|
||||||
|
|
||||||
|
OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_uint8)
|
||||||
|
OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_uint16)
|
||||||
|
OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_uint32)
|
||||||
|
OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_uint64)
|
||||||
|
OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_int8)
|
||||||
|
OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_int16)
|
||||||
|
OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_int32)
|
||||||
|
OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_int64)
|
||||||
|
OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_float32)
|
||||||
|
#if CV_SIMD_64F
|
||||||
|
OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_float64)
|
||||||
|
#endif
|
||||||
|
#if CV_SIMD_WIDTH != 16/*128*/ && CV_SIMD128
|
||||||
|
// when we use CV_SIMD128 with 256/512 bit SIMD (e.g. AVX2 or AVX512)
|
||||||
|
OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_uint8x16)
|
||||||
|
OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_uint16x8)
|
||||||
|
OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_uint32x4)
|
||||||
|
OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_uint64x2)
|
||||||
|
OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_int8x16)
|
||||||
|
OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_int16x8)
|
||||||
|
OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_int32x4)
|
||||||
|
OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_int64x2)
|
||||||
|
OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_float32x4)
|
||||||
|
#if CV_SIMD_64F
|
||||||
|
OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_float64x2)
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
#if CV_SIMD_WIDTH != 32/*256*/ && CV_SIMD256
|
||||||
|
// when we use CV_SIMD256 with 512 bit SIMD (e.g. AVX512)
|
||||||
|
OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_uint8x32)
|
||||||
|
OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_uint16x16)
|
||||||
|
OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_uint32x8)
|
||||||
|
OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_uint64x4)
|
||||||
|
OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_int8x32)
|
||||||
|
OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_int16x16)
|
||||||
|
OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_int32x8)
|
||||||
|
OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_int64x4)
|
||||||
|
OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_float32x8)
|
||||||
|
#if CV_SIMD_64F
|
||||||
|
OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_float64x4)
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define OPENCV_HAL_WRAP_BIN_OP_MUL(_Tpvec) \
|
||||||
|
template<typename... Args> \
|
||||||
|
inline _Tpvec v_mul(const _Tpvec& f1, const _Tpvec& f2, const _Tpvec& f3, const Args&... vf) { \
|
||||||
|
return v_mul(v_mul(f1, f2), f3, vf...); \
|
||||||
|
}
|
||||||
|
OPENCV_HAL_WRAP_BIN_OP_MUL(v_uint8)
|
||||||
|
OPENCV_HAL_WRAP_BIN_OP_MUL(v_int8)
|
||||||
|
OPENCV_HAL_WRAP_BIN_OP_MUL(v_uint16)
|
||||||
|
OPENCV_HAL_WRAP_BIN_OP_MUL(v_uint32)
|
||||||
|
OPENCV_HAL_WRAP_BIN_OP_MUL(v_int16)
|
||||||
|
OPENCV_HAL_WRAP_BIN_OP_MUL(v_int32)
|
||||||
|
OPENCV_HAL_WRAP_BIN_OP_MUL(v_float32)
|
||||||
|
#if CV_SIMD_64F
|
||||||
|
OPENCV_HAL_WRAP_BIN_OP_MUL(v_float64)
|
||||||
|
#endif
|
||||||
|
#if CV_SIMD_WIDTH != 16/*128*/ && CV_SIMD128
|
||||||
|
OPENCV_HAL_WRAP_BIN_OP_MUL(v_uint8x16)
|
||||||
|
OPENCV_HAL_WRAP_BIN_OP_MUL(v_uint16x8)
|
||||||
|
OPENCV_HAL_WRAP_BIN_OP_MUL(v_uint32x4)
|
||||||
|
OPENCV_HAL_WRAP_BIN_OP_MUL(v_int8x16)
|
||||||
|
OPENCV_HAL_WRAP_BIN_OP_MUL(v_int16x8)
|
||||||
|
OPENCV_HAL_WRAP_BIN_OP_MUL(v_int32x4)
|
||||||
|
OPENCV_HAL_WRAP_BIN_OP_MUL(v_float32x4)
|
||||||
|
#if CV_SIMD_64F
|
||||||
|
OPENCV_HAL_WRAP_BIN_OP_MUL(v_float64x2)
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
#if CV_SIMD_WIDTH != 32/*256*/ && CV_SIMD256
|
||||||
|
OPENCV_HAL_WRAP_BIN_OP_MUL(v_uint8x32)
|
||||||
|
OPENCV_HAL_WRAP_BIN_OP_MUL(v_uint16x16)
|
||||||
|
OPENCV_HAL_WRAP_BIN_OP_MUL(v_uint32x8)
|
||||||
|
OPENCV_HAL_WRAP_BIN_OP_MUL(v_int8x32)
|
||||||
|
OPENCV_HAL_WRAP_BIN_OP_MUL(v_int16x16)
|
||||||
|
OPENCV_HAL_WRAP_BIN_OP_MUL(v_int32x8)
|
||||||
|
OPENCV_HAL_WRAP_BIN_OP_MUL(v_float32x8)
|
||||||
|
#if CV_SIMD_64F
|
||||||
|
OPENCV_HAL_WRAP_BIN_OP_MUL(v_float64x4)
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define OPENCV_HAL_WRAP_EXTRACT(_Tpvec) \
|
||||||
|
inline typename VTraits<_Tpvec>::lane_type v_extract_highest(const _Tpvec& v) \
|
||||||
|
{ \
|
||||||
|
return v_extract_n<VTraits<_Tpvec>::nlanes-1>(v); \
|
||||||
|
}
|
||||||
|
|
||||||
|
OPENCV_HAL_WRAP_EXTRACT(v_uint8)
|
||||||
|
OPENCV_HAL_WRAP_EXTRACT(v_int8)
|
||||||
|
OPENCV_HAL_WRAP_EXTRACT(v_uint16)
|
||||||
|
OPENCV_HAL_WRAP_EXTRACT(v_int16)
|
||||||
|
OPENCV_HAL_WRAP_EXTRACT(v_uint32)
|
||||||
|
OPENCV_HAL_WRAP_EXTRACT(v_int32)
|
||||||
|
OPENCV_HAL_WRAP_EXTRACT(v_uint64)
|
||||||
|
OPENCV_HAL_WRAP_EXTRACT(v_int64)
|
||||||
|
OPENCV_HAL_WRAP_EXTRACT(v_float32)
|
||||||
|
#if CV_SIMD_64F
|
||||||
|
OPENCV_HAL_WRAP_EXTRACT(v_float64)
|
||||||
|
#endif
|
||||||
|
#if CV_SIMD_WIDTH != 16/*128*/ && CV_SIMD128
|
||||||
|
OPENCV_HAL_WRAP_EXTRACT(v_uint8x16)
|
||||||
|
OPENCV_HAL_WRAP_EXTRACT(v_uint16x8)
|
||||||
|
OPENCV_HAL_WRAP_EXTRACT(v_uint32x4)
|
||||||
|
OPENCV_HAL_WRAP_EXTRACT(v_uint64x2)
|
||||||
|
OPENCV_HAL_WRAP_EXTRACT(v_int8x16)
|
||||||
|
OPENCV_HAL_WRAP_EXTRACT(v_int16x8)
|
||||||
|
OPENCV_HAL_WRAP_EXTRACT(v_int32x4)
|
||||||
|
OPENCV_HAL_WRAP_EXTRACT(v_int64x2)
|
||||||
|
OPENCV_HAL_WRAP_EXTRACT(v_float32x4)
|
||||||
|
#if CV_SIMD_64F
|
||||||
|
OPENCV_HAL_WRAP_EXTRACT(v_float64x2)
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
#if CV_SIMD_WIDTH != 32/*256*/ && CV_SIMD256
|
||||||
|
OPENCV_HAL_WRAP_EXTRACT(v_uint8x32)
|
||||||
|
OPENCV_HAL_WRAP_EXTRACT(v_uint16x16)
|
||||||
|
OPENCV_HAL_WRAP_EXTRACT(v_uint32x8)
|
||||||
|
OPENCV_HAL_WRAP_EXTRACT(v_uint64x4)
|
||||||
|
OPENCV_HAL_WRAP_EXTRACT(v_int8x32)
|
||||||
|
OPENCV_HAL_WRAP_EXTRACT(v_int16x16)
|
||||||
|
OPENCV_HAL_WRAP_EXTRACT(v_int32x8)
|
||||||
|
OPENCV_HAL_WRAP_EXTRACT(v_int64x4)
|
||||||
|
OPENCV_HAL_WRAP_EXTRACT(v_float32x8)
|
||||||
|
#if CV_SIMD_64F
|
||||||
|
OPENCV_HAL_WRAP_EXTRACT(v_float64x4)
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define OPENCV_HAL_WRAP_BROADCAST(_Tpvec) \
|
||||||
|
inline _Tpvec v_broadcast_highest(const _Tpvec& v) \
|
||||||
|
{ \
|
||||||
|
return v_broadcast_element<VTraits<_Tpvec>::nlanes-1>(v); \
|
||||||
|
}
|
||||||
|
|
||||||
|
OPENCV_HAL_WRAP_BROADCAST(v_uint32)
|
||||||
|
OPENCV_HAL_WRAP_BROADCAST(v_int32)
|
||||||
|
OPENCV_HAL_WRAP_BROADCAST(v_float32)
|
||||||
|
#if CV_SIMD_WIDTH != 16/*128*/ && CV_SIMD128
|
||||||
|
OPENCV_HAL_WRAP_BROADCAST(v_uint32x4)
|
||||||
|
OPENCV_HAL_WRAP_BROADCAST(v_int32x4)
|
||||||
|
OPENCV_HAL_WRAP_BROADCAST(v_float32x4)
|
||||||
|
#endif
|
||||||
|
#if CV_SIMD_WIDTH != 32/*256*/ && CV_SIMD256
|
||||||
|
OPENCV_HAL_WRAP_BROADCAST(v_uint32x8)
|
||||||
|
OPENCV_HAL_WRAP_BROADCAST(v_int32x8)
|
||||||
|
OPENCV_HAL_WRAP_BROADCAST(v_float32x8)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif //!CV_SIMD_SCALABLE
|
||||||
|
|
||||||
|
//! @cond IGNORED
|
||||||
|
|
||||||
|
// backward compatibility
|
||||||
|
template<typename _Tp, typename _Tvec> static inline
|
||||||
|
void vx_store(_Tp* dst, const _Tvec& v) { return v_store(dst, v); }
|
||||||
|
// backward compatibility
|
||||||
|
template<typename _Tp, typename _Tvec> static inline
|
||||||
|
void vx_store_aligned(_Tp* dst, const _Tvec& v) { return v_store_aligned(dst, v); }
|
||||||
|
|
||||||
|
//! @endcond
|
||||||
|
|
||||||
|
|
||||||
|
//! @}
|
||||||
|
#undef VXPREFIX
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef CV_SIMD_FP16
|
||||||
|
#define CV_SIMD_FP16 0 //!< Defined to 1 on native support of operations with float16x8_t / float16x16_t (SIMD256) types
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef CV_SIMD
|
||||||
|
#define CV_SIMD 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "simd_utils.impl.hpp"
|
||||||
|
|
||||||
|
#ifndef CV_DOXYGEN
|
||||||
|
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
|
||||||
|
#endif
|
||||||
|
|
||||||
|
} // cv::
|
||||||
|
|
||||||
|
//! @endcond
|
||||||
|
|
||||||
|
#if defined(__GNUC__) && __GNUC__ == 12
|
||||||
|
#pragma GCC diagnostic pop
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
3189
3rdpart/OpenCV/include/opencv2/core/hal/intrin_avx.hpp
Normal file
3189
3rdpart/OpenCV/include/opencv2/core/hal/intrin_avx.hpp
Normal file
File diff suppressed because it is too large
Load Diff
3101
3rdpart/OpenCV/include/opencv2/core/hal/intrin_avx512.hpp
Normal file
3101
3rdpart/OpenCV/include/opencv2/core/hal/intrin_avx512.hpp
Normal file
File diff suppressed because it is too large
Load Diff
3388
3rdpart/OpenCV/include/opencv2/core/hal/intrin_cpp.hpp
Normal file
3388
3rdpart/OpenCV/include/opencv2/core/hal/intrin_cpp.hpp
Normal file
File diff suppressed because it is too large
Load Diff
191
3rdpart/OpenCV/include/opencv2/core/hal/intrin_forward.hpp
Normal file
191
3rdpart/OpenCV/include/opencv2/core/hal/intrin_forward.hpp
Normal file
@@ -0,0 +1,191 @@
|
|||||||
|
// This file is part of OpenCV project.
|
||||||
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||||
|
// of this distribution and at http://opencv.org/license.html
|
||||||
|
|
||||||
|
#ifndef CV__SIMD_FORWARD
|
||||||
|
#error "Need to pre-define forward width"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
namespace cv
|
||||||
|
{
|
||||||
|
|
||||||
|
//! @cond IGNORED
|
||||||
|
|
||||||
|
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
|
||||||
|
|
||||||
|
/** Types **/
|
||||||
|
#if CV__SIMD_FORWARD == 1024
|
||||||
|
// [todo] 1024
|
||||||
|
#error "1024-long ops not implemented yet"
|
||||||
|
#elif CV__SIMD_FORWARD == 512
|
||||||
|
// 512
|
||||||
|
#define __CV_VX(fun) v512_##fun
|
||||||
|
#define __CV_V_UINT8 v_uint8x64
|
||||||
|
#define __CV_V_INT8 v_int8x64
|
||||||
|
#define __CV_V_UINT16 v_uint16x32
|
||||||
|
#define __CV_V_INT16 v_int16x32
|
||||||
|
#define __CV_V_UINT32 v_uint32x16
|
||||||
|
#define __CV_V_INT32 v_int32x16
|
||||||
|
#define __CV_V_UINT64 v_uint64x8
|
||||||
|
#define __CV_V_INT64 v_int64x8
|
||||||
|
#define __CV_V_FLOAT32 v_float32x16
|
||||||
|
#define __CV_V_FLOAT64 v_float64x8
|
||||||
|
struct v_uint8x64;
|
||||||
|
struct v_int8x64;
|
||||||
|
struct v_uint16x32;
|
||||||
|
struct v_int16x32;
|
||||||
|
struct v_uint32x16;
|
||||||
|
struct v_int32x16;
|
||||||
|
struct v_uint64x8;
|
||||||
|
struct v_int64x8;
|
||||||
|
struct v_float32x16;
|
||||||
|
struct v_float64x8;
|
||||||
|
#elif CV__SIMD_FORWARD == 256
|
||||||
|
// 256
|
||||||
|
#define __CV_VX(fun) v256_##fun
|
||||||
|
#define __CV_V_UINT8 v_uint8x32
|
||||||
|
#define __CV_V_INT8 v_int8x32
|
||||||
|
#define __CV_V_UINT16 v_uint16x16
|
||||||
|
#define __CV_V_INT16 v_int16x16
|
||||||
|
#define __CV_V_UINT32 v_uint32x8
|
||||||
|
#define __CV_V_INT32 v_int32x8
|
||||||
|
#define __CV_V_UINT64 v_uint64x4
|
||||||
|
#define __CV_V_INT64 v_int64x4
|
||||||
|
#define __CV_V_FLOAT32 v_float32x8
|
||||||
|
#define __CV_V_FLOAT64 v_float64x4
|
||||||
|
struct v_uint8x32;
|
||||||
|
struct v_int8x32;
|
||||||
|
struct v_uint16x16;
|
||||||
|
struct v_int16x16;
|
||||||
|
struct v_uint32x8;
|
||||||
|
struct v_int32x8;
|
||||||
|
struct v_uint64x4;
|
||||||
|
struct v_int64x4;
|
||||||
|
struct v_float32x8;
|
||||||
|
struct v_float64x4;
|
||||||
|
#else
|
||||||
|
// 128
|
||||||
|
#define __CV_VX(fun) v_##fun
|
||||||
|
#define __CV_V_UINT8 v_uint8x16
|
||||||
|
#define __CV_V_INT8 v_int8x16
|
||||||
|
#define __CV_V_UINT16 v_uint16x8
|
||||||
|
#define __CV_V_INT16 v_int16x8
|
||||||
|
#define __CV_V_UINT32 v_uint32x4
|
||||||
|
#define __CV_V_INT32 v_int32x4
|
||||||
|
#define __CV_V_UINT64 v_uint64x2
|
||||||
|
#define __CV_V_INT64 v_int64x2
|
||||||
|
#define __CV_V_FLOAT32 v_float32x4
|
||||||
|
#define __CV_V_FLOAT64 v_float64x2
|
||||||
|
struct v_uint8x16;
|
||||||
|
struct v_int8x16;
|
||||||
|
struct v_uint16x8;
|
||||||
|
struct v_int16x8;
|
||||||
|
struct v_uint32x4;
|
||||||
|
struct v_int32x4;
|
||||||
|
struct v_uint64x2;
|
||||||
|
struct v_int64x2;
|
||||||
|
struct v_float32x4;
|
||||||
|
struct v_float64x2;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/** Value reordering **/
|
||||||
|
|
||||||
|
// Expansion
|
||||||
|
void v_expand(const __CV_V_UINT8&, __CV_V_UINT16&, __CV_V_UINT16&);
|
||||||
|
void v_expand(const __CV_V_INT8&, __CV_V_INT16&, __CV_V_INT16&);
|
||||||
|
void v_expand(const __CV_V_UINT16&, __CV_V_UINT32&, __CV_V_UINT32&);
|
||||||
|
void v_expand(const __CV_V_INT16&, __CV_V_INT32&, __CV_V_INT32&);
|
||||||
|
void v_expand(const __CV_V_UINT32&, __CV_V_UINT64&, __CV_V_UINT64&);
|
||||||
|
void v_expand(const __CV_V_INT32&, __CV_V_INT64&, __CV_V_INT64&);
|
||||||
|
// Low Expansion
|
||||||
|
__CV_V_UINT16 v_expand_low(const __CV_V_UINT8&);
|
||||||
|
__CV_V_INT16 v_expand_low(const __CV_V_INT8&);
|
||||||
|
__CV_V_UINT32 v_expand_low(const __CV_V_UINT16&);
|
||||||
|
__CV_V_INT32 v_expand_low(const __CV_V_INT16&);
|
||||||
|
__CV_V_UINT64 v_expand_low(const __CV_V_UINT32&);
|
||||||
|
__CV_V_INT64 v_expand_low(const __CV_V_INT32&);
|
||||||
|
// High Expansion
|
||||||
|
__CV_V_UINT16 v_expand_high(const __CV_V_UINT8&);
|
||||||
|
__CV_V_INT16 v_expand_high(const __CV_V_INT8&);
|
||||||
|
__CV_V_UINT32 v_expand_high(const __CV_V_UINT16&);
|
||||||
|
__CV_V_INT32 v_expand_high(const __CV_V_INT16&);
|
||||||
|
__CV_V_UINT64 v_expand_high(const __CV_V_UINT32&);
|
||||||
|
__CV_V_INT64 v_expand_high(const __CV_V_INT32&);
|
||||||
|
// Load & Low Expansion
|
||||||
|
__CV_V_UINT16 __CV_VX(load_expand)(const uchar*);
|
||||||
|
__CV_V_INT16 __CV_VX(load_expand)(const schar*);
|
||||||
|
__CV_V_UINT32 __CV_VX(load_expand)(const ushort*);
|
||||||
|
__CV_V_INT32 __CV_VX(load_expand)(const short*);
|
||||||
|
__CV_V_UINT64 __CV_VX(load_expand)(const uint*);
|
||||||
|
__CV_V_INT64 __CV_VX(load_expand)(const int*);
|
||||||
|
// Load lower 8-bit and expand into 32-bit
|
||||||
|
__CV_V_UINT32 __CV_VX(load_expand_q)(const uchar*);
|
||||||
|
__CV_V_INT32 __CV_VX(load_expand_q)(const schar*);
|
||||||
|
|
||||||
|
// Saturating Pack
|
||||||
|
__CV_V_UINT8 v_pack(const __CV_V_UINT16&, const __CV_V_UINT16&);
|
||||||
|
__CV_V_INT8 v_pack(const __CV_V_INT16&, const __CV_V_INT16&);
|
||||||
|
__CV_V_UINT16 v_pack(const __CV_V_UINT32&, const __CV_V_UINT32&);
|
||||||
|
__CV_V_INT16 v_pack(const __CV_V_INT32&, const __CV_V_INT32&);
|
||||||
|
// Non-saturating Pack
|
||||||
|
__CV_V_UINT32 v_pack(const __CV_V_UINT64&, const __CV_V_UINT64&);
|
||||||
|
__CV_V_INT32 v_pack(const __CV_V_INT64&, const __CV_V_INT64&);
|
||||||
|
// Pack signed integers with unsigned saturation
|
||||||
|
__CV_V_UINT8 v_pack_u(const __CV_V_INT16&, const __CV_V_INT16&);
|
||||||
|
__CV_V_UINT16 v_pack_u(const __CV_V_INT32&, const __CV_V_INT32&);
|
||||||
|
|
||||||
|
/** Arithmetic, bitwise and comparison operations **/
|
||||||
|
|
||||||
|
// Non-saturating multiply
|
||||||
|
#if CV_VSX
|
||||||
|
template<typename Tvec>
|
||||||
|
Tvec v_mul_wrap(const Tvec& a, const Tvec& b);
|
||||||
|
#else
|
||||||
|
__CV_V_UINT8 v_mul_wrap(const __CV_V_UINT8&, const __CV_V_UINT8&);
|
||||||
|
__CV_V_INT8 v_mul_wrap(const __CV_V_INT8&, const __CV_V_INT8&);
|
||||||
|
__CV_V_UINT16 v_mul_wrap(const __CV_V_UINT16&, const __CV_V_UINT16&);
|
||||||
|
__CV_V_INT16 v_mul_wrap(const __CV_V_INT16&, const __CV_V_INT16&);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Multiply and expand
|
||||||
|
#if CV_VSX
|
||||||
|
template<typename Tvec, typename Twvec>
|
||||||
|
void v_mul_expand(const Tvec& a, const Tvec& b, Twvec& c, Twvec& d);
|
||||||
|
#else
|
||||||
|
void v_mul_expand(const __CV_V_UINT8&, const __CV_V_UINT8&, __CV_V_UINT16&, __CV_V_UINT16&);
|
||||||
|
void v_mul_expand(const __CV_V_INT8&, const __CV_V_INT8&, __CV_V_INT16&, __CV_V_INT16&);
|
||||||
|
void v_mul_expand(const __CV_V_UINT16&, const __CV_V_UINT16&, __CV_V_UINT32&, __CV_V_UINT32&);
|
||||||
|
void v_mul_expand(const __CV_V_INT16&, const __CV_V_INT16&, __CV_V_INT32&, __CV_V_INT32&);
|
||||||
|
void v_mul_expand(const __CV_V_UINT32&, const __CV_V_UINT32&, __CV_V_UINT64&, __CV_V_UINT64&);
|
||||||
|
void v_mul_expand(const __CV_V_INT32&, const __CV_V_INT32&, __CV_V_INT64&, __CV_V_INT64&);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Conversions
|
||||||
|
__CV_V_FLOAT32 v_cvt_f32(const __CV_V_INT32& a);
|
||||||
|
__CV_V_FLOAT32 v_cvt_f32(const __CV_V_FLOAT64& a);
|
||||||
|
__CV_V_FLOAT32 v_cvt_f32(const __CV_V_FLOAT64& a, const __CV_V_FLOAT64& b);
|
||||||
|
__CV_V_FLOAT64 v_cvt_f64(const __CV_V_INT32& a);
|
||||||
|
__CV_V_FLOAT64 v_cvt_f64_high(const __CV_V_INT32& a);
|
||||||
|
__CV_V_FLOAT64 v_cvt_f64(const __CV_V_FLOAT32& a);
|
||||||
|
__CV_V_FLOAT64 v_cvt_f64_high(const __CV_V_FLOAT32& a);
|
||||||
|
__CV_V_FLOAT64 v_cvt_f64(const __CV_V_INT64& a);
|
||||||
|
|
||||||
|
/** Cleanup **/
|
||||||
|
#undef CV__SIMD_FORWARD
|
||||||
|
#undef __CV_VX
|
||||||
|
#undef __CV_V_UINT8
|
||||||
|
#undef __CV_V_INT8
|
||||||
|
#undef __CV_V_UINT16
|
||||||
|
#undef __CV_V_INT16
|
||||||
|
#undef __CV_V_UINT32
|
||||||
|
#undef __CV_V_INT32
|
||||||
|
#undef __CV_V_UINT64
|
||||||
|
#undef __CV_V_INT64
|
||||||
|
#undef __CV_V_FLOAT32
|
||||||
|
#undef __CV_V_FLOAT64
|
||||||
|
|
||||||
|
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
|
||||||
|
|
||||||
|
//! @endcond
|
||||||
|
|
||||||
|
} // cv::
|
||||||
3036
3rdpart/OpenCV/include/opencv2/core/hal/intrin_lasx.hpp
Normal file
3036
3rdpart/OpenCV/include/opencv2/core/hal/intrin_lasx.hpp
Normal file
File diff suppressed because it is too large
Load Diff
111
3rdpart/OpenCV/include/opencv2/core/hal/intrin_legacy_ops.h
Normal file
111
3rdpart/OpenCV/include/opencv2/core/hal/intrin_legacy_ops.h
Normal file
@@ -0,0 +1,111 @@
|
|||||||
|
// This file is part of OpenCV project.
|
||||||
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||||
|
// of this distribution and at http://opencv.org/license.html
|
||||||
|
|
||||||
|
// This file has been created for compatibility with older versions of Universal Intrinscs
|
||||||
|
// Binary operators for vector types has been removed since version 4.11
|
||||||
|
// Include this file manually after OpenCV headers if you need these operators
|
||||||
|
|
||||||
|
#ifndef OPENCV_HAL_INTRIN_LEGACY_OPS_HPP
|
||||||
|
#define OPENCV_HAL_INTRIN_LEGACY_OPS_HPP
|
||||||
|
|
||||||
|
#ifdef __OPENCV_BUILD
|
||||||
|
#error "Universal Intrinsics operators are deprecated and should not be used in OpenCV library"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef __riscv
|
||||||
|
#warning "Operators might conflict with built-in functions on RISC-V platform"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(CV_VERSION) && CV_VERSION_MAJOR == 4 && CV_VERSION_MINOR < 9
|
||||||
|
#warning "Older versions of OpenCV (<4.9) already have Universal Intrinscs operators"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
namespace cv { namespace hal {
|
||||||
|
|
||||||
|
#define BIN_OP(OP, FUN) \
|
||||||
|
template <typename R> R operator OP (const R & lhs, const R & rhs) { return FUN(lhs, rhs); }
|
||||||
|
|
||||||
|
#define BIN_A_OP(OP, FUN) \
|
||||||
|
template <typename R> R & operator OP (R & res, const R & val) { res = FUN(res, val); return res; }
|
||||||
|
|
||||||
|
#define UN_OP(OP, FUN) \
|
||||||
|
template <typename R> R operator OP (const R & val) { return FUN(val); }
|
||||||
|
|
||||||
|
BIN_OP(+, v_add)
|
||||||
|
BIN_OP(-, v_sub)
|
||||||
|
BIN_OP(*, v_mul)
|
||||||
|
BIN_OP(/, v_div)
|
||||||
|
BIN_OP(&, v_and)
|
||||||
|
BIN_OP(|, v_or)
|
||||||
|
BIN_OP(^, v_xor)
|
||||||
|
|
||||||
|
BIN_OP(==, v_eq)
|
||||||
|
BIN_OP(!=, v_ne)
|
||||||
|
BIN_OP(<, v_lt)
|
||||||
|
BIN_OP(>, v_gt)
|
||||||
|
BIN_OP(<=, v_le)
|
||||||
|
BIN_OP(>=, v_ge)
|
||||||
|
|
||||||
|
BIN_A_OP(+=, v_add)
|
||||||
|
BIN_A_OP(-=, v_sub)
|
||||||
|
BIN_A_OP(*=, v_mul)
|
||||||
|
BIN_A_OP(/=, v_div)
|
||||||
|
BIN_A_OP(&=, v_and)
|
||||||
|
BIN_A_OP(|=, v_or)
|
||||||
|
BIN_A_OP(^=, v_xor)
|
||||||
|
|
||||||
|
UN_OP(~, v_not)
|
||||||
|
|
||||||
|
// TODO: shift operators?
|
||||||
|
|
||||||
|
}} // cv::hal::
|
||||||
|
|
||||||
|
//==============================================================================
|
||||||
|
|
||||||
|
#ifdef OPENCV_ENABLE_INLINE_INTRIN_OPERATOR_TEST
|
||||||
|
|
||||||
|
namespace cv { namespace hal {
|
||||||
|
|
||||||
|
inline static void opencv_operator_compile_test()
|
||||||
|
{
|
||||||
|
using namespace cv;
|
||||||
|
v_float32 a, b, c;
|
||||||
|
uint8_t shift = 1;
|
||||||
|
a = b + c;
|
||||||
|
a = b - c;
|
||||||
|
a = b * c;
|
||||||
|
a = b / c;
|
||||||
|
a = b & c;
|
||||||
|
a = b | c;
|
||||||
|
a = b ^ c;
|
||||||
|
// a = b >> shift;
|
||||||
|
// a = b << shift;
|
||||||
|
|
||||||
|
a = (b == c);
|
||||||
|
a = (b != c);
|
||||||
|
a = (b < c);}}
|
||||||
|
a = (b > c);
|
||||||
|
a = (b <= c);
|
||||||
|
a = (b >= c);
|
||||||
|
|
||||||
|
a += b;
|
||||||
|
a -= b;
|
||||||
|
a *= b;
|
||||||
|
a /= b;
|
||||||
|
a &= b;
|
||||||
|
a |= b;
|
||||||
|
a ^= b;
|
||||||
|
// a <<= shift;
|
||||||
|
// a >>= shift;
|
||||||
|
|
||||||
|
a = ~b;
|
||||||
|
}
|
||||||
|
|
||||||
|
}} // cv::hal::
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#endif // OPENCV_HAL_INTRIN_LEGACY_OPS_HPP
|
||||||
2546
3rdpart/OpenCV/include/opencv2/core/hal/intrin_lsx.hpp
Normal file
2546
3rdpart/OpenCV/include/opencv2/core/hal/intrin_lsx.hpp
Normal file
File diff suppressed because it is too large
Load Diff
687
3rdpart/OpenCV/include/opencv2/core/hal/intrin_math.hpp
Normal file
687
3rdpart/OpenCV/include/opencv2/core/hal/intrin_math.hpp
Normal file
@@ -0,0 +1,687 @@
|
|||||||
|
// This file is part of OpenCV project.
|
||||||
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||||
|
// of this distribution and at http://opencv.org/license.html
|
||||||
|
|
||||||
|
|
||||||
|
/* Universal Intrinsics implementation of sin, cos, exp and log
|
||||||
|
|
||||||
|
Inspired by Intel Approximate Math library, and based on the
|
||||||
|
corresponding algorithms of the cephes math library
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* Copyright (C) 2010,2011 RJVB - extensions */
|
||||||
|
/* Copyright (C) 2011 Julien Pommier
|
||||||
|
|
||||||
|
This software is provided 'as-is', without any express or implied
|
||||||
|
warranty. In no event will the authors be held liable for any damages
|
||||||
|
arising from the use of this software.
|
||||||
|
|
||||||
|
Permission is granted to anyone to use this software for any purpose,
|
||||||
|
including commercial applications, and to alter it and redistribute it
|
||||||
|
freely, subject to the following restrictions:
|
||||||
|
|
||||||
|
1. The origin of this software must not be misrepresented; you must not
|
||||||
|
claim that you wrote the original software. If you use this software
|
||||||
|
in a product, an acknowledgment in the product documentation would be
|
||||||
|
appreciated but is not required.
|
||||||
|
2. Altered source versions must be plainly marked as such, and must not be
|
||||||
|
misrepresented as being the original software.
|
||||||
|
3. This notice may not be removed or altered from any source distribution.
|
||||||
|
|
||||||
|
(this is the zlib license)
|
||||||
|
*/
|
||||||
|
#ifndef OPENCV_HAL_INTRIN_MATH_HPP
|
||||||
|
#define OPENCV_HAL_INTRIN_MATH_HPP
|
||||||
|
|
||||||
|
//! @name Exponential
|
||||||
|
//! @{
|
||||||
|
// Implementation is the same as float32 vector.
|
||||||
|
template<typename _TpVec16F, typename _TpVec16S>
|
||||||
|
inline _TpVec16F v_exp_default_16f(const _TpVec16F &x) {
|
||||||
|
const _TpVec16F _vexp_lo_f16 = v_setall_<_TpVec16F>(-10.7421875f);
|
||||||
|
const _TpVec16F _vexp_hi_f16 = v_setall_<_TpVec16F>(11.f);
|
||||||
|
const _TpVec16F _vexp_half_fp16 = v_setall_<_TpVec16F>(0.5f);
|
||||||
|
const _TpVec16F _vexp_one_fp16 = v_setall_<_TpVec16F>(1.f);
|
||||||
|
const _TpVec16F _vexp_LOG2EF_f16 = v_setall_<_TpVec16F>(1.44269504088896341f);
|
||||||
|
const _TpVec16F _vexp_C1_f16 = v_setall_<_TpVec16F>(-6.93359375E-1f);
|
||||||
|
const _TpVec16F _vexp_C2_f16 = v_setall_<_TpVec16F>(2.12194440E-4f);
|
||||||
|
const _TpVec16F _vexp_p0_f16 = v_setall_<_TpVec16F>(1.9875691500E-4f);
|
||||||
|
const _TpVec16F _vexp_p1_f16 = v_setall_<_TpVec16F>(1.3981999507E-3f);
|
||||||
|
const _TpVec16F _vexp_p2_f16 = v_setall_<_TpVec16F>(8.3334519073E-3f);
|
||||||
|
const _TpVec16F _vexp_p3_f16 = v_setall_<_TpVec16F>(4.1665795894E-2f);
|
||||||
|
const _TpVec16F _vexp_p4_f16 = v_setall_<_TpVec16F>(1.6666665459E-1f);
|
||||||
|
const _TpVec16F _vexp_p5_f16 = v_setall_<_TpVec16F>(5.0000001201E-1f);
|
||||||
|
|
||||||
|
_TpVec16F _vexp_, _vexp_x, _vexp_y, _vexp_xx;
|
||||||
|
_TpVec16S _vexp_mm;
|
||||||
|
const _TpVec16S _vexp_bias_s16 = v_setall_<_TpVec16S>((short)0xf);
|
||||||
|
|
||||||
|
// compute exponential of x
|
||||||
|
_vexp_x = v_max(x, _vexp_lo_f16);
|
||||||
|
_vexp_x = v_min(_vexp_x, _vexp_hi_f16);
|
||||||
|
|
||||||
|
_vexp_ = v_fma(_vexp_x, _vexp_LOG2EF_f16, _vexp_half_fp16);
|
||||||
|
_vexp_mm = v_floor(_vexp_);
|
||||||
|
_vexp_ = v_cvt_f16(_vexp_mm);
|
||||||
|
_vexp_mm = v_add(_vexp_mm, _vexp_bias_s16);
|
||||||
|
_vexp_mm = v_shl(_vexp_mm, 10);
|
||||||
|
|
||||||
|
_vexp_x = v_fma(_vexp_, _vexp_C1_f16, _vexp_x);
|
||||||
|
_vexp_x = v_fma(_vexp_, _vexp_C2_f16, _vexp_x);
|
||||||
|
_vexp_xx = v_mul(_vexp_x, _vexp_x);
|
||||||
|
|
||||||
|
_vexp_y = v_fma(_vexp_x, _vexp_p0_f16, _vexp_p1_f16);
|
||||||
|
_vexp_y = v_fma(_vexp_y, _vexp_x, _vexp_p2_f16);
|
||||||
|
_vexp_y = v_fma(_vexp_y, _vexp_x, _vexp_p3_f16);
|
||||||
|
_vexp_y = v_fma(_vexp_y, _vexp_x, _vexp_p4_f16);
|
||||||
|
_vexp_y = v_fma(_vexp_y, _vexp_x, _vexp_p5_f16);
|
||||||
|
|
||||||
|
_vexp_y = v_fma(_vexp_y, _vexp_xx, _vexp_x);
|
||||||
|
_vexp_y = v_add(_vexp_y, _vexp_one_fp16);
|
||||||
|
_vexp_y = v_mul(_vexp_y, v_reinterpret_as_f16(_vexp_mm));
|
||||||
|
|
||||||
|
// exp(NAN) -> NAN
|
||||||
|
_TpVec16F mask_not_nan = v_not_nan(x);
|
||||||
|
return v_select(mask_not_nan, _vexp_y, v_reinterpret_as_f16(v_setall_<_TpVec16S>((short)0x7e00)));
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename _TpVec32F, typename _TpVec32S>
|
||||||
|
inline _TpVec32F v_exp_default_32f(const _TpVec32F &x) {
|
||||||
|
const _TpVec32F _vexp_lo_f32 = v_setall_<_TpVec32F>(-88.3762626647949f);
|
||||||
|
const _TpVec32F _vexp_hi_f32 = v_setall_<_TpVec32F>(89.f);
|
||||||
|
const _TpVec32F _vexp_half_fp32 = v_setall_<_TpVec32F>(0.5f);
|
||||||
|
const _TpVec32F _vexp_one_fp32 = v_setall_<_TpVec32F>(1.f);
|
||||||
|
const _TpVec32F _vexp_LOG2EF_f32 = v_setall_<_TpVec32F>(1.44269504088896341f);
|
||||||
|
const _TpVec32F _vexp_C1_f32 = v_setall_<_TpVec32F>(-6.93359375E-1f);
|
||||||
|
const _TpVec32F _vexp_C2_f32 = v_setall_<_TpVec32F>(2.12194440E-4f);
|
||||||
|
const _TpVec32F _vexp_p0_f32 = v_setall_<_TpVec32F>(1.9875691500E-4f);
|
||||||
|
const _TpVec32F _vexp_p1_f32 = v_setall_<_TpVec32F>(1.3981999507E-3f);
|
||||||
|
const _TpVec32F _vexp_p2_f32 = v_setall_<_TpVec32F>(8.3334519073E-3f);
|
||||||
|
const _TpVec32F _vexp_p3_f32 = v_setall_<_TpVec32F>(4.1665795894E-2f);
|
||||||
|
const _TpVec32F _vexp_p4_f32 = v_setall_<_TpVec32F>(1.6666665459E-1f);
|
||||||
|
const _TpVec32F _vexp_p5_f32 = v_setall_<_TpVec32F>(5.0000001201E-1f);
|
||||||
|
|
||||||
|
_TpVec32F _vexp_, _vexp_x, _vexp_y, _vexp_xx;
|
||||||
|
_TpVec32S _vexp_mm;
|
||||||
|
const _TpVec32S _vexp_bias_s32 = v_setall_<_TpVec32S>((int)0x7f);
|
||||||
|
|
||||||
|
// compute exponential of x
|
||||||
|
_vexp_x = v_max(x, _vexp_lo_f32);
|
||||||
|
_vexp_x = v_min(_vexp_x, _vexp_hi_f32);
|
||||||
|
|
||||||
|
_vexp_ = v_fma(_vexp_x, _vexp_LOG2EF_f32, _vexp_half_fp32);
|
||||||
|
_vexp_mm = v_floor(_vexp_);
|
||||||
|
_vexp_ = v_cvt_f32(_vexp_mm);
|
||||||
|
_vexp_mm = v_add(_vexp_mm, _vexp_bias_s32);
|
||||||
|
_vexp_mm = v_shl(_vexp_mm, 23);
|
||||||
|
|
||||||
|
_vexp_x = v_fma(_vexp_, _vexp_C1_f32, _vexp_x);
|
||||||
|
_vexp_x = v_fma(_vexp_, _vexp_C2_f32, _vexp_x);
|
||||||
|
_vexp_xx = v_mul(_vexp_x, _vexp_x);
|
||||||
|
|
||||||
|
_vexp_y = v_fma(_vexp_x, _vexp_p0_f32, _vexp_p1_f32);
|
||||||
|
_vexp_y = v_fma(_vexp_y, _vexp_x, _vexp_p2_f32);
|
||||||
|
_vexp_y = v_fma(_vexp_y, _vexp_x, _vexp_p3_f32);
|
||||||
|
_vexp_y = v_fma(_vexp_y, _vexp_x, _vexp_p4_f32);
|
||||||
|
_vexp_y = v_fma(_vexp_y, _vexp_x, _vexp_p5_f32);
|
||||||
|
|
||||||
|
_vexp_y = v_fma(_vexp_y, _vexp_xx, _vexp_x);
|
||||||
|
_vexp_y = v_add(_vexp_y, _vexp_one_fp32);
|
||||||
|
_vexp_y = v_mul(_vexp_y, v_reinterpret_as_f32(_vexp_mm));
|
||||||
|
|
||||||
|
// exp(NAN) -> NAN
|
||||||
|
_TpVec32F mask_not_nan = v_not_nan(x);
|
||||||
|
return v_select(mask_not_nan, _vexp_y, v_reinterpret_as_f32(v_setall_<_TpVec32S>((int)0x7fc00000)));
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename _TpVec64F, typename _TpVec64S>
|
||||||
|
inline _TpVec64F v_exp_default_64f(const _TpVec64F &x) {
|
||||||
|
const _TpVec64F _vexp_lo_f64 = v_setall_<_TpVec64F>(-709.43613930310391424428);
|
||||||
|
const _TpVec64F _vexp_hi_f64 = v_setall_<_TpVec64F>(710.);
|
||||||
|
const _TpVec64F _vexp_half_f64 = v_setall_<_TpVec64F>(0.5);
|
||||||
|
const _TpVec64F _vexp_one_f64 = v_setall_<_TpVec64F>(1.0);
|
||||||
|
const _TpVec64F _vexp_two_f64 = v_setall_<_TpVec64F>(2.0);
|
||||||
|
const _TpVec64F _vexp_LOG2EF_f64 = v_setall_<_TpVec64F>(1.44269504088896340736);
|
||||||
|
const _TpVec64F _vexp_C1_f64 = v_setall_<_TpVec64F>(-6.93145751953125E-1);
|
||||||
|
const _TpVec64F _vexp_C2_f64 = v_setall_<_TpVec64F>(-1.42860682030941723212E-6);
|
||||||
|
const _TpVec64F _vexp_p0_f64 = v_setall_<_TpVec64F>(1.26177193074810590878E-4);
|
||||||
|
const _TpVec64F _vexp_p1_f64 = v_setall_<_TpVec64F>(3.02994407707441961300E-2);
|
||||||
|
const _TpVec64F _vexp_p2_f64 = v_setall_<_TpVec64F>(9.99999999999999999910E-1);
|
||||||
|
const _TpVec64F _vexp_q0_f64 = v_setall_<_TpVec64F>(3.00198505138664455042E-6);
|
||||||
|
const _TpVec64F _vexp_q1_f64 = v_setall_<_TpVec64F>(2.52448340349684104192E-3);
|
||||||
|
const _TpVec64F _vexp_q2_f64 = v_setall_<_TpVec64F>(2.27265548208155028766E-1);
|
||||||
|
const _TpVec64F _vexp_q3_f64 = v_setall_<_TpVec64F>(2.00000000000000000009E0);
|
||||||
|
|
||||||
|
_TpVec64F _vexp_, _vexp_x, _vexp_y, _vexp_z, _vexp_xx;
|
||||||
|
_TpVec64S _vexp_mm;
|
||||||
|
const _TpVec64S _vexp_bias_s64 = v_setall_<_TpVec64S>((int64)0x3ff);
|
||||||
|
|
||||||
|
// compute exponential of x
|
||||||
|
_vexp_x = v_max(x, _vexp_lo_f64);
|
||||||
|
_vexp_x = v_min(_vexp_x, _vexp_hi_f64);
|
||||||
|
|
||||||
|
_vexp_ = v_fma(_vexp_x, _vexp_LOG2EF_f64, _vexp_half_f64);
|
||||||
|
_vexp_mm = v_expand_low(v_floor(_vexp_));
|
||||||
|
_vexp_ = v_cvt_f64(_vexp_mm);
|
||||||
|
_vexp_mm = v_add(_vexp_mm, _vexp_bias_s64);
|
||||||
|
_vexp_mm = v_shl(_vexp_mm, 52);
|
||||||
|
|
||||||
|
_vexp_x = v_fma(_vexp_, _vexp_C1_f64, _vexp_x);
|
||||||
|
_vexp_x = v_fma(_vexp_, _vexp_C2_f64, _vexp_x);
|
||||||
|
_vexp_xx = v_mul(_vexp_x, _vexp_x);
|
||||||
|
|
||||||
|
_vexp_y = v_fma(_vexp_xx, _vexp_p0_f64, _vexp_p1_f64);
|
||||||
|
_vexp_y = v_fma(_vexp_y, _vexp_xx, _vexp_p2_f64);
|
||||||
|
_vexp_y = v_mul(_vexp_y, _vexp_x);
|
||||||
|
|
||||||
|
_vexp_z = v_fma(_vexp_xx, _vexp_q0_f64, _vexp_q1_f64);
|
||||||
|
_vexp_z = v_fma(_vexp_xx, _vexp_z, _vexp_q2_f64);
|
||||||
|
_vexp_z = v_fma(_vexp_xx, _vexp_z, _vexp_q3_f64);
|
||||||
|
|
||||||
|
_vexp_z = v_div(_vexp_y, v_sub(_vexp_z, _vexp_y));
|
||||||
|
_vexp_z = v_fma(_vexp_two_f64, _vexp_z, _vexp_one_f64);
|
||||||
|
_vexp_z = v_mul(_vexp_z, v_reinterpret_as_f64(_vexp_mm));
|
||||||
|
|
||||||
|
// exp(NAN) -> NAN
|
||||||
|
_TpVec64F mask_not_nan = v_not_nan(x);
|
||||||
|
return v_select(mask_not_nan, _vexp_z, v_reinterpret_as_f64(v_setall_<_TpVec64S>((int64)0x7FF8000000000000)));
|
||||||
|
}
|
||||||
|
//! @}
|
||||||
|
|
||||||
|
//! @name Natural Logarithm
|
||||||
|
//! @{
|
||||||
|
template<typename _TpVec16F, typename _TpVec16S>
|
||||||
|
inline _TpVec16F v_log_default_16f(const _TpVec16F &x) {
|
||||||
|
const _TpVec16F _vlog_one_fp16 = v_setall_<_TpVec16F>(1.0f);
|
||||||
|
const _TpVec16F _vlog_SQRTHF_fp16 = v_setall_<_TpVec16F>(0.707106781186547524f);
|
||||||
|
const _TpVec16F _vlog_q1_fp16 = v_setall_<_TpVec16F>(-2.12194440E-4f);
|
||||||
|
const _TpVec16F _vlog_q2_fp16 = v_setall_<_TpVec16F>(0.693359375f);
|
||||||
|
const _TpVec16F _vlog_p0_fp16 = v_setall_<_TpVec16F>(7.0376836292E-2f);
|
||||||
|
const _TpVec16F _vlog_p1_fp16 = v_setall_<_TpVec16F>(-1.1514610310E-1f);
|
||||||
|
const _TpVec16F _vlog_p2_fp16 = v_setall_<_TpVec16F>(1.1676998740E-1f);
|
||||||
|
const _TpVec16F _vlog_p3_fp16 = v_setall_<_TpVec16F>(-1.2420140846E-1f);
|
||||||
|
const _TpVec16F _vlog_p4_fp16 = v_setall_<_TpVec16F>(1.4249322787E-1f);
|
||||||
|
const _TpVec16F _vlog_p5_fp16 = v_setall_<_TpVec16F>(-1.6668057665E-1f);
|
||||||
|
const _TpVec16F _vlog_p6_fp16 = v_setall_<_TpVec16F>(2.0000714765E-1f);
|
||||||
|
const _TpVec16F _vlog_p7_fp16 = v_setall_<_TpVec16F>(-2.4999993993E-1f);
|
||||||
|
const _TpVec16F _vlog_p8_fp16 = v_setall_<_TpVec16F>(3.3333331174E-1f);
|
||||||
|
|
||||||
|
_TpVec16F _vlog_x, _vlog_e, _vlog_y, _vlog_z, _vlog_tmp;
|
||||||
|
_TpVec16S _vlog_ux, _vlog_emm0;
|
||||||
|
const _TpVec16S _vlog_inv_mant_mask_s16 = v_setall_<_TpVec16S>((short)~0x7c00);
|
||||||
|
|
||||||
|
_vlog_ux = v_reinterpret_as_s16(x);
|
||||||
|
_vlog_emm0 = v_shr(_vlog_ux, 10);
|
||||||
|
|
||||||
|
_vlog_ux = v_and(_vlog_ux, _vlog_inv_mant_mask_s16);
|
||||||
|
_vlog_ux = v_or(_vlog_ux, v_reinterpret_as_s16(v_setall_<_TpVec16F>(0.5f)));
|
||||||
|
_vlog_x = v_reinterpret_as_f16(_vlog_ux);
|
||||||
|
|
||||||
|
_vlog_emm0 = v_sub(_vlog_emm0, v_setall_<_TpVec16S>((short)0xf));
|
||||||
|
_vlog_e = v_cvt_f16(_vlog_emm0);
|
||||||
|
|
||||||
|
_vlog_e = v_add(_vlog_e, _vlog_one_fp16);
|
||||||
|
|
||||||
|
_TpVec16F _vlog_mask = v_lt(_vlog_x, _vlog_SQRTHF_fp16);
|
||||||
|
_vlog_tmp = v_and(_vlog_x, _vlog_mask);
|
||||||
|
_vlog_x = v_sub(_vlog_x, _vlog_one_fp16);
|
||||||
|
_vlog_e = v_sub(_vlog_e, v_and(_vlog_one_fp16, _vlog_mask));
|
||||||
|
_vlog_x = v_add(_vlog_x, _vlog_tmp);
|
||||||
|
|
||||||
|
_vlog_z = v_mul(_vlog_x, _vlog_x);
|
||||||
|
|
||||||
|
_vlog_y = v_fma(_vlog_p0_fp16, _vlog_x, _vlog_p1_fp16);
|
||||||
|
_vlog_y = v_fma(_vlog_y, _vlog_x, _vlog_p2_fp16);
|
||||||
|
_vlog_y = v_fma(_vlog_y, _vlog_x, _vlog_p3_fp16);
|
||||||
|
_vlog_y = v_fma(_vlog_y, _vlog_x, _vlog_p4_fp16);
|
||||||
|
_vlog_y = v_fma(_vlog_y, _vlog_x, _vlog_p5_fp16);
|
||||||
|
_vlog_y = v_fma(_vlog_y, _vlog_x, _vlog_p6_fp16);
|
||||||
|
_vlog_y = v_fma(_vlog_y, _vlog_x, _vlog_p7_fp16);
|
||||||
|
_vlog_y = v_fma(_vlog_y, _vlog_x, _vlog_p8_fp16);
|
||||||
|
_vlog_y = v_mul(_vlog_y, _vlog_x);
|
||||||
|
_vlog_y = v_mul(_vlog_y, _vlog_z);
|
||||||
|
|
||||||
|
_vlog_y = v_fma(_vlog_e, _vlog_q1_fp16, _vlog_y);
|
||||||
|
|
||||||
|
_vlog_y = v_sub(_vlog_y, v_mul(_vlog_z, v_setall_<_TpVec16F>(0.5f)));
|
||||||
|
|
||||||
|
_vlog_x = v_add(_vlog_x, _vlog_y);
|
||||||
|
_vlog_x = v_fma(_vlog_e, _vlog_q2_fp16, _vlog_x);
|
||||||
|
// log(0) -> -INF
|
||||||
|
_TpVec16F mask_zero = v_eq(x, v_setzero_<_TpVec16F>());
|
||||||
|
_vlog_x = v_select(mask_zero, v_reinterpret_as_f16(v_setall_<_TpVec16S>((short)0xfc00)), _vlog_x);
|
||||||
|
// log(NEG), log(NAN) -> NAN
|
||||||
|
_TpVec16F mask_not_nan = v_ge(x, v_setzero_<_TpVec16F>());
|
||||||
|
_vlog_x = v_select(mask_not_nan, _vlog_x, v_reinterpret_as_f16(v_setall_<_TpVec16S>((short)0x7e00)));
|
||||||
|
// log(INF) -> INF
|
||||||
|
_TpVec16F mask_inf = v_eq(x, v_reinterpret_as_f16(v_setall_<_TpVec16S>((short)0x7c00)));
|
||||||
|
_vlog_x = v_select(mask_inf, x, _vlog_x);
|
||||||
|
return _vlog_x;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename _TpVec32F, typename _TpVec32S>
|
||||||
|
inline _TpVec32F v_log_default_32f(const _TpVec32F &x) {
|
||||||
|
const _TpVec32F _vlog_one_fp32 = v_setall_<_TpVec32F>(1.0f);
|
||||||
|
const _TpVec32F _vlog_SQRTHF_fp32 = v_setall_<_TpVec32F>(0.707106781186547524f);
|
||||||
|
const _TpVec32F _vlog_q1_fp32 = v_setall_<_TpVec32F>(-2.12194440E-4f);
|
||||||
|
const _TpVec32F _vlog_q2_fp32 = v_setall_<_TpVec32F>(0.693359375f);
|
||||||
|
const _TpVec32F _vlog_p0_fp32 = v_setall_<_TpVec32F>(7.0376836292E-2f);
|
||||||
|
const _TpVec32F _vlog_p1_fp32 = v_setall_<_TpVec32F>(-1.1514610310E-1f);
|
||||||
|
const _TpVec32F _vlog_p2_fp32 = v_setall_<_TpVec32F>(1.1676998740E-1f);
|
||||||
|
const _TpVec32F _vlog_p3_fp32 = v_setall_<_TpVec32F>(-1.2420140846E-1f);
|
||||||
|
const _TpVec32F _vlog_p4_fp32 = v_setall_<_TpVec32F>(1.4249322787E-1f);
|
||||||
|
const _TpVec32F _vlog_p5_fp32 = v_setall_<_TpVec32F>(-1.6668057665E-1f);
|
||||||
|
const _TpVec32F _vlog_p6_fp32 = v_setall_<_TpVec32F>(2.0000714765E-1f);
|
||||||
|
const _TpVec32F _vlog_p7_fp32 = v_setall_<_TpVec32F>(-2.4999993993E-1f);
|
||||||
|
const _TpVec32F _vlog_p8_fp32 = v_setall_<_TpVec32F>(3.3333331174E-1f);
|
||||||
|
|
||||||
|
_TpVec32F _vlog_x, _vlog_e, _vlog_y, _vlog_z, _vlog_tmp;
|
||||||
|
_TpVec32S _vlog_ux, _vlog_emm0;
|
||||||
|
const _TpVec32S _vlog_inv_mant_mask_s32 = v_setall_<_TpVec32S>((int)~0x7f800000);
|
||||||
|
|
||||||
|
_vlog_ux = v_reinterpret_as_s32(x);
|
||||||
|
_vlog_emm0 = v_shr(_vlog_ux, 23);
|
||||||
|
|
||||||
|
_vlog_ux = v_and(_vlog_ux, _vlog_inv_mant_mask_s32);
|
||||||
|
_vlog_ux = v_or(_vlog_ux, v_reinterpret_as_s32(v_setall_<_TpVec32F>(0.5f)));
|
||||||
|
_vlog_x = v_reinterpret_as_f32(_vlog_ux);
|
||||||
|
|
||||||
|
_vlog_emm0 = v_sub(_vlog_emm0, v_setall_<_TpVec32S>((int)0x7f));
|
||||||
|
_vlog_e = v_cvt_f32(_vlog_emm0);
|
||||||
|
|
||||||
|
_vlog_e = v_add(_vlog_e, _vlog_one_fp32);
|
||||||
|
|
||||||
|
_TpVec32F _vlog_mask = v_lt(_vlog_x, _vlog_SQRTHF_fp32);
|
||||||
|
_vlog_tmp = v_and(_vlog_x, _vlog_mask);
|
||||||
|
_vlog_x = v_sub(_vlog_x, _vlog_one_fp32);
|
||||||
|
_vlog_e = v_sub(_vlog_e, v_and(_vlog_one_fp32, _vlog_mask));
|
||||||
|
_vlog_x = v_add(_vlog_x, _vlog_tmp);
|
||||||
|
|
||||||
|
_vlog_z = v_mul(_vlog_x, _vlog_x);
|
||||||
|
|
||||||
|
_vlog_y = v_fma(_vlog_p0_fp32, _vlog_x, _vlog_p1_fp32);
|
||||||
|
_vlog_y = v_fma(_vlog_y, _vlog_x, _vlog_p2_fp32);
|
||||||
|
_vlog_y = v_fma(_vlog_y, _vlog_x, _vlog_p3_fp32);
|
||||||
|
_vlog_y = v_fma(_vlog_y, _vlog_x, _vlog_p4_fp32);
|
||||||
|
_vlog_y = v_fma(_vlog_y, _vlog_x, _vlog_p5_fp32);
|
||||||
|
_vlog_y = v_fma(_vlog_y, _vlog_x, _vlog_p6_fp32);
|
||||||
|
_vlog_y = v_fma(_vlog_y, _vlog_x, _vlog_p7_fp32);
|
||||||
|
_vlog_y = v_fma(_vlog_y, _vlog_x, _vlog_p8_fp32);
|
||||||
|
_vlog_y = v_mul(_vlog_y, _vlog_x);
|
||||||
|
_vlog_y = v_mul(_vlog_y, _vlog_z);
|
||||||
|
|
||||||
|
_vlog_y = v_fma(_vlog_e, _vlog_q1_fp32, _vlog_y);
|
||||||
|
|
||||||
|
_vlog_y = v_sub(_vlog_y, v_mul(_vlog_z, v_setall_<_TpVec32F>(0.5f)));
|
||||||
|
|
||||||
|
_vlog_x = v_add(_vlog_x, _vlog_y);
|
||||||
|
_vlog_x = v_fma(_vlog_e, _vlog_q2_fp32, _vlog_x);
|
||||||
|
// log(0) -> -INF
|
||||||
|
_TpVec32F mask_zero = v_eq(x, v_setzero_<_TpVec32F>());
|
||||||
|
_vlog_x = v_select(mask_zero, v_reinterpret_as_f32(v_setall_<_TpVec32S>((int)0xff800000)), _vlog_x);
|
||||||
|
// log(NEG), log(NAN) -> NAN
|
||||||
|
_TpVec32F mask_not_nan = v_ge(x, v_setzero_<_TpVec32F>());
|
||||||
|
_vlog_x = v_select(mask_not_nan, _vlog_x, v_reinterpret_as_f32(v_setall_<_TpVec32S>((int)0x7fc00000)));
|
||||||
|
// log(INF) -> INF
|
||||||
|
_TpVec32F mask_inf = v_eq(x, v_reinterpret_as_f32(v_setall_<_TpVec32S>((int)0x7f800000)));
|
||||||
|
_vlog_x = v_select(mask_inf, x, _vlog_x);
|
||||||
|
return _vlog_x;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename _TpVec64F, typename _TpVec64S>
|
||||||
|
inline _TpVec64F v_log_default_64f(const _TpVec64F &x) {
|
||||||
|
const _TpVec64F _vlog_one_fp64 = v_setall_<_TpVec64F>(1.0);
|
||||||
|
const _TpVec64F _vlog_SQRTHF_fp64 = v_setall_<_TpVec64F>(0.7071067811865475244);
|
||||||
|
const _TpVec64F _vlog_p0_fp64 = v_setall_<_TpVec64F>(1.01875663804580931796E-4);
|
||||||
|
const _TpVec64F _vlog_p1_fp64 = v_setall_<_TpVec64F>(4.97494994976747001425E-1);
|
||||||
|
const _TpVec64F _vlog_p2_fp64 = v_setall_<_TpVec64F>(4.70579119878881725854);
|
||||||
|
const _TpVec64F _vlog_p3_fp64 = v_setall_<_TpVec64F>(1.44989225341610930846E1);
|
||||||
|
const _TpVec64F _vlog_p4_fp64 = v_setall_<_TpVec64F>(1.79368678507819816313E1);
|
||||||
|
const _TpVec64F _vlog_p5_fp64 = v_setall_<_TpVec64F>(7.70838733755885391666);
|
||||||
|
const _TpVec64F _vlog_q0_fp64 = v_setall_<_TpVec64F>(1.12873587189167450590E1);
|
||||||
|
const _TpVec64F _vlog_q1_fp64 = v_setall_<_TpVec64F>(4.52279145837532221105E1);
|
||||||
|
const _TpVec64F _vlog_q2_fp64 = v_setall_<_TpVec64F>(8.29875266912776603211E1);
|
||||||
|
const _TpVec64F _vlog_q3_fp64 = v_setall_<_TpVec64F>(7.11544750618563894466E1);
|
||||||
|
const _TpVec64F _vlog_q4_fp64 = v_setall_<_TpVec64F>(2.31251620126765340583E1);
|
||||||
|
|
||||||
|
const _TpVec64F _vlog_C0_fp64 = v_setall_<_TpVec64F>(2.121944400546905827679e-4);
|
||||||
|
const _TpVec64F _vlog_C1_fp64 = v_setall_<_TpVec64F>(0.693359375);
|
||||||
|
|
||||||
|
_TpVec64F _vlog_x, _vlog_e, _vlog_y, _vlog_z, _vlog_tmp, _vlog_xx;
|
||||||
|
_TpVec64S _vlog_ux, _vlog_emm0;
|
||||||
|
const _TpVec64S _vlog_inv_mant_mask_s64 = v_setall_<_TpVec64S>((int64)~0x7ff0000000000000);
|
||||||
|
|
||||||
|
_vlog_ux = v_reinterpret_as_s64(x);
|
||||||
|
_vlog_emm0 = v_shr(_vlog_ux, 52);
|
||||||
|
|
||||||
|
_vlog_ux = v_and(_vlog_ux, _vlog_inv_mant_mask_s64);
|
||||||
|
_vlog_ux = v_or(_vlog_ux, v_reinterpret_as_s64(v_setall_<_TpVec64F>(0.5)));
|
||||||
|
_vlog_x = v_reinterpret_as_f64(_vlog_ux);
|
||||||
|
|
||||||
|
_vlog_emm0 = v_sub(_vlog_emm0, v_setall_<_TpVec64S>((int64)0x3ff));
|
||||||
|
_vlog_e = v_cvt_f64(_vlog_emm0);
|
||||||
|
|
||||||
|
_vlog_e = v_add(_vlog_e, _vlog_one_fp64);
|
||||||
|
|
||||||
|
_TpVec64F _vlog_mask = v_lt(_vlog_x, _vlog_SQRTHF_fp64);
|
||||||
|
_vlog_tmp = v_and(_vlog_x, _vlog_mask);
|
||||||
|
_vlog_x = v_sub(_vlog_x, _vlog_one_fp64);
|
||||||
|
_vlog_e = v_sub(_vlog_e, v_and(_vlog_one_fp64, _vlog_mask));
|
||||||
|
_vlog_x = v_add(_vlog_x, _vlog_tmp);
|
||||||
|
|
||||||
|
_vlog_xx = v_mul(_vlog_x, _vlog_x);
|
||||||
|
|
||||||
|
_vlog_y = v_fma(_vlog_p0_fp64, _vlog_x, _vlog_p1_fp64);
|
||||||
|
_vlog_y = v_fma(_vlog_y, _vlog_x, _vlog_p2_fp64);
|
||||||
|
_vlog_y = v_fma(_vlog_y, _vlog_x, _vlog_p3_fp64);
|
||||||
|
_vlog_y = v_fma(_vlog_y, _vlog_x, _vlog_p4_fp64);
|
||||||
|
_vlog_y = v_fma(_vlog_y, _vlog_x, _vlog_p5_fp64);
|
||||||
|
_vlog_y = v_mul(_vlog_y, _vlog_x);
|
||||||
|
_vlog_y = v_mul(_vlog_y, _vlog_xx);
|
||||||
|
|
||||||
|
_vlog_z = v_add(_vlog_x, _vlog_q0_fp64);
|
||||||
|
_vlog_z = v_fma(_vlog_z, _vlog_x, _vlog_q1_fp64);
|
||||||
|
_vlog_z = v_fma(_vlog_z, _vlog_x, _vlog_q2_fp64);
|
||||||
|
_vlog_z = v_fma(_vlog_z, _vlog_x, _vlog_q3_fp64);
|
||||||
|
_vlog_z = v_fma(_vlog_z, _vlog_x, _vlog_q4_fp64);
|
||||||
|
|
||||||
|
_vlog_z = v_div(_vlog_y, _vlog_z);
|
||||||
|
_vlog_z = v_sub(_vlog_z, v_mul(_vlog_e, _vlog_C0_fp64));
|
||||||
|
_vlog_z = v_sub(_vlog_z, v_mul(_vlog_xx, v_setall_<_TpVec64F>(0.5)));
|
||||||
|
|
||||||
|
_vlog_z = v_add(_vlog_z, _vlog_x);
|
||||||
|
_vlog_z = v_fma(_vlog_e, _vlog_C1_fp64, _vlog_z);
|
||||||
|
|
||||||
|
// log(0) -> -INF
|
||||||
|
_TpVec64F mask_zero = v_eq(x, v_setzero_<_TpVec64F>());
|
||||||
|
_vlog_z = v_select(mask_zero, v_reinterpret_as_f64(v_setall_<_TpVec64S>((int64)0xfff0000000000000)), _vlog_z);
|
||||||
|
// log(NEG), log(NAN) -> NAN
|
||||||
|
_TpVec64F mask_not_nan = v_ge(x, v_setzero_<_TpVec64F>());
|
||||||
|
_vlog_z = v_select(mask_not_nan, _vlog_z, v_reinterpret_as_f64(v_setall_<_TpVec64S>((int64)0x7ff8000000000000)));
|
||||||
|
// log(INF) -> INF
|
||||||
|
_TpVec64F mask_inf = v_eq(x, v_reinterpret_as_f64(v_setall_<_TpVec64S>((int64)0x7ff0000000000000)));
|
||||||
|
_vlog_z = v_select(mask_inf, x, _vlog_z);
|
||||||
|
return _vlog_z;
|
||||||
|
}
|
||||||
|
//! @}
|
||||||
|
|
||||||
|
//! @name Sine and Cosine
|
||||||
|
//! @{
|
||||||
|
template<typename _TpVec16F, typename _TpVec16S>
|
||||||
|
inline void v_sincos_default_16f(const _TpVec16F &x, _TpVec16F &ysin, _TpVec16F &ycos) {
|
||||||
|
const _TpVec16F v_cephes_FOPI = v_setall_<_TpVec16F>(hfloat(1.27323954473516f)); // 4 / M_PI
|
||||||
|
const _TpVec16F v_minus_DP1 = v_setall_<_TpVec16F>(hfloat(-0.78515625f));
|
||||||
|
const _TpVec16F v_minus_DP2 = v_setall_<_TpVec16F>(hfloat(-2.4187564849853515625E-4f));
|
||||||
|
const _TpVec16F v_minus_DP3 = v_setall_<_TpVec16F>(hfloat(-3.77489497744594108E-8f));
|
||||||
|
const _TpVec16F v_sincof_p0 = v_setall_<_TpVec16F>(hfloat(-1.9515295891E-4f));
|
||||||
|
const _TpVec16F v_sincof_p1 = v_setall_<_TpVec16F>(hfloat(8.3321608736E-3f));
|
||||||
|
const _TpVec16F v_sincof_p2 = v_setall_<_TpVec16F>(hfloat(-1.6666654611E-1f));
|
||||||
|
const _TpVec16F v_coscof_p0 = v_setall_<_TpVec16F>(hfloat(2.443315711809948E-5f));
|
||||||
|
const _TpVec16F v_coscof_p1 = v_setall_<_TpVec16F>(hfloat(-1.388731625493765E-3f));
|
||||||
|
const _TpVec16F v_coscof_p2 = v_setall_<_TpVec16F>(hfloat(4.166664568298827E-2f));
|
||||||
|
const _TpVec16F v_nan = v_reinterpret_as_f16(v_setall_<_TpVec16S>((short)0x7e00));
|
||||||
|
const _TpVec16F v_neg_zero = v_setall_<_TpVec16F>(hfloat(-0.f));
|
||||||
|
|
||||||
|
_TpVec16F _vx, _vy, sign_mask_sin, sign_mask_cos;
|
||||||
|
_TpVec16S emm2;
|
||||||
|
|
||||||
|
sign_mask_sin = v_lt(x, v_setzero_<_TpVec16F>());
|
||||||
|
_vx = v_abs(x);
|
||||||
|
_vy = v_mul(_vx, v_cephes_FOPI);
|
||||||
|
|
||||||
|
emm2 = v_trunc(_vy);
|
||||||
|
emm2 = v_add(emm2, v_setall_<_TpVec16S>((short)1));
|
||||||
|
emm2 = v_and(emm2, v_setall_<_TpVec16S>((short)~1));
|
||||||
|
_vy = v_cvt_f16(emm2);
|
||||||
|
|
||||||
|
_TpVec16F poly_mask = v_reinterpret_as_f16(v_eq(v_and(emm2, v_setall_<_TpVec16S>((short)2)), v_setall_<_TpVec16S>((short)0)));
|
||||||
|
|
||||||
|
_vx = v_fma(_vy, v_minus_DP1, _vx);
|
||||||
|
_vx = v_fma(_vy, v_minus_DP2, _vx);
|
||||||
|
_vx = v_fma(_vy, v_minus_DP3, _vx);
|
||||||
|
|
||||||
|
sign_mask_sin = v_xor(sign_mask_sin, v_reinterpret_as_f16(v_eq(v_and(emm2, v_setall_<_TpVec16S>((short)4)), v_setall_<_TpVec16S>((short)0))));
|
||||||
|
sign_mask_cos = v_reinterpret_as_f16(v_eq(v_and(v_sub(emm2, v_setall_<_TpVec16S>((short)2)), v_setall_<_TpVec16S>((short)4)), v_setall_<_TpVec16S>((short)0)));
|
||||||
|
|
||||||
|
_TpVec16F _vxx = v_mul(_vx, _vx);
|
||||||
|
_TpVec16F y1, y2;
|
||||||
|
|
||||||
|
y1 = v_fma(v_coscof_p0, _vxx, v_coscof_p1);
|
||||||
|
y1 = v_fma(y1, _vxx, v_coscof_p2);
|
||||||
|
y1 = v_fma(y1, _vxx, v_setall_<_TpVec16F>(hfloat(-0.5f)));
|
||||||
|
y1 = v_fma(y1, _vxx, v_setall_<_TpVec16F>(hfloat(1.f)));
|
||||||
|
|
||||||
|
y2 = v_fma(v_sincof_p0, _vxx, v_sincof_p1);
|
||||||
|
y2 = v_fma(y2, _vxx, v_sincof_p2);
|
||||||
|
y2 = v_mul(y2, _vxx);
|
||||||
|
y2 = v_fma(y2, _vx, _vx);
|
||||||
|
|
||||||
|
ysin = v_select(poly_mask, y2, y1);
|
||||||
|
ycos = v_select(poly_mask, y1, y2);
|
||||||
|
ysin = v_select(sign_mask_sin, ysin, v_xor(v_neg_zero, ysin));
|
||||||
|
ycos = v_select(sign_mask_cos, v_xor(v_neg_zero, ycos), ycos);
|
||||||
|
|
||||||
|
// sincos(NAN) -> NAN, sincos(±INF) -> NAN
|
||||||
|
_TpVec16F mask_inf = v_eq(_vx, v_reinterpret_as_f16(v_setall_<_TpVec16S>((short)0x7c00)));
|
||||||
|
_TpVec16F mask_nan = v_or(mask_inf, v_ne(x, x));
|
||||||
|
ysin = v_select(mask_nan, v_nan, ysin);
|
||||||
|
ycos = v_select(mask_nan, v_nan, ycos);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename _TpVec16F, typename _TpVec16S>
|
||||||
|
inline _TpVec16F v_sin_default_16f(const _TpVec16F &x) {
|
||||||
|
_TpVec16F ysin, ycos;
|
||||||
|
v_sincos_default_16f<_TpVec16F, _TpVec16S>(x, ysin, ycos);
|
||||||
|
return ysin;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename _TpVec16F, typename _TpVec16S>
|
||||||
|
inline _TpVec16F v_cos_default_16f(const _TpVec16F &x) {
|
||||||
|
_TpVec16F ysin, ycos;
|
||||||
|
v_sincos_default_16f<_TpVec16F, _TpVec16S>(x, ysin, ycos);
|
||||||
|
return ycos;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template<typename _TpVec32F, typename _TpVec32S>
|
||||||
|
inline void v_sincos_default_32f(const _TpVec32F &x, _TpVec32F &ysin, _TpVec32F &ycos) {
|
||||||
|
const _TpVec32F v_cephes_FOPI = v_setall_<_TpVec32F>(1.27323954473516f); // 4 / M_PI
|
||||||
|
const _TpVec32F v_minus_DP1 = v_setall_<_TpVec32F>(-0.78515625f);
|
||||||
|
const _TpVec32F v_minus_DP2 = v_setall_<_TpVec32F>(-2.4187564849853515625E-4f);
|
||||||
|
const _TpVec32F v_minus_DP3 = v_setall_<_TpVec32F>(-3.77489497744594108E-8f);
|
||||||
|
const _TpVec32F v_sincof_p0 = v_setall_<_TpVec32F>(-1.9515295891E-4f);
|
||||||
|
const _TpVec32F v_sincof_p1 = v_setall_<_TpVec32F>(8.3321608736E-3f);
|
||||||
|
const _TpVec32F v_sincof_p2 = v_setall_<_TpVec32F>(-1.6666654611E-1f);
|
||||||
|
const _TpVec32F v_coscof_p0 = v_setall_<_TpVec32F>(2.443315711809948E-5f);
|
||||||
|
const _TpVec32F v_coscof_p1 = v_setall_<_TpVec32F>(-1.388731625493765E-3f);
|
||||||
|
const _TpVec32F v_coscof_p2 = v_setall_<_TpVec32F>(4.166664568298827E-2f);
|
||||||
|
const _TpVec32F v_nan = v_reinterpret_as_f32(v_setall_<_TpVec32S>((int)0x7fc00000));
|
||||||
|
const _TpVec32F v_neg_zero = v_setall_<_TpVec32F>(-0.f);
|
||||||
|
|
||||||
|
_TpVec32F _vx, _vy, sign_mask_sin, sign_mask_cos;
|
||||||
|
_TpVec32S emm2;
|
||||||
|
|
||||||
|
sign_mask_sin = v_lt(x, v_setzero_<_TpVec32F>());
|
||||||
|
_vx = v_abs(x);
|
||||||
|
_vy = v_mul(_vx, v_cephes_FOPI);
|
||||||
|
|
||||||
|
emm2 = v_trunc(_vy);
|
||||||
|
emm2 = v_add(emm2, v_setall_<_TpVec32S>(1));
|
||||||
|
emm2 = v_and(emm2, v_setall_<_TpVec32S>(~1));
|
||||||
|
_vy = v_cvt_f32(emm2);
|
||||||
|
|
||||||
|
_TpVec32F poly_mask = v_reinterpret_as_f32(v_eq(v_and(emm2, v_setall_<_TpVec32S>(2)), v_setall_<_TpVec32S>(0)));
|
||||||
|
|
||||||
|
_vx = v_fma(_vy, v_minus_DP1, _vx);
|
||||||
|
_vx = v_fma(_vy, v_minus_DP2, _vx);
|
||||||
|
_vx = v_fma(_vy, v_minus_DP3, _vx);
|
||||||
|
|
||||||
|
sign_mask_sin = v_xor(sign_mask_sin, v_reinterpret_as_f32(v_eq(v_and(emm2, v_setall_<_TpVec32S>(4)), v_setall_<_TpVec32S>(0))));
|
||||||
|
sign_mask_cos = v_reinterpret_as_f32(v_eq(v_and(v_sub(emm2, v_setall_<_TpVec32S>(2)), v_setall_<_TpVec32S>(4)), v_setall_<_TpVec32S>(0)));
|
||||||
|
|
||||||
|
_TpVec32F _vxx = v_mul(_vx, _vx);
|
||||||
|
_TpVec32F y1, y2;
|
||||||
|
|
||||||
|
y1 = v_fma(v_coscof_p0, _vxx, v_coscof_p1);
|
||||||
|
y1 = v_fma(y1, _vxx, v_coscof_p2);
|
||||||
|
y1 = v_fma(y1, _vxx, v_setall_<_TpVec32F>(-0.5f));
|
||||||
|
y1 = v_fma(y1, _vxx, v_setall_<_TpVec32F>(1.f));
|
||||||
|
|
||||||
|
y2 = v_fma(v_sincof_p0, _vxx, v_sincof_p1);
|
||||||
|
y2 = v_fma(y2, _vxx, v_sincof_p2);
|
||||||
|
y2 = v_mul(y2, _vxx);
|
||||||
|
y2 = v_fma(y2, _vx, _vx);
|
||||||
|
|
||||||
|
ysin = v_select(poly_mask, y2, y1);
|
||||||
|
ycos = v_select(poly_mask, y1, y2);
|
||||||
|
ysin = v_select(sign_mask_sin, ysin, v_xor(v_neg_zero, ysin));
|
||||||
|
ycos = v_select(sign_mask_cos, v_xor(v_neg_zero, ycos), ycos);
|
||||||
|
|
||||||
|
// sincos(NAN) -> NAN, sincos(±INF) -> NAN
|
||||||
|
_TpVec32F mask_inf = v_eq(_vx, v_reinterpret_as_f32(v_setall_<_TpVec32S>((int)0x7f800000)));
|
||||||
|
_TpVec32F mask_nan = v_or(mask_inf, v_ne(x, x));
|
||||||
|
ysin = v_select(mask_nan, v_nan, ysin);
|
||||||
|
ycos = v_select(mask_nan, v_nan, ycos);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename _TpVec32F, typename _TpVec32S>
|
||||||
|
inline _TpVec32F v_sin_default_32f(const _TpVec32F &x) {
|
||||||
|
_TpVec32F ysin, ycos;
|
||||||
|
v_sincos_default_32f<_TpVec32F, _TpVec32S>(x, ysin, ycos);
|
||||||
|
return ysin;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename _TpVec32F, typename _TpVec32S>
|
||||||
|
inline _TpVec32F v_cos_default_32f(const _TpVec32F &x) {
|
||||||
|
_TpVec32F ysin, ycos;
|
||||||
|
v_sincos_default_32f<_TpVec32F, _TpVec32S>(x, ysin, ycos);
|
||||||
|
return ycos;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename _TpVec64F, typename _TpVec64S>
|
||||||
|
inline void v_sincos_default_64f(const _TpVec64F &x, _TpVec64F &ysin, _TpVec64F &ycos) {
|
||||||
|
const _TpVec64F v_cephes_FOPI = v_setall_<_TpVec64F>(1.2732395447351626861510701069801148); // 4 / M_PI
|
||||||
|
const _TpVec64F v_minus_DP1 = v_setall_<_TpVec64F>(-7.853981554508209228515625E-1);
|
||||||
|
const _TpVec64F v_minus_DP2 = v_setall_<_TpVec64F>(-7.94662735614792836714E-9);
|
||||||
|
const _TpVec64F v_minus_DP3 = v_setall_<_TpVec64F>(-3.06161699786838294307E-17);
|
||||||
|
const _TpVec64F v_sin_C1 = v_setall_<_TpVec64F>(1.58962301576546568060E-10);
|
||||||
|
const _TpVec64F v_sin_C2 = v_setall_<_TpVec64F>(-2.50507477628578072866E-8);
|
||||||
|
const _TpVec64F v_sin_C3 = v_setall_<_TpVec64F>(2.75573136213857245213E-6);
|
||||||
|
const _TpVec64F v_sin_C4 = v_setall_<_TpVec64F>(-1.98412698295895385996E-4);
|
||||||
|
const _TpVec64F v_sin_C5 = v_setall_<_TpVec64F>(8.33333333332211858878E-3);
|
||||||
|
const _TpVec64F v_sin_C6 = v_setall_<_TpVec64F>(-1.66666666666666307295E-1);
|
||||||
|
const _TpVec64F v_cos_C1 = v_setall_<_TpVec64F>(-1.13585365213876817300E-11);
|
||||||
|
const _TpVec64F v_cos_C2 = v_setall_<_TpVec64F>(2.08757008419747316778E-9);
|
||||||
|
const _TpVec64F v_cos_C3 = v_setall_<_TpVec64F>(-2.75573141792967388112E-7);
|
||||||
|
const _TpVec64F v_cos_C4 = v_setall_<_TpVec64F>(2.48015872888517045348E-5);
|
||||||
|
const _TpVec64F v_cos_C5 = v_setall_<_TpVec64F>(-1.38888888888730564116E-3);
|
||||||
|
const _TpVec64F v_cos_C6 = v_setall_<_TpVec64F>(4.16666666666665929218E-2);
|
||||||
|
const _TpVec64F v_nan = v_reinterpret_as_f64(v_setall_<_TpVec64S>((int64)0x7ff8000000000000));
|
||||||
|
const _TpVec64F v_neg_zero = v_setall_<_TpVec64F>(-0.0);
|
||||||
|
|
||||||
|
_TpVec64F _vx, _vy, sign_mask_sin, sign_mask_cos;
|
||||||
|
_TpVec64S emm2;
|
||||||
|
|
||||||
|
sign_mask_sin = v_lt(x, v_setzero_<_TpVec64F>());
|
||||||
|
_vx = v_abs(x);
|
||||||
|
_vy = v_mul(_vx, v_cephes_FOPI);
|
||||||
|
|
||||||
|
emm2 = v_expand_low(v_trunc(_vy));
|
||||||
|
emm2 = v_add(emm2, v_setall_<_TpVec64S>((int64)1));
|
||||||
|
emm2 = v_and(emm2, v_setall_<_TpVec64S>((int64)~1));
|
||||||
|
_vy = v_cvt_f64(emm2);
|
||||||
|
|
||||||
|
_TpVec64F poly_mask = v_reinterpret_as_f64(v_eq(v_and(emm2, v_setall_<_TpVec64S>((int64)2)), v_setall_<_TpVec64S>((int64)0)));
|
||||||
|
|
||||||
|
_vx = v_fma(_vy, v_minus_DP1, _vx);
|
||||||
|
_vx = v_fma(_vy, v_minus_DP2, _vx);
|
||||||
|
_vx = v_fma(_vy, v_minus_DP3, _vx);
|
||||||
|
|
||||||
|
sign_mask_sin = v_xor(sign_mask_sin, v_reinterpret_as_f64(v_eq(v_and(emm2, v_setall_<_TpVec64S>((int64)4)), v_setall_<_TpVec64S>((int64)0))));
|
||||||
|
sign_mask_cos = v_reinterpret_as_f64(v_eq(v_and(v_sub(emm2, v_setall_<_TpVec64S>((int64)2)), v_setall_<_TpVec64S>((int64)4)), v_setall_<_TpVec64S>((int64)0)));
|
||||||
|
|
||||||
|
_TpVec64F _vxx = v_mul(_vx, _vx);
|
||||||
|
_TpVec64F y1, y2;
|
||||||
|
|
||||||
|
y1 = v_fma(v_cos_C1, _vxx, v_cos_C2);
|
||||||
|
y1 = v_fma(y1, _vxx, v_cos_C3);
|
||||||
|
y1 = v_fma(y1, _vxx, v_cos_C4);
|
||||||
|
y1 = v_fma(y1, _vxx, v_cos_C5);
|
||||||
|
y1 = v_fma(y1, _vxx, v_cos_C6);
|
||||||
|
y1 = v_fma(y1, _vxx, v_setall_<_TpVec64F>(-0.5));
|
||||||
|
y1 = v_fma(y1, _vxx, v_setall_<_TpVec64F>(1.0));
|
||||||
|
|
||||||
|
y2 = v_fma(v_sin_C1, _vxx, v_sin_C2);
|
||||||
|
y2 = v_fma(y2, _vxx, v_sin_C3);
|
||||||
|
y2 = v_fma(y2, _vxx, v_sin_C4);
|
||||||
|
y2 = v_fma(y2, _vxx, v_sin_C5);
|
||||||
|
y2 = v_fma(y2, _vxx, v_sin_C6);
|
||||||
|
y2 = v_mul(y2, _vxx);
|
||||||
|
y2 = v_fma(y2, _vx, _vx);
|
||||||
|
|
||||||
|
ysin = v_select(poly_mask, y2, y1);
|
||||||
|
ycos = v_select(poly_mask, y1, y2);
|
||||||
|
ysin = v_select(sign_mask_sin, ysin, v_xor(v_neg_zero, ysin));
|
||||||
|
ycos = v_select(sign_mask_cos, v_xor(v_neg_zero, ycos), ycos);
|
||||||
|
|
||||||
|
// sincos(NAN) -> NAN, sincos(±INF) -> NAN
|
||||||
|
_TpVec64F mask_inf = v_eq(_vx, v_reinterpret_as_f64(v_setall_<_TpVec64S>((int64)0x7ff0000000000000)));
|
||||||
|
_TpVec64F mask_nan = v_or(mask_inf, v_ne(x, x));
|
||||||
|
ysin = v_select(mask_nan, v_nan, ysin);
|
||||||
|
ycos = v_select(mask_nan, v_nan, ycos);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename _TpVec64F, typename _TpVec64S>
|
||||||
|
inline _TpVec64F v_sin_default_64f(const _TpVec64F &x) {
|
||||||
|
_TpVec64F ysin, ycos;
|
||||||
|
v_sincos_default_64f<_TpVec64F, _TpVec64S>(x, ysin, ycos);
|
||||||
|
return ysin;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename _TpVec64F, typename _TpVec64S>
|
||||||
|
inline _TpVec64F v_cos_default_64f(const _TpVec64F &x) {
|
||||||
|
_TpVec64F ysin, ycos;
|
||||||
|
v_sincos_default_64f<_TpVec64F, _TpVec64S>(x, ysin, ycos);
|
||||||
|
return ycos;
|
||||||
|
}
|
||||||
|
//! @}
|
||||||
|
|
||||||
|
|
||||||
|
/* This implementation is derived from the approximation approach of Error Function (Erf) from PyTorch
|
||||||
|
https://github.com/pytorch/pytorch/blob/9c50ecc84b9a6e699a7f058891b889aafbf976c7/aten/src/ATen/cpu/vec/vec512/vec512_float.h#L189-L220
|
||||||
|
*/
|
||||||
|
|
||||||
|
//! @name Error Function
|
||||||
|
//! @{
|
||||||
|
template<typename _TpVec32F, typename _TpVec32S>
|
||||||
|
inline _TpVec32F v_erf_default_32f(const _TpVec32F &v) {
|
||||||
|
const _TpVec32F coef0 = v_setall_<_TpVec32F>(0.3275911f),
|
||||||
|
coef1 = v_setall_<_TpVec32F>(1.061405429f),
|
||||||
|
coef2 = v_setall_<_TpVec32F>(-1.453152027f),
|
||||||
|
coef3 = v_setall_<_TpVec32F>(1.421413741f),
|
||||||
|
coef4 = v_setall_<_TpVec32F>(-0.284496736f),
|
||||||
|
coef5 = v_setall_<_TpVec32F>(0.254829592f),
|
||||||
|
ones = v_setall_<_TpVec32F>(1.0f),
|
||||||
|
neg_zeros = v_setall_<_TpVec32F>(-0.f);
|
||||||
|
_TpVec32F t = v_abs(v);
|
||||||
|
// sign(v)
|
||||||
|
_TpVec32F sign_mask = v_and(neg_zeros, v);
|
||||||
|
|
||||||
|
t = v_div(ones, v_fma(coef0, t, ones));
|
||||||
|
_TpVec32F r = v_fma(coef1, t, coef2);
|
||||||
|
r = v_fma(r, t, coef3);
|
||||||
|
r = v_fma(r, t, coef4);
|
||||||
|
r = v_fma(r, t, coef5);
|
||||||
|
// - v * v
|
||||||
|
_TpVec32F v2 = v_mul(v, v);
|
||||||
|
_TpVec32F mv2 = v_xor(neg_zeros, v2);
|
||||||
|
// - exp(- v * v)
|
||||||
|
_TpVec32F exp = v_exp_default_32f<_TpVec32F, _TpVec32S>(mv2);
|
||||||
|
_TpVec32F neg_exp = v_xor(neg_zeros, exp);
|
||||||
|
_TpVec32F res = v_mul(t, neg_exp);
|
||||||
|
res = v_fma(r, res, ones);
|
||||||
|
return v_xor(sign_mask, res);
|
||||||
|
}
|
||||||
|
//! @}
|
||||||
|
|
||||||
|
#endif // OPENCV_HAL_INTRIN_MATH_HPP
|
||||||
1886
3rdpart/OpenCV/include/opencv2/core/hal/intrin_msa.hpp
Normal file
1886
3rdpart/OpenCV/include/opencv2/core/hal/intrin_msa.hpp
Normal file
File diff suppressed because it is too large
Load Diff
2680
3rdpart/OpenCV/include/opencv2/core/hal/intrin_neon.hpp
Normal file
2680
3rdpart/OpenCV/include/opencv2/core/hal/intrin_neon.hpp
Normal file
File diff suppressed because it is too large
Load Diff
2888
3rdpart/OpenCV/include/opencv2/core/hal/intrin_rvv071.hpp
Normal file
2888
3rdpart/OpenCV/include/opencv2/core/hal/intrin_rvv071.hpp
Normal file
File diff suppressed because it is too large
Load Diff
2194
3rdpart/OpenCV/include/opencv2/core/hal/intrin_rvv_scalable.hpp
Normal file
2194
3rdpart/OpenCV/include/opencv2/core/hal/intrin_rvv_scalable.hpp
Normal file
File diff suppressed because it is too large
Load Diff
3483
3rdpart/OpenCV/include/opencv2/core/hal/intrin_sse.hpp
Normal file
3483
3rdpart/OpenCV/include/opencv2/core/hal/intrin_sse.hpp
Normal file
File diff suppressed because it is too large
Load Diff
180
3rdpart/OpenCV/include/opencv2/core/hal/intrin_sse_em.hpp
Normal file
180
3rdpart/OpenCV/include/opencv2/core/hal/intrin_sse_em.hpp
Normal file
@@ -0,0 +1,180 @@
|
|||||||
|
// This file is part of OpenCV project.
|
||||||
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||||
|
// of this distribution and at http://opencv.org/license.html
|
||||||
|
|
||||||
|
#ifndef OPENCV_HAL_INTRIN_SSE_EM_HPP
|
||||||
|
#define OPENCV_HAL_INTRIN_SSE_EM_HPP
|
||||||
|
|
||||||
|
namespace cv
|
||||||
|
{
|
||||||
|
|
||||||
|
//! @cond IGNORED
|
||||||
|
|
||||||
|
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
|
||||||
|
|
||||||
|
#define OPENCV_HAL_SSE_WRAP_1(fun, tp) \
|
||||||
|
inline tp _v128_##fun(const tp& a) \
|
||||||
|
{ return _mm_##fun(a); }
|
||||||
|
|
||||||
|
#define OPENCV_HAL_SSE_WRAP_2(fun, tp) \
|
||||||
|
inline tp _v128_##fun(const tp& a, const tp& b) \
|
||||||
|
{ return _mm_##fun(a, b); }
|
||||||
|
|
||||||
|
#define OPENCV_HAL_SSE_WRAP_3(fun, tp) \
|
||||||
|
inline tp _v128_##fun(const tp& a, const tp& b, const tp& c) \
|
||||||
|
{ return _mm_##fun(a, b, c); }
|
||||||
|
|
||||||
|
///////////////////////////// XOP /////////////////////////////
|
||||||
|
|
||||||
|
// [todo] define CV_XOP
|
||||||
|
#if 1 // CV_XOP
|
||||||
|
inline __m128i _v128_comgt_epu32(const __m128i& a, const __m128i& b)
|
||||||
|
{
|
||||||
|
const __m128i delta = _mm_set1_epi32((int)0x80000000);
|
||||||
|
return _mm_cmpgt_epi32(_mm_xor_si128(a, delta), _mm_xor_si128(b, delta));
|
||||||
|
}
|
||||||
|
// wrapping XOP
|
||||||
|
#else
|
||||||
|
OPENCV_HAL_SSE_WRAP_2(_v128_comgt_epu32, __m128i)
|
||||||
|
#endif // !CV_XOP
|
||||||
|
|
||||||
|
///////////////////////////// SSE4.1 /////////////////////////////
|
||||||
|
|
||||||
|
#if !CV_SSE4_1
|
||||||
|
|
||||||
|
/** Swizzle **/
|
||||||
|
inline __m128i _v128_blendv_epi8(const __m128i& a, const __m128i& b, const __m128i& mask)
|
||||||
|
{ return _mm_xor_si128(a, _mm_and_si128(_mm_xor_si128(b, a), mask)); }
|
||||||
|
|
||||||
|
/** Convert **/
|
||||||
|
// 8 >> 16
|
||||||
|
inline __m128i _v128_cvtepu8_epi16(const __m128i& a)
|
||||||
|
{
|
||||||
|
const __m128i z = _mm_setzero_si128();
|
||||||
|
return _mm_unpacklo_epi8(a, z);
|
||||||
|
}
|
||||||
|
inline __m128i _v128_cvtepi8_epi16(const __m128i& a)
|
||||||
|
{ return _mm_srai_epi16(_mm_unpacklo_epi8(a, a), 8); }
|
||||||
|
// 8 >> 32
|
||||||
|
inline __m128i _v128_cvtepu8_epi32(const __m128i& a)
|
||||||
|
{
|
||||||
|
const __m128i z = _mm_setzero_si128();
|
||||||
|
return _mm_unpacklo_epi16(_mm_unpacklo_epi8(a, z), z);
|
||||||
|
}
|
||||||
|
inline __m128i _v128_cvtepi8_epi32(const __m128i& a)
|
||||||
|
{
|
||||||
|
__m128i r = _mm_unpacklo_epi8(a, a);
|
||||||
|
r = _mm_unpacklo_epi8(r, r);
|
||||||
|
return _mm_srai_epi32(r, 24);
|
||||||
|
}
|
||||||
|
// 16 >> 32
|
||||||
|
inline __m128i _v128_cvtepu16_epi32(const __m128i& a)
|
||||||
|
{
|
||||||
|
const __m128i z = _mm_setzero_si128();
|
||||||
|
return _mm_unpacklo_epi16(a, z);
|
||||||
|
}
|
||||||
|
inline __m128i _v128_cvtepi16_epi32(const __m128i& a)
|
||||||
|
{ return _mm_srai_epi32(_mm_unpacklo_epi16(a, a), 16); }
|
||||||
|
// 32 >> 64
|
||||||
|
inline __m128i _v128_cvtepu32_epi64(const __m128i& a)
|
||||||
|
{
|
||||||
|
const __m128i z = _mm_setzero_si128();
|
||||||
|
return _mm_unpacklo_epi32(a, z);
|
||||||
|
}
|
||||||
|
inline __m128i _v128_cvtepi32_epi64(const __m128i& a)
|
||||||
|
{ return _mm_unpacklo_epi32(a, _mm_srai_epi32(a, 31)); }
|
||||||
|
|
||||||
|
/** Arithmetic **/
|
||||||
|
inline __m128i _v128_mullo_epi32(const __m128i& a, const __m128i& b)
|
||||||
|
{
|
||||||
|
__m128i c0 = _mm_mul_epu32(a, b);
|
||||||
|
__m128i c1 = _mm_mul_epu32(_mm_srli_epi64(a, 32), _mm_srli_epi64(b, 32));
|
||||||
|
__m128i d0 = _mm_unpacklo_epi32(c0, c1);
|
||||||
|
__m128i d1 = _mm_unpackhi_epi32(c0, c1);
|
||||||
|
return _mm_unpacklo_epi64(d0, d1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Math **/
|
||||||
|
inline __m128i _v128_min_epu32(const __m128i& a, const __m128i& b)
|
||||||
|
{ return _v128_blendv_epi8(a, b, _v128_comgt_epu32(a, b)); }
|
||||||
|
|
||||||
|
// wrapping SSE4.1
|
||||||
|
#else
|
||||||
|
OPENCV_HAL_SSE_WRAP_1(cvtepu8_epi16, __m128i)
|
||||||
|
OPENCV_HAL_SSE_WRAP_1(cvtepi8_epi16, __m128i)
|
||||||
|
OPENCV_HAL_SSE_WRAP_1(cvtepu8_epi32, __m128i)
|
||||||
|
OPENCV_HAL_SSE_WRAP_1(cvtepi8_epi32, __m128i)
|
||||||
|
OPENCV_HAL_SSE_WRAP_1(cvtepu16_epi32, __m128i)
|
||||||
|
OPENCV_HAL_SSE_WRAP_1(cvtepi16_epi32, __m128i)
|
||||||
|
OPENCV_HAL_SSE_WRAP_1(cvtepu32_epi64, __m128i)
|
||||||
|
OPENCV_HAL_SSE_WRAP_1(cvtepi32_epi64, __m128i)
|
||||||
|
OPENCV_HAL_SSE_WRAP_2(min_epu32, __m128i)
|
||||||
|
OPENCV_HAL_SSE_WRAP_2(mullo_epi32, __m128i)
|
||||||
|
OPENCV_HAL_SSE_WRAP_3(blendv_epi8, __m128i)
|
||||||
|
#endif // !CV_SSE4_1
|
||||||
|
|
||||||
|
///////////////////////////// Revolutionary /////////////////////////////
|
||||||
|
|
||||||
|
/** Convert **/
|
||||||
|
// 16 << 8
|
||||||
|
inline __m128i _v128_cvtepu8_epi16_high(const __m128i& a)
|
||||||
|
{
|
||||||
|
const __m128i z = _mm_setzero_si128();
|
||||||
|
return _mm_unpackhi_epi8(a, z);
|
||||||
|
}
|
||||||
|
inline __m128i _v128_cvtepi8_epi16_high(const __m128i& a)
|
||||||
|
{ return _mm_srai_epi16(_mm_unpackhi_epi8(a, a), 8); }
|
||||||
|
// 32 << 16
|
||||||
|
inline __m128i _v128_cvtepu16_epi32_high(const __m128i& a)
|
||||||
|
{
|
||||||
|
const __m128i z = _mm_setzero_si128();
|
||||||
|
return _mm_unpackhi_epi16(a, z);
|
||||||
|
}
|
||||||
|
inline __m128i _v128_cvtepi16_epi32_high(const __m128i& a)
|
||||||
|
{ return _mm_srai_epi32(_mm_unpackhi_epi16(a, a), 16); }
|
||||||
|
// 64 << 32
|
||||||
|
inline __m128i _v128_cvtepu32_epi64_high(const __m128i& a)
|
||||||
|
{
|
||||||
|
const __m128i z = _mm_setzero_si128();
|
||||||
|
return _mm_unpackhi_epi32(a, z);
|
||||||
|
}
|
||||||
|
inline __m128i _v128_cvtepi32_epi64_high(const __m128i& a)
|
||||||
|
{ return _mm_unpackhi_epi32(a, _mm_srai_epi32(a, 31)); }
|
||||||
|
|
||||||
|
/** Miscellaneous **/
|
||||||
|
inline __m128i _v128_packs_epu32(const __m128i& a, const __m128i& b)
|
||||||
|
{
|
||||||
|
const __m128i m = _mm_set1_epi32(65535);
|
||||||
|
__m128i am = _v128_min_epu32(a, m);
|
||||||
|
__m128i bm = _v128_min_epu32(b, m);
|
||||||
|
#if CV_SSE4_1
|
||||||
|
return _mm_packus_epi32(am, bm);
|
||||||
|
#else
|
||||||
|
const __m128i d = _mm_set1_epi32(32768), nd = _mm_set1_epi16(-32768);
|
||||||
|
am = _mm_sub_epi32(am, d);
|
||||||
|
bm = _mm_sub_epi32(bm, d);
|
||||||
|
am = _mm_packs_epi32(am, bm);
|
||||||
|
return _mm_sub_epi16(am, nd);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
template<int i>
|
||||||
|
inline int64 _v128_extract_epi64(const __m128i& a)
|
||||||
|
{
|
||||||
|
#if defined(CV__SIMD_HAVE_mm_extract_epi64) || (CV_SSE4_1 && (defined(__x86_64__)/*GCC*/ || defined(_M_X64)/*MSVC*/))
|
||||||
|
#define CV__SIMD_NATIVE_mm_extract_epi64 1
|
||||||
|
return _mm_extract_epi64(a, i);
|
||||||
|
#else
|
||||||
|
CV_DECL_ALIGNED(16) int64 tmp[2];
|
||||||
|
_mm_store_si128((__m128i*)tmp, a);
|
||||||
|
return tmp[i];
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
|
||||||
|
|
||||||
|
//! @endcond
|
||||||
|
|
||||||
|
} // cv::
|
||||||
|
|
||||||
|
#endif // OPENCV_HAL_INTRIN_SSE_EM_HPP
|
||||||
1619
3rdpart/OpenCV/include/opencv2/core/hal/intrin_vsx.hpp
Normal file
1619
3rdpart/OpenCV/include/opencv2/core/hal/intrin_vsx.hpp
Normal file
File diff suppressed because it is too large
Load Diff
2801
3rdpart/OpenCV/include/opencv2/core/hal/intrin_wasm.hpp
Normal file
2801
3rdpart/OpenCV/include/opencv2/core/hal/intrin_wasm.hpp
Normal file
File diff suppressed because it is too large
Load Diff
1558
3rdpart/OpenCV/include/opencv2/core/hal/msa_macros.h
Normal file
1558
3rdpart/OpenCV/include/opencv2/core/hal/msa_macros.h
Normal file
File diff suppressed because it is too large
Load Diff
186
3rdpart/OpenCV/include/opencv2/core/hal/simd_utils.impl.hpp
Normal file
186
3rdpart/OpenCV/include/opencv2/core/hal/simd_utils.impl.hpp
Normal file
@@ -0,0 +1,186 @@
|
|||||||
|
// This file is part of OpenCV project.
|
||||||
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||||
|
// of this distribution and at http://opencv.org/license.html
|
||||||
|
|
||||||
|
// This header is not standalone. Don't include directly, use "intrin.hpp" instead.
|
||||||
|
#ifdef OPENCV_HAL_INTRIN_HPP // defined in intrin.hpp
|
||||||
|
|
||||||
|
|
||||||
|
#if CV_SIMD128 || CV_SIMD128_CPP
|
||||||
|
|
||||||
|
template<typename _T> struct Type2Vec128_Traits;
|
||||||
|
#define CV_INTRIN_DEF_TYPE2VEC128_TRAITS(type_, vec_type_) \
|
||||||
|
template<> struct Type2Vec128_Traits<type_> \
|
||||||
|
{ \
|
||||||
|
typedef vec_type_ vec_type; \
|
||||||
|
}
|
||||||
|
|
||||||
|
CV_INTRIN_DEF_TYPE2VEC128_TRAITS(uchar, v_uint8x16);
|
||||||
|
CV_INTRIN_DEF_TYPE2VEC128_TRAITS(schar, v_int8x16);
|
||||||
|
CV_INTRIN_DEF_TYPE2VEC128_TRAITS(ushort, v_uint16x8);
|
||||||
|
CV_INTRIN_DEF_TYPE2VEC128_TRAITS(short, v_int16x8);
|
||||||
|
CV_INTRIN_DEF_TYPE2VEC128_TRAITS(unsigned, v_uint32x4);
|
||||||
|
CV_INTRIN_DEF_TYPE2VEC128_TRAITS(int, v_int32x4);
|
||||||
|
CV_INTRIN_DEF_TYPE2VEC128_TRAITS(float, v_float32x4);
|
||||||
|
CV_INTRIN_DEF_TYPE2VEC128_TRAITS(uint64, v_uint64x2);
|
||||||
|
CV_INTRIN_DEF_TYPE2VEC128_TRAITS(int64, v_int64x2);
|
||||||
|
#if CV_SIMD128_64F
|
||||||
|
CV_INTRIN_DEF_TYPE2VEC128_TRAITS(double, v_float64x2);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
template<typename _T> static inline
|
||||||
|
typename Type2Vec128_Traits<_T>::vec_type v_setall(const _T& a);
|
||||||
|
|
||||||
|
template<> inline Type2Vec128_Traits< uchar>::vec_type v_setall< uchar>(const uchar& a) { return v_setall_u8(a); }
|
||||||
|
template<> inline Type2Vec128_Traits< schar>::vec_type v_setall< schar>(const schar& a) { return v_setall_s8(a); }
|
||||||
|
template<> inline Type2Vec128_Traits<ushort>::vec_type v_setall<ushort>(const ushort& a) { return v_setall_u16(a); }
|
||||||
|
template<> inline Type2Vec128_Traits< short>::vec_type v_setall< short>(const short& a) { return v_setall_s16(a); }
|
||||||
|
template<> inline Type2Vec128_Traits< uint>::vec_type v_setall< uint>(const uint& a) { return v_setall_u32(a); }
|
||||||
|
template<> inline Type2Vec128_Traits< int>::vec_type v_setall< int>(const int& a) { return v_setall_s32(a); }
|
||||||
|
template<> inline Type2Vec128_Traits<uint64>::vec_type v_setall<uint64>(const uint64& a) { return v_setall_u64(a); }
|
||||||
|
template<> inline Type2Vec128_Traits< int64>::vec_type v_setall< int64>(const int64& a) { return v_setall_s64(a); }
|
||||||
|
template<> inline Type2Vec128_Traits< float>::vec_type v_setall< float>(const float& a) { return v_setall_f32(a); }
|
||||||
|
#if CV_SIMD128_64F
|
||||||
|
template<> inline Type2Vec128_Traits<double>::vec_type v_setall<double>(const double& a) { return v_setall_f64(a); }
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif // SIMD128
|
||||||
|
|
||||||
|
|
||||||
|
#if CV_SIMD256
|
||||||
|
|
||||||
|
template<typename _T> struct Type2Vec256_Traits;
|
||||||
|
#define CV_INTRIN_DEF_TYPE2VEC256_TRAITS(type_, vec_type_) \
|
||||||
|
template<> struct Type2Vec256_Traits<type_> \
|
||||||
|
{ \
|
||||||
|
typedef vec_type_ vec_type; \
|
||||||
|
}
|
||||||
|
|
||||||
|
CV_INTRIN_DEF_TYPE2VEC256_TRAITS(uchar, v_uint8x32);
|
||||||
|
CV_INTRIN_DEF_TYPE2VEC256_TRAITS(schar, v_int8x32);
|
||||||
|
CV_INTRIN_DEF_TYPE2VEC256_TRAITS(ushort, v_uint16x16);
|
||||||
|
CV_INTRIN_DEF_TYPE2VEC256_TRAITS(short, v_int16x16);
|
||||||
|
CV_INTRIN_DEF_TYPE2VEC256_TRAITS(unsigned, v_uint32x8);
|
||||||
|
CV_INTRIN_DEF_TYPE2VEC256_TRAITS(int, v_int32x8);
|
||||||
|
CV_INTRIN_DEF_TYPE2VEC256_TRAITS(float, v_float32x8);
|
||||||
|
CV_INTRIN_DEF_TYPE2VEC256_TRAITS(uint64, v_uint64x4);
|
||||||
|
CV_INTRIN_DEF_TYPE2VEC256_TRAITS(int64, v_int64x4);
|
||||||
|
#if CV_SIMD256_64F
|
||||||
|
CV_INTRIN_DEF_TYPE2VEC256_TRAITS(double, v_float64x4);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
template<typename _T> static inline
|
||||||
|
typename Type2Vec256_Traits<_T>::vec_type v256_setall(const _T& a);
|
||||||
|
|
||||||
|
template<> inline Type2Vec256_Traits< uchar>::vec_type v256_setall< uchar>(const uchar& a) { return v256_setall_u8(a); }
|
||||||
|
template<> inline Type2Vec256_Traits< schar>::vec_type v256_setall< schar>(const schar& a) { return v256_setall_s8(a); }
|
||||||
|
template<> inline Type2Vec256_Traits<ushort>::vec_type v256_setall<ushort>(const ushort& a) { return v256_setall_u16(a); }
|
||||||
|
template<> inline Type2Vec256_Traits< short>::vec_type v256_setall< short>(const short& a) { return v256_setall_s16(a); }
|
||||||
|
template<> inline Type2Vec256_Traits< uint>::vec_type v256_setall< uint>(const uint& a) { return v256_setall_u32(a); }
|
||||||
|
template<> inline Type2Vec256_Traits< int>::vec_type v256_setall< int>(const int& a) { return v256_setall_s32(a); }
|
||||||
|
template<> inline Type2Vec256_Traits<uint64>::vec_type v256_setall<uint64>(const uint64& a) { return v256_setall_u64(a); }
|
||||||
|
template<> inline Type2Vec256_Traits< int64>::vec_type v256_setall< int64>(const int64& a) { return v256_setall_s64(a); }
|
||||||
|
template<> inline Type2Vec256_Traits< float>::vec_type v256_setall< float>(const float& a) { return v256_setall_f32(a); }
|
||||||
|
#if CV_SIMD256_64F
|
||||||
|
template<> inline Type2Vec256_Traits<double>::vec_type v256_setall<double>(const double& a) { return v256_setall_f64(a); }
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif // SIMD256
|
||||||
|
|
||||||
|
|
||||||
|
#if CV_SIMD512
|
||||||
|
|
||||||
|
template<typename _T> struct Type2Vec512_Traits;
|
||||||
|
#define CV_INTRIN_DEF_TYPE2VEC512_TRAITS(type_, vec_type_) \
|
||||||
|
template<> struct Type2Vec512_Traits<type_> \
|
||||||
|
{ \
|
||||||
|
typedef vec_type_ vec_type; \
|
||||||
|
}
|
||||||
|
|
||||||
|
CV_INTRIN_DEF_TYPE2VEC512_TRAITS(uchar, v_uint8x64);
|
||||||
|
CV_INTRIN_DEF_TYPE2VEC512_TRAITS(schar, v_int8x64);
|
||||||
|
CV_INTRIN_DEF_TYPE2VEC512_TRAITS(ushort, v_uint16x32);
|
||||||
|
CV_INTRIN_DEF_TYPE2VEC512_TRAITS(short, v_int16x32);
|
||||||
|
CV_INTRIN_DEF_TYPE2VEC512_TRAITS(unsigned, v_uint32x16);
|
||||||
|
CV_INTRIN_DEF_TYPE2VEC512_TRAITS(int, v_int32x16);
|
||||||
|
CV_INTRIN_DEF_TYPE2VEC512_TRAITS(float, v_float32x16);
|
||||||
|
CV_INTRIN_DEF_TYPE2VEC512_TRAITS(uint64, v_uint64x8);
|
||||||
|
CV_INTRIN_DEF_TYPE2VEC512_TRAITS(int64, v_int64x8);
|
||||||
|
#if CV_SIMD512_64F
|
||||||
|
CV_INTRIN_DEF_TYPE2VEC512_TRAITS(double, v_float64x8);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
template<typename _T> static inline
|
||||||
|
typename Type2Vec512_Traits<_T>::vec_type v512_setall(const _T& a);
|
||||||
|
|
||||||
|
template<> inline Type2Vec512_Traits< uchar>::vec_type v512_setall< uchar>(const uchar& a) { return v512_setall_u8(a); }
|
||||||
|
template<> inline Type2Vec512_Traits< schar>::vec_type v512_setall< schar>(const schar& a) { return v512_setall_s8(a); }
|
||||||
|
template<> inline Type2Vec512_Traits<ushort>::vec_type v512_setall<ushort>(const ushort& a) { return v512_setall_u16(a); }
|
||||||
|
template<> inline Type2Vec512_Traits< short>::vec_type v512_setall< short>(const short& a) { return v512_setall_s16(a); }
|
||||||
|
template<> inline Type2Vec512_Traits< uint>::vec_type v512_setall< uint>(const uint& a) { return v512_setall_u32(a); }
|
||||||
|
template<> inline Type2Vec512_Traits< int>::vec_type v512_setall< int>(const int& a) { return v512_setall_s32(a); }
|
||||||
|
template<> inline Type2Vec512_Traits<uint64>::vec_type v512_setall<uint64>(const uint64& a) { return v512_setall_u64(a); }
|
||||||
|
template<> inline Type2Vec512_Traits< int64>::vec_type v512_setall< int64>(const int64& a) { return v512_setall_s64(a); }
|
||||||
|
template<> inline Type2Vec512_Traits< float>::vec_type v512_setall< float>(const float& a) { return v512_setall_f32(a); }
|
||||||
|
#if CV_SIMD512_64F
|
||||||
|
template<> inline Type2Vec512_Traits<double>::vec_type v512_setall<double>(const double& a) { return v512_setall_f64(a); }
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif // SIMD512
|
||||||
|
|
||||||
|
#if CV_SIMD_SCALABLE
|
||||||
|
template<typename _T> struct Type2Vec_Traits;
|
||||||
|
#define CV_INTRIN_DEF_TYPE2VEC_TRAITS(type_, vec_type_) \
|
||||||
|
template<> struct Type2Vec_Traits<type_> \
|
||||||
|
{ \
|
||||||
|
typedef vec_type_ vec_type; \
|
||||||
|
}
|
||||||
|
|
||||||
|
CV_INTRIN_DEF_TYPE2VEC_TRAITS(uchar, v_uint8);
|
||||||
|
CV_INTRIN_DEF_TYPE2VEC_TRAITS(schar, v_int8);
|
||||||
|
CV_INTRIN_DEF_TYPE2VEC_TRAITS(ushort, v_uint16);
|
||||||
|
CV_INTRIN_DEF_TYPE2VEC_TRAITS(short, v_int16);
|
||||||
|
CV_INTRIN_DEF_TYPE2VEC_TRAITS(unsigned, v_uint32);
|
||||||
|
CV_INTRIN_DEF_TYPE2VEC_TRAITS(int, v_int32);
|
||||||
|
CV_INTRIN_DEF_TYPE2VEC_TRAITS(float, v_float32);
|
||||||
|
CV_INTRIN_DEF_TYPE2VEC_TRAITS(uint64, v_uint64);
|
||||||
|
CV_INTRIN_DEF_TYPE2VEC_TRAITS(int64, v_int64);
|
||||||
|
#if CV_SIMD_SCALABLE_64F
|
||||||
|
CV_INTRIN_DEF_TYPE2VEC_TRAITS(double, v_float64);
|
||||||
|
#endif
|
||||||
|
template<typename _T> static inline
|
||||||
|
typename Type2Vec_Traits<_T>::vec_type v_setall(const _T& a);
|
||||||
|
|
||||||
|
template<> inline Type2Vec_Traits< uchar>::vec_type v_setall< uchar>(const uchar& a) { return v_setall_u8(a); }
|
||||||
|
template<> inline Type2Vec_Traits< schar>::vec_type v_setall< schar>(const schar& a) { return v_setall_s8(a); }
|
||||||
|
template<> inline Type2Vec_Traits<ushort>::vec_type v_setall<ushort>(const ushort& a) { return v_setall_u16(a); }
|
||||||
|
template<> inline Type2Vec_Traits< short>::vec_type v_setall< short>(const short& a) { return v_setall_s16(a); }
|
||||||
|
template<> inline Type2Vec_Traits< uint>::vec_type v_setall< uint>(const uint& a) { return v_setall_u32(a); }
|
||||||
|
template<> inline Type2Vec_Traits< int>::vec_type v_setall< int>(const int& a) { return v_setall_s32(a); }
|
||||||
|
template<> inline Type2Vec_Traits<uint64>::vec_type v_setall<uint64>(const uint64& a) { return v_setall_u64(a); }
|
||||||
|
template<> inline Type2Vec_Traits< int64>::vec_type v_setall< int64>(const int64& a) { return v_setall_s64(a); }
|
||||||
|
template<> inline Type2Vec_Traits< float>::vec_type v_setall< float>(const float& a) { return v_setall_f32(a); }
|
||||||
|
#if CV_SIMD_SCALABLE_64F
|
||||||
|
template<> inline Type2Vec_Traits<double>::vec_type v_setall<double>(const double& a) { return v_setall_f64(a); }
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#if CV_SIMD_SCALABLE
|
||||||
|
template<typename _T> static inline
|
||||||
|
typename Type2Vec_Traits<_T>::vec_type vx_setall(const _T& a) { return v_setall(a); }
|
||||||
|
#elif CV_SIMD_WIDTH == 16
|
||||||
|
template<typename _T> static inline
|
||||||
|
typename Type2Vec128_Traits<_T>::vec_type vx_setall(const _T& a) { return v_setall(a); }
|
||||||
|
#elif CV_SIMD_WIDTH == 32
|
||||||
|
template<typename _T> static inline
|
||||||
|
typename Type2Vec256_Traits<_T>::vec_type vx_setall(const _T& a) { return v256_setall(a); }
|
||||||
|
#elif CV_SIMD_WIDTH == 64
|
||||||
|
template<typename _T> static inline
|
||||||
|
typename Type2Vec512_Traits<_T>::vec_type vx_setall(const _T& a) { return v512_setall(a); }
|
||||||
|
#else
|
||||||
|
#error "Build configuration error, unsupported CV_SIMD_WIDTH"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#endif // OPENCV_HAL_INTRIN_HPP
|
||||||
3863
3rdpart/OpenCV/include/opencv2/core/mat.hpp
Normal file
3863
3rdpart/OpenCV/include/opencv2/core/mat.hpp
Normal file
File diff suppressed because it is too large
Load Diff
3441
3rdpart/OpenCV/include/opencv2/core/mat.inl.hpp
Normal file
3441
3rdpart/OpenCV/include/opencv2/core/mat.inl.hpp
Normal file
File diff suppressed because it is too large
Load Diff
544
3rdpart/OpenCV/include/opencv2/core/matx.hpp
Normal file
544
3rdpart/OpenCV/include/opencv2/core/matx.hpp
Normal file
@@ -0,0 +1,544 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||||
|
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#ifndef OPENCV_CORE_MATX_HPP
|
||||||
|
#define OPENCV_CORE_MATX_HPP
|
||||||
|
|
||||||
|
#ifndef __cplusplus
|
||||||
|
# error matx.hpp header must be compiled as C++
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "opencv2/core/cvdef.h"
|
||||||
|
#include "opencv2/core/base.hpp"
|
||||||
|
#include "opencv2/core/traits.hpp"
|
||||||
|
#include "opencv2/core/saturate.hpp"
|
||||||
|
|
||||||
|
#include <initializer_list>
|
||||||
|
|
||||||
|
namespace cv
|
||||||
|
{
|
||||||
|
|
||||||
|
//! @addtogroup core_basic
|
||||||
|
//! @{
|
||||||
|
|
||||||
|
//! @cond IGNORED
|
||||||
|
// FIXIT Remove this (especially CV_EXPORTS modifier)
|
||||||
|
struct CV_EXPORTS Matx_AddOp { Matx_AddOp() {} Matx_AddOp(const Matx_AddOp&) {} };
|
||||||
|
struct CV_EXPORTS Matx_SubOp { Matx_SubOp() {} Matx_SubOp(const Matx_SubOp&) {} };
|
||||||
|
struct CV_EXPORTS Matx_ScaleOp { Matx_ScaleOp() {} Matx_ScaleOp(const Matx_ScaleOp&) {} };
|
||||||
|
struct CV_EXPORTS Matx_MulOp { Matx_MulOp() {} Matx_MulOp(const Matx_MulOp&) {} };
|
||||||
|
struct CV_EXPORTS Matx_DivOp { Matx_DivOp() {} Matx_DivOp(const Matx_DivOp&) {} };
|
||||||
|
struct CV_EXPORTS Matx_MatMulOp { Matx_MatMulOp() {} Matx_MatMulOp(const Matx_MatMulOp&) {} };
|
||||||
|
struct CV_EXPORTS Matx_TOp { Matx_TOp() {} Matx_TOp(const Matx_TOp&) {} };
|
||||||
|
//! @endcond
|
||||||
|
|
||||||
|
////////////////////////////// Small Matrix ///////////////////////////
|
||||||
|
|
||||||
|
/** @brief Template class for small matrices whose type and size are known at compilation time
|
||||||
|
|
||||||
|
If you need a more flexible type, use Mat . The elements of the matrix M are accessible using the
|
||||||
|
M(i,j) notation. Most of the common matrix operations (see also @ref MatrixExpressions ) are
|
||||||
|
available. To do an operation on Matx that is not implemented, you can easily convert the matrix to
|
||||||
|
Mat and backwards:
|
||||||
|
@code{.cpp}
|
||||||
|
Matx33f m(1, 2, 3,
|
||||||
|
4, 5, 6,
|
||||||
|
7, 8, 9);
|
||||||
|
cout << sum(Mat(m*m.t())) << endl;
|
||||||
|
@endcode
|
||||||
|
Except of the plain constructor which takes a list of elements, Matx can be initialized from a C-array:
|
||||||
|
@code{.cpp}
|
||||||
|
float values[] = { 1, 2, 3};
|
||||||
|
Matx31f m(values);
|
||||||
|
@endcode
|
||||||
|
In case if C++11 features are available, std::initializer_list can be also used to initialize Matx:
|
||||||
|
@code{.cpp}
|
||||||
|
Matx31f m = { 1, 2, 3};
|
||||||
|
@endcode
|
||||||
|
*/
|
||||||
|
template<typename _Tp, int m, int n> class Matx
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
enum {
|
||||||
|
rows = m,
|
||||||
|
cols = n,
|
||||||
|
channels = rows*cols,
|
||||||
|
#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED
|
||||||
|
depth = traits::Type<_Tp>::value,
|
||||||
|
type = CV_MAKETYPE(depth, channels),
|
||||||
|
#endif
|
||||||
|
shortdim = (m < n ? m : n)
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef _Tp value_type;
|
||||||
|
typedef Matx<_Tp, m, n> mat_type;
|
||||||
|
typedef Matx<_Tp, shortdim, 1> diag_type;
|
||||||
|
|
||||||
|
//! default constructor
|
||||||
|
Matx();
|
||||||
|
|
||||||
|
explicit Matx(_Tp v0); //!< 1x1 matrix
|
||||||
|
Matx(_Tp v0, _Tp v1); //!< 1x2 or 2x1 matrix
|
||||||
|
Matx(_Tp v0, _Tp v1, _Tp v2); //!< 1x3 or 3x1 matrix
|
||||||
|
Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3); //!< 1x4, 2x2 or 4x1 matrix
|
||||||
|
Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4); //!< 1x5 or 5x1 matrix
|
||||||
|
Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5); //!< 1x6, 2x3, 3x2 or 6x1 matrix
|
||||||
|
Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6); //!< 1x7 or 7x1 matrix
|
||||||
|
Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7); //!< 1x8, 2x4, 4x2 or 8x1 matrix
|
||||||
|
Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7, _Tp v8); //!< 1x9, 3x3 or 9x1 matrix
|
||||||
|
Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7, _Tp v8, _Tp v9); //!< 1x10, 2x5 or 5x2 or 10x1 matrix
|
||||||
|
Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3,
|
||||||
|
_Tp v4, _Tp v5, _Tp v6, _Tp v7,
|
||||||
|
_Tp v8, _Tp v9, _Tp v10, _Tp v11); //!< 1x12, 2x6, 3x4, 4x3, 6x2 or 12x1 matrix
|
||||||
|
Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3,
|
||||||
|
_Tp v4, _Tp v5, _Tp v6, _Tp v7,
|
||||||
|
_Tp v8, _Tp v9, _Tp v10, _Tp v11,
|
||||||
|
_Tp v12, _Tp v13); //!< 1x14, 2x7, 7x2 or 14x1 matrix
|
||||||
|
Matx(_Tp v0, _Tp v1, _Tp v2, _Tp v3,
|
||||||
|
_Tp v4, _Tp v5, _Tp v6, _Tp v7,
|
||||||
|
_Tp v8, _Tp v9, _Tp v10, _Tp v11,
|
||||||
|
_Tp v12, _Tp v13, _Tp v14, _Tp v15); //!< 1x16, 4x4 or 16x1 matrix
|
||||||
|
explicit Matx(const _Tp* vals); //!< initialize from a plain array
|
||||||
|
|
||||||
|
Matx(std::initializer_list<_Tp>); //!< initialize from an initializer list
|
||||||
|
|
||||||
|
CV_NODISCARD_STD static Matx all(_Tp alpha);
|
||||||
|
CV_NODISCARD_STD static Matx zeros();
|
||||||
|
CV_NODISCARD_STD static Matx ones();
|
||||||
|
CV_NODISCARD_STD static Matx eye();
|
||||||
|
CV_NODISCARD_STD static Matx diag(const diag_type& d);
|
||||||
|
/** @brief Generates uniformly distributed random numbers
|
||||||
|
@param a Range boundary.
|
||||||
|
@param b The other range boundary (boundaries don't have to be ordered, the lower boundary is inclusive,
|
||||||
|
the upper one is exclusive).
|
||||||
|
*/
|
||||||
|
CV_NODISCARD_STD static Matx randu(_Tp a, _Tp b);
|
||||||
|
/** @brief Generates normally distributed random numbers
|
||||||
|
@param a Mean value.
|
||||||
|
@param b Standard deviation.
|
||||||
|
*/
|
||||||
|
CV_NODISCARD_STD static Matx randn(_Tp a, _Tp b);
|
||||||
|
|
||||||
|
//! dot product computed with the default precision
|
||||||
|
_Tp dot(const Matx<_Tp, m, n>& v) const;
|
||||||
|
|
||||||
|
//! dot product computed in double-precision arithmetics
|
||||||
|
double ddot(const Matx<_Tp, m, n>& v) const;
|
||||||
|
|
||||||
|
//! conversion to another data type
|
||||||
|
template<typename T2> operator Matx<T2, m, n>() const;
|
||||||
|
|
||||||
|
//! change the matrix shape
|
||||||
|
template<int m1, int n1> Matx<_Tp, m1, n1> reshape() const;
|
||||||
|
|
||||||
|
//! extract part of the matrix
|
||||||
|
template<int m1, int n1> Matx<_Tp, m1, n1> get_minor(int base_row, int base_col) const;
|
||||||
|
|
||||||
|
//! extract the matrix row
|
||||||
|
Matx<_Tp, 1, n> row(int i) const;
|
||||||
|
|
||||||
|
//! extract the matrix column
|
||||||
|
Matx<_Tp, m, 1> col(int i) const;
|
||||||
|
|
||||||
|
//! extract the matrix diagonal
|
||||||
|
diag_type diag() const;
|
||||||
|
|
||||||
|
//! transpose the matrix
|
||||||
|
Matx<_Tp, n, m> t() const;
|
||||||
|
|
||||||
|
//! invert the matrix
|
||||||
|
Matx<_Tp, n, m> inv(int method=DECOMP_LU, bool *p_is_ok = NULL) const;
|
||||||
|
|
||||||
|
//! solve linear system
|
||||||
|
template<int l> Matx<_Tp, n, l> solve(const Matx<_Tp, m, l>& rhs, int flags=DECOMP_LU) const;
|
||||||
|
Vec<_Tp, n> solve(const Vec<_Tp, m>& rhs, int method) const;
|
||||||
|
|
||||||
|
//! multiply two matrices element-wise
|
||||||
|
Matx<_Tp, m, n> mul(const Matx<_Tp, m, n>& a) const;
|
||||||
|
|
||||||
|
//! divide two matrices element-wise
|
||||||
|
Matx<_Tp, m, n> div(const Matx<_Tp, m, n>& a) const;
|
||||||
|
|
||||||
|
//! element access
|
||||||
|
const _Tp& operator ()(int row, int col) const;
|
||||||
|
_Tp& operator ()(int row, int col);
|
||||||
|
|
||||||
|
//! 1D element access
|
||||||
|
const _Tp& operator ()(int i) const;
|
||||||
|
_Tp& operator ()(int i);
|
||||||
|
|
||||||
|
Matx(const Matx<_Tp, m, n>& a, const Matx<_Tp, m, n>& b, Matx_AddOp);
|
||||||
|
Matx(const Matx<_Tp, m, n>& a, const Matx<_Tp, m, n>& b, Matx_SubOp);
|
||||||
|
template<typename _T2> Matx(const Matx<_Tp, m, n>& a, _T2 alpha, Matx_ScaleOp);
|
||||||
|
Matx(const Matx<_Tp, m, n>& a, const Matx<_Tp, m, n>& b, Matx_MulOp);
|
||||||
|
Matx(const Matx<_Tp, m, n>& a, const Matx<_Tp, m, n>& b, Matx_DivOp);
|
||||||
|
template<int l> Matx(const Matx<_Tp, m, l>& a, const Matx<_Tp, l, n>& b, Matx_MatMulOp);
|
||||||
|
Matx(const Matx<_Tp, n, m>& a, Matx_TOp);
|
||||||
|
|
||||||
|
_Tp val[m*n]; ///< matrix elements
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef Matx<float, 1, 2> Matx12f;
|
||||||
|
typedef Matx<double, 1, 2> Matx12d;
|
||||||
|
typedef Matx<float, 1, 3> Matx13f;
|
||||||
|
typedef Matx<double, 1, 3> Matx13d;
|
||||||
|
typedef Matx<float, 1, 4> Matx14f;
|
||||||
|
typedef Matx<double, 1, 4> Matx14d;
|
||||||
|
typedef Matx<float, 1, 6> Matx16f;
|
||||||
|
typedef Matx<double, 1, 6> Matx16d;
|
||||||
|
|
||||||
|
typedef Matx<float, 2, 1> Matx21f;
|
||||||
|
typedef Matx<double, 2, 1> Matx21d;
|
||||||
|
typedef Matx<float, 3, 1> Matx31f;
|
||||||
|
typedef Matx<double, 3, 1> Matx31d;
|
||||||
|
typedef Matx<float, 4, 1> Matx41f;
|
||||||
|
typedef Matx<double, 4, 1> Matx41d;
|
||||||
|
typedef Matx<float, 6, 1> Matx61f;
|
||||||
|
typedef Matx<double, 6, 1> Matx61d;
|
||||||
|
|
||||||
|
typedef Matx<float, 2, 2> Matx22f;
|
||||||
|
typedef Matx<double, 2, 2> Matx22d;
|
||||||
|
typedef Matx<float, 2, 3> Matx23f;
|
||||||
|
typedef Matx<double, 2, 3> Matx23d;
|
||||||
|
typedef Matx<float, 3, 2> Matx32f;
|
||||||
|
typedef Matx<double, 3, 2> Matx32d;
|
||||||
|
|
||||||
|
typedef Matx<float, 3, 3> Matx33f;
|
||||||
|
typedef Matx<double, 3, 3> Matx33d;
|
||||||
|
|
||||||
|
typedef Matx<float, 3, 4> Matx34f;
|
||||||
|
typedef Matx<double, 3, 4> Matx34d;
|
||||||
|
typedef Matx<float, 4, 3> Matx43f;
|
||||||
|
typedef Matx<double, 4, 3> Matx43d;
|
||||||
|
|
||||||
|
typedef Matx<float, 4, 4> Matx44f;
|
||||||
|
typedef Matx<double, 4, 4> Matx44d;
|
||||||
|
typedef Matx<float, 6, 6> Matx66f;
|
||||||
|
typedef Matx<double, 6, 6> Matx66d;
|
||||||
|
|
||||||
|
template<typename _Tp, int m> static inline
|
||||||
|
double determinant(const Matx<_Tp, m, m>& a);
|
||||||
|
|
||||||
|
template<typename _Tp, int m, int n> static inline
|
||||||
|
double trace(const Matx<_Tp, m, n>& a);
|
||||||
|
|
||||||
|
template<typename _Tp, int m, int n> static inline
|
||||||
|
double norm(const Matx<_Tp, m, n>& M);
|
||||||
|
|
||||||
|
template<typename _Tp, int m, int n> static inline
|
||||||
|
double norm(const Matx<_Tp, m, n>& M, int normType);
|
||||||
|
|
||||||
|
template<typename _Tp1, typename _Tp2, int m, int n> static inline
|
||||||
|
Matx<_Tp1, m, n>& operator += (Matx<_Tp1, m, n>& a, const Matx<_Tp2, m, n>& b);
|
||||||
|
|
||||||
|
template<typename _Tp1, typename _Tp2, int m, int n> static inline
|
||||||
|
Matx<_Tp1, m, n>& operator -= (Matx<_Tp1, m, n>& a, const Matx<_Tp2, m, n>& b);
|
||||||
|
|
||||||
|
template<typename _Tp, int m, int n> static inline
|
||||||
|
Matx<_Tp, m, n> operator + (const Matx<_Tp, m, n>& a, const Matx<_Tp, m, n>& b);
|
||||||
|
|
||||||
|
template<typename _Tp, int m, int n> static inline
|
||||||
|
Matx<_Tp, m, n> operator - (const Matx<_Tp, m, n>& a, const Matx<_Tp, m, n>& b);
|
||||||
|
|
||||||
|
template<typename _Tp, int m, int n> static inline
|
||||||
|
Matx<_Tp, m, n>& operator *= (Matx<_Tp, m, n>& a, int alpha);
|
||||||
|
|
||||||
|
template<typename _Tp, int m, int n> static inline
|
||||||
|
Matx<_Tp, m, n>& operator *= (Matx<_Tp, m, n>& a, float alpha);
|
||||||
|
|
||||||
|
template<typename _Tp, int m, int n> static inline
|
||||||
|
Matx<_Tp, m, n>& operator *= (Matx<_Tp, m, n>& a, double alpha);
|
||||||
|
|
||||||
|
template<typename _Tp, int m, int n> static inline
|
||||||
|
Matx<_Tp, m, n> operator * (const Matx<_Tp, m, n>& a, int alpha);
|
||||||
|
|
||||||
|
template<typename _Tp, int m, int n> static inline
|
||||||
|
Matx<_Tp, m, n> operator * (const Matx<_Tp, m, n>& a, float alpha);
|
||||||
|
|
||||||
|
template<typename _Tp, int m, int n> static inline
|
||||||
|
Matx<_Tp, m, n> operator * (const Matx<_Tp, m, n>& a, double alpha);
|
||||||
|
|
||||||
|
template<typename _Tp, int m, int n> static inline
|
||||||
|
Matx<_Tp, m, n> operator * (int alpha, const Matx<_Tp, m, n>& a);
|
||||||
|
|
||||||
|
template<typename _Tp, int m, int n> static inline
|
||||||
|
Matx<_Tp, m, n> operator * (float alpha, const Matx<_Tp, m, n>& a);
|
||||||
|
|
||||||
|
template<typename _Tp, int m, int n> static inline
|
||||||
|
Matx<_Tp, m, n> operator * (double alpha, const Matx<_Tp, m, n>& a);
|
||||||
|
|
||||||
|
template<typename _Tp, int m, int n> static inline
|
||||||
|
Matx<_Tp, m, n>& operator /= (Matx<_Tp, m, n>& a, float alpha);
|
||||||
|
|
||||||
|
template<typename _Tp, int m, int n> static inline
|
||||||
|
Matx<_Tp, m, n>& operator /= (Matx<_Tp, m, n>& a, double alpha);
|
||||||
|
|
||||||
|
template<typename _Tp, int m, int n> static inline
|
||||||
|
Matx<_Tp, m, n> operator / (const Matx<_Tp, m, n>& a, float alpha);
|
||||||
|
|
||||||
|
template<typename _Tp, int m, int n> static inline
|
||||||
|
Matx<_Tp, m, n> operator / (const Matx<_Tp, m, n>& a, double alpha);
|
||||||
|
|
||||||
|
template<typename _Tp, int m, int n> static inline
|
||||||
|
Matx<_Tp, m, n> operator - (const Matx<_Tp, m, n>& a);
|
||||||
|
|
||||||
|
template<typename _Tp, int m, int n, int l> static inline
|
||||||
|
Matx<_Tp, m, n> operator * (const Matx<_Tp, m, l>& a, const Matx<_Tp, l, n>& b);
|
||||||
|
|
||||||
|
template<typename _Tp, int m, int n> static inline
|
||||||
|
Vec<_Tp, m> operator * (const Matx<_Tp, m, n>& a, const Vec<_Tp, n>& b);
|
||||||
|
|
||||||
|
template<typename _Tp, int m, int n> static inline
|
||||||
|
bool operator == (const Matx<_Tp, m, n>& a, const Matx<_Tp, m, n>& b);
|
||||||
|
|
||||||
|
template<typename _Tp, int m, int n> static inline
|
||||||
|
bool operator != (const Matx<_Tp, m, n>& a, const Matx<_Tp, m, n>& b);
|
||||||
|
|
||||||
|
|
||||||
|
/////////////////////// Vec (used as element of multi-channel images /////////////////////
|
||||||
|
|
||||||
|
/** @brief Template class for short numerical vectors, a partial case of Matx
|
||||||
|
|
||||||
|
This template class represents short numerical vectors (of 1, 2, 3, 4 ... elements) on which you
|
||||||
|
can perform basic arithmetical operations, access individual elements using [] operator etc. The
|
||||||
|
vectors are allocated on stack, as opposite to std::valarray, std::vector, cv::Mat etc., which
|
||||||
|
elements are dynamically allocated in the heap.
|
||||||
|
|
||||||
|
The template takes 2 parameters:
|
||||||
|
@tparam _Tp element type
|
||||||
|
@tparam cn the number of elements
|
||||||
|
|
||||||
|
In addition to the universal notation like Vec<float, 3>, you can use shorter aliases
|
||||||
|
for the most popular specialized variants of Vec, e.g. Vec3f ~ Vec<float, 3>.
|
||||||
|
|
||||||
|
It is possible to convert Vec\<T,2\> to/from Point_, Vec\<T,3\> to/from Point3_ , and Vec\<T,4\>
|
||||||
|
to CvScalar or Scalar_. Use operator[] to access the elements of Vec.
|
||||||
|
|
||||||
|
All the expected vector operations are also implemented:
|
||||||
|
- v1 = v2 + v3
|
||||||
|
- v1 = v2 - v3
|
||||||
|
- v1 = v2 \* scale
|
||||||
|
- v1 = scale \* v2
|
||||||
|
- v1 = -v2
|
||||||
|
- v1 += v2 and other augmenting operations
|
||||||
|
- v1 == v2, v1 != v2
|
||||||
|
- norm(v1) (euclidean norm)
|
||||||
|
The Vec class is commonly used to describe pixel types of multi-channel arrays. See Mat for details.
|
||||||
|
*/
|
||||||
|
template<typename _Tp, int cn> class Vec : public Matx<_Tp, cn, 1>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
typedef _Tp value_type;
|
||||||
|
enum {
|
||||||
|
channels = cn,
|
||||||
|
#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED
|
||||||
|
depth = Matx<_Tp, cn, 1>::depth,
|
||||||
|
type = CV_MAKETYPE(depth, channels),
|
||||||
|
#endif
|
||||||
|
_dummy_enum_finalizer = 0
|
||||||
|
};
|
||||||
|
|
||||||
|
//! default constructor
|
||||||
|
Vec();
|
||||||
|
|
||||||
|
Vec(_Tp v0); //!< 1-element vector constructor
|
||||||
|
Vec(_Tp v0, _Tp v1); //!< 2-element vector constructor
|
||||||
|
Vec(_Tp v0, _Tp v1, _Tp v2); //!< 3-element vector constructor
|
||||||
|
Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3); //!< 4-element vector constructor
|
||||||
|
Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4); //!< 5-element vector constructor
|
||||||
|
Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5); //!< 6-element vector constructor
|
||||||
|
Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6); //!< 7-element vector constructor
|
||||||
|
Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7); //!< 8-element vector constructor
|
||||||
|
Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7, _Tp v8); //!< 9-element vector constructor
|
||||||
|
Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7, _Tp v8, _Tp v9); //!< 10-element vector constructor
|
||||||
|
Vec(_Tp v0, _Tp v1, _Tp v2, _Tp v3, _Tp v4, _Tp v5, _Tp v6, _Tp v7, _Tp v8, _Tp v9, _Tp v10, _Tp v11, _Tp v12, _Tp v13); //!< 14-element vector constructor
|
||||||
|
explicit Vec(const _Tp* values);
|
||||||
|
|
||||||
|
Vec(std::initializer_list<_Tp>);
|
||||||
|
|
||||||
|
Vec(const Vec<_Tp, cn>& v);
|
||||||
|
|
||||||
|
static Vec all(_Tp alpha);
|
||||||
|
static Vec ones();
|
||||||
|
static Vec randn(_Tp a, _Tp b);
|
||||||
|
static Vec randu(_Tp a, _Tp b);
|
||||||
|
static Vec zeros();
|
||||||
|
static Vec diag(_Tp alpha) = delete;
|
||||||
|
static Vec eye() = delete;
|
||||||
|
|
||||||
|
//! per-element multiplication
|
||||||
|
Vec mul(const Vec<_Tp, cn>& v) const;
|
||||||
|
|
||||||
|
//! conjugation (makes sense for complex numbers and quaternions)
|
||||||
|
Vec conj() const;
|
||||||
|
|
||||||
|
/*!
|
||||||
|
cross product of the two 3D vectors.
|
||||||
|
|
||||||
|
For other dimensionalities the exception is raised
|
||||||
|
*/
|
||||||
|
Vec cross(const Vec& v) const;
|
||||||
|
//! conversion to another data type
|
||||||
|
template<typename T2> operator Vec<T2, cn>() const;
|
||||||
|
|
||||||
|
/*! element access */
|
||||||
|
const _Tp& operator [](int i) const;
|
||||||
|
_Tp& operator[](int i);
|
||||||
|
const _Tp& operator ()(int i) const;
|
||||||
|
_Tp& operator ()(int i);
|
||||||
|
|
||||||
|
Vec<_Tp, cn>& operator=(const Vec<_Tp, cn>& rhs) = default;
|
||||||
|
|
||||||
|
Vec(const Matx<_Tp, cn, 1>& a, const Matx<_Tp, cn, 1>& b, Matx_AddOp);
|
||||||
|
Vec(const Matx<_Tp, cn, 1>& a, const Matx<_Tp, cn, 1>& b, Matx_SubOp);
|
||||||
|
template<typename _T2> Vec(const Matx<_Tp, cn, 1>& a, _T2 alpha, Matx_ScaleOp);
|
||||||
|
};
|
||||||
|
|
||||||
|
/** @name Shorter aliases for the most popular specializations of Vec<T,n>
|
||||||
|
@{
|
||||||
|
*/
|
||||||
|
typedef Vec<uchar, 2> Vec2b;
|
||||||
|
typedef Vec<uchar, 3> Vec3b;
|
||||||
|
typedef Vec<uchar, 4> Vec4b;
|
||||||
|
|
||||||
|
typedef Vec<short, 2> Vec2s;
|
||||||
|
typedef Vec<short, 3> Vec3s;
|
||||||
|
typedef Vec<short, 4> Vec4s;
|
||||||
|
|
||||||
|
typedef Vec<ushort, 2> Vec2w;
|
||||||
|
typedef Vec<ushort, 3> Vec3w;
|
||||||
|
typedef Vec<ushort, 4> Vec4w;
|
||||||
|
|
||||||
|
typedef Vec<int, 2> Vec2i;
|
||||||
|
typedef Vec<int, 3> Vec3i;
|
||||||
|
typedef Vec<int, 4> Vec4i;
|
||||||
|
typedef Vec<int, 6> Vec6i;
|
||||||
|
typedef Vec<int, 8> Vec8i;
|
||||||
|
|
||||||
|
typedef Vec<float, 2> Vec2f;
|
||||||
|
typedef Vec<float, 3> Vec3f;
|
||||||
|
typedef Vec<float, 4> Vec4f;
|
||||||
|
typedef Vec<float, 6> Vec6f;
|
||||||
|
|
||||||
|
typedef Vec<double, 2> Vec2d;
|
||||||
|
typedef Vec<double, 3> Vec3d;
|
||||||
|
typedef Vec<double, 4> Vec4d;
|
||||||
|
typedef Vec<double, 6> Vec6d;
|
||||||
|
/** @} */
|
||||||
|
|
||||||
|
template<typename _Tp, int cn> inline
|
||||||
|
Vec<_Tp, cn> normalize(const Vec<_Tp, cn>& v);
|
||||||
|
|
||||||
|
template<typename _Tp1, typename _Tp2, int cn> static inline
|
||||||
|
Vec<_Tp1, cn>& operator += (Vec<_Tp1, cn>& a, const Vec<_Tp2, cn>& b);
|
||||||
|
|
||||||
|
template<typename _Tp1, typename _Tp2, int cn> static inline
|
||||||
|
Vec<_Tp1, cn>& operator -= (Vec<_Tp1, cn>& a, const Vec<_Tp2, cn>& b);
|
||||||
|
|
||||||
|
template<typename _Tp, int cn> static inline
|
||||||
|
Vec<_Tp, cn> operator + (const Vec<_Tp, cn>& a, const Vec<_Tp, cn>& b);
|
||||||
|
|
||||||
|
template<typename _Tp, int cn> static inline
|
||||||
|
Vec<_Tp, cn> operator - (const Vec<_Tp, cn>& a, const Vec<_Tp, cn>& b);
|
||||||
|
|
||||||
|
template<typename _Tp, int cn> static inline
|
||||||
|
Vec<_Tp, cn>& operator *= (Vec<_Tp, cn>& a, int alpha);
|
||||||
|
|
||||||
|
template<typename _Tp, int cn> static inline
|
||||||
|
Vec<_Tp, cn>& operator *= (Vec<_Tp, cn>& a, float alpha);
|
||||||
|
|
||||||
|
template<typename _Tp, int cn> static inline
|
||||||
|
Vec<_Tp, cn>& operator *= (Vec<_Tp, cn>& a, double alpha);
|
||||||
|
|
||||||
|
template<typename _Tp, int cn> static inline
|
||||||
|
Vec<_Tp, cn>& operator /= (Vec<_Tp, cn>& a, int alpha);
|
||||||
|
|
||||||
|
template<typename _Tp, int cn> static inline
|
||||||
|
Vec<_Tp, cn>& operator /= (Vec<_Tp, cn>& a, float alpha);
|
||||||
|
|
||||||
|
template<typename _Tp, int cn> static inline
|
||||||
|
Vec<_Tp, cn>& operator /= (Vec<_Tp, cn>& a, double alpha);
|
||||||
|
|
||||||
|
template<typename _Tp, int cn> static inline
|
||||||
|
Vec<_Tp, cn> operator * (const Vec<_Tp, cn>& a, int alpha);
|
||||||
|
|
||||||
|
template<typename _Tp, int cn> static inline
|
||||||
|
Vec<_Tp, cn> operator * (int alpha, const Vec<_Tp, cn>& a);
|
||||||
|
|
||||||
|
template<typename _Tp, int cn> static inline
|
||||||
|
Vec<_Tp, cn> operator * (const Vec<_Tp, cn>& a, float alpha);
|
||||||
|
|
||||||
|
template<typename _Tp, int cn> static inline
|
||||||
|
Vec<_Tp, cn> operator * (float alpha, const Vec<_Tp, cn>& a);
|
||||||
|
|
||||||
|
template<typename _Tp, int cn> static inline
|
||||||
|
Vec<_Tp, cn> operator * (const Vec<_Tp, cn>& a, double alpha);
|
||||||
|
|
||||||
|
template<typename _Tp, int cn> static inline
|
||||||
|
Vec<_Tp, cn> operator * (double alpha, const Vec<_Tp, cn>& a);
|
||||||
|
|
||||||
|
template<typename _Tp, int cn> static inline
|
||||||
|
Vec<_Tp, cn> operator / (const Vec<_Tp, cn>& a, int alpha);
|
||||||
|
|
||||||
|
template<typename _Tp, int cn> static inline
|
||||||
|
Vec<_Tp, cn> operator / (const Vec<_Tp, cn>& a, float alpha);
|
||||||
|
|
||||||
|
template<typename _Tp, int cn> static inline
|
||||||
|
Vec<_Tp, cn> operator / (const Vec<_Tp, cn>& a, double alpha);
|
||||||
|
|
||||||
|
template<typename _Tp, int cn> static inline
|
||||||
|
Vec<_Tp, cn> operator - (const Vec<_Tp, cn>& a);
|
||||||
|
|
||||||
|
template<typename _Tp> inline
|
||||||
|
Vec<_Tp, 4> operator * (const Vec<_Tp, 4>& v1, const Vec<_Tp, 4>& v2);
|
||||||
|
|
||||||
|
template<typename _Tp> inline
|
||||||
|
Vec<_Tp, 4>& operator *= (Vec<_Tp, 4>& v1, const Vec<_Tp, 4>& v2);
|
||||||
|
|
||||||
|
//! @} core_basic
|
||||||
|
|
||||||
|
} // cv
|
||||||
|
|
||||||
|
#include "opencv2/core/matx.inl.hpp"
|
||||||
|
|
||||||
|
#endif // OPENCV_CORE_MATX_HPP
|
||||||
1115
3rdpart/OpenCV/include/opencv2/core/matx.inl.hpp
Normal file
1115
3rdpart/OpenCV/include/opencv2/core/matx.inl.hpp
Normal file
File diff suppressed because it is too large
Load Diff
128
3rdpart/OpenCV/include/opencv2/core/neon_utils.hpp
Normal file
128
3rdpart/OpenCV/include/opencv2/core/neon_utils.hpp
Normal file
@@ -0,0 +1,128 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2015, Itseez Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#ifndef OPENCV_HAL_NEON_UTILS_HPP
|
||||||
|
#define OPENCV_HAL_NEON_UTILS_HPP
|
||||||
|
|
||||||
|
#include "opencv2/core/cvdef.h"
|
||||||
|
|
||||||
|
//! @addtogroup core_utils_neon
|
||||||
|
//! @{
|
||||||
|
|
||||||
|
#if CV_NEON
|
||||||
|
|
||||||
|
inline int32x2_t cv_vrnd_s32_f32(float32x2_t v)
|
||||||
|
{
|
||||||
|
static int32x2_t v_sign = vdup_n_s32(1 << 31),
|
||||||
|
v_05 = vreinterpret_s32_f32(vdup_n_f32(0.5f));
|
||||||
|
|
||||||
|
int32x2_t v_addition = vorr_s32(v_05, vand_s32(v_sign, vreinterpret_s32_f32(v)));
|
||||||
|
return vcvt_s32_f32(vadd_f32(v, vreinterpret_f32_s32(v_addition)));
|
||||||
|
}
|
||||||
|
|
||||||
|
inline int32x4_t cv_vrndq_s32_f32(float32x4_t v)
|
||||||
|
{
|
||||||
|
static int32x4_t v_sign = vdupq_n_s32(1 << 31),
|
||||||
|
v_05 = vreinterpretq_s32_f32(vdupq_n_f32(0.5f));
|
||||||
|
|
||||||
|
int32x4_t v_addition = vorrq_s32(v_05, vandq_s32(v_sign, vreinterpretq_s32_f32(v)));
|
||||||
|
return vcvtq_s32_f32(vaddq_f32(v, vreinterpretq_f32_s32(v_addition)));
|
||||||
|
}
|
||||||
|
|
||||||
|
inline uint32x2_t cv_vrnd_u32_f32(float32x2_t v)
|
||||||
|
{
|
||||||
|
static float32x2_t v_05 = vdup_n_f32(0.5f);
|
||||||
|
return vcvt_u32_f32(vadd_f32(v, v_05));
|
||||||
|
}
|
||||||
|
|
||||||
|
inline uint32x4_t cv_vrndq_u32_f32(float32x4_t v)
|
||||||
|
{
|
||||||
|
static float32x4_t v_05 = vdupq_n_f32(0.5f);
|
||||||
|
return vcvtq_u32_f32(vaddq_f32(v, v_05));
|
||||||
|
}
|
||||||
|
|
||||||
|
inline float32x4_t cv_vrecpq_f32(float32x4_t val)
|
||||||
|
{
|
||||||
|
float32x4_t reciprocal = vrecpeq_f32(val);
|
||||||
|
reciprocal = vmulq_f32(vrecpsq_f32(val, reciprocal), reciprocal);
|
||||||
|
reciprocal = vmulq_f32(vrecpsq_f32(val, reciprocal), reciprocal);
|
||||||
|
return reciprocal;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline float32x2_t cv_vrecp_f32(float32x2_t val)
|
||||||
|
{
|
||||||
|
float32x2_t reciprocal = vrecpe_f32(val);
|
||||||
|
reciprocal = vmul_f32(vrecps_f32(val, reciprocal), reciprocal);
|
||||||
|
reciprocal = vmul_f32(vrecps_f32(val, reciprocal), reciprocal);
|
||||||
|
return reciprocal;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline float32x4_t cv_vrsqrtq_f32(float32x4_t val)
|
||||||
|
{
|
||||||
|
float32x4_t e = vrsqrteq_f32(val);
|
||||||
|
e = vmulq_f32(vrsqrtsq_f32(vmulq_f32(e, e), val), e);
|
||||||
|
e = vmulq_f32(vrsqrtsq_f32(vmulq_f32(e, e), val), e);
|
||||||
|
return e;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline float32x2_t cv_vrsqrt_f32(float32x2_t val)
|
||||||
|
{
|
||||||
|
float32x2_t e = vrsqrte_f32(val);
|
||||||
|
e = vmul_f32(vrsqrts_f32(vmul_f32(e, e), val), e);
|
||||||
|
e = vmul_f32(vrsqrts_f32(vmul_f32(e, e), val), e);
|
||||||
|
return e;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline float32x4_t cv_vsqrtq_f32(float32x4_t val)
|
||||||
|
{
|
||||||
|
return cv_vrecpq_f32(cv_vrsqrtq_f32(val));
|
||||||
|
}
|
||||||
|
|
||||||
|
inline float32x2_t cv_vsqrt_f32(float32x2_t val)
|
||||||
|
{
|
||||||
|
return cv_vrecp_f32(cv_vrsqrt_f32(val));
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
//! @}
|
||||||
|
|
||||||
|
#endif // OPENCV_HAL_NEON_UTILS_HPP
|
||||||
923
3rdpart/OpenCV/include/opencv2/core/ocl.hpp
Normal file
923
3rdpart/OpenCV/include/opencv2/core/ocl.hpp
Normal file
@@ -0,0 +1,923 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the OpenCV Foundation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#ifndef OPENCV_OPENCL_HPP
|
||||||
|
#define OPENCV_OPENCL_HPP
|
||||||
|
|
||||||
|
#include "opencv2/core.hpp"
|
||||||
|
#include <typeinfo>
|
||||||
|
#include <typeindex>
|
||||||
|
|
||||||
|
namespace cv { namespace ocl {
|
||||||
|
|
||||||
|
//! @addtogroup core_opencl
|
||||||
|
//! @{
|
||||||
|
|
||||||
|
CV_EXPORTS_W bool haveOpenCL();
|
||||||
|
CV_EXPORTS_W bool useOpenCL();
|
||||||
|
CV_EXPORTS_W bool haveAmdBlas();
|
||||||
|
CV_EXPORTS_W bool haveAmdFft();
|
||||||
|
CV_EXPORTS_W void setUseOpenCL(bool flag);
|
||||||
|
CV_EXPORTS_W void finish();
|
||||||
|
|
||||||
|
CV_EXPORTS bool haveSVM();
|
||||||
|
|
||||||
|
class CV_EXPORTS Context;
|
||||||
|
class CV_EXPORTS_W_SIMPLE Device;
|
||||||
|
class CV_EXPORTS Kernel;
|
||||||
|
class CV_EXPORTS Program;
|
||||||
|
class CV_EXPORTS ProgramSource;
|
||||||
|
class CV_EXPORTS Queue;
|
||||||
|
class CV_EXPORTS PlatformInfo;
|
||||||
|
class CV_EXPORTS Image2D;
|
||||||
|
|
||||||
|
class CV_EXPORTS_W_SIMPLE Device
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
CV_WRAP Device() CV_NOEXCEPT;
|
||||||
|
explicit Device(void* d);
|
||||||
|
Device(const Device& d);
|
||||||
|
Device& operator = (const Device& d);
|
||||||
|
Device(Device&& d) CV_NOEXCEPT;
|
||||||
|
Device& operator = (Device&& d) CV_NOEXCEPT;
|
||||||
|
CV_WRAP ~Device();
|
||||||
|
|
||||||
|
void set(void* d);
|
||||||
|
|
||||||
|
enum
|
||||||
|
{
|
||||||
|
TYPE_DEFAULT = (1 << 0),
|
||||||
|
TYPE_CPU = (1 << 1),
|
||||||
|
TYPE_GPU = (1 << 2),
|
||||||
|
TYPE_ACCELERATOR = (1 << 3),
|
||||||
|
TYPE_DGPU = TYPE_GPU + (1 << 16),
|
||||||
|
TYPE_IGPU = TYPE_GPU + (1 << 17),
|
||||||
|
TYPE_ALL = 0xFFFFFFFF
|
||||||
|
};
|
||||||
|
|
||||||
|
CV_WRAP String name() const;
|
||||||
|
CV_WRAP String extensions() const;
|
||||||
|
CV_WRAP bool isExtensionSupported(const String& extensionName) const;
|
||||||
|
CV_WRAP String version() const;
|
||||||
|
CV_WRAP String vendorName() const;
|
||||||
|
CV_WRAP String OpenCL_C_Version() const;
|
||||||
|
CV_WRAP String OpenCLVersion() const;
|
||||||
|
CV_WRAP int deviceVersionMajor() const;
|
||||||
|
CV_WRAP int deviceVersionMinor() const;
|
||||||
|
CV_WRAP String driverVersion() const;
|
||||||
|
void* ptr() const;
|
||||||
|
|
||||||
|
CV_WRAP int type() const;
|
||||||
|
|
||||||
|
CV_WRAP int addressBits() const;
|
||||||
|
CV_WRAP bool available() const;
|
||||||
|
CV_WRAP bool compilerAvailable() const;
|
||||||
|
CV_WRAP bool linkerAvailable() const;
|
||||||
|
|
||||||
|
enum
|
||||||
|
{
|
||||||
|
FP_DENORM=(1 << 0),
|
||||||
|
FP_INF_NAN=(1 << 1),
|
||||||
|
FP_ROUND_TO_NEAREST=(1 << 2),
|
||||||
|
FP_ROUND_TO_ZERO=(1 << 3),
|
||||||
|
FP_ROUND_TO_INF=(1 << 4),
|
||||||
|
FP_FMA=(1 << 5),
|
||||||
|
FP_SOFT_FLOAT=(1 << 6),
|
||||||
|
FP_CORRECTLY_ROUNDED_DIVIDE_SQRT=(1 << 7)
|
||||||
|
};
|
||||||
|
CV_WRAP int doubleFPConfig() const;
|
||||||
|
CV_WRAP int singleFPConfig() const;
|
||||||
|
CV_WRAP int halfFPConfig() const;
|
||||||
|
|
||||||
|
/// true if 'cl_khr_fp64' extension is available
|
||||||
|
CV_WRAP bool hasFP64() const;
|
||||||
|
/// true if 'cl_khr_fp16' extension is available
|
||||||
|
CV_WRAP bool hasFP16() const;
|
||||||
|
|
||||||
|
CV_WRAP bool endianLittle() const;
|
||||||
|
CV_WRAP bool errorCorrectionSupport() const;
|
||||||
|
|
||||||
|
enum
|
||||||
|
{
|
||||||
|
EXEC_KERNEL=(1 << 0),
|
||||||
|
EXEC_NATIVE_KERNEL=(1 << 1)
|
||||||
|
};
|
||||||
|
CV_WRAP int executionCapabilities() const;
|
||||||
|
|
||||||
|
CV_WRAP size_t globalMemCacheSize() const;
|
||||||
|
|
||||||
|
enum
|
||||||
|
{
|
||||||
|
NO_CACHE=0,
|
||||||
|
READ_ONLY_CACHE=1,
|
||||||
|
READ_WRITE_CACHE=2
|
||||||
|
};
|
||||||
|
CV_WRAP int globalMemCacheType() const;
|
||||||
|
CV_WRAP int globalMemCacheLineSize() const;
|
||||||
|
CV_WRAP size_t globalMemSize() const;
|
||||||
|
|
||||||
|
CV_WRAP size_t localMemSize() const;
|
||||||
|
enum
|
||||||
|
{
|
||||||
|
NO_LOCAL_MEM=0,
|
||||||
|
LOCAL_IS_LOCAL=1,
|
||||||
|
LOCAL_IS_GLOBAL=2
|
||||||
|
};
|
||||||
|
CV_WRAP int localMemType() const;
|
||||||
|
CV_WRAP bool hostUnifiedMemory() const;
|
||||||
|
|
||||||
|
CV_WRAP bool imageSupport() const;
|
||||||
|
|
||||||
|
CV_WRAP bool imageFromBufferSupport() const;
|
||||||
|
uint imagePitchAlignment() const;
|
||||||
|
uint imageBaseAddressAlignment() const;
|
||||||
|
|
||||||
|
/// deprecated, use isExtensionSupported() method (probably with "cl_khr_subgroups" value)
|
||||||
|
CV_WRAP bool intelSubgroupsSupport() const;
|
||||||
|
|
||||||
|
CV_WRAP size_t image2DMaxWidth() const;
|
||||||
|
CV_WRAP size_t image2DMaxHeight() const;
|
||||||
|
|
||||||
|
CV_WRAP size_t image3DMaxWidth() const;
|
||||||
|
CV_WRAP size_t image3DMaxHeight() const;
|
||||||
|
CV_WRAP size_t image3DMaxDepth() const;
|
||||||
|
|
||||||
|
CV_WRAP size_t imageMaxBufferSize() const;
|
||||||
|
CV_WRAP size_t imageMaxArraySize() const;
|
||||||
|
|
||||||
|
enum
|
||||||
|
{
|
||||||
|
UNKNOWN_VENDOR=0,
|
||||||
|
VENDOR_AMD=1,
|
||||||
|
VENDOR_INTEL=2,
|
||||||
|
VENDOR_NVIDIA=3
|
||||||
|
};
|
||||||
|
CV_WRAP int vendorID() const;
|
||||||
|
// FIXIT
|
||||||
|
// dev.isAMD() doesn't work for OpenCL CPU devices from AMD OpenCL platform.
|
||||||
|
// This method should use platform name instead of vendor name.
|
||||||
|
// After fix restore code in arithm.cpp: ocl_compare()
|
||||||
|
CV_WRAP inline bool isAMD() const { return vendorID() == VENDOR_AMD; }
|
||||||
|
CV_WRAP inline bool isIntel() const { return vendorID() == VENDOR_INTEL; }
|
||||||
|
CV_WRAP inline bool isNVidia() const { return vendorID() == VENDOR_NVIDIA; }
|
||||||
|
|
||||||
|
CV_WRAP int maxClockFrequency() const;
|
||||||
|
CV_WRAP int maxComputeUnits() const;
|
||||||
|
CV_WRAP int maxConstantArgs() const;
|
||||||
|
CV_WRAP size_t maxConstantBufferSize() const;
|
||||||
|
|
||||||
|
CV_WRAP size_t maxMemAllocSize() const;
|
||||||
|
CV_WRAP size_t maxParameterSize() const;
|
||||||
|
|
||||||
|
CV_WRAP int maxReadImageArgs() const;
|
||||||
|
CV_WRAP int maxWriteImageArgs() const;
|
||||||
|
CV_WRAP int maxSamplers() const;
|
||||||
|
|
||||||
|
CV_WRAP size_t maxWorkGroupSize() const;
|
||||||
|
CV_WRAP int maxWorkItemDims() const;
|
||||||
|
void maxWorkItemSizes(size_t*) const;
|
||||||
|
|
||||||
|
CV_WRAP int memBaseAddrAlign() const;
|
||||||
|
|
||||||
|
CV_WRAP int nativeVectorWidthChar() const;
|
||||||
|
CV_WRAP int nativeVectorWidthShort() const;
|
||||||
|
CV_WRAP int nativeVectorWidthInt() const;
|
||||||
|
CV_WRAP int nativeVectorWidthLong() const;
|
||||||
|
CV_WRAP int nativeVectorWidthFloat() const;
|
||||||
|
CV_WRAP int nativeVectorWidthDouble() const;
|
||||||
|
CV_WRAP int nativeVectorWidthHalf() const;
|
||||||
|
|
||||||
|
CV_WRAP int preferredVectorWidthChar() const;
|
||||||
|
CV_WRAP int preferredVectorWidthShort() const;
|
||||||
|
CV_WRAP int preferredVectorWidthInt() const;
|
||||||
|
CV_WRAP int preferredVectorWidthLong() const;
|
||||||
|
CV_WRAP int preferredVectorWidthFloat() const;
|
||||||
|
CV_WRAP int preferredVectorWidthDouble() const;
|
||||||
|
CV_WRAP int preferredVectorWidthHalf() const;
|
||||||
|
|
||||||
|
CV_WRAP size_t printfBufferSize() const;
|
||||||
|
CV_WRAP size_t profilingTimerResolution() const;
|
||||||
|
|
||||||
|
CV_WRAP static const Device& getDefault();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param d OpenCL handle (cl_device_id). clRetainDevice() is called on success.
|
||||||
|
*
|
||||||
|
* @note Ownership of the passed device is passed to OpenCV on success.
|
||||||
|
* The caller should additionally call `clRetainDevice` on it if it intends
|
||||||
|
* to continue using the device.
|
||||||
|
*/
|
||||||
|
static Device fromHandle(void* d);
|
||||||
|
|
||||||
|
struct Impl;
|
||||||
|
inline Impl* getImpl() const { return (Impl*)p; }
|
||||||
|
inline bool empty() const { return !p; }
|
||||||
|
protected:
|
||||||
|
Impl* p;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
class CV_EXPORTS Context
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
Context() CV_NOEXCEPT;
|
||||||
|
explicit Context(int dtype); //!< @deprecated
|
||||||
|
~Context();
|
||||||
|
Context(const Context& c);
|
||||||
|
Context& operator= (const Context& c);
|
||||||
|
Context(Context&& c) CV_NOEXCEPT;
|
||||||
|
Context& operator = (Context&& c) CV_NOEXCEPT;
|
||||||
|
|
||||||
|
/** @deprecated */
|
||||||
|
bool create();
|
||||||
|
/** @deprecated */
|
||||||
|
bool create(int dtype);
|
||||||
|
|
||||||
|
size_t ndevices() const;
|
||||||
|
Device& device(size_t idx) const;
|
||||||
|
Program getProg(const ProgramSource& prog,
|
||||||
|
const String& buildopt, String& errmsg);
|
||||||
|
void unloadProg(Program& prog);
|
||||||
|
|
||||||
|
|
||||||
|
/** Get thread-local OpenCL context (initialize if necessary) */
|
||||||
|
#if 0 // OpenCV 5.0
|
||||||
|
static Context& getDefault();
|
||||||
|
#else
|
||||||
|
static Context& getDefault(bool initialize = true);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/** @returns cl_context value */
|
||||||
|
void* ptr() const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Get OpenCL context property specified on context creation
|
||||||
|
* @param propertyId Property id (CL_CONTEXT_* as defined in cl_context_properties type)
|
||||||
|
* @returns Property value if property was specified on clCreateContext, or NULL if context created without the property
|
||||||
|
*/
|
||||||
|
void* getOpenCLContextProperty(int propertyId) const;
|
||||||
|
|
||||||
|
bool useSVM() const;
|
||||||
|
void setUseSVM(bool enabled);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param context OpenCL handle (cl_context). clRetainContext() is called on success
|
||||||
|
*/
|
||||||
|
static Context fromHandle(void* context);
|
||||||
|
static Context fromDevice(const ocl::Device& device);
|
||||||
|
static Context create(const std::string& configuration);
|
||||||
|
|
||||||
|
void release();
|
||||||
|
|
||||||
|
class CV_EXPORTS UserContext {
|
||||||
|
public:
|
||||||
|
virtual ~UserContext();
|
||||||
|
};
|
||||||
|
template <typename T>
|
||||||
|
inline void setUserContext(const std::shared_ptr<T>& userContext) {
|
||||||
|
setUserContext(typeid(T), userContext);
|
||||||
|
}
|
||||||
|
template <typename T>
|
||||||
|
inline std::shared_ptr<T> getUserContext() {
|
||||||
|
return std::dynamic_pointer_cast<T>(getUserContext(typeid(T)));
|
||||||
|
}
|
||||||
|
void setUserContext(std::type_index typeId, const std::shared_ptr<UserContext>& userContext);
|
||||||
|
std::shared_ptr<UserContext> getUserContext(std::type_index typeId);
|
||||||
|
|
||||||
|
struct Impl;
|
||||||
|
inline Impl* getImpl() const { return (Impl*)p; }
|
||||||
|
inline bool empty() const { return !p; }
|
||||||
|
// TODO OpenCV 5.0
|
||||||
|
//protected:
|
||||||
|
Impl* p;
|
||||||
|
};
|
||||||
|
|
||||||
|
/** @deprecated */
|
||||||
|
class CV_EXPORTS Platform
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
Platform() CV_NOEXCEPT;
|
||||||
|
~Platform();
|
||||||
|
Platform(const Platform& p);
|
||||||
|
Platform& operator = (const Platform& p);
|
||||||
|
Platform(Platform&& p) CV_NOEXCEPT;
|
||||||
|
Platform& operator = (Platform&& p) CV_NOEXCEPT;
|
||||||
|
|
||||||
|
void* ptr() const;
|
||||||
|
|
||||||
|
/** @deprecated */
|
||||||
|
static Platform& getDefault();
|
||||||
|
|
||||||
|
struct Impl;
|
||||||
|
inline Impl* getImpl() const { return (Impl*)p; }
|
||||||
|
inline bool empty() const { return !p; }
|
||||||
|
protected:
|
||||||
|
Impl* p;
|
||||||
|
};
|
||||||
|
|
||||||
|
/** @brief Attaches OpenCL context to OpenCV
|
||||||
|
@note
|
||||||
|
OpenCV will check if available OpenCL platform has platformName name, then assign context to
|
||||||
|
OpenCV and call `clRetainContext` function. The deviceID device will be used as target device and
|
||||||
|
new command queue will be created.
|
||||||
|
@param platformName name of OpenCL platform to attach, this string is used to check if platform is available to OpenCV at runtime
|
||||||
|
@param platformID ID of platform attached context was created for
|
||||||
|
@param context OpenCL context to be attached to OpenCV
|
||||||
|
@param deviceID ID of device, must be created from attached context
|
||||||
|
*/
|
||||||
|
CV_EXPORTS void attachContext(const String& platformName, void* platformID, void* context, void* deviceID);
|
||||||
|
|
||||||
|
/** @brief Convert OpenCL buffer to UMat
|
||||||
|
@note
|
||||||
|
OpenCL buffer (cl_mem_buffer) should contain 2D image data, compatible with OpenCV. Memory
|
||||||
|
content is not copied from `clBuffer` to UMat. Instead, buffer handle assigned to UMat and
|
||||||
|
`clRetainMemObject` is called.
|
||||||
|
@param cl_mem_buffer source clBuffer handle
|
||||||
|
@param step num of bytes in single row
|
||||||
|
@param rows number of rows
|
||||||
|
@param cols number of cols
|
||||||
|
@param type OpenCV type of image
|
||||||
|
@param dst destination UMat
|
||||||
|
*/
|
||||||
|
CV_EXPORTS void convertFromBuffer(void* cl_mem_buffer, size_t step, int rows, int cols, int type, UMat& dst);
|
||||||
|
|
||||||
|
/** @brief Convert OpenCL image2d_t to UMat
|
||||||
|
@note
|
||||||
|
OpenCL `image2d_t` (cl_mem_image), should be compatible with OpenCV UMat formats. Memory content
|
||||||
|
is copied from image to UMat with `clEnqueueCopyImageToBuffer` function.
|
||||||
|
@param cl_mem_image source image2d_t handle
|
||||||
|
@param dst destination UMat
|
||||||
|
*/
|
||||||
|
CV_EXPORTS void convertFromImage(void* cl_mem_image, UMat& dst);
|
||||||
|
|
||||||
|
// TODO Move to internal header
|
||||||
|
/// @deprecated
|
||||||
|
void initializeContextFromHandle(Context& ctx, void* platform, void* context, void* device);
|
||||||
|
|
||||||
|
class CV_EXPORTS Queue
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
Queue() CV_NOEXCEPT;
|
||||||
|
explicit Queue(const Context& c, const Device& d=Device());
|
||||||
|
~Queue();
|
||||||
|
Queue(const Queue& q);
|
||||||
|
Queue& operator = (const Queue& q);
|
||||||
|
Queue(Queue&& q) CV_NOEXCEPT;
|
||||||
|
Queue& operator = (Queue&& q) CV_NOEXCEPT;
|
||||||
|
|
||||||
|
bool create(const Context& c=Context(), const Device& d=Device());
|
||||||
|
void finish();
|
||||||
|
void* ptr() const;
|
||||||
|
static Queue& getDefault();
|
||||||
|
|
||||||
|
/// @brief Returns OpenCL command queue with enable profiling mode support
|
||||||
|
const Queue& getProfilingQueue() const;
|
||||||
|
|
||||||
|
struct Impl; friend struct Impl;
|
||||||
|
inline Impl* getImpl() const { return p; }
|
||||||
|
inline bool empty() const { return !p; }
|
||||||
|
protected:
|
||||||
|
Impl* p;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
class CV_EXPORTS KernelArg
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
enum { LOCAL=1, READ_ONLY=2, WRITE_ONLY=4, READ_WRITE=6, CONSTANT=8, PTR_ONLY = 16, NO_SIZE=256 };
|
||||||
|
KernelArg(int _flags, UMat* _m, int wscale=1, int iwscale=1, const void* _obj=0, size_t _sz=0);
|
||||||
|
KernelArg() CV_NOEXCEPT;
|
||||||
|
|
||||||
|
static KernelArg Local(size_t localMemSize)
|
||||||
|
{ return KernelArg(LOCAL, 0, 1, 1, 0, localMemSize); }
|
||||||
|
static KernelArg PtrWriteOnly(const UMat& m)
|
||||||
|
{ return KernelArg(PTR_ONLY+WRITE_ONLY, (UMat*)&m); }
|
||||||
|
static KernelArg PtrReadOnly(const UMat& m)
|
||||||
|
{ return KernelArg(PTR_ONLY+READ_ONLY, (UMat*)&m); }
|
||||||
|
static KernelArg PtrReadWrite(const UMat& m)
|
||||||
|
{ return KernelArg(PTR_ONLY+READ_WRITE, (UMat*)&m); }
|
||||||
|
static KernelArg ReadWrite(const UMat& m, int wscale=1, int iwscale=1)
|
||||||
|
{ return KernelArg(READ_WRITE, (UMat*)&m, wscale, iwscale); }
|
||||||
|
static KernelArg ReadWriteNoSize(const UMat& m, int wscale=1, int iwscale=1)
|
||||||
|
{ return KernelArg(READ_WRITE+NO_SIZE, (UMat*)&m, wscale, iwscale); }
|
||||||
|
static KernelArg ReadOnly(const UMat& m, int wscale=1, int iwscale=1)
|
||||||
|
{ return KernelArg(READ_ONLY, (UMat*)&m, wscale, iwscale); }
|
||||||
|
static KernelArg WriteOnly(const UMat& m, int wscale=1, int iwscale=1)
|
||||||
|
{ return KernelArg(WRITE_ONLY, (UMat*)&m, wscale, iwscale); }
|
||||||
|
static KernelArg ReadOnlyNoSize(const UMat& m, int wscale=1, int iwscale=1)
|
||||||
|
{ return KernelArg(READ_ONLY+NO_SIZE, (UMat*)&m, wscale, iwscale); }
|
||||||
|
static KernelArg WriteOnlyNoSize(const UMat& m, int wscale=1, int iwscale=1)
|
||||||
|
{ return KernelArg(WRITE_ONLY+NO_SIZE, (UMat*)&m, wscale, iwscale); }
|
||||||
|
static KernelArg Constant(const Mat& m);
|
||||||
|
template<typename _Tp> static KernelArg Constant(const _Tp* arr, size_t n)
|
||||||
|
{ return KernelArg(CONSTANT, 0, 1, 1, (void*)arr, n); }
|
||||||
|
|
||||||
|
int flags;
|
||||||
|
UMat* m;
|
||||||
|
const void* obj;
|
||||||
|
size_t sz;
|
||||||
|
int wscale, iwscale;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
class CV_EXPORTS Kernel
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
Kernel() CV_NOEXCEPT;
|
||||||
|
Kernel(const char* kname, const Program& prog);
|
||||||
|
Kernel(const char* kname, const ProgramSource& prog,
|
||||||
|
const String& buildopts = String(), String* errmsg=0);
|
||||||
|
~Kernel();
|
||||||
|
Kernel(const Kernel& k);
|
||||||
|
Kernel& operator = (const Kernel& k);
|
||||||
|
Kernel(Kernel&& k) CV_NOEXCEPT;
|
||||||
|
Kernel& operator = (Kernel&& k) CV_NOEXCEPT;
|
||||||
|
|
||||||
|
bool empty() const;
|
||||||
|
bool create(const char* kname, const Program& prog);
|
||||||
|
bool create(const char* kname, const ProgramSource& prog,
|
||||||
|
const String& buildopts, String* errmsg=0);
|
||||||
|
|
||||||
|
int set(int i, const void* value, size_t sz);
|
||||||
|
int set(int i, const Image2D& image2D);
|
||||||
|
int set(int i, const UMat& m);
|
||||||
|
int set(int i, const KernelArg& arg);
|
||||||
|
template<typename _Tp> int set(int i, const _Tp& value)
|
||||||
|
{ return set(i, &value, sizeof(value)); }
|
||||||
|
|
||||||
|
|
||||||
|
protected:
|
||||||
|
template<typename _Tp0> inline
|
||||||
|
int set_args_(int i, const _Tp0& a0) { return set(i, a0); }
|
||||||
|
template<typename _Tp0, typename... _Tps> inline
|
||||||
|
int set_args_(int i, const _Tp0& a0, const _Tps&... rest_args) { i = set(i, a0); return set_args_(i, rest_args...); }
|
||||||
|
public:
|
||||||
|
/** @brief Setup OpenCL Kernel arguments.
|
||||||
|
Avoid direct using of set(i, ...) methods.
|
||||||
|
@code
|
||||||
|
bool ok = kernel
|
||||||
|
.args(
|
||||||
|
srcUMat, dstUMat,
|
||||||
|
(float)some_float_param
|
||||||
|
).run(ndims, globalSize, localSize);
|
||||||
|
if (!ok) return false;
|
||||||
|
@endcode
|
||||||
|
*/
|
||||||
|
template<typename... _Tps> inline
|
||||||
|
Kernel& args(const _Tps&... kernel_args) { set_args_(0, kernel_args...); return *this; }
|
||||||
|
|
||||||
|
/** @brief Run the OpenCL kernel (globalsize value may be adjusted)
|
||||||
|
|
||||||
|
@param dims the work problem dimensions. It is the length of globalsize and localsize. It can be either 1, 2 or 3.
|
||||||
|
@param globalsize work items for each dimension. It is not the final globalsize passed to
|
||||||
|
OpenCL. Each dimension will be adjusted to the nearest integer divisible by the corresponding
|
||||||
|
value in localsize. If localsize is NULL, it will still be adjusted depending on dims. The
|
||||||
|
adjusted values are greater than or equal to the original values.
|
||||||
|
@param localsize work-group size for each dimension.
|
||||||
|
@param sync specify whether to wait for OpenCL computation to finish before return.
|
||||||
|
@param q command queue
|
||||||
|
|
||||||
|
@note Use run_() if your kernel code doesn't support adjusted globalsize.
|
||||||
|
*/
|
||||||
|
bool run(int dims, size_t globalsize[],
|
||||||
|
size_t localsize[], bool sync, const Queue& q=Queue());
|
||||||
|
|
||||||
|
/** @brief Run the OpenCL kernel
|
||||||
|
*
|
||||||
|
* @param dims the work problem dimensions. It is the length of globalsize and localsize. It can be either 1, 2 or 3.
|
||||||
|
* @param globalsize work items for each dimension. This value is passed to OpenCL without changes.
|
||||||
|
* @param localsize work-group size for each dimension.
|
||||||
|
* @param sync specify whether to wait for OpenCL computation to finish before return.
|
||||||
|
* @param q command queue
|
||||||
|
*/
|
||||||
|
bool run_(int dims, size_t globalsize[], size_t localsize[], bool sync, const Queue& q=Queue());
|
||||||
|
|
||||||
|
bool runTask(bool sync, const Queue& q=Queue());
|
||||||
|
|
||||||
|
/** @brief Similar to synchronized run_() call with returning of kernel execution time
|
||||||
|
*
|
||||||
|
* Separate OpenCL command queue may be used (with CL_QUEUE_PROFILING_ENABLE)
|
||||||
|
* @return Execution time in nanoseconds or negative number on error
|
||||||
|
*/
|
||||||
|
int64 runProfiling(int dims, size_t globalsize[], size_t localsize[], const Queue& q=Queue());
|
||||||
|
|
||||||
|
size_t workGroupSize() const;
|
||||||
|
size_t preferedWorkGroupSizeMultiple() const;
|
||||||
|
bool compileWorkGroupSize(size_t wsz[]) const;
|
||||||
|
size_t localMemSize() const;
|
||||||
|
|
||||||
|
void* ptr() const;
|
||||||
|
struct Impl;
|
||||||
|
|
||||||
|
protected:
|
||||||
|
Impl* p;
|
||||||
|
};
|
||||||
|
|
||||||
|
class CV_EXPORTS Program
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
Program() CV_NOEXCEPT;
|
||||||
|
Program(const ProgramSource& src,
|
||||||
|
const String& buildflags, String& errmsg);
|
||||||
|
Program(const Program& prog);
|
||||||
|
Program& operator = (const Program& prog);
|
||||||
|
Program(Program&& prog) CV_NOEXCEPT;
|
||||||
|
Program& operator = (Program&& prog) CV_NOEXCEPT;
|
||||||
|
~Program();
|
||||||
|
|
||||||
|
bool create(const ProgramSource& src,
|
||||||
|
const String& buildflags, String& errmsg);
|
||||||
|
|
||||||
|
void* ptr() const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Query device-specific program binary.
|
||||||
|
*
|
||||||
|
* Returns RAW OpenCL executable binary without additional attachments.
|
||||||
|
*
|
||||||
|
* @sa ProgramSource::fromBinary
|
||||||
|
*
|
||||||
|
* @param[out] binary output buffer
|
||||||
|
*/
|
||||||
|
void getBinary(std::vector<char>& binary) const;
|
||||||
|
|
||||||
|
struct Impl; friend struct Impl;
|
||||||
|
inline Impl* getImpl() const { return (Impl*)p; }
|
||||||
|
inline bool empty() const { return !p; }
|
||||||
|
protected:
|
||||||
|
Impl* p;
|
||||||
|
public:
|
||||||
|
#ifndef OPENCV_REMOVE_DEPRECATED_API
|
||||||
|
// TODO Remove this
|
||||||
|
CV_DEPRECATED bool read(const String& buf, const String& buildflags); // removed, use ProgramSource instead
|
||||||
|
CV_DEPRECATED bool write(String& buf) const; // removed, use getBinary() method instead (RAW OpenCL binary)
|
||||||
|
CV_DEPRECATED const ProgramSource& source() const; // implementation removed
|
||||||
|
CV_DEPRECATED String getPrefix() const; // deprecated, implementation replaced
|
||||||
|
CV_DEPRECATED static String getPrefix(const String& buildflags); // deprecated, implementation replaced
|
||||||
|
#endif
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
class CV_EXPORTS ProgramSource
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
typedef uint64 hash_t; // deprecated
|
||||||
|
|
||||||
|
ProgramSource() CV_NOEXCEPT;
|
||||||
|
explicit ProgramSource(const String& module, const String& name, const String& codeStr, const String& codeHash);
|
||||||
|
explicit ProgramSource(const String& prog); // deprecated
|
||||||
|
explicit ProgramSource(const char* prog); // deprecated
|
||||||
|
~ProgramSource();
|
||||||
|
ProgramSource(const ProgramSource& prog);
|
||||||
|
ProgramSource& operator = (const ProgramSource& prog);
|
||||||
|
ProgramSource(ProgramSource&& prog) CV_NOEXCEPT;
|
||||||
|
ProgramSource& operator = (ProgramSource&& prog) CV_NOEXCEPT;
|
||||||
|
|
||||||
|
const String& source() const; // deprecated
|
||||||
|
hash_t hash() const; // deprecated
|
||||||
|
|
||||||
|
|
||||||
|
/** @brief Describe OpenCL program binary.
|
||||||
|
* Do not call clCreateProgramWithBinary() and/or clBuildProgram().
|
||||||
|
*
|
||||||
|
* Caller should guarantee binary buffer lifetime greater than ProgramSource object (and any of its copies).
|
||||||
|
*
|
||||||
|
* This kind of binary is not portable between platforms in general - it is specific to OpenCL vendor / device / driver version.
|
||||||
|
*
|
||||||
|
* @param module name of program owner module
|
||||||
|
* @param name unique name of program (module+name is used as key for OpenCL program caching)
|
||||||
|
* @param binary buffer address. See buffer lifetime requirement in description.
|
||||||
|
* @param size buffer size
|
||||||
|
* @param buildOptions additional program-related build options passed to clBuildProgram()
|
||||||
|
* @return created ProgramSource object
|
||||||
|
*/
|
||||||
|
static ProgramSource fromBinary(const String& module, const String& name,
|
||||||
|
const unsigned char* binary, const size_t size,
|
||||||
|
const cv::String& buildOptions = cv::String());
|
||||||
|
|
||||||
|
/** @brief Describe OpenCL program in SPIR format.
|
||||||
|
* Do not call clCreateProgramWithBinary() and/or clBuildProgram().
|
||||||
|
*
|
||||||
|
* Supports SPIR 1.2 by default (pass '-spir-std=X.Y' in buildOptions to override this behavior)
|
||||||
|
*
|
||||||
|
* Caller should guarantee binary buffer lifetime greater than ProgramSource object (and any of its copies).
|
||||||
|
*
|
||||||
|
* Programs in this format are portable between OpenCL implementations with 'khr_spir' extension:
|
||||||
|
* https://www.khronos.org/registry/OpenCL/sdk/2.0/docs/man/xhtml/cl_khr_spir.html
|
||||||
|
* (but they are not portable between different platforms: 32-bit / 64-bit)
|
||||||
|
*
|
||||||
|
* Note: these programs can't support vendor specific extensions, like 'cl_intel_subgroups'.
|
||||||
|
*
|
||||||
|
* @param module name of program owner module
|
||||||
|
* @param name unique name of program (module+name is used as key for OpenCL program caching)
|
||||||
|
* @param binary buffer address. See buffer lifetime requirement in description.
|
||||||
|
* @param size buffer size
|
||||||
|
* @param buildOptions additional program-related build options passed to clBuildProgram()
|
||||||
|
* (these options are added automatically: '-x spir' and '-spir-std=1.2')
|
||||||
|
* @return created ProgramSource object.
|
||||||
|
*/
|
||||||
|
static ProgramSource fromSPIR(const String& module, const String& name,
|
||||||
|
const unsigned char* binary, const size_t size,
|
||||||
|
const cv::String& buildOptions = cv::String());
|
||||||
|
|
||||||
|
//OpenCL 2.1+ only
|
||||||
|
//static Program fromSPIRV(const String& module, const String& name,
|
||||||
|
// const unsigned char* binary, const size_t size,
|
||||||
|
// const cv::String& buildOptions = cv::String());
|
||||||
|
|
||||||
|
struct Impl; friend struct Impl;
|
||||||
|
inline Impl* getImpl() const { return (Impl*)p; }
|
||||||
|
inline bool empty() const { return !p; }
|
||||||
|
protected:
|
||||||
|
Impl* p;
|
||||||
|
};
|
||||||
|
|
||||||
|
class CV_EXPORTS PlatformInfo
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
PlatformInfo() CV_NOEXCEPT;
|
||||||
|
/**
|
||||||
|
* @param id pointer cl_platform_id (cl_platform_id*)
|
||||||
|
*/
|
||||||
|
explicit PlatformInfo(void* id);
|
||||||
|
~PlatformInfo();
|
||||||
|
|
||||||
|
PlatformInfo(const PlatformInfo& i);
|
||||||
|
PlatformInfo& operator =(const PlatformInfo& i);
|
||||||
|
PlatformInfo(PlatformInfo&& i) CV_NOEXCEPT;
|
||||||
|
PlatformInfo& operator = (PlatformInfo&& i) CV_NOEXCEPT;
|
||||||
|
|
||||||
|
String name() const;
|
||||||
|
String vendor() const;
|
||||||
|
|
||||||
|
/// See CL_PLATFORM_VERSION
|
||||||
|
String version() const;
|
||||||
|
int versionMajor() const;
|
||||||
|
int versionMinor() const;
|
||||||
|
|
||||||
|
int deviceNumber() const;
|
||||||
|
void getDevice(Device& device, int d) const;
|
||||||
|
|
||||||
|
struct Impl;
|
||||||
|
bool empty() const { return !p; }
|
||||||
|
protected:
|
||||||
|
Impl* p;
|
||||||
|
};
|
||||||
|
|
||||||
|
CV_EXPORTS CV_DEPRECATED const char* convertTypeStr(int sdepth, int ddepth, int cn, char* buf);
|
||||||
|
CV_EXPORTS const char* convertTypeStr(int sdepth, int ddepth, int cn, char* buf, size_t buf_size);
|
||||||
|
CV_EXPORTS const char* typeToStr(int t);
|
||||||
|
CV_EXPORTS const char* memopTypeToStr(int t);
|
||||||
|
CV_EXPORTS const char* vecopTypeToStr(int t);
|
||||||
|
CV_EXPORTS const char* getOpenCLErrorString(int errorCode);
|
||||||
|
CV_EXPORTS String kernelToStr(InputArray _kernel, int ddepth = -1, const char * name = NULL);
|
||||||
|
CV_EXPORTS void getPlatfomsInfo(std::vector<PlatformInfo>& platform_info);
|
||||||
|
|
||||||
|
|
||||||
|
enum OclVectorStrategy
|
||||||
|
{
|
||||||
|
// all matrices have its own vector width
|
||||||
|
OCL_VECTOR_OWN = 0,
|
||||||
|
// all matrices have maximal vector width among all matrices
|
||||||
|
// (useful for cases when matrices have different data types)
|
||||||
|
OCL_VECTOR_MAX = 1,
|
||||||
|
|
||||||
|
// default strategy
|
||||||
|
OCL_VECTOR_DEFAULT = OCL_VECTOR_OWN
|
||||||
|
};
|
||||||
|
|
||||||
|
CV_EXPORTS int predictOptimalVectorWidth(InputArray src1, InputArray src2 = noArray(), InputArray src3 = noArray(),
|
||||||
|
InputArray src4 = noArray(), InputArray src5 = noArray(), InputArray src6 = noArray(),
|
||||||
|
InputArray src7 = noArray(), InputArray src8 = noArray(), InputArray src9 = noArray(),
|
||||||
|
OclVectorStrategy strat = OCL_VECTOR_DEFAULT);
|
||||||
|
|
||||||
|
CV_EXPORTS int checkOptimalVectorWidth(const int *vectorWidths,
|
||||||
|
InputArray src1, InputArray src2 = noArray(), InputArray src3 = noArray(),
|
||||||
|
InputArray src4 = noArray(), InputArray src5 = noArray(), InputArray src6 = noArray(),
|
||||||
|
InputArray src7 = noArray(), InputArray src8 = noArray(), InputArray src9 = noArray(),
|
||||||
|
OclVectorStrategy strat = OCL_VECTOR_DEFAULT);
|
||||||
|
|
||||||
|
// with OCL_VECTOR_MAX strategy
|
||||||
|
CV_EXPORTS int predictOptimalVectorWidthMax(InputArray src1, InputArray src2 = noArray(), InputArray src3 = noArray(),
|
||||||
|
InputArray src4 = noArray(), InputArray src5 = noArray(), InputArray src6 = noArray(),
|
||||||
|
InputArray src7 = noArray(), InputArray src8 = noArray(), InputArray src9 = noArray());
|
||||||
|
|
||||||
|
CV_EXPORTS void buildOptionsAddMatrixDescription(String& buildOptions, const String& name, InputArray _m);
|
||||||
|
|
||||||
|
class CV_EXPORTS Image2D
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
Image2D() CV_NOEXCEPT;
|
||||||
|
|
||||||
|
/**
|
||||||
|
@param src UMat object from which to get image properties and data
|
||||||
|
@param norm flag to enable the use of normalized channel data types
|
||||||
|
@param alias flag indicating that the image should alias the src UMat. If true, changes to the
|
||||||
|
image or src will be reflected in both objects.
|
||||||
|
*/
|
||||||
|
explicit Image2D(const UMat &src, bool norm = false, bool alias = false);
|
||||||
|
Image2D(const Image2D & i);
|
||||||
|
~Image2D();
|
||||||
|
|
||||||
|
Image2D & operator = (const Image2D & i);
|
||||||
|
Image2D(Image2D &&) CV_NOEXCEPT;
|
||||||
|
Image2D &operator=(Image2D &&) CV_NOEXCEPT;
|
||||||
|
|
||||||
|
/** Indicates if creating an aliased image should succeed.
|
||||||
|
Depends on the underlying platform and the dimensions of the UMat.
|
||||||
|
*/
|
||||||
|
static bool canCreateAlias(const UMat &u);
|
||||||
|
|
||||||
|
/** Indicates if the image format is supported.
|
||||||
|
*/
|
||||||
|
static bool isFormatSupported(int depth, int cn, bool norm);
|
||||||
|
|
||||||
|
void* ptr() const;
|
||||||
|
protected:
|
||||||
|
struct Impl;
|
||||||
|
Impl* p;
|
||||||
|
};
|
||||||
|
|
||||||
|
class CV_EXPORTS Timer
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
Timer(const Queue& q);
|
||||||
|
~Timer();
|
||||||
|
void start();
|
||||||
|
void stop();
|
||||||
|
|
||||||
|
uint64 durationNS() const; ///< duration in nanoseconds
|
||||||
|
|
||||||
|
protected:
|
||||||
|
struct Impl;
|
||||||
|
Impl* const p;
|
||||||
|
|
||||||
|
private:
|
||||||
|
Timer(const Timer&); // disabled
|
||||||
|
Timer& operator=(const Timer&); // disabled
|
||||||
|
};
|
||||||
|
|
||||||
|
CV_EXPORTS MatAllocator* getOpenCLAllocator();
|
||||||
|
|
||||||
|
|
||||||
|
class CV_EXPORTS_W OpenCLExecutionContext
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
OpenCLExecutionContext() = default;
|
||||||
|
~OpenCLExecutionContext() = default;
|
||||||
|
|
||||||
|
OpenCLExecutionContext(const OpenCLExecutionContext&) = default;
|
||||||
|
OpenCLExecutionContext(OpenCLExecutionContext&&) = default;
|
||||||
|
|
||||||
|
OpenCLExecutionContext& operator=(const OpenCLExecutionContext&) = default;
|
||||||
|
OpenCLExecutionContext& operator=(OpenCLExecutionContext&&) = default;
|
||||||
|
|
||||||
|
/** Get associated ocl::Context */
|
||||||
|
Context& getContext() const;
|
||||||
|
/** Get the single default associated ocl::Device */
|
||||||
|
Device& getDevice() const;
|
||||||
|
/** Get the single ocl::Queue that is associated with the ocl::Context and
|
||||||
|
* the single default ocl::Device
|
||||||
|
*/
|
||||||
|
Queue& getQueue() const;
|
||||||
|
|
||||||
|
bool useOpenCL() const;
|
||||||
|
void setUseOpenCL(bool flag);
|
||||||
|
|
||||||
|
/** Get OpenCL execution context of current thread.
|
||||||
|
*
|
||||||
|
* Initialize OpenCL execution context if it is empty
|
||||||
|
* - create new
|
||||||
|
* - reuse context of the main thread (threadID = 0)
|
||||||
|
*/
|
||||||
|
static OpenCLExecutionContext& getCurrent();
|
||||||
|
|
||||||
|
/** Get OpenCL execution context of current thread (can be empty) */
|
||||||
|
static OpenCLExecutionContext& getCurrentRef();
|
||||||
|
|
||||||
|
/** Bind this OpenCL execution context to current thread.
|
||||||
|
*
|
||||||
|
* Context can't be empty.
|
||||||
|
*
|
||||||
|
* @note clFinish is not called for queue of previous execution context
|
||||||
|
*/
|
||||||
|
void bind() const;
|
||||||
|
|
||||||
|
/** Creates new execution context with same OpenCV context and device
|
||||||
|
*
|
||||||
|
* @param q OpenCL queue
|
||||||
|
*/
|
||||||
|
OpenCLExecutionContext cloneWithNewQueue(const ocl::Queue& q) const;
|
||||||
|
/** @overload */
|
||||||
|
OpenCLExecutionContext cloneWithNewQueue() const;
|
||||||
|
|
||||||
|
/** @brief Creates OpenCL execution context
|
||||||
|
* OpenCV will check if available OpenCL platform has platformName name,
|
||||||
|
* then assign context to OpenCV.
|
||||||
|
* The deviceID device will be used as target device and a new command queue will be created.
|
||||||
|
*
|
||||||
|
* @note On success, ownership of one reference of the context and device is taken.
|
||||||
|
* The caller should additionally call `clRetainContext` and/or `clRetainDevice`
|
||||||
|
* to increase the reference count if it wishes to continue using them.
|
||||||
|
*
|
||||||
|
* @param platformName name of OpenCL platform to attach, this string is used to check if platform is available to OpenCV at runtime
|
||||||
|
* @param platformID ID of platform attached context was created for (cl_platform_id)
|
||||||
|
* @param context OpenCL context to be attached to OpenCV (cl_context)
|
||||||
|
* @param deviceID OpenCL device (cl_device_id)
|
||||||
|
*/
|
||||||
|
static OpenCLExecutionContext create(const std::string& platformName, void* platformID, void* context, void* deviceID);
|
||||||
|
|
||||||
|
/** @brief Creates OpenCL execution context
|
||||||
|
*
|
||||||
|
* @param context non-empty OpenCL context
|
||||||
|
* @param device non-empty OpenCL device (must be a part of context)
|
||||||
|
* @param queue non-empty OpenCL queue for provided context and device
|
||||||
|
*/
|
||||||
|
static OpenCLExecutionContext create(const Context& context, const Device& device, const ocl::Queue& queue);
|
||||||
|
/** @overload */
|
||||||
|
static OpenCLExecutionContext create(const Context& context, const Device& device);
|
||||||
|
|
||||||
|
struct Impl;
|
||||||
|
inline bool empty() const { return !p; }
|
||||||
|
void release();
|
||||||
|
protected:
|
||||||
|
std::shared_ptr<Impl> p;
|
||||||
|
};
|
||||||
|
|
||||||
|
class OpenCLExecutionContextScope
|
||||||
|
{
|
||||||
|
OpenCLExecutionContext ctx_;
|
||||||
|
public:
|
||||||
|
inline OpenCLExecutionContextScope(const OpenCLExecutionContext& ctx)
|
||||||
|
{
|
||||||
|
CV_Assert(!ctx.empty());
|
||||||
|
ctx_ = OpenCLExecutionContext::getCurrentRef();
|
||||||
|
ctx.bind();
|
||||||
|
}
|
||||||
|
|
||||||
|
inline ~OpenCLExecutionContextScope()
|
||||||
|
{
|
||||||
|
if (!ctx_.empty())
|
||||||
|
{
|
||||||
|
ctx_.bind();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
#ifdef __OPENCV_BUILD
|
||||||
|
namespace internal {
|
||||||
|
|
||||||
|
CV_EXPORTS bool isOpenCLForced();
|
||||||
|
#define OCL_FORCE_CHECK(condition) (cv::ocl::internal::isOpenCLForced() || (condition))
|
||||||
|
|
||||||
|
CV_EXPORTS bool isPerformanceCheckBypassed();
|
||||||
|
#define OCL_PERFORMANCE_CHECK(condition) (cv::ocl::internal::isPerformanceCheckBypassed() || (condition))
|
||||||
|
|
||||||
|
CV_EXPORTS bool isCLBuffer(UMat& u);
|
||||||
|
|
||||||
|
} // namespace internal
|
||||||
|
#endif
|
||||||
|
|
||||||
|
//! @}
|
||||||
|
|
||||||
|
}}
|
||||||
|
|
||||||
|
#endif
|
||||||
69
3rdpart/OpenCV/include/opencv2/core/ocl_genbase.hpp
Normal file
69
3rdpart/OpenCV/include/opencv2/core/ocl_genbase.hpp
Normal file
@@ -0,0 +1,69 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the OpenCV Foundation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#ifndef OPENCV_OPENCL_GENBASE_HPP
|
||||||
|
#define OPENCV_OPENCL_GENBASE_HPP
|
||||||
|
|
||||||
|
//! @cond IGNORED
|
||||||
|
|
||||||
|
namespace cv {
|
||||||
|
namespace ocl {
|
||||||
|
|
||||||
|
class ProgramSource;
|
||||||
|
|
||||||
|
namespace internal {
|
||||||
|
|
||||||
|
struct CV_EXPORTS ProgramEntry
|
||||||
|
{
|
||||||
|
const char* module;
|
||||||
|
const char* name;
|
||||||
|
const char* programCode;
|
||||||
|
const char* programHash;
|
||||||
|
ProgramSource* pProgramSource;
|
||||||
|
|
||||||
|
operator ProgramSource& () const;
|
||||||
|
};
|
||||||
|
|
||||||
|
} } } // namespace
|
||||||
|
|
||||||
|
//! @endcond
|
||||||
|
|
||||||
|
#endif
|
||||||
82
3rdpart/OpenCV/include/opencv2/core/opencl/ocl_defs.hpp
Normal file
82
3rdpart/OpenCV/include/opencv2/core/opencl/ocl_defs.hpp
Normal file
@@ -0,0 +1,82 @@
|
|||||||
|
// This file is part of OpenCV project.
|
||||||
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||||
|
// of this distribution and at http://opencv.org/license.html.
|
||||||
|
|
||||||
|
// Copyright (C) 2014, Advanced Micro Devices, Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
|
||||||
|
#ifndef OPENCV_CORE_OPENCL_DEFS_HPP
|
||||||
|
#define OPENCV_CORE_OPENCL_DEFS_HPP
|
||||||
|
|
||||||
|
#include "opencv2/core/utility.hpp"
|
||||||
|
#include "cvconfig.h"
|
||||||
|
|
||||||
|
namespace cv { namespace ocl {
|
||||||
|
#ifdef HAVE_OPENCL
|
||||||
|
/// Call is similar to useOpenCL() but doesn't try to load OpenCL runtime or create OpenCL context
|
||||||
|
CV_EXPORTS bool isOpenCLActivated();
|
||||||
|
#else
|
||||||
|
static inline bool isOpenCLActivated() { return false; }
|
||||||
|
#endif
|
||||||
|
}} // namespace
|
||||||
|
|
||||||
|
|
||||||
|
//#define CV_OPENCL_RUN_ASSERT
|
||||||
|
|
||||||
|
#ifdef HAVE_OPENCL
|
||||||
|
|
||||||
|
#ifdef CV_OPENCL_RUN_VERBOSE
|
||||||
|
#define CV_OCL_RUN_(condition, func, ...) \
|
||||||
|
{ \
|
||||||
|
if (cv::ocl::isOpenCLActivated() && (condition) && func) \
|
||||||
|
{ \
|
||||||
|
printf("%s: OpenCL implementation is running\n", CV_Func); \
|
||||||
|
fflush(stdout); \
|
||||||
|
CV_IMPL_ADD(CV_IMPL_OCL); \
|
||||||
|
return __VA_ARGS__; \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
printf("%s: Plain implementation is running\n", CV_Func); \
|
||||||
|
fflush(stdout); \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
#elif defined CV_OPENCL_RUN_ASSERT
|
||||||
|
#define CV_OCL_RUN_(condition, func, ...) \
|
||||||
|
{ \
|
||||||
|
if (cv::ocl::isOpenCLActivated() && (condition)) \
|
||||||
|
{ \
|
||||||
|
if(func) \
|
||||||
|
{ \
|
||||||
|
CV_IMPL_ADD(CV_IMPL_OCL); \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
CV_Error(cv::Error::StsAssert, #func); \
|
||||||
|
} \
|
||||||
|
return __VA_ARGS__; \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
#define CV_OCL_RUN_(condition, func, ...) \
|
||||||
|
try \
|
||||||
|
{ \
|
||||||
|
if (cv::ocl::isOpenCLActivated() && (condition) && func) \
|
||||||
|
{ \
|
||||||
|
CV_IMPL_ADD(CV_IMPL_OCL); \
|
||||||
|
return __VA_ARGS__; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
catch (const cv::Exception& e) \
|
||||||
|
{ \
|
||||||
|
CV_UNUSED(e); /* TODO: Add some logging here */ \
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#else
|
||||||
|
#define CV_OCL_RUN_(condition, func, ...)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define CV_OCL_RUN(condition, func) CV_OCL_RUN_(condition, func)
|
||||||
|
|
||||||
|
#endif // OPENCV_CORE_OPENCL_DEFS_HPP
|
||||||
213
3rdpart/OpenCV/include/opencv2/core/opencl/opencl_info.hpp
Normal file
213
3rdpart/OpenCV/include/opencv2/core/opencl/opencl_info.hpp
Normal file
@@ -0,0 +1,213 @@
|
|||||||
|
// This file is part of OpenCV project.
|
||||||
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||||
|
// of this distribution and at http://opencv.org/license.html.
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <sstream>
|
||||||
|
|
||||||
|
#include <opencv2/core.hpp>
|
||||||
|
#include <opencv2/core/ocl.hpp>
|
||||||
|
|
||||||
|
#ifndef DUMP_CONFIG_PROPERTY
|
||||||
|
#define DUMP_CONFIG_PROPERTY(...)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef DUMP_MESSAGE_STDOUT
|
||||||
|
#define DUMP_MESSAGE_STDOUT(...) do { std::cout << __VA_ARGS__ << std::endl; } while (false)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
namespace cv {
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
static std::string bytesToStringRepr(size_t value)
|
||||||
|
{
|
||||||
|
size_t b = value % 1024;
|
||||||
|
value /= 1024;
|
||||||
|
|
||||||
|
size_t kb = value % 1024;
|
||||||
|
value /= 1024;
|
||||||
|
|
||||||
|
size_t mb = value % 1024;
|
||||||
|
value /= 1024;
|
||||||
|
|
||||||
|
size_t gb = value;
|
||||||
|
|
||||||
|
std::ostringstream stream;
|
||||||
|
|
||||||
|
if (gb > 0)
|
||||||
|
stream << gb << " GB ";
|
||||||
|
if (mb > 0)
|
||||||
|
stream << mb << " MB ";
|
||||||
|
if (kb > 0)
|
||||||
|
stream << kb << " KB ";
|
||||||
|
if (b > 0)
|
||||||
|
stream << b << " B";
|
||||||
|
|
||||||
|
std::string s = stream.str();
|
||||||
|
if (s[s.size() - 1] == ' ')
|
||||||
|
s = s.substr(0, s.size() - 1);
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
static String getDeviceTypeString(const cv::ocl::Device& device)
|
||||||
|
{
|
||||||
|
if (device.type() == cv::ocl::Device::TYPE_CPU) {
|
||||||
|
return "CPU";
|
||||||
|
}
|
||||||
|
|
||||||
|
if (device.type() == cv::ocl::Device::TYPE_GPU) {
|
||||||
|
if (device.hostUnifiedMemory()) {
|
||||||
|
return "iGPU";
|
||||||
|
} else {
|
||||||
|
return "dGPU";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return "unknown";
|
||||||
|
}
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
static void dumpOpenCLInformation()
|
||||||
|
{
|
||||||
|
using namespace cv::ocl;
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
if (!haveOpenCL() || !useOpenCL())
|
||||||
|
{
|
||||||
|
DUMP_MESSAGE_STDOUT("OpenCL is disabled");
|
||||||
|
DUMP_CONFIG_PROPERTY("cv_ocl", "disabled");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<PlatformInfo> platforms;
|
||||||
|
cv::ocl::getPlatfomsInfo(platforms);
|
||||||
|
if (platforms.empty())
|
||||||
|
{
|
||||||
|
DUMP_MESSAGE_STDOUT("OpenCL is not available");
|
||||||
|
DUMP_CONFIG_PROPERTY("cv_ocl", "not available");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
DUMP_MESSAGE_STDOUT("OpenCL Platforms: ");
|
||||||
|
for (size_t i = 0; i < platforms.size(); i++)
|
||||||
|
{
|
||||||
|
const PlatformInfo* platform = &platforms[i];
|
||||||
|
DUMP_MESSAGE_STDOUT(" " << platform->name());
|
||||||
|
Device current_device;
|
||||||
|
for (int j = 0; j < platform->deviceNumber(); j++)
|
||||||
|
{
|
||||||
|
platform->getDevice(current_device, j);
|
||||||
|
String deviceTypeStr = getDeviceTypeString(current_device);
|
||||||
|
DUMP_MESSAGE_STDOUT( " " << deviceTypeStr << ": " << current_device.name() << " (" << current_device.version() << ")");
|
||||||
|
DUMP_CONFIG_PROPERTY( cv::format("cv_ocl_platform_%d_device_%d", (int)i, j ),
|
||||||
|
cv::format("(Platform=%s)(Type=%s)(Name=%s)(Version=%s)",
|
||||||
|
platform->name().c_str(), deviceTypeStr.c_str(), current_device.name().c_str(), current_device.version().c_str()) );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
const Device& device = Device::getDefault();
|
||||||
|
if (!device.available())
|
||||||
|
CV_Error(Error::OpenCLInitError, "OpenCL device is not available");
|
||||||
|
|
||||||
|
DUMP_MESSAGE_STDOUT("Current OpenCL device: ");
|
||||||
|
|
||||||
|
String deviceTypeStr = getDeviceTypeString(device);
|
||||||
|
DUMP_MESSAGE_STDOUT(" Type = " << deviceTypeStr);
|
||||||
|
DUMP_CONFIG_PROPERTY("cv_ocl_current_deviceType", deviceTypeStr);
|
||||||
|
|
||||||
|
DUMP_MESSAGE_STDOUT(" Name = " << device.name());
|
||||||
|
DUMP_CONFIG_PROPERTY("cv_ocl_current_deviceName", device.name());
|
||||||
|
|
||||||
|
DUMP_MESSAGE_STDOUT(" Version = " << device.version());
|
||||||
|
DUMP_CONFIG_PROPERTY("cv_ocl_current_deviceVersion", device.version());
|
||||||
|
|
||||||
|
DUMP_MESSAGE_STDOUT(" Driver version = " << device.driverVersion());
|
||||||
|
DUMP_CONFIG_PROPERTY("cv_ocl_current_driverVersion", device.driverVersion());
|
||||||
|
|
||||||
|
DUMP_MESSAGE_STDOUT(" Address bits = " << device.addressBits());
|
||||||
|
DUMP_CONFIG_PROPERTY("cv_ocl_current_addressBits", device.addressBits());
|
||||||
|
|
||||||
|
DUMP_MESSAGE_STDOUT(" Compute units = " << device.maxComputeUnits());
|
||||||
|
DUMP_CONFIG_PROPERTY("cv_ocl_current_maxComputeUnits", device.maxComputeUnits());
|
||||||
|
|
||||||
|
DUMP_MESSAGE_STDOUT(" Max work group size = " << device.maxWorkGroupSize());
|
||||||
|
DUMP_CONFIG_PROPERTY("cv_ocl_current_maxWorkGroupSize", device.maxWorkGroupSize());
|
||||||
|
|
||||||
|
std::string localMemorySizeStr = bytesToStringRepr(device.localMemSize());
|
||||||
|
DUMP_MESSAGE_STDOUT(" Local memory size = " << localMemorySizeStr);
|
||||||
|
DUMP_CONFIG_PROPERTY("cv_ocl_current_localMemSize", device.localMemSize());
|
||||||
|
|
||||||
|
std::string maxMemAllocSizeStr = bytesToStringRepr(device.maxMemAllocSize());
|
||||||
|
DUMP_MESSAGE_STDOUT(" Max memory allocation size = " << maxMemAllocSizeStr);
|
||||||
|
DUMP_CONFIG_PROPERTY("cv_ocl_current_maxMemAllocSize", device.maxMemAllocSize());
|
||||||
|
|
||||||
|
const char* doubleSupportStr = device.hasFP64() ? "Yes" : "No";
|
||||||
|
DUMP_MESSAGE_STDOUT(" Double support = " << doubleSupportStr);
|
||||||
|
DUMP_CONFIG_PROPERTY("cv_ocl_current_haveDoubleSupport", device.hasFP64());
|
||||||
|
|
||||||
|
const char* halfSupportStr = device.hasFP16() ? "Yes" : "No";
|
||||||
|
DUMP_MESSAGE_STDOUT(" Half support = " << halfSupportStr);
|
||||||
|
DUMP_CONFIG_PROPERTY("cv_ocl_current_haveHalfSupport", device.hasFP16());
|
||||||
|
|
||||||
|
const char* isUnifiedMemoryStr = device.hostUnifiedMemory() ? "Yes" : "No";
|
||||||
|
DUMP_MESSAGE_STDOUT(" Host unified memory = " << isUnifiedMemoryStr);
|
||||||
|
DUMP_CONFIG_PROPERTY("cv_ocl_current_hostUnifiedMemory", device.hostUnifiedMemory());
|
||||||
|
|
||||||
|
DUMP_MESSAGE_STDOUT(" Device extensions:");
|
||||||
|
String extensionsStr = device.extensions();
|
||||||
|
size_t pos = 0;
|
||||||
|
while (pos < extensionsStr.size())
|
||||||
|
{
|
||||||
|
size_t pos2 = extensionsStr.find(' ', pos);
|
||||||
|
if (pos2 == String::npos)
|
||||||
|
pos2 = extensionsStr.size();
|
||||||
|
if (pos2 > pos)
|
||||||
|
{
|
||||||
|
String extensionName = extensionsStr.substr(pos, pos2 - pos);
|
||||||
|
DUMP_MESSAGE_STDOUT(" " << extensionName);
|
||||||
|
}
|
||||||
|
pos = pos2 + 1;
|
||||||
|
}
|
||||||
|
DUMP_CONFIG_PROPERTY("cv_ocl_current_extensions", extensionsStr);
|
||||||
|
|
||||||
|
const char* haveAmdBlasStr = haveAmdBlas() ? "Yes" : "No";
|
||||||
|
DUMP_MESSAGE_STDOUT(" Has AMD Blas = " << haveAmdBlasStr);
|
||||||
|
DUMP_CONFIG_PROPERTY("cv_ocl_current_AmdBlas", haveAmdBlas());
|
||||||
|
|
||||||
|
const char* haveAmdFftStr = haveAmdFft() ? "Yes" : "No";
|
||||||
|
DUMP_MESSAGE_STDOUT(" Has AMD Fft = " << haveAmdFftStr);
|
||||||
|
DUMP_CONFIG_PROPERTY("cv_ocl_current_AmdFft", haveAmdFft());
|
||||||
|
|
||||||
|
|
||||||
|
DUMP_MESSAGE_STDOUT(" Preferred vector width char = " << device.preferredVectorWidthChar());
|
||||||
|
DUMP_CONFIG_PROPERTY("cv_ocl_current_preferredVectorWidthChar", device.preferredVectorWidthChar());
|
||||||
|
|
||||||
|
DUMP_MESSAGE_STDOUT(" Preferred vector width short = " << device.preferredVectorWidthShort());
|
||||||
|
DUMP_CONFIG_PROPERTY("cv_ocl_current_preferredVectorWidthShort", device.preferredVectorWidthShort());
|
||||||
|
|
||||||
|
DUMP_MESSAGE_STDOUT(" Preferred vector width int = " << device.preferredVectorWidthInt());
|
||||||
|
DUMP_CONFIG_PROPERTY("cv_ocl_current_preferredVectorWidthInt", device.preferredVectorWidthInt());
|
||||||
|
|
||||||
|
DUMP_MESSAGE_STDOUT(" Preferred vector width long = " << device.preferredVectorWidthLong());
|
||||||
|
DUMP_CONFIG_PROPERTY("cv_ocl_current_preferredVectorWidthLong", device.preferredVectorWidthLong());
|
||||||
|
|
||||||
|
DUMP_MESSAGE_STDOUT(" Preferred vector width float = " << device.preferredVectorWidthFloat());
|
||||||
|
DUMP_CONFIG_PROPERTY("cv_ocl_current_preferredVectorWidthFloat", device.preferredVectorWidthFloat());
|
||||||
|
|
||||||
|
DUMP_MESSAGE_STDOUT(" Preferred vector width double = " << device.preferredVectorWidthDouble());
|
||||||
|
DUMP_CONFIG_PROPERTY("cv_ocl_current_preferredVectorWidthDouble", device.preferredVectorWidthDouble());
|
||||||
|
|
||||||
|
DUMP_MESSAGE_STDOUT(" Preferred vector width half = " << device.preferredVectorWidthHalf());
|
||||||
|
DUMP_CONFIG_PROPERTY("cv_ocl_current_preferredVectorWidthHalf", device.preferredVectorWidthHalf());
|
||||||
|
}
|
||||||
|
catch (...)
|
||||||
|
{
|
||||||
|
DUMP_MESSAGE_STDOUT("Exception. Can't dump OpenCL info");
|
||||||
|
DUMP_MESSAGE_STDOUT("OpenCL device not available");
|
||||||
|
DUMP_CONFIG_PROPERTY("cv_ocl", "not available");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#undef DUMP_MESSAGE_STDOUT
|
||||||
|
#undef DUMP_CONFIG_PROPERTY
|
||||||
|
|
||||||
|
} // namespace
|
||||||
81
3rdpart/OpenCV/include/opencv2/core/opencl/opencl_svm.hpp
Normal file
81
3rdpart/OpenCV/include/opencv2/core/opencl/opencl_svm.hpp
Normal file
@@ -0,0 +1,81 @@
|
|||||||
|
/* See LICENSE file in the root OpenCV directory */
|
||||||
|
|
||||||
|
#ifndef OPENCV_CORE_OPENCL_SVM_HPP
|
||||||
|
#define OPENCV_CORE_OPENCL_SVM_HPP
|
||||||
|
|
||||||
|
//
|
||||||
|
// Internal usage only (binary compatibility is not guaranteed)
|
||||||
|
//
|
||||||
|
#ifndef __OPENCV_BUILD
|
||||||
|
#error Internal header file
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(HAVE_OPENCL) && defined(HAVE_OPENCL_SVM)
|
||||||
|
#include "runtime/opencl_core.hpp"
|
||||||
|
#include "runtime/opencl_svm_20.hpp"
|
||||||
|
#include "runtime/opencl_svm_hsa_extension.hpp"
|
||||||
|
|
||||||
|
namespace cv { namespace ocl { namespace svm {
|
||||||
|
|
||||||
|
struct SVMCapabilities
|
||||||
|
{
|
||||||
|
enum Value
|
||||||
|
{
|
||||||
|
SVM_COARSE_GRAIN_BUFFER = (1 << 0),
|
||||||
|
SVM_FINE_GRAIN_BUFFER = (1 << 1),
|
||||||
|
SVM_FINE_GRAIN_SYSTEM = (1 << 2),
|
||||||
|
SVM_ATOMICS = (1 << 3),
|
||||||
|
};
|
||||||
|
int value_;
|
||||||
|
|
||||||
|
SVMCapabilities(int capabilities = 0) : value_(capabilities) { }
|
||||||
|
operator int() const { return value_; }
|
||||||
|
|
||||||
|
inline bool isNoSVMSupport() const { return value_ == 0; }
|
||||||
|
inline bool isSupportCoarseGrainBuffer() const { return (value_ & SVM_COARSE_GRAIN_BUFFER) != 0; }
|
||||||
|
inline bool isSupportFineGrainBuffer() const { return (value_ & SVM_FINE_GRAIN_BUFFER) != 0; }
|
||||||
|
inline bool isSupportFineGrainSystem() const { return (value_ & SVM_FINE_GRAIN_SYSTEM) != 0; }
|
||||||
|
inline bool isSupportAtomics() const { return (value_ & SVM_ATOMICS) != 0; }
|
||||||
|
};
|
||||||
|
|
||||||
|
CV_EXPORTS const SVMCapabilities getSVMCapabilitites(const ocl::Context& context);
|
||||||
|
|
||||||
|
struct SVMFunctions
|
||||||
|
{
|
||||||
|
clSVMAllocAMD_fn fn_clSVMAlloc;
|
||||||
|
clSVMFreeAMD_fn fn_clSVMFree;
|
||||||
|
clSetKernelArgSVMPointerAMD_fn fn_clSetKernelArgSVMPointer;
|
||||||
|
//clSetKernelExecInfoAMD_fn fn_clSetKernelExecInfo;
|
||||||
|
//clEnqueueSVMFreeAMD_fn fn_clEnqueueSVMFree;
|
||||||
|
clEnqueueSVMMemcpyAMD_fn fn_clEnqueueSVMMemcpy;
|
||||||
|
clEnqueueSVMMemFillAMD_fn fn_clEnqueueSVMMemFill;
|
||||||
|
clEnqueueSVMMapAMD_fn fn_clEnqueueSVMMap;
|
||||||
|
clEnqueueSVMUnmapAMD_fn fn_clEnqueueSVMUnmap;
|
||||||
|
|
||||||
|
inline SVMFunctions()
|
||||||
|
: fn_clSVMAlloc(NULL), fn_clSVMFree(NULL),
|
||||||
|
fn_clSetKernelArgSVMPointer(NULL), /*fn_clSetKernelExecInfo(NULL),*/
|
||||||
|
/*fn_clEnqueueSVMFree(NULL),*/ fn_clEnqueueSVMMemcpy(NULL), fn_clEnqueueSVMMemFill(NULL),
|
||||||
|
fn_clEnqueueSVMMap(NULL), fn_clEnqueueSVMUnmap(NULL)
|
||||||
|
{
|
||||||
|
// nothing
|
||||||
|
}
|
||||||
|
|
||||||
|
inline bool isValid() const
|
||||||
|
{
|
||||||
|
return fn_clSVMAlloc != NULL && fn_clSVMFree && fn_clSetKernelArgSVMPointer &&
|
||||||
|
/*fn_clSetKernelExecInfo && fn_clEnqueueSVMFree &&*/ fn_clEnqueueSVMMemcpy &&
|
||||||
|
fn_clEnqueueSVMMemFill && fn_clEnqueueSVMMap && fn_clEnqueueSVMUnmap;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// We should guarantee that SVMFunctions lifetime is not less than context's lifetime
|
||||||
|
CV_EXPORTS const SVMFunctions* getSVMFunctions(const ocl::Context& context);
|
||||||
|
|
||||||
|
CV_EXPORTS bool useSVM(UMatUsageFlags usageFlags);
|
||||||
|
|
||||||
|
}}} //namespace cv::ocl::svm
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif // OPENCV_CORE_OPENCL_SVM_HPP
|
||||||
|
/* End of file. */
|
||||||
@@ -0,0 +1,602 @@
|
|||||||
|
//
|
||||||
|
// AUTOGENERATED, DO NOT EDIT
|
||||||
|
//
|
||||||
|
#ifndef OPENCV_CORE_OCL_RUNTIME_CLAMDBLAS_HPP
|
||||||
|
#error "Invalid usage"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// generated by parser_clblas.py
|
||||||
|
#define clblasCaxpy clblasCaxpy_
|
||||||
|
#define clblasCcopy clblasCcopy_
|
||||||
|
#define clblasCdotc clblasCdotc_
|
||||||
|
#define clblasCdotu clblasCdotu_
|
||||||
|
#define clblasCgbmv clblasCgbmv_
|
||||||
|
#define clblasCgemm clblasCgemm_
|
||||||
|
#define clblasCgemv clblasCgemv_
|
||||||
|
#define clblasCgerc clblasCgerc_
|
||||||
|
#define clblasCgeru clblasCgeru_
|
||||||
|
#define clblasChbmv clblasChbmv_
|
||||||
|
#define clblasChemm clblasChemm_
|
||||||
|
#define clblasChemv clblasChemv_
|
||||||
|
#define clblasCher clblasCher_
|
||||||
|
#define clblasCher2 clblasCher2_
|
||||||
|
#define clblasCher2k clblasCher2k_
|
||||||
|
#define clblasCherk clblasCherk_
|
||||||
|
#define clblasChpmv clblasChpmv_
|
||||||
|
#define clblasChpr clblasChpr_
|
||||||
|
#define clblasChpr2 clblasChpr2_
|
||||||
|
#define clblasCrotg clblasCrotg_
|
||||||
|
#define clblasCscal clblasCscal_
|
||||||
|
#define clblasCsrot clblasCsrot_
|
||||||
|
#define clblasCsscal clblasCsscal_
|
||||||
|
#define clblasCswap clblasCswap_
|
||||||
|
#define clblasCsymm clblasCsymm_
|
||||||
|
#define clblasCsyr2k clblasCsyr2k_
|
||||||
|
#define clblasCsyrk clblasCsyrk_
|
||||||
|
#define clblasCtbmv clblasCtbmv_
|
||||||
|
#define clblasCtbsv clblasCtbsv_
|
||||||
|
#define clblasCtpmv clblasCtpmv_
|
||||||
|
#define clblasCtpsv clblasCtpsv_
|
||||||
|
#define clblasCtrmm clblasCtrmm_
|
||||||
|
#define clblasCtrmv clblasCtrmv_
|
||||||
|
#define clblasCtrsm clblasCtrsm_
|
||||||
|
#define clblasCtrsv clblasCtrsv_
|
||||||
|
#define clblasDasum clblasDasum_
|
||||||
|
#define clblasDaxpy clblasDaxpy_
|
||||||
|
#define clblasDcopy clblasDcopy_
|
||||||
|
#define clblasDdot clblasDdot_
|
||||||
|
#define clblasDgbmv clblasDgbmv_
|
||||||
|
#define clblasDgemm clblasDgemm_
|
||||||
|
#define clblasDgemv clblasDgemv_
|
||||||
|
#define clblasDger clblasDger_
|
||||||
|
#define clblasDnrm2 clblasDnrm2_
|
||||||
|
#define clblasDrot clblasDrot_
|
||||||
|
#define clblasDrotg clblasDrotg_
|
||||||
|
#define clblasDrotm clblasDrotm_
|
||||||
|
#define clblasDrotmg clblasDrotmg_
|
||||||
|
#define clblasDsbmv clblasDsbmv_
|
||||||
|
#define clblasDscal clblasDscal_
|
||||||
|
#define clblasDspmv clblasDspmv_
|
||||||
|
#define clblasDspr clblasDspr_
|
||||||
|
#define clblasDspr2 clblasDspr2_
|
||||||
|
#define clblasDswap clblasDswap_
|
||||||
|
#define clblasDsymm clblasDsymm_
|
||||||
|
#define clblasDsymv clblasDsymv_
|
||||||
|
#define clblasDsyr clblasDsyr_
|
||||||
|
#define clblasDsyr2 clblasDsyr2_
|
||||||
|
#define clblasDsyr2k clblasDsyr2k_
|
||||||
|
#define clblasDsyrk clblasDsyrk_
|
||||||
|
#define clblasDtbmv clblasDtbmv_
|
||||||
|
#define clblasDtbsv clblasDtbsv_
|
||||||
|
#define clblasDtpmv clblasDtpmv_
|
||||||
|
#define clblasDtpsv clblasDtpsv_
|
||||||
|
#define clblasDtrmm clblasDtrmm_
|
||||||
|
#define clblasDtrmv clblasDtrmv_
|
||||||
|
#define clblasDtrsm clblasDtrsm_
|
||||||
|
#define clblasDtrsv clblasDtrsv_
|
||||||
|
#define clblasDzasum clblasDzasum_
|
||||||
|
#define clblasDznrm2 clblasDznrm2_
|
||||||
|
#define clblasGetVersion clblasGetVersion_
|
||||||
|
#define clblasSasum clblasSasum_
|
||||||
|
#define clblasSaxpy clblasSaxpy_
|
||||||
|
#define clblasScasum clblasScasum_
|
||||||
|
#define clblasScnrm2 clblasScnrm2_
|
||||||
|
#define clblasScopy clblasScopy_
|
||||||
|
#define clblasSdot clblasSdot_
|
||||||
|
#define clblasSetup clblasSetup_
|
||||||
|
#define clblasSgbmv clblasSgbmv_
|
||||||
|
#define clblasSgemm clblasSgemm_
|
||||||
|
#define clblasSgemv clblasSgemv_
|
||||||
|
#define clblasSger clblasSger_
|
||||||
|
#define clblasSnrm2 clblasSnrm2_
|
||||||
|
#define clblasSrot clblasSrot_
|
||||||
|
#define clblasSrotg clblasSrotg_
|
||||||
|
#define clblasSrotm clblasSrotm_
|
||||||
|
#define clblasSrotmg clblasSrotmg_
|
||||||
|
#define clblasSsbmv clblasSsbmv_
|
||||||
|
#define clblasSscal clblasSscal_
|
||||||
|
#define clblasSspmv clblasSspmv_
|
||||||
|
#define clblasSspr clblasSspr_
|
||||||
|
#define clblasSspr2 clblasSspr2_
|
||||||
|
#define clblasSswap clblasSswap_
|
||||||
|
#define clblasSsymm clblasSsymm_
|
||||||
|
#define clblasSsymv clblasSsymv_
|
||||||
|
#define clblasSsyr clblasSsyr_
|
||||||
|
#define clblasSsyr2 clblasSsyr2_
|
||||||
|
#define clblasSsyr2k clblasSsyr2k_
|
||||||
|
#define clblasSsyrk clblasSsyrk_
|
||||||
|
#define clblasStbmv clblasStbmv_
|
||||||
|
#define clblasStbsv clblasStbsv_
|
||||||
|
#define clblasStpmv clblasStpmv_
|
||||||
|
#define clblasStpsv clblasStpsv_
|
||||||
|
#define clblasStrmm clblasStrmm_
|
||||||
|
#define clblasStrmv clblasStrmv_
|
||||||
|
#define clblasStrsm clblasStrsm_
|
||||||
|
#define clblasStrsv clblasStrsv_
|
||||||
|
#define clblasTeardown clblasTeardown_
|
||||||
|
#define clblasZaxpy clblasZaxpy_
|
||||||
|
#define clblasZcopy clblasZcopy_
|
||||||
|
#define clblasZdotc clblasZdotc_
|
||||||
|
#define clblasZdotu clblasZdotu_
|
||||||
|
#define clblasZdrot clblasZdrot_
|
||||||
|
#define clblasZdscal clblasZdscal_
|
||||||
|
#define clblasZgbmv clblasZgbmv_
|
||||||
|
#define clblasZgemm clblasZgemm_
|
||||||
|
#define clblasZgemv clblasZgemv_
|
||||||
|
#define clblasZgerc clblasZgerc_
|
||||||
|
#define clblasZgeru clblasZgeru_
|
||||||
|
#define clblasZhbmv clblasZhbmv_
|
||||||
|
#define clblasZhemm clblasZhemm_
|
||||||
|
#define clblasZhemv clblasZhemv_
|
||||||
|
#define clblasZher clblasZher_
|
||||||
|
#define clblasZher2 clblasZher2_
|
||||||
|
#define clblasZher2k clblasZher2k_
|
||||||
|
#define clblasZherk clblasZherk_
|
||||||
|
#define clblasZhpmv clblasZhpmv_
|
||||||
|
#define clblasZhpr clblasZhpr_
|
||||||
|
#define clblasZhpr2 clblasZhpr2_
|
||||||
|
#define clblasZrotg clblasZrotg_
|
||||||
|
#define clblasZscal clblasZscal_
|
||||||
|
#define clblasZswap clblasZswap_
|
||||||
|
#define clblasZsymm clblasZsymm_
|
||||||
|
#define clblasZsyr2k clblasZsyr2k_
|
||||||
|
#define clblasZsyrk clblasZsyrk_
|
||||||
|
#define clblasZtbmv clblasZtbmv_
|
||||||
|
#define clblasZtbsv clblasZtbsv_
|
||||||
|
#define clblasZtpmv clblasZtpmv_
|
||||||
|
#define clblasZtpsv clblasZtpsv_
|
||||||
|
#define clblasZtrmm clblasZtrmm_
|
||||||
|
#define clblasZtrmv clblasZtrmv_
|
||||||
|
#define clblasZtrsm clblasZtrsm_
|
||||||
|
#define clblasZtrsv clblasZtrsv_
|
||||||
|
#define clblasiCamax clblasiCamax_
|
||||||
|
#define clblasiDamax clblasiDamax_
|
||||||
|
#define clblasiSamax clblasiSamax_
|
||||||
|
#define clblasiZamax clblasiZamax_
|
||||||
|
|
||||||
|
#include <clBLAS.h>
|
||||||
|
|
||||||
|
// generated by parser_clblas.py
|
||||||
|
#undef clblasCaxpy
|
||||||
|
//#define clblasCaxpy clblasCaxpy_pfn
|
||||||
|
#undef clblasCcopy
|
||||||
|
//#define clblasCcopy clblasCcopy_pfn
|
||||||
|
#undef clblasCdotc
|
||||||
|
//#define clblasCdotc clblasCdotc_pfn
|
||||||
|
#undef clblasCdotu
|
||||||
|
//#define clblasCdotu clblasCdotu_pfn
|
||||||
|
#undef clblasCgbmv
|
||||||
|
//#define clblasCgbmv clblasCgbmv_pfn
|
||||||
|
#undef clblasCgemm
|
||||||
|
#define clblasCgemm clblasCgemm_pfn
|
||||||
|
#undef clblasCgemv
|
||||||
|
//#define clblasCgemv clblasCgemv_pfn
|
||||||
|
#undef clblasCgerc
|
||||||
|
//#define clblasCgerc clblasCgerc_pfn
|
||||||
|
#undef clblasCgeru
|
||||||
|
//#define clblasCgeru clblasCgeru_pfn
|
||||||
|
#undef clblasChbmv
|
||||||
|
//#define clblasChbmv clblasChbmv_pfn
|
||||||
|
#undef clblasChemm
|
||||||
|
//#define clblasChemm clblasChemm_pfn
|
||||||
|
#undef clblasChemv
|
||||||
|
//#define clblasChemv clblasChemv_pfn
|
||||||
|
#undef clblasCher
|
||||||
|
//#define clblasCher clblasCher_pfn
|
||||||
|
#undef clblasCher2
|
||||||
|
//#define clblasCher2 clblasCher2_pfn
|
||||||
|
#undef clblasCher2k
|
||||||
|
//#define clblasCher2k clblasCher2k_pfn
|
||||||
|
#undef clblasCherk
|
||||||
|
//#define clblasCherk clblasCherk_pfn
|
||||||
|
#undef clblasChpmv
|
||||||
|
//#define clblasChpmv clblasChpmv_pfn
|
||||||
|
#undef clblasChpr
|
||||||
|
//#define clblasChpr clblasChpr_pfn
|
||||||
|
#undef clblasChpr2
|
||||||
|
//#define clblasChpr2 clblasChpr2_pfn
|
||||||
|
#undef clblasCrotg
|
||||||
|
//#define clblasCrotg clblasCrotg_pfn
|
||||||
|
#undef clblasCscal
|
||||||
|
//#define clblasCscal clblasCscal_pfn
|
||||||
|
#undef clblasCsrot
|
||||||
|
//#define clblasCsrot clblasCsrot_pfn
|
||||||
|
#undef clblasCsscal
|
||||||
|
//#define clblasCsscal clblasCsscal_pfn
|
||||||
|
#undef clblasCswap
|
||||||
|
//#define clblasCswap clblasCswap_pfn
|
||||||
|
#undef clblasCsymm
|
||||||
|
//#define clblasCsymm clblasCsymm_pfn
|
||||||
|
#undef clblasCsyr2k
|
||||||
|
//#define clblasCsyr2k clblasCsyr2k_pfn
|
||||||
|
#undef clblasCsyrk
|
||||||
|
//#define clblasCsyrk clblasCsyrk_pfn
|
||||||
|
#undef clblasCtbmv
|
||||||
|
//#define clblasCtbmv clblasCtbmv_pfn
|
||||||
|
#undef clblasCtbsv
|
||||||
|
//#define clblasCtbsv clblasCtbsv_pfn
|
||||||
|
#undef clblasCtpmv
|
||||||
|
//#define clblasCtpmv clblasCtpmv_pfn
|
||||||
|
#undef clblasCtpsv
|
||||||
|
//#define clblasCtpsv clblasCtpsv_pfn
|
||||||
|
#undef clblasCtrmm
|
||||||
|
//#define clblasCtrmm clblasCtrmm_pfn
|
||||||
|
#undef clblasCtrmv
|
||||||
|
//#define clblasCtrmv clblasCtrmv_pfn
|
||||||
|
#undef clblasCtrsm
|
||||||
|
//#define clblasCtrsm clblasCtrsm_pfn
|
||||||
|
#undef clblasCtrsv
|
||||||
|
//#define clblasCtrsv clblasCtrsv_pfn
|
||||||
|
#undef clblasDasum
|
||||||
|
//#define clblasDasum clblasDasum_pfn
|
||||||
|
#undef clblasDaxpy
|
||||||
|
//#define clblasDaxpy clblasDaxpy_pfn
|
||||||
|
#undef clblasDcopy
|
||||||
|
//#define clblasDcopy clblasDcopy_pfn
|
||||||
|
#undef clblasDdot
|
||||||
|
//#define clblasDdot clblasDdot_pfn
|
||||||
|
#undef clblasDgbmv
|
||||||
|
//#define clblasDgbmv clblasDgbmv_pfn
|
||||||
|
#undef clblasDgemm
|
||||||
|
#define clblasDgemm clblasDgemm_pfn
|
||||||
|
#undef clblasDgemv
|
||||||
|
//#define clblasDgemv clblasDgemv_pfn
|
||||||
|
#undef clblasDger
|
||||||
|
//#define clblasDger clblasDger_pfn
|
||||||
|
#undef clblasDnrm2
|
||||||
|
//#define clblasDnrm2 clblasDnrm2_pfn
|
||||||
|
#undef clblasDrot
|
||||||
|
//#define clblasDrot clblasDrot_pfn
|
||||||
|
#undef clblasDrotg
|
||||||
|
//#define clblasDrotg clblasDrotg_pfn
|
||||||
|
#undef clblasDrotm
|
||||||
|
//#define clblasDrotm clblasDrotm_pfn
|
||||||
|
#undef clblasDrotmg
|
||||||
|
//#define clblasDrotmg clblasDrotmg_pfn
|
||||||
|
#undef clblasDsbmv
|
||||||
|
//#define clblasDsbmv clblasDsbmv_pfn
|
||||||
|
#undef clblasDscal
|
||||||
|
//#define clblasDscal clblasDscal_pfn
|
||||||
|
#undef clblasDspmv
|
||||||
|
//#define clblasDspmv clblasDspmv_pfn
|
||||||
|
#undef clblasDspr
|
||||||
|
//#define clblasDspr clblasDspr_pfn
|
||||||
|
#undef clblasDspr2
|
||||||
|
//#define clblasDspr2 clblasDspr2_pfn
|
||||||
|
#undef clblasDswap
|
||||||
|
//#define clblasDswap clblasDswap_pfn
|
||||||
|
#undef clblasDsymm
|
||||||
|
//#define clblasDsymm clblasDsymm_pfn
|
||||||
|
#undef clblasDsymv
|
||||||
|
//#define clblasDsymv clblasDsymv_pfn
|
||||||
|
#undef clblasDsyr
|
||||||
|
//#define clblasDsyr clblasDsyr_pfn
|
||||||
|
#undef clblasDsyr2
|
||||||
|
//#define clblasDsyr2 clblasDsyr2_pfn
|
||||||
|
#undef clblasDsyr2k
|
||||||
|
//#define clblasDsyr2k clblasDsyr2k_pfn
|
||||||
|
#undef clblasDsyrk
|
||||||
|
//#define clblasDsyrk clblasDsyrk_pfn
|
||||||
|
#undef clblasDtbmv
|
||||||
|
//#define clblasDtbmv clblasDtbmv_pfn
|
||||||
|
#undef clblasDtbsv
|
||||||
|
//#define clblasDtbsv clblasDtbsv_pfn
|
||||||
|
#undef clblasDtpmv
|
||||||
|
//#define clblasDtpmv clblasDtpmv_pfn
|
||||||
|
#undef clblasDtpsv
|
||||||
|
//#define clblasDtpsv clblasDtpsv_pfn
|
||||||
|
#undef clblasDtrmm
|
||||||
|
//#define clblasDtrmm clblasDtrmm_pfn
|
||||||
|
#undef clblasDtrmv
|
||||||
|
//#define clblasDtrmv clblasDtrmv_pfn
|
||||||
|
#undef clblasDtrsm
|
||||||
|
//#define clblasDtrsm clblasDtrsm_pfn
|
||||||
|
#undef clblasDtrsv
|
||||||
|
//#define clblasDtrsv clblasDtrsv_pfn
|
||||||
|
#undef clblasDzasum
|
||||||
|
//#define clblasDzasum clblasDzasum_pfn
|
||||||
|
#undef clblasDznrm2
|
||||||
|
//#define clblasDznrm2 clblasDznrm2_pfn
|
||||||
|
#undef clblasGetVersion
|
||||||
|
//#define clblasGetVersion clblasGetVersion_pfn
|
||||||
|
#undef clblasSasum
|
||||||
|
//#define clblasSasum clblasSasum_pfn
|
||||||
|
#undef clblasSaxpy
|
||||||
|
//#define clblasSaxpy clblasSaxpy_pfn
|
||||||
|
#undef clblasScasum
|
||||||
|
//#define clblasScasum clblasScasum_pfn
|
||||||
|
#undef clblasScnrm2
|
||||||
|
//#define clblasScnrm2 clblasScnrm2_pfn
|
||||||
|
#undef clblasScopy
|
||||||
|
//#define clblasScopy clblasScopy_pfn
|
||||||
|
#undef clblasSdot
|
||||||
|
//#define clblasSdot clblasSdot_pfn
|
||||||
|
#undef clblasSetup
|
||||||
|
#define clblasSetup clblasSetup_pfn
|
||||||
|
#undef clblasSgbmv
|
||||||
|
//#define clblasSgbmv clblasSgbmv_pfn
|
||||||
|
#undef clblasSgemm
|
||||||
|
#define clblasSgemm clblasSgemm_pfn
|
||||||
|
#undef clblasSgemv
|
||||||
|
//#define clblasSgemv clblasSgemv_pfn
|
||||||
|
#undef clblasSger
|
||||||
|
//#define clblasSger clblasSger_pfn
|
||||||
|
#undef clblasSnrm2
|
||||||
|
//#define clblasSnrm2 clblasSnrm2_pfn
|
||||||
|
#undef clblasSrot
|
||||||
|
//#define clblasSrot clblasSrot_pfn
|
||||||
|
#undef clblasSrotg
|
||||||
|
//#define clblasSrotg clblasSrotg_pfn
|
||||||
|
#undef clblasSrotm
|
||||||
|
//#define clblasSrotm clblasSrotm_pfn
|
||||||
|
#undef clblasSrotmg
|
||||||
|
//#define clblasSrotmg clblasSrotmg_pfn
|
||||||
|
#undef clblasSsbmv
|
||||||
|
//#define clblasSsbmv clblasSsbmv_pfn
|
||||||
|
#undef clblasSscal
|
||||||
|
//#define clblasSscal clblasSscal_pfn
|
||||||
|
#undef clblasSspmv
|
||||||
|
//#define clblasSspmv clblasSspmv_pfn
|
||||||
|
#undef clblasSspr
|
||||||
|
//#define clblasSspr clblasSspr_pfn
|
||||||
|
#undef clblasSspr2
|
||||||
|
//#define clblasSspr2 clblasSspr2_pfn
|
||||||
|
#undef clblasSswap
|
||||||
|
//#define clblasSswap clblasSswap_pfn
|
||||||
|
#undef clblasSsymm
|
||||||
|
//#define clblasSsymm clblasSsymm_pfn
|
||||||
|
#undef clblasSsymv
|
||||||
|
//#define clblasSsymv clblasSsymv_pfn
|
||||||
|
#undef clblasSsyr
|
||||||
|
//#define clblasSsyr clblasSsyr_pfn
|
||||||
|
#undef clblasSsyr2
|
||||||
|
//#define clblasSsyr2 clblasSsyr2_pfn
|
||||||
|
#undef clblasSsyr2k
|
||||||
|
//#define clblasSsyr2k clblasSsyr2k_pfn
|
||||||
|
#undef clblasSsyrk
|
||||||
|
//#define clblasSsyrk clblasSsyrk_pfn
|
||||||
|
#undef clblasStbmv
|
||||||
|
//#define clblasStbmv clblasStbmv_pfn
|
||||||
|
#undef clblasStbsv
|
||||||
|
//#define clblasStbsv clblasStbsv_pfn
|
||||||
|
#undef clblasStpmv
|
||||||
|
//#define clblasStpmv clblasStpmv_pfn
|
||||||
|
#undef clblasStpsv
|
||||||
|
//#define clblasStpsv clblasStpsv_pfn
|
||||||
|
#undef clblasStrmm
|
||||||
|
//#define clblasStrmm clblasStrmm_pfn
|
||||||
|
#undef clblasStrmv
|
||||||
|
//#define clblasStrmv clblasStrmv_pfn
|
||||||
|
#undef clblasStrsm
|
||||||
|
//#define clblasStrsm clblasStrsm_pfn
|
||||||
|
#undef clblasStrsv
|
||||||
|
//#define clblasStrsv clblasStrsv_pfn
|
||||||
|
#undef clblasTeardown
|
||||||
|
#define clblasTeardown clblasTeardown_pfn
|
||||||
|
#undef clblasZaxpy
|
||||||
|
//#define clblasZaxpy clblasZaxpy_pfn
|
||||||
|
#undef clblasZcopy
|
||||||
|
//#define clblasZcopy clblasZcopy_pfn
|
||||||
|
#undef clblasZdotc
|
||||||
|
//#define clblasZdotc clblasZdotc_pfn
|
||||||
|
#undef clblasZdotu
|
||||||
|
//#define clblasZdotu clblasZdotu_pfn
|
||||||
|
#undef clblasZdrot
|
||||||
|
//#define clblasZdrot clblasZdrot_pfn
|
||||||
|
#undef clblasZdscal
|
||||||
|
//#define clblasZdscal clblasZdscal_pfn
|
||||||
|
#undef clblasZgbmv
|
||||||
|
//#define clblasZgbmv clblasZgbmv_pfn
|
||||||
|
#undef clblasZgemm
|
||||||
|
#define clblasZgemm clblasZgemm_pfn
|
||||||
|
#undef clblasZgemv
|
||||||
|
//#define clblasZgemv clblasZgemv_pfn
|
||||||
|
#undef clblasZgerc
|
||||||
|
//#define clblasZgerc clblasZgerc_pfn
|
||||||
|
#undef clblasZgeru
|
||||||
|
//#define clblasZgeru clblasZgeru_pfn
|
||||||
|
#undef clblasZhbmv
|
||||||
|
//#define clblasZhbmv clblasZhbmv_pfn
|
||||||
|
#undef clblasZhemm
|
||||||
|
//#define clblasZhemm clblasZhemm_pfn
|
||||||
|
#undef clblasZhemv
|
||||||
|
//#define clblasZhemv clblasZhemv_pfn
|
||||||
|
#undef clblasZher
|
||||||
|
//#define clblasZher clblasZher_pfn
|
||||||
|
#undef clblasZher2
|
||||||
|
//#define clblasZher2 clblasZher2_pfn
|
||||||
|
#undef clblasZher2k
|
||||||
|
//#define clblasZher2k clblasZher2k_pfn
|
||||||
|
#undef clblasZherk
|
||||||
|
//#define clblasZherk clblasZherk_pfn
|
||||||
|
#undef clblasZhpmv
|
||||||
|
//#define clblasZhpmv clblasZhpmv_pfn
|
||||||
|
#undef clblasZhpr
|
||||||
|
//#define clblasZhpr clblasZhpr_pfn
|
||||||
|
#undef clblasZhpr2
|
||||||
|
//#define clblasZhpr2 clblasZhpr2_pfn
|
||||||
|
#undef clblasZrotg
|
||||||
|
//#define clblasZrotg clblasZrotg_pfn
|
||||||
|
#undef clblasZscal
|
||||||
|
//#define clblasZscal clblasZscal_pfn
|
||||||
|
#undef clblasZswap
|
||||||
|
//#define clblasZswap clblasZswap_pfn
|
||||||
|
#undef clblasZsymm
|
||||||
|
//#define clblasZsymm clblasZsymm_pfn
|
||||||
|
#undef clblasZsyr2k
|
||||||
|
//#define clblasZsyr2k clblasZsyr2k_pfn
|
||||||
|
#undef clblasZsyrk
|
||||||
|
//#define clblasZsyrk clblasZsyrk_pfn
|
||||||
|
#undef clblasZtbmv
|
||||||
|
//#define clblasZtbmv clblasZtbmv_pfn
|
||||||
|
#undef clblasZtbsv
|
||||||
|
//#define clblasZtbsv clblasZtbsv_pfn
|
||||||
|
#undef clblasZtpmv
|
||||||
|
//#define clblasZtpmv clblasZtpmv_pfn
|
||||||
|
#undef clblasZtpsv
|
||||||
|
//#define clblasZtpsv clblasZtpsv_pfn
|
||||||
|
#undef clblasZtrmm
|
||||||
|
//#define clblasZtrmm clblasZtrmm_pfn
|
||||||
|
#undef clblasZtrmv
|
||||||
|
//#define clblasZtrmv clblasZtrmv_pfn
|
||||||
|
#undef clblasZtrsm
|
||||||
|
//#define clblasZtrsm clblasZtrsm_pfn
|
||||||
|
#undef clblasZtrsv
|
||||||
|
//#define clblasZtrsv clblasZtrsv_pfn
|
||||||
|
#undef clblasiCamax
|
||||||
|
//#define clblasiCamax clblasiCamax_pfn
|
||||||
|
#undef clblasiDamax
|
||||||
|
//#define clblasiDamax clblasiDamax_pfn
|
||||||
|
#undef clblasiSamax
|
||||||
|
//#define clblasiSamax clblasiSamax_pfn
|
||||||
|
#undef clblasiZamax
|
||||||
|
//#define clblasiZamax clblasiZamax_pfn
|
||||||
|
|
||||||
|
// generated by parser_clblas.py
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCaxpy)(size_t N, cl_float2 alpha, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCcopy)(size_t N, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCdotc)(size_t N, cl_mem dotProduct, size_t offDP, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCdotu)(size_t N, cl_mem dotProduct, size_t offDP, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCgbmv)(clblasOrder order, clblasTranspose trans, size_t M, size_t N, size_t KL, size_t KU, cl_float2 alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, cl_float2 beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
extern CL_RUNTIME_EXPORT clblasStatus (*clblasCgemm)(clblasOrder order, clblasTranspose transA, clblasTranspose transB, size_t M, size_t N, size_t K, FloatComplex alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, FloatComplex beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCgemv)(clblasOrder order, clblasTranspose transA, size_t M, size_t N, FloatComplex alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem x, size_t offx, int incx, FloatComplex beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCgerc)(clblasOrder order, size_t M, size_t N, cl_float2 alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCgeru)(clblasOrder order, size_t M, size_t N, cl_float2 alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasChbmv)(clblasOrder order, clblasUplo uplo, size_t N, size_t K, cl_float2 alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, cl_float2 beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasChemm)(clblasOrder order, clblasSide side, clblasUplo uplo, size_t M, size_t N, cl_float2 alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_float2 beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasChemv)(clblasOrder order, clblasUplo uplo, size_t N, FloatComplex alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, FloatComplex beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCher)(clblasOrder order, clblasUplo uplo, size_t N, cl_float alpha, const cl_mem X, size_t offx, int incx, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCher2)(clblasOrder order, clblasUplo uplo, size_t N, cl_float2 alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCher2k)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, size_t N, size_t K, FloatComplex alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_float beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCherk)(clblasOrder order, clblasUplo uplo, clblasTranspose transA, size_t N, size_t K, float alpha, const cl_mem A, size_t offa, size_t lda, float beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasChpmv)(clblasOrder order, clblasUplo uplo, size_t N, cl_float2 alpha, const cl_mem AP, size_t offa, const cl_mem X, size_t offx, int incx, cl_float2 beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasChpr)(clblasOrder order, clblasUplo uplo, size_t N, cl_float alpha, const cl_mem X, size_t offx, int incx, cl_mem AP, size_t offa, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasChpr2)(clblasOrder order, clblasUplo uplo, size_t N, cl_float2 alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem AP, size_t offa, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCrotg)(cl_mem CA, size_t offCA, cl_mem CB, size_t offCB, cl_mem C, size_t offC, cl_mem S, size_t offS, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCscal)(size_t N, cl_float2 alpha, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCsrot)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_float C, cl_float S, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCsscal)(size_t N, cl_float alpha, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCswap)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCsymm)(clblasOrder order, clblasSide side, clblasUplo uplo, size_t M, size_t N, cl_float2 alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_float2 beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCsyr2k)(clblasOrder order, clblasUplo uplo, clblasTranspose transAB, size_t N, size_t K, FloatComplex alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, FloatComplex beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCsyrk)(clblasOrder order, clblasUplo uplo, clblasTranspose transA, size_t N, size_t K, FloatComplex alpha, const cl_mem A, size_t offA, size_t lda, FloatComplex beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCtbmv)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, clblasDiag diag, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCtbsv)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, clblasDiag diag, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCtpmv)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, clblasDiag diag, size_t N, const cl_mem AP, size_t offa, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCtpsv)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, clblasDiag diag, size_t N, const cl_mem A, size_t offa, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCtrmm)(clblasOrder order, clblasSide side, clblasUplo uplo, clblasTranspose transA, clblasDiag diag, size_t M, size_t N, FloatComplex alpha, const cl_mem A, size_t offA, size_t lda, cl_mem B, size_t offB, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCtrmv)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, clblasDiag diag, size_t N, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCtrsm)(clblasOrder order, clblasSide side, clblasUplo uplo, clblasTranspose transA, clblasDiag diag, size_t M, size_t N, FloatComplex alpha, const cl_mem A, size_t offA, size_t lda, cl_mem B, size_t offB, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasCtrsv)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, clblasDiag diag, size_t N, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDasum)(size_t N, cl_mem asum, size_t offAsum, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDaxpy)(size_t N, cl_double alpha, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDcopy)(size_t N, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDdot)(size_t N, cl_mem dotProduct, size_t offDP, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDgbmv)(clblasOrder order, clblasTranspose trans, size_t M, size_t N, size_t KL, size_t KU, cl_double alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, cl_double beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
extern CL_RUNTIME_EXPORT clblasStatus (*clblasDgemm)(clblasOrder order, clblasTranspose transA, clblasTranspose transB, size_t M, size_t N, size_t K, cl_double alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, cl_double beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDgemv)(clblasOrder order, clblasTranspose transA, size_t M, size_t N, cl_double alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem x, size_t offx, int incx, cl_double beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDger)(clblasOrder order, size_t M, size_t N, cl_double alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDnrm2)(size_t N, cl_mem NRM2, size_t offNRM2, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDrot)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_double C, cl_double S, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDrotg)(cl_mem DA, size_t offDA, cl_mem DB, size_t offDB, cl_mem C, size_t offC, cl_mem S, size_t offS, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDrotm)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, const cl_mem DPARAM, size_t offDparam, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDrotmg)(cl_mem DD1, size_t offDD1, cl_mem DD2, size_t offDD2, cl_mem DX1, size_t offDX1, const cl_mem DY1, size_t offDY1, cl_mem DPARAM, size_t offDparam, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDsbmv)(clblasOrder order, clblasUplo uplo, size_t N, size_t K, cl_double alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, cl_double beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDscal)(size_t N, cl_double alpha, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDspmv)(clblasOrder order, clblasUplo uplo, size_t N, cl_double alpha, const cl_mem AP, size_t offa, const cl_mem X, size_t offx, int incx, cl_double beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDspr)(clblasOrder order, clblasUplo uplo, size_t N, cl_double alpha, const cl_mem X, size_t offx, int incx, cl_mem AP, size_t offa, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDspr2)(clblasOrder order, clblasUplo uplo, size_t N, cl_double alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem AP, size_t offa, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDswap)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDsymm)(clblasOrder order, clblasSide side, clblasUplo uplo, size_t M, size_t N, cl_double alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_double beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDsymv)(clblasOrder order, clblasUplo uplo, size_t N, cl_double alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem x, size_t offx, int incx, cl_double beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDsyr)(clblasOrder order, clblasUplo uplo, size_t N, cl_double alpha, const cl_mem X, size_t offx, int incx, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDsyr2)(clblasOrder order, clblasUplo uplo, size_t N, cl_double alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDsyr2k)(clblasOrder order, clblasUplo uplo, clblasTranspose transAB, size_t N, size_t K, cl_double alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, cl_double beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDsyrk)(clblasOrder order, clblasUplo uplo, clblasTranspose transA, size_t N, size_t K, cl_double alpha, const cl_mem A, size_t offA, size_t lda, cl_double beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDtbmv)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, clblasDiag diag, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDtbsv)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, clblasDiag diag, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDtpmv)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, clblasDiag diag, size_t N, const cl_mem AP, size_t offa, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDtpsv)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, clblasDiag diag, size_t N, const cl_mem A, size_t offa, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDtrmm)(clblasOrder order, clblasSide side, clblasUplo uplo, clblasTranspose transA, clblasDiag diag, size_t M, size_t N, cl_double alpha, const cl_mem A, size_t offA, size_t lda, cl_mem B, size_t offB, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDtrmv)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, clblasDiag diag, size_t N, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDtrsm)(clblasOrder order, clblasSide side, clblasUplo uplo, clblasTranspose transA, clblasDiag diag, size_t M, size_t N, cl_double alpha, const cl_mem A, size_t offA, size_t lda, cl_mem B, size_t offB, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDtrsv)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, clblasDiag diag, size_t N, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDzasum)(size_t N, cl_mem asum, size_t offAsum, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasDznrm2)(size_t N, cl_mem NRM2, size_t offNRM2, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasGetVersion)(cl_uint* major, cl_uint* minor, cl_uint* patch);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasSasum)(size_t N, cl_mem asum, size_t offAsum, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasSaxpy)(size_t N, cl_float alpha, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasScasum)(size_t N, cl_mem asum, size_t offAsum, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasScnrm2)(size_t N, cl_mem NRM2, size_t offNRM2, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasScopy)(size_t N, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasSdot)(size_t N, cl_mem dotProduct, size_t offDP, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
extern CL_RUNTIME_EXPORT clblasStatus (*clblasSetup)();
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasSgbmv)(clblasOrder order, clblasTranspose trans, size_t M, size_t N, size_t KL, size_t KU, cl_float alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, cl_float beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
extern CL_RUNTIME_EXPORT clblasStatus (*clblasSgemm)(clblasOrder order, clblasTranspose transA, clblasTranspose transB, size_t M, size_t N, size_t K, cl_float alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, cl_float beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasSgemv)(clblasOrder order, clblasTranspose transA, size_t M, size_t N, cl_float alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem x, size_t offx, int incx, cl_float beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasSger)(clblasOrder order, size_t M, size_t N, cl_float alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasSnrm2)(size_t N, cl_mem NRM2, size_t offNRM2, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasSrot)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_float C, cl_float S, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasSrotg)(cl_mem SA, size_t offSA, cl_mem SB, size_t offSB, cl_mem C, size_t offC, cl_mem S, size_t offS, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasSrotm)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, const cl_mem SPARAM, size_t offSparam, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasSrotmg)(cl_mem SD1, size_t offSD1, cl_mem SD2, size_t offSD2, cl_mem SX1, size_t offSX1, const cl_mem SY1, size_t offSY1, cl_mem SPARAM, size_t offSparam, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasSsbmv)(clblasOrder order, clblasUplo uplo, size_t N, size_t K, cl_float alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, cl_float beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasSscal)(size_t N, cl_float alpha, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasSspmv)(clblasOrder order, clblasUplo uplo, size_t N, cl_float alpha, const cl_mem AP, size_t offa, const cl_mem X, size_t offx, int incx, cl_float beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasSspr)(clblasOrder order, clblasUplo uplo, size_t N, cl_float alpha, const cl_mem X, size_t offx, int incx, cl_mem AP, size_t offa, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasSspr2)(clblasOrder order, clblasUplo uplo, size_t N, cl_float alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem AP, size_t offa, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasSswap)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasSsymm)(clblasOrder order, clblasSide side, clblasUplo uplo, size_t M, size_t N, cl_float alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_float beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasSsymv)(clblasOrder order, clblasUplo uplo, size_t N, cl_float alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem x, size_t offx, int incx, cl_float beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasSsyr)(clblasOrder order, clblasUplo uplo, size_t N, cl_float alpha, const cl_mem X, size_t offx, int incx, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasSsyr2)(clblasOrder order, clblasUplo uplo, size_t N, cl_float alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasSsyr2k)(clblasOrder order, clblasUplo uplo, clblasTranspose transAB, size_t N, size_t K, cl_float alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, cl_float beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasSsyrk)(clblasOrder order, clblasUplo uplo, clblasTranspose transA, size_t N, size_t K, cl_float alpha, const cl_mem A, size_t offA, size_t lda, cl_float beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasStbmv)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, clblasDiag diag, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasStbsv)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, clblasDiag diag, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasStpmv)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, clblasDiag diag, size_t N, const cl_mem AP, size_t offa, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasStpsv)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, clblasDiag diag, size_t N, const cl_mem A, size_t offa, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasStrmm)(clblasOrder order, clblasSide side, clblasUplo uplo, clblasTranspose transA, clblasDiag diag, size_t M, size_t N, cl_float alpha, const cl_mem A, size_t offA, size_t lda, cl_mem B, size_t offB, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasStrmv)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, clblasDiag diag, size_t N, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasStrsm)(clblasOrder order, clblasSide side, clblasUplo uplo, clblasTranspose transA, clblasDiag diag, size_t M, size_t N, cl_float alpha, const cl_mem A, size_t offA, size_t lda, cl_mem B, size_t offB, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasStrsv)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, clblasDiag diag, size_t N, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
extern CL_RUNTIME_EXPORT void (*clblasTeardown)();
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZaxpy)(size_t N, cl_double2 alpha, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZcopy)(size_t N, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZdotc)(size_t N, cl_mem dotProduct, size_t offDP, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZdotu)(size_t N, cl_mem dotProduct, size_t offDP, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZdrot)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_double C, cl_double S, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZdscal)(size_t N, cl_double alpha, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZgbmv)(clblasOrder order, clblasTranspose trans, size_t M, size_t N, size_t KL, size_t KU, cl_double2 alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, cl_double2 beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
extern CL_RUNTIME_EXPORT clblasStatus (*clblasZgemm)(clblasOrder order, clblasTranspose transA, clblasTranspose transB, size_t M, size_t N, size_t K, DoubleComplex alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, DoubleComplex beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZgemv)(clblasOrder order, clblasTranspose transA, size_t M, size_t N, DoubleComplex alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem x, size_t offx, int incx, DoubleComplex beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZgerc)(clblasOrder order, size_t M, size_t N, cl_double2 alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZgeru)(clblasOrder order, size_t M, size_t N, cl_double2 alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZhbmv)(clblasOrder order, clblasUplo uplo, size_t N, size_t K, cl_double2 alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, cl_double2 beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZhemm)(clblasOrder order, clblasSide side, clblasUplo uplo, size_t M, size_t N, cl_double2 alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_double2 beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZhemv)(clblasOrder order, clblasUplo uplo, size_t N, DoubleComplex alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, DoubleComplex beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZher)(clblasOrder order, clblasUplo uplo, size_t N, cl_double alpha, const cl_mem X, size_t offx, int incx, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZher2)(clblasOrder order, clblasUplo uplo, size_t N, cl_double2 alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZher2k)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, size_t N, size_t K, DoubleComplex alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_double beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZherk)(clblasOrder order, clblasUplo uplo, clblasTranspose transA, size_t N, size_t K, double alpha, const cl_mem A, size_t offa, size_t lda, double beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZhpmv)(clblasOrder order, clblasUplo uplo, size_t N, cl_double2 alpha, const cl_mem AP, size_t offa, const cl_mem X, size_t offx, int incx, cl_double2 beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZhpr)(clblasOrder order, clblasUplo uplo, size_t N, cl_double alpha, const cl_mem X, size_t offx, int incx, cl_mem AP, size_t offa, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZhpr2)(clblasOrder order, clblasUplo uplo, size_t N, cl_double2 alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem AP, size_t offa, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZrotg)(cl_mem CA, size_t offCA, cl_mem CB, size_t offCB, cl_mem C, size_t offC, cl_mem S, size_t offS, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZscal)(size_t N, cl_double2 alpha, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZswap)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZsymm)(clblasOrder order, clblasSide side, clblasUplo uplo, size_t M, size_t N, cl_double2 alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_double2 beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZsyr2k)(clblasOrder order, clblasUplo uplo, clblasTranspose transAB, size_t N, size_t K, DoubleComplex alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, DoubleComplex beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZsyrk)(clblasOrder order, clblasUplo uplo, clblasTranspose transA, size_t N, size_t K, DoubleComplex alpha, const cl_mem A, size_t offA, size_t lda, DoubleComplex beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZtbmv)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, clblasDiag diag, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZtbsv)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, clblasDiag diag, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZtpmv)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, clblasDiag diag, size_t N, const cl_mem AP, size_t offa, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZtpsv)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, clblasDiag diag, size_t N, const cl_mem A, size_t offa, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZtrmm)(clblasOrder order, clblasSide side, clblasUplo uplo, clblasTranspose transA, clblasDiag diag, size_t M, size_t N, DoubleComplex alpha, const cl_mem A, size_t offA, size_t lda, cl_mem B, size_t offB, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZtrmv)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, clblasDiag diag, size_t N, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZtrsm)(clblasOrder order, clblasSide side, clblasUplo uplo, clblasTranspose transA, clblasDiag diag, size_t M, size_t N, DoubleComplex alpha, const cl_mem A, size_t offA, size_t lda, cl_mem B, size_t offB, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasZtrsv)(clblasOrder order, clblasUplo uplo, clblasTranspose trans, clblasDiag diag, size_t N, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasiCamax)(size_t N, cl_mem iMax, size_t offiMax, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasiDamax)(size_t N, cl_mem iMax, size_t offiMax, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasiSamax)(size_t N, cl_mem iMax, size_t offiMax, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
|
//extern CL_RUNTIME_EXPORT clblasStatus (*clblasiZamax)(size_t N, cl_mem iMax, size_t offiMax, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
|
||||||
@@ -0,0 +1,146 @@
|
|||||||
|
//
|
||||||
|
// AUTOGENERATED, DO NOT EDIT
|
||||||
|
//
|
||||||
|
#ifndef OPENCV_CORE_OCL_RUNTIME_CLAMDFFT_HPP
|
||||||
|
#error "Invalid usage"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// generated by parser_clfft.py
|
||||||
|
#define clfftBakePlan clfftBakePlan_
|
||||||
|
#define clfftCopyPlan clfftCopyPlan_
|
||||||
|
#define clfftCreateDefaultPlan clfftCreateDefaultPlan_
|
||||||
|
#define clfftDestroyPlan clfftDestroyPlan_
|
||||||
|
#define clfftEnqueueTransform clfftEnqueueTransform_
|
||||||
|
#define clfftGetLayout clfftGetLayout_
|
||||||
|
#define clfftGetPlanBatchSize clfftGetPlanBatchSize_
|
||||||
|
#define clfftGetPlanContext clfftGetPlanContext_
|
||||||
|
#define clfftGetPlanDim clfftGetPlanDim_
|
||||||
|
#define clfftGetPlanDistance clfftGetPlanDistance_
|
||||||
|
#define clfftGetPlanInStride clfftGetPlanInStride_
|
||||||
|
#define clfftGetPlanLength clfftGetPlanLength_
|
||||||
|
#define clfftGetPlanOutStride clfftGetPlanOutStride_
|
||||||
|
#define clfftGetPlanPrecision clfftGetPlanPrecision_
|
||||||
|
#define clfftGetPlanScale clfftGetPlanScale_
|
||||||
|
#define clfftGetPlanTransposeResult clfftGetPlanTransposeResult_
|
||||||
|
#define clfftGetResultLocation clfftGetResultLocation_
|
||||||
|
#define clfftGetTmpBufSize clfftGetTmpBufSize_
|
||||||
|
#define clfftGetVersion clfftGetVersion_
|
||||||
|
#define clfftSetLayout clfftSetLayout_
|
||||||
|
#define clfftSetPlanBatchSize clfftSetPlanBatchSize_
|
||||||
|
#define clfftSetPlanCallback clfftSetPlanCallback_
|
||||||
|
#define clfftSetPlanDim clfftSetPlanDim_
|
||||||
|
#define clfftSetPlanDistance clfftSetPlanDistance_
|
||||||
|
#define clfftSetPlanInStride clfftSetPlanInStride_
|
||||||
|
#define clfftSetPlanLength clfftSetPlanLength_
|
||||||
|
#define clfftSetPlanOutStride clfftSetPlanOutStride_
|
||||||
|
#define clfftSetPlanPrecision clfftSetPlanPrecision_
|
||||||
|
#define clfftSetPlanScale clfftSetPlanScale_
|
||||||
|
#define clfftSetPlanTransposeResult clfftSetPlanTransposeResult_
|
||||||
|
#define clfftSetResultLocation clfftSetResultLocation_
|
||||||
|
#define clfftSetup clfftSetup_
|
||||||
|
#define clfftTeardown clfftTeardown_
|
||||||
|
|
||||||
|
#include <clFFT.h>
|
||||||
|
|
||||||
|
// generated by parser_clfft.py
|
||||||
|
#undef clfftBakePlan
|
||||||
|
#define clfftBakePlan clfftBakePlan_pfn
|
||||||
|
#undef clfftCopyPlan
|
||||||
|
//#define clfftCopyPlan clfftCopyPlan_pfn
|
||||||
|
#undef clfftCreateDefaultPlan
|
||||||
|
#define clfftCreateDefaultPlan clfftCreateDefaultPlan_pfn
|
||||||
|
#undef clfftDestroyPlan
|
||||||
|
#define clfftDestroyPlan clfftDestroyPlan_pfn
|
||||||
|
#undef clfftEnqueueTransform
|
||||||
|
#define clfftEnqueueTransform clfftEnqueueTransform_pfn
|
||||||
|
#undef clfftGetLayout
|
||||||
|
//#define clfftGetLayout clfftGetLayout_pfn
|
||||||
|
#undef clfftGetPlanBatchSize
|
||||||
|
//#define clfftGetPlanBatchSize clfftGetPlanBatchSize_pfn
|
||||||
|
#undef clfftGetPlanContext
|
||||||
|
//#define clfftGetPlanContext clfftGetPlanContext_pfn
|
||||||
|
#undef clfftGetPlanDim
|
||||||
|
//#define clfftGetPlanDim clfftGetPlanDim_pfn
|
||||||
|
#undef clfftGetPlanDistance
|
||||||
|
//#define clfftGetPlanDistance clfftGetPlanDistance_pfn
|
||||||
|
#undef clfftGetPlanInStride
|
||||||
|
//#define clfftGetPlanInStride clfftGetPlanInStride_pfn
|
||||||
|
#undef clfftGetPlanLength
|
||||||
|
//#define clfftGetPlanLength clfftGetPlanLength_pfn
|
||||||
|
#undef clfftGetPlanOutStride
|
||||||
|
//#define clfftGetPlanOutStride clfftGetPlanOutStride_pfn
|
||||||
|
#undef clfftGetPlanPrecision
|
||||||
|
//#define clfftGetPlanPrecision clfftGetPlanPrecision_pfn
|
||||||
|
#undef clfftGetPlanScale
|
||||||
|
//#define clfftGetPlanScale clfftGetPlanScale_pfn
|
||||||
|
#undef clfftGetPlanTransposeResult
|
||||||
|
//#define clfftGetPlanTransposeResult clfftGetPlanTransposeResult_pfn
|
||||||
|
#undef clfftGetResultLocation
|
||||||
|
//#define clfftGetResultLocation clfftGetResultLocation_pfn
|
||||||
|
#undef clfftGetTmpBufSize
|
||||||
|
#define clfftGetTmpBufSize clfftGetTmpBufSize_pfn
|
||||||
|
#undef clfftGetVersion
|
||||||
|
#define clfftGetVersion clfftGetVersion_pfn
|
||||||
|
#undef clfftSetLayout
|
||||||
|
#define clfftSetLayout clfftSetLayout_pfn
|
||||||
|
#undef clfftSetPlanBatchSize
|
||||||
|
#define clfftSetPlanBatchSize clfftSetPlanBatchSize_pfn
|
||||||
|
#undef clfftSetPlanCallback
|
||||||
|
//#define clfftSetPlanCallback clfftSetPlanCallback_pfn
|
||||||
|
#undef clfftSetPlanDim
|
||||||
|
//#define clfftSetPlanDim clfftSetPlanDim_pfn
|
||||||
|
#undef clfftSetPlanDistance
|
||||||
|
#define clfftSetPlanDistance clfftSetPlanDistance_pfn
|
||||||
|
#undef clfftSetPlanInStride
|
||||||
|
#define clfftSetPlanInStride clfftSetPlanInStride_pfn
|
||||||
|
#undef clfftSetPlanLength
|
||||||
|
//#define clfftSetPlanLength clfftSetPlanLength_pfn
|
||||||
|
#undef clfftSetPlanOutStride
|
||||||
|
#define clfftSetPlanOutStride clfftSetPlanOutStride_pfn
|
||||||
|
#undef clfftSetPlanPrecision
|
||||||
|
#define clfftSetPlanPrecision clfftSetPlanPrecision_pfn
|
||||||
|
#undef clfftSetPlanScale
|
||||||
|
#define clfftSetPlanScale clfftSetPlanScale_pfn
|
||||||
|
#undef clfftSetPlanTransposeResult
|
||||||
|
//#define clfftSetPlanTransposeResult clfftSetPlanTransposeResult_pfn
|
||||||
|
#undef clfftSetResultLocation
|
||||||
|
#define clfftSetResultLocation clfftSetResultLocation_pfn
|
||||||
|
#undef clfftSetup
|
||||||
|
#define clfftSetup clfftSetup_pfn
|
||||||
|
#undef clfftTeardown
|
||||||
|
#define clfftTeardown clfftTeardown_pfn
|
||||||
|
|
||||||
|
// generated by parser_clfft.py
|
||||||
|
extern CL_RUNTIME_EXPORT clfftStatus (*clfftBakePlan)(clfftPlanHandle plHandle, cl_uint numQueues, cl_command_queue* commQueueFFT, void (CL_CALLBACK* pfn_notify) (clfftPlanHandle plHandle, void* user_data), void* user_data);
|
||||||
|
//extern CL_RUNTIME_EXPORT clfftStatus (*clfftCopyPlan)(clfftPlanHandle* out_plHandle, cl_context new_context, clfftPlanHandle in_plHandle);
|
||||||
|
extern CL_RUNTIME_EXPORT clfftStatus (*clfftCreateDefaultPlan)(clfftPlanHandle* plHandle, cl_context context, const clfftDim dim, const size_t* clLengths);
|
||||||
|
extern CL_RUNTIME_EXPORT clfftStatus (*clfftDestroyPlan)(clfftPlanHandle* plHandle);
|
||||||
|
extern CL_RUNTIME_EXPORT clfftStatus (*clfftEnqueueTransform)(clfftPlanHandle plHandle, clfftDirection dir, cl_uint numQueuesAndEvents, cl_command_queue* commQueues, cl_uint numWaitEvents, const cl_event* waitEvents, cl_event* outEvents, cl_mem* inputBuffers, cl_mem* outputBuffers, cl_mem tmpBuffer);
|
||||||
|
//extern CL_RUNTIME_EXPORT clfftStatus (*clfftGetLayout)(const clfftPlanHandle plHandle, clfftLayout* iLayout, clfftLayout* oLayout);
|
||||||
|
//extern CL_RUNTIME_EXPORT clfftStatus (*clfftGetPlanBatchSize)(const clfftPlanHandle plHandle, size_t* batchSize);
|
||||||
|
//extern CL_RUNTIME_EXPORT clfftStatus (*clfftGetPlanContext)(const clfftPlanHandle plHandle, cl_context* context);
|
||||||
|
//extern CL_RUNTIME_EXPORT clfftStatus (*clfftGetPlanDim)(const clfftPlanHandle plHandle, clfftDim* dim, cl_uint* size);
|
||||||
|
//extern CL_RUNTIME_EXPORT clfftStatus (*clfftGetPlanDistance)(const clfftPlanHandle plHandle, size_t* iDist, size_t* oDist);
|
||||||
|
//extern CL_RUNTIME_EXPORT clfftStatus (*clfftGetPlanInStride)(const clfftPlanHandle plHandle, const clfftDim dim, size_t* clStrides);
|
||||||
|
//extern CL_RUNTIME_EXPORT clfftStatus (*clfftGetPlanLength)(const clfftPlanHandle plHandle, const clfftDim dim, size_t* clLengths);
|
||||||
|
//extern CL_RUNTIME_EXPORT clfftStatus (*clfftGetPlanOutStride)(const clfftPlanHandle plHandle, const clfftDim dim, size_t* clStrides);
|
||||||
|
//extern CL_RUNTIME_EXPORT clfftStatus (*clfftGetPlanPrecision)(const clfftPlanHandle plHandle, clfftPrecision* precision);
|
||||||
|
//extern CL_RUNTIME_EXPORT clfftStatus (*clfftGetPlanScale)(const clfftPlanHandle plHandle, clfftDirection dir, cl_float* scale);
|
||||||
|
//extern CL_RUNTIME_EXPORT clfftStatus (*clfftGetPlanTransposeResult)(const clfftPlanHandle plHandle, clfftResultTransposed* transposed);
|
||||||
|
//extern CL_RUNTIME_EXPORT clfftStatus (*clfftGetResultLocation)(const clfftPlanHandle plHandle, clfftResultLocation* placeness);
|
||||||
|
extern CL_RUNTIME_EXPORT clfftStatus (*clfftGetTmpBufSize)(const clfftPlanHandle plHandle, size_t* buffersize);
|
||||||
|
extern CL_RUNTIME_EXPORT clfftStatus (*clfftGetVersion)(cl_uint* major, cl_uint* minor, cl_uint* patch);
|
||||||
|
extern CL_RUNTIME_EXPORT clfftStatus (*clfftSetLayout)(clfftPlanHandle plHandle, clfftLayout iLayout, clfftLayout oLayout);
|
||||||
|
extern CL_RUNTIME_EXPORT clfftStatus (*clfftSetPlanBatchSize)(clfftPlanHandle plHandle, size_t batchSize);
|
||||||
|
//extern CL_RUNTIME_EXPORT clfftStatus (*clfftSetPlanCallback)(clfftPlanHandle plHandle, const char* funcName, const char* funcString, int localMemSize, clfftCallbackType callbackType, cl_mem* userdata, int numUserdataBuffers);
|
||||||
|
//extern CL_RUNTIME_EXPORT clfftStatus (*clfftSetPlanDim)(clfftPlanHandle plHandle, const clfftDim dim);
|
||||||
|
extern CL_RUNTIME_EXPORT clfftStatus (*clfftSetPlanDistance)(clfftPlanHandle plHandle, size_t iDist, size_t oDist);
|
||||||
|
extern CL_RUNTIME_EXPORT clfftStatus (*clfftSetPlanInStride)(clfftPlanHandle plHandle, const clfftDim dim, size_t* clStrides);
|
||||||
|
//extern CL_RUNTIME_EXPORT clfftStatus (*clfftSetPlanLength)(clfftPlanHandle plHandle, const clfftDim dim, const size_t* clLengths);
|
||||||
|
extern CL_RUNTIME_EXPORT clfftStatus (*clfftSetPlanOutStride)(clfftPlanHandle plHandle, const clfftDim dim, size_t* clStrides);
|
||||||
|
extern CL_RUNTIME_EXPORT clfftStatus (*clfftSetPlanPrecision)(clfftPlanHandle plHandle, clfftPrecision precision);
|
||||||
|
extern CL_RUNTIME_EXPORT clfftStatus (*clfftSetPlanScale)(clfftPlanHandle plHandle, clfftDirection dir, cl_float scale);
|
||||||
|
//extern CL_RUNTIME_EXPORT clfftStatus (*clfftSetPlanTransposeResult)(clfftPlanHandle plHandle, clfftResultTransposed transposed);
|
||||||
|
extern CL_RUNTIME_EXPORT clfftStatus (*clfftSetResultLocation)(clfftPlanHandle plHandle, clfftResultLocation placeness);
|
||||||
|
extern CL_RUNTIME_EXPORT clfftStatus (*clfftSetup)(const clfftSetupData* setupData);
|
||||||
|
extern CL_RUNTIME_EXPORT clfftStatus (*clfftTeardown)();
|
||||||
@@ -0,0 +1,371 @@
|
|||||||
|
//
|
||||||
|
// AUTOGENERATED, DO NOT EDIT
|
||||||
|
//
|
||||||
|
#ifndef OPENCV_CORE_OCL_RUNTIME_OPENCL_CORE_HPP
|
||||||
|
#error "Invalid usage"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// generated by parser_cl.py
|
||||||
|
#define clBuildProgram clBuildProgram_
|
||||||
|
#define clCompileProgram clCompileProgram_
|
||||||
|
#define clCreateBuffer clCreateBuffer_
|
||||||
|
#define clCreateCommandQueue clCreateCommandQueue_
|
||||||
|
#define clCreateContext clCreateContext_
|
||||||
|
#define clCreateContextFromType clCreateContextFromType_
|
||||||
|
#define clCreateImage clCreateImage_
|
||||||
|
#define clCreateImage2D clCreateImage2D_
|
||||||
|
#define clCreateImage3D clCreateImage3D_
|
||||||
|
#define clCreateKernel clCreateKernel_
|
||||||
|
#define clCreateKernelsInProgram clCreateKernelsInProgram_
|
||||||
|
#define clCreateProgramWithBinary clCreateProgramWithBinary_
|
||||||
|
#define clCreateProgramWithBuiltInKernels clCreateProgramWithBuiltInKernels_
|
||||||
|
#define clCreateProgramWithSource clCreateProgramWithSource_
|
||||||
|
#define clCreateSampler clCreateSampler_
|
||||||
|
#define clCreateSubBuffer clCreateSubBuffer_
|
||||||
|
#define clCreateSubDevices clCreateSubDevices_
|
||||||
|
#define clCreateUserEvent clCreateUserEvent_
|
||||||
|
#define clEnqueueBarrier clEnqueueBarrier_
|
||||||
|
#define clEnqueueBarrierWithWaitList clEnqueueBarrierWithWaitList_
|
||||||
|
#define clEnqueueCopyBuffer clEnqueueCopyBuffer_
|
||||||
|
#define clEnqueueCopyBufferRect clEnqueueCopyBufferRect_
|
||||||
|
#define clEnqueueCopyBufferToImage clEnqueueCopyBufferToImage_
|
||||||
|
#define clEnqueueCopyImage clEnqueueCopyImage_
|
||||||
|
#define clEnqueueCopyImageToBuffer clEnqueueCopyImageToBuffer_
|
||||||
|
#define clEnqueueFillBuffer clEnqueueFillBuffer_
|
||||||
|
#define clEnqueueFillImage clEnqueueFillImage_
|
||||||
|
#define clEnqueueMapBuffer clEnqueueMapBuffer_
|
||||||
|
#define clEnqueueMapImage clEnqueueMapImage_
|
||||||
|
#define clEnqueueMarker clEnqueueMarker_
|
||||||
|
#define clEnqueueMarkerWithWaitList clEnqueueMarkerWithWaitList_
|
||||||
|
#define clEnqueueMigrateMemObjects clEnqueueMigrateMemObjects_
|
||||||
|
#define clEnqueueNDRangeKernel clEnqueueNDRangeKernel_
|
||||||
|
#define clEnqueueNativeKernel clEnqueueNativeKernel_
|
||||||
|
#define clEnqueueReadBuffer clEnqueueReadBuffer_
|
||||||
|
#define clEnqueueReadBufferRect clEnqueueReadBufferRect_
|
||||||
|
#define clEnqueueReadImage clEnqueueReadImage_
|
||||||
|
#define clEnqueueTask clEnqueueTask_
|
||||||
|
#define clEnqueueUnmapMemObject clEnqueueUnmapMemObject_
|
||||||
|
#define clEnqueueWaitForEvents clEnqueueWaitForEvents_
|
||||||
|
#define clEnqueueWriteBuffer clEnqueueWriteBuffer_
|
||||||
|
#define clEnqueueWriteBufferRect clEnqueueWriteBufferRect_
|
||||||
|
#define clEnqueueWriteImage clEnqueueWriteImage_
|
||||||
|
#define clFinish clFinish_
|
||||||
|
#define clFlush clFlush_
|
||||||
|
#define clGetCommandQueueInfo clGetCommandQueueInfo_
|
||||||
|
#define clGetContextInfo clGetContextInfo_
|
||||||
|
#define clGetDeviceIDs clGetDeviceIDs_
|
||||||
|
#define clGetDeviceInfo clGetDeviceInfo_
|
||||||
|
#define clGetEventInfo clGetEventInfo_
|
||||||
|
#define clGetEventProfilingInfo clGetEventProfilingInfo_
|
||||||
|
#define clGetExtensionFunctionAddress clGetExtensionFunctionAddress_
|
||||||
|
#define clGetExtensionFunctionAddressForPlatform clGetExtensionFunctionAddressForPlatform_
|
||||||
|
#define clGetImageInfo clGetImageInfo_
|
||||||
|
#define clGetKernelArgInfo clGetKernelArgInfo_
|
||||||
|
#define clGetKernelInfo clGetKernelInfo_
|
||||||
|
#define clGetKernelWorkGroupInfo clGetKernelWorkGroupInfo_
|
||||||
|
#define clGetMemObjectInfo clGetMemObjectInfo_
|
||||||
|
#define clGetPlatformIDs clGetPlatformIDs_
|
||||||
|
#define clGetPlatformInfo clGetPlatformInfo_
|
||||||
|
#define clGetProgramBuildInfo clGetProgramBuildInfo_
|
||||||
|
#define clGetProgramInfo clGetProgramInfo_
|
||||||
|
#define clGetSamplerInfo clGetSamplerInfo_
|
||||||
|
#define clGetSupportedImageFormats clGetSupportedImageFormats_
|
||||||
|
#define clLinkProgram clLinkProgram_
|
||||||
|
#define clReleaseCommandQueue clReleaseCommandQueue_
|
||||||
|
#define clReleaseContext clReleaseContext_
|
||||||
|
#define clReleaseDevice clReleaseDevice_
|
||||||
|
#define clReleaseEvent clReleaseEvent_
|
||||||
|
#define clReleaseKernel clReleaseKernel_
|
||||||
|
#define clReleaseMemObject clReleaseMemObject_
|
||||||
|
#define clReleaseProgram clReleaseProgram_
|
||||||
|
#define clReleaseSampler clReleaseSampler_
|
||||||
|
#define clRetainCommandQueue clRetainCommandQueue_
|
||||||
|
#define clRetainContext clRetainContext_
|
||||||
|
#define clRetainDevice clRetainDevice_
|
||||||
|
#define clRetainEvent clRetainEvent_
|
||||||
|
#define clRetainKernel clRetainKernel_
|
||||||
|
#define clRetainMemObject clRetainMemObject_
|
||||||
|
#define clRetainProgram clRetainProgram_
|
||||||
|
#define clRetainSampler clRetainSampler_
|
||||||
|
#define clSetEventCallback clSetEventCallback_
|
||||||
|
#define clSetKernelArg clSetKernelArg_
|
||||||
|
#define clSetMemObjectDestructorCallback clSetMemObjectDestructorCallback_
|
||||||
|
#define clSetUserEventStatus clSetUserEventStatus_
|
||||||
|
#define clUnloadCompiler clUnloadCompiler_
|
||||||
|
#define clUnloadPlatformCompiler clUnloadPlatformCompiler_
|
||||||
|
#define clWaitForEvents clWaitForEvents_
|
||||||
|
|
||||||
|
#if defined __APPLE__
|
||||||
|
#define CL_SILENCE_DEPRECATION
|
||||||
|
#include <OpenCL/cl.h>
|
||||||
|
#else
|
||||||
|
#include <CL/cl.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// generated by parser_cl.py
|
||||||
|
#undef clBuildProgram
|
||||||
|
#define clBuildProgram clBuildProgram_pfn
|
||||||
|
#undef clCompileProgram
|
||||||
|
#define clCompileProgram clCompileProgram_pfn
|
||||||
|
#undef clCreateBuffer
|
||||||
|
#define clCreateBuffer clCreateBuffer_pfn
|
||||||
|
#undef clCreateCommandQueue
|
||||||
|
#define clCreateCommandQueue clCreateCommandQueue_pfn
|
||||||
|
#undef clCreateContext
|
||||||
|
#define clCreateContext clCreateContext_pfn
|
||||||
|
#undef clCreateContextFromType
|
||||||
|
#define clCreateContextFromType clCreateContextFromType_pfn
|
||||||
|
#undef clCreateImage
|
||||||
|
#define clCreateImage clCreateImage_pfn
|
||||||
|
#undef clCreateImage2D
|
||||||
|
#define clCreateImage2D clCreateImage2D_pfn
|
||||||
|
#undef clCreateImage3D
|
||||||
|
#define clCreateImage3D clCreateImage3D_pfn
|
||||||
|
#undef clCreateKernel
|
||||||
|
#define clCreateKernel clCreateKernel_pfn
|
||||||
|
#undef clCreateKernelsInProgram
|
||||||
|
#define clCreateKernelsInProgram clCreateKernelsInProgram_pfn
|
||||||
|
#undef clCreateProgramWithBinary
|
||||||
|
#define clCreateProgramWithBinary clCreateProgramWithBinary_pfn
|
||||||
|
#undef clCreateProgramWithBuiltInKernels
|
||||||
|
#define clCreateProgramWithBuiltInKernels clCreateProgramWithBuiltInKernels_pfn
|
||||||
|
#undef clCreateProgramWithSource
|
||||||
|
#define clCreateProgramWithSource clCreateProgramWithSource_pfn
|
||||||
|
#undef clCreateSampler
|
||||||
|
#define clCreateSampler clCreateSampler_pfn
|
||||||
|
#undef clCreateSubBuffer
|
||||||
|
#define clCreateSubBuffer clCreateSubBuffer_pfn
|
||||||
|
#undef clCreateSubDevices
|
||||||
|
#define clCreateSubDevices clCreateSubDevices_pfn
|
||||||
|
#undef clCreateUserEvent
|
||||||
|
#define clCreateUserEvent clCreateUserEvent_pfn
|
||||||
|
#undef clEnqueueBarrier
|
||||||
|
#define clEnqueueBarrier clEnqueueBarrier_pfn
|
||||||
|
#undef clEnqueueBarrierWithWaitList
|
||||||
|
#define clEnqueueBarrierWithWaitList clEnqueueBarrierWithWaitList_pfn
|
||||||
|
#undef clEnqueueCopyBuffer
|
||||||
|
#define clEnqueueCopyBuffer clEnqueueCopyBuffer_pfn
|
||||||
|
#undef clEnqueueCopyBufferRect
|
||||||
|
#define clEnqueueCopyBufferRect clEnqueueCopyBufferRect_pfn
|
||||||
|
#undef clEnqueueCopyBufferToImage
|
||||||
|
#define clEnqueueCopyBufferToImage clEnqueueCopyBufferToImage_pfn
|
||||||
|
#undef clEnqueueCopyImage
|
||||||
|
#define clEnqueueCopyImage clEnqueueCopyImage_pfn
|
||||||
|
#undef clEnqueueCopyImageToBuffer
|
||||||
|
#define clEnqueueCopyImageToBuffer clEnqueueCopyImageToBuffer_pfn
|
||||||
|
#undef clEnqueueFillBuffer
|
||||||
|
#define clEnqueueFillBuffer clEnqueueFillBuffer_pfn
|
||||||
|
#undef clEnqueueFillImage
|
||||||
|
#define clEnqueueFillImage clEnqueueFillImage_pfn
|
||||||
|
#undef clEnqueueMapBuffer
|
||||||
|
#define clEnqueueMapBuffer clEnqueueMapBuffer_pfn
|
||||||
|
#undef clEnqueueMapImage
|
||||||
|
#define clEnqueueMapImage clEnqueueMapImage_pfn
|
||||||
|
#undef clEnqueueMarker
|
||||||
|
#define clEnqueueMarker clEnqueueMarker_pfn
|
||||||
|
#undef clEnqueueMarkerWithWaitList
|
||||||
|
#define clEnqueueMarkerWithWaitList clEnqueueMarkerWithWaitList_pfn
|
||||||
|
#undef clEnqueueMigrateMemObjects
|
||||||
|
#define clEnqueueMigrateMemObjects clEnqueueMigrateMemObjects_pfn
|
||||||
|
#undef clEnqueueNDRangeKernel
|
||||||
|
#define clEnqueueNDRangeKernel clEnqueueNDRangeKernel_pfn
|
||||||
|
#undef clEnqueueNativeKernel
|
||||||
|
#define clEnqueueNativeKernel clEnqueueNativeKernel_pfn
|
||||||
|
#undef clEnqueueReadBuffer
|
||||||
|
#define clEnqueueReadBuffer clEnqueueReadBuffer_pfn
|
||||||
|
#undef clEnqueueReadBufferRect
|
||||||
|
#define clEnqueueReadBufferRect clEnqueueReadBufferRect_pfn
|
||||||
|
#undef clEnqueueReadImage
|
||||||
|
#define clEnqueueReadImage clEnqueueReadImage_pfn
|
||||||
|
#undef clEnqueueTask
|
||||||
|
#define clEnqueueTask clEnqueueTask_pfn
|
||||||
|
#undef clEnqueueUnmapMemObject
|
||||||
|
#define clEnqueueUnmapMemObject clEnqueueUnmapMemObject_pfn
|
||||||
|
#undef clEnqueueWaitForEvents
|
||||||
|
#define clEnqueueWaitForEvents clEnqueueWaitForEvents_pfn
|
||||||
|
#undef clEnqueueWriteBuffer
|
||||||
|
#define clEnqueueWriteBuffer clEnqueueWriteBuffer_pfn
|
||||||
|
#undef clEnqueueWriteBufferRect
|
||||||
|
#define clEnqueueWriteBufferRect clEnqueueWriteBufferRect_pfn
|
||||||
|
#undef clEnqueueWriteImage
|
||||||
|
#define clEnqueueWriteImage clEnqueueWriteImage_pfn
|
||||||
|
#undef clFinish
|
||||||
|
#define clFinish clFinish_pfn
|
||||||
|
#undef clFlush
|
||||||
|
#define clFlush clFlush_pfn
|
||||||
|
#undef clGetCommandQueueInfo
|
||||||
|
#define clGetCommandQueueInfo clGetCommandQueueInfo_pfn
|
||||||
|
#undef clGetContextInfo
|
||||||
|
#define clGetContextInfo clGetContextInfo_pfn
|
||||||
|
#undef clGetDeviceIDs
|
||||||
|
#define clGetDeviceIDs clGetDeviceIDs_pfn
|
||||||
|
#undef clGetDeviceInfo
|
||||||
|
#define clGetDeviceInfo clGetDeviceInfo_pfn
|
||||||
|
#undef clGetEventInfo
|
||||||
|
#define clGetEventInfo clGetEventInfo_pfn
|
||||||
|
#undef clGetEventProfilingInfo
|
||||||
|
#define clGetEventProfilingInfo clGetEventProfilingInfo_pfn
|
||||||
|
#undef clGetExtensionFunctionAddress
|
||||||
|
#define clGetExtensionFunctionAddress clGetExtensionFunctionAddress_pfn
|
||||||
|
#undef clGetExtensionFunctionAddressForPlatform
|
||||||
|
#define clGetExtensionFunctionAddressForPlatform clGetExtensionFunctionAddressForPlatform_pfn
|
||||||
|
#undef clGetImageInfo
|
||||||
|
#define clGetImageInfo clGetImageInfo_pfn
|
||||||
|
#undef clGetKernelArgInfo
|
||||||
|
#define clGetKernelArgInfo clGetKernelArgInfo_pfn
|
||||||
|
#undef clGetKernelInfo
|
||||||
|
#define clGetKernelInfo clGetKernelInfo_pfn
|
||||||
|
#undef clGetKernelWorkGroupInfo
|
||||||
|
#define clGetKernelWorkGroupInfo clGetKernelWorkGroupInfo_pfn
|
||||||
|
#undef clGetMemObjectInfo
|
||||||
|
#define clGetMemObjectInfo clGetMemObjectInfo_pfn
|
||||||
|
#undef clGetPlatformIDs
|
||||||
|
#define clGetPlatformIDs clGetPlatformIDs_pfn
|
||||||
|
#undef clGetPlatformInfo
|
||||||
|
#define clGetPlatformInfo clGetPlatformInfo_pfn
|
||||||
|
#undef clGetProgramBuildInfo
|
||||||
|
#define clGetProgramBuildInfo clGetProgramBuildInfo_pfn
|
||||||
|
#undef clGetProgramInfo
|
||||||
|
#define clGetProgramInfo clGetProgramInfo_pfn
|
||||||
|
#undef clGetSamplerInfo
|
||||||
|
#define clGetSamplerInfo clGetSamplerInfo_pfn
|
||||||
|
#undef clGetSupportedImageFormats
|
||||||
|
#define clGetSupportedImageFormats clGetSupportedImageFormats_pfn
|
||||||
|
#undef clLinkProgram
|
||||||
|
#define clLinkProgram clLinkProgram_pfn
|
||||||
|
#undef clReleaseCommandQueue
|
||||||
|
#define clReleaseCommandQueue clReleaseCommandQueue_pfn
|
||||||
|
#undef clReleaseContext
|
||||||
|
#define clReleaseContext clReleaseContext_pfn
|
||||||
|
#undef clReleaseDevice
|
||||||
|
#define clReleaseDevice clReleaseDevice_pfn
|
||||||
|
#undef clReleaseEvent
|
||||||
|
#define clReleaseEvent clReleaseEvent_pfn
|
||||||
|
#undef clReleaseKernel
|
||||||
|
#define clReleaseKernel clReleaseKernel_pfn
|
||||||
|
#undef clReleaseMemObject
|
||||||
|
#define clReleaseMemObject clReleaseMemObject_pfn
|
||||||
|
#undef clReleaseProgram
|
||||||
|
#define clReleaseProgram clReleaseProgram_pfn
|
||||||
|
#undef clReleaseSampler
|
||||||
|
#define clReleaseSampler clReleaseSampler_pfn
|
||||||
|
#undef clRetainCommandQueue
|
||||||
|
#define clRetainCommandQueue clRetainCommandQueue_pfn
|
||||||
|
#undef clRetainContext
|
||||||
|
#define clRetainContext clRetainContext_pfn
|
||||||
|
#undef clRetainDevice
|
||||||
|
#define clRetainDevice clRetainDevice_pfn
|
||||||
|
#undef clRetainEvent
|
||||||
|
#define clRetainEvent clRetainEvent_pfn
|
||||||
|
#undef clRetainKernel
|
||||||
|
#define clRetainKernel clRetainKernel_pfn
|
||||||
|
#undef clRetainMemObject
|
||||||
|
#define clRetainMemObject clRetainMemObject_pfn
|
||||||
|
#undef clRetainProgram
|
||||||
|
#define clRetainProgram clRetainProgram_pfn
|
||||||
|
#undef clRetainSampler
|
||||||
|
#define clRetainSampler clRetainSampler_pfn
|
||||||
|
#undef clSetEventCallback
|
||||||
|
#define clSetEventCallback clSetEventCallback_pfn
|
||||||
|
#undef clSetKernelArg
|
||||||
|
#define clSetKernelArg clSetKernelArg_pfn
|
||||||
|
#undef clSetMemObjectDestructorCallback
|
||||||
|
#define clSetMemObjectDestructorCallback clSetMemObjectDestructorCallback_pfn
|
||||||
|
#undef clSetUserEventStatus
|
||||||
|
#define clSetUserEventStatus clSetUserEventStatus_pfn
|
||||||
|
#undef clUnloadCompiler
|
||||||
|
#define clUnloadCompiler clUnloadCompiler_pfn
|
||||||
|
#undef clUnloadPlatformCompiler
|
||||||
|
#define clUnloadPlatformCompiler clUnloadPlatformCompiler_pfn
|
||||||
|
#undef clWaitForEvents
|
||||||
|
#define clWaitForEvents clWaitForEvents_pfn
|
||||||
|
|
||||||
|
// generated by parser_cl.py
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clBuildProgram)(cl_program, cl_uint, const cl_device_id*, const char*, void (CL_CALLBACK*) (cl_program, void*), void*);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clCompileProgram)(cl_program, cl_uint, const cl_device_id*, const char*, cl_uint, const cl_program*, const char**, void (CL_CALLBACK*) (cl_program, void*), void*);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_mem (CL_API_CALL*clCreateBuffer)(cl_context, cl_mem_flags, size_t, void*, cl_int*);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_command_queue (CL_API_CALL*clCreateCommandQueue)(cl_context, cl_device_id, cl_command_queue_properties, cl_int*);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_context (CL_API_CALL*clCreateContext)(const cl_context_properties*, cl_uint, const cl_device_id*, void (CL_CALLBACK*) (const char*, const void*, size_t, void*), void*, cl_int*);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_context (CL_API_CALL*clCreateContextFromType)(const cl_context_properties*, cl_device_type, void (CL_CALLBACK*) (const char*, const void*, size_t, void*), void*, cl_int*);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_mem (CL_API_CALL*clCreateImage)(cl_context, cl_mem_flags, const cl_image_format*, const cl_image_desc*, void*, cl_int*);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_mem (CL_API_CALL*clCreateImage2D)(cl_context, cl_mem_flags, const cl_image_format*, size_t, size_t, size_t, void*, cl_int*);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_mem (CL_API_CALL*clCreateImage3D)(cl_context, cl_mem_flags, const cl_image_format*, size_t, size_t, size_t, size_t, size_t, void*, cl_int*);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_kernel (CL_API_CALL*clCreateKernel)(cl_program, const char*, cl_int*);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clCreateKernelsInProgram)(cl_program, cl_uint, cl_kernel*, cl_uint*);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_program (CL_API_CALL*clCreateProgramWithBinary)(cl_context, cl_uint, const cl_device_id*, const size_t*, const unsigned char**, cl_int*, cl_int*);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_program (CL_API_CALL*clCreateProgramWithBuiltInKernels)(cl_context, cl_uint, const cl_device_id*, const char*, cl_int*);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_program (CL_API_CALL*clCreateProgramWithSource)(cl_context, cl_uint, const char**, const size_t*, cl_int*);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_sampler (CL_API_CALL*clCreateSampler)(cl_context, cl_bool, cl_addressing_mode, cl_filter_mode, cl_int*);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_mem (CL_API_CALL*clCreateSubBuffer)(cl_mem, cl_mem_flags, cl_buffer_create_type, const void*, cl_int*);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clCreateSubDevices)(cl_device_id, const cl_device_partition_property*, cl_uint, cl_device_id*, cl_uint*);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_event (CL_API_CALL*clCreateUserEvent)(cl_context, cl_int*);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueBarrier)(cl_command_queue);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueBarrierWithWaitList)(cl_command_queue, cl_uint, const cl_event*, cl_event*);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueCopyBuffer)(cl_command_queue, cl_mem, cl_mem, size_t, size_t, size_t, cl_uint, const cl_event*, cl_event*);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueCopyBufferRect)(cl_command_queue, cl_mem, cl_mem, const size_t*, const size_t*, const size_t*, size_t, size_t, size_t, size_t, cl_uint, const cl_event*, cl_event*);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueCopyBufferToImage)(cl_command_queue, cl_mem, cl_mem, size_t, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueCopyImage)(cl_command_queue, cl_mem, cl_mem, const size_t*, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueCopyImageToBuffer)(cl_command_queue, cl_mem, cl_mem, const size_t*, const size_t*, size_t, cl_uint, const cl_event*, cl_event*);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueFillBuffer)(cl_command_queue, cl_mem, const void*, size_t, size_t, size_t, cl_uint, const cl_event*, cl_event*);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueFillImage)(cl_command_queue, cl_mem, const void*, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*);
|
||||||
|
extern CL_RUNTIME_EXPORT void* (CL_API_CALL*clEnqueueMapBuffer)(cl_command_queue, cl_mem, cl_bool, cl_map_flags, size_t, size_t, cl_uint, const cl_event*, cl_event*, cl_int*);
|
||||||
|
extern CL_RUNTIME_EXPORT void* (CL_API_CALL*clEnqueueMapImage)(cl_command_queue, cl_mem, cl_bool, cl_map_flags, const size_t*, const size_t*, size_t*, size_t*, cl_uint, const cl_event*, cl_event*, cl_int*);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueMarker)(cl_command_queue, cl_event*);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueMarkerWithWaitList)(cl_command_queue, cl_uint, const cl_event*, cl_event*);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueMigrateMemObjects)(cl_command_queue, cl_uint, const cl_mem*, cl_mem_migration_flags, cl_uint, const cl_event*, cl_event*);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueNDRangeKernel)(cl_command_queue, cl_kernel, cl_uint, const size_t*, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueNativeKernel)(cl_command_queue, void (CL_CALLBACK*) (void*), void*, size_t, cl_uint, const cl_mem*, const void**, cl_uint, const cl_event*, cl_event*);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueReadBuffer)(cl_command_queue, cl_mem, cl_bool, size_t, size_t, void*, cl_uint, const cl_event*, cl_event*);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueReadBufferRect)(cl_command_queue, cl_mem, cl_bool, const size_t*, const size_t*, const size_t*, size_t, size_t, size_t, size_t, void*, cl_uint, const cl_event*, cl_event*);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueReadImage)(cl_command_queue, cl_mem, cl_bool, const size_t*, const size_t*, size_t, size_t, void*, cl_uint, const cl_event*, cl_event*);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueTask)(cl_command_queue, cl_kernel, cl_uint, const cl_event*, cl_event*);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueUnmapMemObject)(cl_command_queue, cl_mem, void*, cl_uint, const cl_event*, cl_event*);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueWaitForEvents)(cl_command_queue, cl_uint, const cl_event*);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueWriteBuffer)(cl_command_queue, cl_mem, cl_bool, size_t, size_t, const void*, cl_uint, const cl_event*, cl_event*);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueWriteBufferRect)(cl_command_queue, cl_mem, cl_bool, const size_t*, const size_t*, const size_t*, size_t, size_t, size_t, size_t, const void*, cl_uint, const cl_event*, cl_event*);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueWriteImage)(cl_command_queue, cl_mem, cl_bool, const size_t*, const size_t*, size_t, size_t, const void*, cl_uint, const cl_event*, cl_event*);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clFinish)(cl_command_queue);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clFlush)(cl_command_queue);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetCommandQueueInfo)(cl_command_queue, cl_command_queue_info, size_t, void*, size_t*);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetContextInfo)(cl_context, cl_context_info, size_t, void*, size_t*);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetDeviceIDs)(cl_platform_id, cl_device_type, cl_uint, cl_device_id*, cl_uint*);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetDeviceInfo)(cl_device_id, cl_device_info, size_t, void*, size_t*);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetEventInfo)(cl_event, cl_event_info, size_t, void*, size_t*);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetEventProfilingInfo)(cl_event, cl_profiling_info, size_t, void*, size_t*);
|
||||||
|
extern CL_RUNTIME_EXPORT void* (CL_API_CALL*clGetExtensionFunctionAddress)(const char*);
|
||||||
|
extern CL_RUNTIME_EXPORT void* (CL_API_CALL*clGetExtensionFunctionAddressForPlatform)(cl_platform_id, const char*);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetImageInfo)(cl_mem, cl_image_info, size_t, void*, size_t*);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetKernelArgInfo)(cl_kernel, cl_uint, cl_kernel_arg_info, size_t, void*, size_t*);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetKernelInfo)(cl_kernel, cl_kernel_info, size_t, void*, size_t*);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetKernelWorkGroupInfo)(cl_kernel, cl_device_id, cl_kernel_work_group_info, size_t, void*, size_t*);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetMemObjectInfo)(cl_mem, cl_mem_info, size_t, void*, size_t*);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetPlatformIDs)(cl_uint, cl_platform_id*, cl_uint*);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetPlatformInfo)(cl_platform_id, cl_platform_info, size_t, void*, size_t*);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetProgramBuildInfo)(cl_program, cl_device_id, cl_program_build_info, size_t, void*, size_t*);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetProgramInfo)(cl_program, cl_program_info, size_t, void*, size_t*);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetSamplerInfo)(cl_sampler, cl_sampler_info, size_t, void*, size_t*);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetSupportedImageFormats)(cl_context, cl_mem_flags, cl_mem_object_type, cl_uint, cl_image_format*, cl_uint*);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_program (CL_API_CALL*clLinkProgram)(cl_context, cl_uint, const cl_device_id*, const char*, cl_uint, const cl_program*, void (CL_CALLBACK*) (cl_program, void*), void*, cl_int*);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clReleaseCommandQueue)(cl_command_queue);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clReleaseContext)(cl_context);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clReleaseDevice)(cl_device_id);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clReleaseEvent)(cl_event);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clReleaseKernel)(cl_kernel);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clReleaseMemObject)(cl_mem);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clReleaseProgram)(cl_program);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clReleaseSampler)(cl_sampler);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clRetainCommandQueue)(cl_command_queue);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clRetainContext)(cl_context);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clRetainDevice)(cl_device_id);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clRetainEvent)(cl_event);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clRetainKernel)(cl_kernel);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clRetainMemObject)(cl_mem);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clRetainProgram)(cl_program);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clRetainSampler)(cl_sampler);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clSetEventCallback)(cl_event, cl_int, void (CL_CALLBACK*) (cl_event, cl_int, void*), void*);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clSetKernelArg)(cl_kernel, cl_uint, size_t, const void*);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clSetMemObjectDestructorCallback)(cl_mem, void (CL_CALLBACK*) (cl_mem, void*), void*);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clSetUserEventStatus)(cl_event, cl_int);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clUnloadCompiler)();
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clUnloadPlatformCompiler)(cl_platform_id);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clWaitForEvents)(cl_uint, const cl_event*);
|
||||||
@@ -0,0 +1,272 @@
|
|||||||
|
//
|
||||||
|
// AUTOGENERATED, DO NOT EDIT
|
||||||
|
//
|
||||||
|
#ifndef OPENCV_CORE_OCL_RUNTIME_OPENCL_WRAPPERS_HPP
|
||||||
|
#error "Invalid usage"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// generated by parser_cl.py
|
||||||
|
#undef clBuildProgram
|
||||||
|
#define clBuildProgram clBuildProgram_fn
|
||||||
|
inline cl_int clBuildProgram(cl_program p0, cl_uint p1, const cl_device_id* p2, const char* p3, void (CL_CALLBACK*p4) (cl_program, void*), void* p5) { return clBuildProgram_pfn(p0, p1, p2, p3, p4, p5); }
|
||||||
|
#undef clCompileProgram
|
||||||
|
#define clCompileProgram clCompileProgram_fn
|
||||||
|
inline cl_int clCompileProgram(cl_program p0, cl_uint p1, const cl_device_id* p2, const char* p3, cl_uint p4, const cl_program* p5, const char** p6, void (CL_CALLBACK*p7) (cl_program, void*), void* p8) { return clCompileProgram_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); }
|
||||||
|
#undef clCreateBuffer
|
||||||
|
#define clCreateBuffer clCreateBuffer_fn
|
||||||
|
inline cl_mem clCreateBuffer(cl_context p0, cl_mem_flags p1, size_t p2, void* p3, cl_int* p4) { return clCreateBuffer_pfn(p0, p1, p2, p3, p4); }
|
||||||
|
#undef clCreateCommandQueue
|
||||||
|
#define clCreateCommandQueue clCreateCommandQueue_fn
|
||||||
|
inline cl_command_queue clCreateCommandQueue(cl_context p0, cl_device_id p1, cl_command_queue_properties p2, cl_int* p3) { return clCreateCommandQueue_pfn(p0, p1, p2, p3); }
|
||||||
|
#undef clCreateContext
|
||||||
|
#define clCreateContext clCreateContext_fn
|
||||||
|
inline cl_context clCreateContext(const cl_context_properties* p0, cl_uint p1, const cl_device_id* p2, void (CL_CALLBACK*p3) (const char*, const void*, size_t, void*), void* p4, cl_int* p5) { return clCreateContext_pfn(p0, p1, p2, p3, p4, p5); }
|
||||||
|
#undef clCreateContextFromType
|
||||||
|
#define clCreateContextFromType clCreateContextFromType_fn
|
||||||
|
inline cl_context clCreateContextFromType(const cl_context_properties* p0, cl_device_type p1, void (CL_CALLBACK*p2) (const char*, const void*, size_t, void*), void* p3, cl_int* p4) { return clCreateContextFromType_pfn(p0, p1, p2, p3, p4); }
|
||||||
|
#undef clCreateImage
|
||||||
|
#define clCreateImage clCreateImage_fn
|
||||||
|
inline cl_mem clCreateImage(cl_context p0, cl_mem_flags p1, const cl_image_format* p2, const cl_image_desc* p3, void* p4, cl_int* p5) { return clCreateImage_pfn(p0, p1, p2, p3, p4, p5); }
|
||||||
|
#undef clCreateImage2D
|
||||||
|
#define clCreateImage2D clCreateImage2D_fn
|
||||||
|
inline cl_mem clCreateImage2D(cl_context p0, cl_mem_flags p1, const cl_image_format* p2, size_t p3, size_t p4, size_t p5, void* p6, cl_int* p7) { return clCreateImage2D_pfn(p0, p1, p2, p3, p4, p5, p6, p7); }
|
||||||
|
#undef clCreateImage3D
|
||||||
|
#define clCreateImage3D clCreateImage3D_fn
|
||||||
|
inline cl_mem clCreateImage3D(cl_context p0, cl_mem_flags p1, const cl_image_format* p2, size_t p3, size_t p4, size_t p5, size_t p6, size_t p7, void* p8, cl_int* p9) { return clCreateImage3D_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9); }
|
||||||
|
#undef clCreateKernel
|
||||||
|
#define clCreateKernel clCreateKernel_fn
|
||||||
|
inline cl_kernel clCreateKernel(cl_program p0, const char* p1, cl_int* p2) { return clCreateKernel_pfn(p0, p1, p2); }
|
||||||
|
#undef clCreateKernelsInProgram
|
||||||
|
#define clCreateKernelsInProgram clCreateKernelsInProgram_fn
|
||||||
|
inline cl_int clCreateKernelsInProgram(cl_program p0, cl_uint p1, cl_kernel* p2, cl_uint* p3) { return clCreateKernelsInProgram_pfn(p0, p1, p2, p3); }
|
||||||
|
#undef clCreateProgramWithBinary
|
||||||
|
#define clCreateProgramWithBinary clCreateProgramWithBinary_fn
|
||||||
|
inline cl_program clCreateProgramWithBinary(cl_context p0, cl_uint p1, const cl_device_id* p2, const size_t* p3, const unsigned char** p4, cl_int* p5, cl_int* p6) { return clCreateProgramWithBinary_pfn(p0, p1, p2, p3, p4, p5, p6); }
|
||||||
|
#undef clCreateProgramWithBuiltInKernels
|
||||||
|
#define clCreateProgramWithBuiltInKernels clCreateProgramWithBuiltInKernels_fn
|
||||||
|
inline cl_program clCreateProgramWithBuiltInKernels(cl_context p0, cl_uint p1, const cl_device_id* p2, const char* p3, cl_int* p4) { return clCreateProgramWithBuiltInKernels_pfn(p0, p1, p2, p3, p4); }
|
||||||
|
#undef clCreateProgramWithSource
|
||||||
|
#define clCreateProgramWithSource clCreateProgramWithSource_fn
|
||||||
|
inline cl_program clCreateProgramWithSource(cl_context p0, cl_uint p1, const char** p2, const size_t* p3, cl_int* p4) { return clCreateProgramWithSource_pfn(p0, p1, p2, p3, p4); }
|
||||||
|
#undef clCreateSampler
|
||||||
|
#define clCreateSampler clCreateSampler_fn
|
||||||
|
inline cl_sampler clCreateSampler(cl_context p0, cl_bool p1, cl_addressing_mode p2, cl_filter_mode p3, cl_int* p4) { return clCreateSampler_pfn(p0, p1, p2, p3, p4); }
|
||||||
|
#undef clCreateSubBuffer
|
||||||
|
#define clCreateSubBuffer clCreateSubBuffer_fn
|
||||||
|
inline cl_mem clCreateSubBuffer(cl_mem p0, cl_mem_flags p1, cl_buffer_create_type p2, const void* p3, cl_int* p4) { return clCreateSubBuffer_pfn(p0, p1, p2, p3, p4); }
|
||||||
|
#undef clCreateSubDevices
|
||||||
|
#define clCreateSubDevices clCreateSubDevices_fn
|
||||||
|
inline cl_int clCreateSubDevices(cl_device_id p0, const cl_device_partition_property* p1, cl_uint p2, cl_device_id* p3, cl_uint* p4) { return clCreateSubDevices_pfn(p0, p1, p2, p3, p4); }
|
||||||
|
#undef clCreateUserEvent
|
||||||
|
#define clCreateUserEvent clCreateUserEvent_fn
|
||||||
|
inline cl_event clCreateUserEvent(cl_context p0, cl_int* p1) { return clCreateUserEvent_pfn(p0, p1); }
|
||||||
|
#undef clEnqueueBarrier
|
||||||
|
#define clEnqueueBarrier clEnqueueBarrier_fn
|
||||||
|
inline cl_int clEnqueueBarrier(cl_command_queue p0) { return clEnqueueBarrier_pfn(p0); }
|
||||||
|
#undef clEnqueueBarrierWithWaitList
|
||||||
|
#define clEnqueueBarrierWithWaitList clEnqueueBarrierWithWaitList_fn
|
||||||
|
inline cl_int clEnqueueBarrierWithWaitList(cl_command_queue p0, cl_uint p1, const cl_event* p2, cl_event* p3) { return clEnqueueBarrierWithWaitList_pfn(p0, p1, p2, p3); }
|
||||||
|
#undef clEnqueueCopyBuffer
|
||||||
|
#define clEnqueueCopyBuffer clEnqueueCopyBuffer_fn
|
||||||
|
inline cl_int clEnqueueCopyBuffer(cl_command_queue p0, cl_mem p1, cl_mem p2, size_t p3, size_t p4, size_t p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueCopyBuffer_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); }
|
||||||
|
#undef clEnqueueCopyBufferRect
|
||||||
|
#define clEnqueueCopyBufferRect clEnqueueCopyBufferRect_fn
|
||||||
|
inline cl_int clEnqueueCopyBufferRect(cl_command_queue p0, cl_mem p1, cl_mem p2, const size_t* p3, const size_t* p4, const size_t* p5, size_t p6, size_t p7, size_t p8, size_t p9, cl_uint p10, const cl_event* p11, cl_event* p12) { return clEnqueueCopyBufferRect_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12); }
|
||||||
|
#undef clEnqueueCopyBufferToImage
|
||||||
|
#define clEnqueueCopyBufferToImage clEnqueueCopyBufferToImage_fn
|
||||||
|
inline cl_int clEnqueueCopyBufferToImage(cl_command_queue p0, cl_mem p1, cl_mem p2, size_t p3, const size_t* p4, const size_t* p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueCopyBufferToImage_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); }
|
||||||
|
#undef clEnqueueCopyImage
|
||||||
|
#define clEnqueueCopyImage clEnqueueCopyImage_fn
|
||||||
|
inline cl_int clEnqueueCopyImage(cl_command_queue p0, cl_mem p1, cl_mem p2, const size_t* p3, const size_t* p4, const size_t* p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueCopyImage_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); }
|
||||||
|
#undef clEnqueueCopyImageToBuffer
|
||||||
|
#define clEnqueueCopyImageToBuffer clEnqueueCopyImageToBuffer_fn
|
||||||
|
inline cl_int clEnqueueCopyImageToBuffer(cl_command_queue p0, cl_mem p1, cl_mem p2, const size_t* p3, const size_t* p4, size_t p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueCopyImageToBuffer_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); }
|
||||||
|
#undef clEnqueueFillBuffer
|
||||||
|
#define clEnqueueFillBuffer clEnqueueFillBuffer_fn
|
||||||
|
inline cl_int clEnqueueFillBuffer(cl_command_queue p0, cl_mem p1, const void* p2, size_t p3, size_t p4, size_t p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueFillBuffer_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); }
|
||||||
|
#undef clEnqueueFillImage
|
||||||
|
#define clEnqueueFillImage clEnqueueFillImage_fn
|
||||||
|
inline cl_int clEnqueueFillImage(cl_command_queue p0, cl_mem p1, const void* p2, const size_t* p3, const size_t* p4, cl_uint p5, const cl_event* p6, cl_event* p7) { return clEnqueueFillImage_pfn(p0, p1, p2, p3, p4, p5, p6, p7); }
|
||||||
|
#undef clEnqueueMapBuffer
|
||||||
|
#define clEnqueueMapBuffer clEnqueueMapBuffer_fn
|
||||||
|
inline void* clEnqueueMapBuffer(cl_command_queue p0, cl_mem p1, cl_bool p2, cl_map_flags p3, size_t p4, size_t p5, cl_uint p6, const cl_event* p7, cl_event* p8, cl_int* p9) { return clEnqueueMapBuffer_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9); }
|
||||||
|
#undef clEnqueueMapImage
|
||||||
|
#define clEnqueueMapImage clEnqueueMapImage_fn
|
||||||
|
inline void* clEnqueueMapImage(cl_command_queue p0, cl_mem p1, cl_bool p2, cl_map_flags p3, const size_t* p4, const size_t* p5, size_t* p6, size_t* p7, cl_uint p8, const cl_event* p9, cl_event* p10, cl_int* p11) { return clEnqueueMapImage_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11); }
|
||||||
|
#undef clEnqueueMarker
|
||||||
|
#define clEnqueueMarker clEnqueueMarker_fn
|
||||||
|
inline cl_int clEnqueueMarker(cl_command_queue p0, cl_event* p1) { return clEnqueueMarker_pfn(p0, p1); }
|
||||||
|
#undef clEnqueueMarkerWithWaitList
|
||||||
|
#define clEnqueueMarkerWithWaitList clEnqueueMarkerWithWaitList_fn
|
||||||
|
inline cl_int clEnqueueMarkerWithWaitList(cl_command_queue p0, cl_uint p1, const cl_event* p2, cl_event* p3) { return clEnqueueMarkerWithWaitList_pfn(p0, p1, p2, p3); }
|
||||||
|
#undef clEnqueueMigrateMemObjects
|
||||||
|
#define clEnqueueMigrateMemObjects clEnqueueMigrateMemObjects_fn
|
||||||
|
inline cl_int clEnqueueMigrateMemObjects(cl_command_queue p0, cl_uint p1, const cl_mem* p2, cl_mem_migration_flags p3, cl_uint p4, const cl_event* p5, cl_event* p6) { return clEnqueueMigrateMemObjects_pfn(p0, p1, p2, p3, p4, p5, p6); }
|
||||||
|
#undef clEnqueueNDRangeKernel
|
||||||
|
#define clEnqueueNDRangeKernel clEnqueueNDRangeKernel_fn
|
||||||
|
inline cl_int clEnqueueNDRangeKernel(cl_command_queue p0, cl_kernel p1, cl_uint p2, const size_t* p3, const size_t* p4, const size_t* p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueNDRangeKernel_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); }
|
||||||
|
#undef clEnqueueNativeKernel
|
||||||
|
#define clEnqueueNativeKernel clEnqueueNativeKernel_fn
|
||||||
|
inline cl_int clEnqueueNativeKernel(cl_command_queue p0, void (CL_CALLBACK*p1) (void*), void* p2, size_t p3, cl_uint p4, const cl_mem* p5, const void** p6, cl_uint p7, const cl_event* p8, cl_event* p9) { return clEnqueueNativeKernel_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9); }
|
||||||
|
#undef clEnqueueReadBuffer
|
||||||
|
#define clEnqueueReadBuffer clEnqueueReadBuffer_fn
|
||||||
|
inline cl_int clEnqueueReadBuffer(cl_command_queue p0, cl_mem p1, cl_bool p2, size_t p3, size_t p4, void* p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueReadBuffer_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); }
|
||||||
|
#undef clEnqueueReadBufferRect
|
||||||
|
#define clEnqueueReadBufferRect clEnqueueReadBufferRect_fn
|
||||||
|
inline cl_int clEnqueueReadBufferRect(cl_command_queue p0, cl_mem p1, cl_bool p2, const size_t* p3, const size_t* p4, const size_t* p5, size_t p6, size_t p7, size_t p8, size_t p9, void* p10, cl_uint p11, const cl_event* p12, cl_event* p13) { return clEnqueueReadBufferRect_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13); }
|
||||||
|
#undef clEnqueueReadImage
|
||||||
|
#define clEnqueueReadImage clEnqueueReadImage_fn
|
||||||
|
inline cl_int clEnqueueReadImage(cl_command_queue p0, cl_mem p1, cl_bool p2, const size_t* p3, const size_t* p4, size_t p5, size_t p6, void* p7, cl_uint p8, const cl_event* p9, cl_event* p10) { return clEnqueueReadImage_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10); }
|
||||||
|
#undef clEnqueueTask
|
||||||
|
#define clEnqueueTask clEnqueueTask_fn
|
||||||
|
inline cl_int clEnqueueTask(cl_command_queue p0, cl_kernel p1, cl_uint p2, const cl_event* p3, cl_event* p4) { return clEnqueueTask_pfn(p0, p1, p2, p3, p4); }
|
||||||
|
#undef clEnqueueUnmapMemObject
|
||||||
|
#define clEnqueueUnmapMemObject clEnqueueUnmapMemObject_fn
|
||||||
|
inline cl_int clEnqueueUnmapMemObject(cl_command_queue p0, cl_mem p1, void* p2, cl_uint p3, const cl_event* p4, cl_event* p5) { return clEnqueueUnmapMemObject_pfn(p0, p1, p2, p3, p4, p5); }
|
||||||
|
#undef clEnqueueWaitForEvents
|
||||||
|
#define clEnqueueWaitForEvents clEnqueueWaitForEvents_fn
|
||||||
|
inline cl_int clEnqueueWaitForEvents(cl_command_queue p0, cl_uint p1, const cl_event* p2) { return clEnqueueWaitForEvents_pfn(p0, p1, p2); }
|
||||||
|
#undef clEnqueueWriteBuffer
|
||||||
|
#define clEnqueueWriteBuffer clEnqueueWriteBuffer_fn
|
||||||
|
inline cl_int clEnqueueWriteBuffer(cl_command_queue p0, cl_mem p1, cl_bool p2, size_t p3, size_t p4, const void* p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueWriteBuffer_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); }
|
||||||
|
#undef clEnqueueWriteBufferRect
|
||||||
|
#define clEnqueueWriteBufferRect clEnqueueWriteBufferRect_fn
|
||||||
|
inline cl_int clEnqueueWriteBufferRect(cl_command_queue p0, cl_mem p1, cl_bool p2, const size_t* p3, const size_t* p4, const size_t* p5, size_t p6, size_t p7, size_t p8, size_t p9, const void* p10, cl_uint p11, const cl_event* p12, cl_event* p13) { return clEnqueueWriteBufferRect_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13); }
|
||||||
|
#undef clEnqueueWriteImage
|
||||||
|
#define clEnqueueWriteImage clEnqueueWriteImage_fn
|
||||||
|
inline cl_int clEnqueueWriteImage(cl_command_queue p0, cl_mem p1, cl_bool p2, const size_t* p3, const size_t* p4, size_t p5, size_t p6, const void* p7, cl_uint p8, const cl_event* p9, cl_event* p10) { return clEnqueueWriteImage_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10); }
|
||||||
|
#undef clFinish
|
||||||
|
#define clFinish clFinish_fn
|
||||||
|
inline cl_int clFinish(cl_command_queue p0) { return clFinish_pfn(p0); }
|
||||||
|
#undef clFlush
|
||||||
|
#define clFlush clFlush_fn
|
||||||
|
inline cl_int clFlush(cl_command_queue p0) { return clFlush_pfn(p0); }
|
||||||
|
#undef clGetCommandQueueInfo
|
||||||
|
#define clGetCommandQueueInfo clGetCommandQueueInfo_fn
|
||||||
|
inline cl_int clGetCommandQueueInfo(cl_command_queue p0, cl_command_queue_info p1, size_t p2, void* p3, size_t* p4) { return clGetCommandQueueInfo_pfn(p0, p1, p2, p3, p4); }
|
||||||
|
#undef clGetContextInfo
|
||||||
|
#define clGetContextInfo clGetContextInfo_fn
|
||||||
|
inline cl_int clGetContextInfo(cl_context p0, cl_context_info p1, size_t p2, void* p3, size_t* p4) { return clGetContextInfo_pfn(p0, p1, p2, p3, p4); }
|
||||||
|
#undef clGetDeviceIDs
|
||||||
|
#define clGetDeviceIDs clGetDeviceIDs_fn
|
||||||
|
inline cl_int clGetDeviceIDs(cl_platform_id p0, cl_device_type p1, cl_uint p2, cl_device_id* p3, cl_uint* p4) { return clGetDeviceIDs_pfn(p0, p1, p2, p3, p4); }
|
||||||
|
#undef clGetDeviceInfo
|
||||||
|
#define clGetDeviceInfo clGetDeviceInfo_fn
|
||||||
|
inline cl_int clGetDeviceInfo(cl_device_id p0, cl_device_info p1, size_t p2, void* p3, size_t* p4) { return clGetDeviceInfo_pfn(p0, p1, p2, p3, p4); }
|
||||||
|
#undef clGetEventInfo
|
||||||
|
#define clGetEventInfo clGetEventInfo_fn
|
||||||
|
inline cl_int clGetEventInfo(cl_event p0, cl_event_info p1, size_t p2, void* p3, size_t* p4) { return clGetEventInfo_pfn(p0, p1, p2, p3, p4); }
|
||||||
|
#undef clGetEventProfilingInfo
|
||||||
|
#define clGetEventProfilingInfo clGetEventProfilingInfo_fn
|
||||||
|
inline cl_int clGetEventProfilingInfo(cl_event p0, cl_profiling_info p1, size_t p2, void* p3, size_t* p4) { return clGetEventProfilingInfo_pfn(p0, p1, p2, p3, p4); }
|
||||||
|
#undef clGetExtensionFunctionAddress
|
||||||
|
#define clGetExtensionFunctionAddress clGetExtensionFunctionAddress_fn
|
||||||
|
inline void* clGetExtensionFunctionAddress(const char* p0) { return clGetExtensionFunctionAddress_pfn(p0); }
|
||||||
|
#undef clGetExtensionFunctionAddressForPlatform
|
||||||
|
#define clGetExtensionFunctionAddressForPlatform clGetExtensionFunctionAddressForPlatform_fn
|
||||||
|
inline void* clGetExtensionFunctionAddressForPlatform(cl_platform_id p0, const char* p1) { return clGetExtensionFunctionAddressForPlatform_pfn(p0, p1); }
|
||||||
|
#undef clGetImageInfo
|
||||||
|
#define clGetImageInfo clGetImageInfo_fn
|
||||||
|
inline cl_int clGetImageInfo(cl_mem p0, cl_image_info p1, size_t p2, void* p3, size_t* p4) { return clGetImageInfo_pfn(p0, p1, p2, p3, p4); }
|
||||||
|
#undef clGetKernelArgInfo
|
||||||
|
#define clGetKernelArgInfo clGetKernelArgInfo_fn
|
||||||
|
inline cl_int clGetKernelArgInfo(cl_kernel p0, cl_uint p1, cl_kernel_arg_info p2, size_t p3, void* p4, size_t* p5) { return clGetKernelArgInfo_pfn(p0, p1, p2, p3, p4, p5); }
|
||||||
|
#undef clGetKernelInfo
|
||||||
|
#define clGetKernelInfo clGetKernelInfo_fn
|
||||||
|
inline cl_int clGetKernelInfo(cl_kernel p0, cl_kernel_info p1, size_t p2, void* p3, size_t* p4) { return clGetKernelInfo_pfn(p0, p1, p2, p3, p4); }
|
||||||
|
#undef clGetKernelWorkGroupInfo
|
||||||
|
#define clGetKernelWorkGroupInfo clGetKernelWorkGroupInfo_fn
|
||||||
|
inline cl_int clGetKernelWorkGroupInfo(cl_kernel p0, cl_device_id p1, cl_kernel_work_group_info p2, size_t p3, void* p4, size_t* p5) { return clGetKernelWorkGroupInfo_pfn(p0, p1, p2, p3, p4, p5); }
|
||||||
|
#undef clGetMemObjectInfo
|
||||||
|
#define clGetMemObjectInfo clGetMemObjectInfo_fn
|
||||||
|
inline cl_int clGetMemObjectInfo(cl_mem p0, cl_mem_info p1, size_t p2, void* p3, size_t* p4) { return clGetMemObjectInfo_pfn(p0, p1, p2, p3, p4); }
|
||||||
|
#undef clGetPlatformIDs
|
||||||
|
#define clGetPlatformIDs clGetPlatformIDs_fn
|
||||||
|
inline cl_int clGetPlatformIDs(cl_uint p0, cl_platform_id* p1, cl_uint* p2) { return clGetPlatformIDs_pfn(p0, p1, p2); }
|
||||||
|
#undef clGetPlatformInfo
|
||||||
|
#define clGetPlatformInfo clGetPlatformInfo_fn
|
||||||
|
inline cl_int clGetPlatformInfo(cl_platform_id p0, cl_platform_info p1, size_t p2, void* p3, size_t* p4) { return clGetPlatformInfo_pfn(p0, p1, p2, p3, p4); }
|
||||||
|
#undef clGetProgramBuildInfo
|
||||||
|
#define clGetProgramBuildInfo clGetProgramBuildInfo_fn
|
||||||
|
inline cl_int clGetProgramBuildInfo(cl_program p0, cl_device_id p1, cl_program_build_info p2, size_t p3, void* p4, size_t* p5) { return clGetProgramBuildInfo_pfn(p0, p1, p2, p3, p4, p5); }
|
||||||
|
#undef clGetProgramInfo
|
||||||
|
#define clGetProgramInfo clGetProgramInfo_fn
|
||||||
|
inline cl_int clGetProgramInfo(cl_program p0, cl_program_info p1, size_t p2, void* p3, size_t* p4) { return clGetProgramInfo_pfn(p0, p1, p2, p3, p4); }
|
||||||
|
#undef clGetSamplerInfo
|
||||||
|
#define clGetSamplerInfo clGetSamplerInfo_fn
|
||||||
|
inline cl_int clGetSamplerInfo(cl_sampler p0, cl_sampler_info p1, size_t p2, void* p3, size_t* p4) { return clGetSamplerInfo_pfn(p0, p1, p2, p3, p4); }
|
||||||
|
#undef clGetSupportedImageFormats
|
||||||
|
#define clGetSupportedImageFormats clGetSupportedImageFormats_fn
|
||||||
|
inline cl_int clGetSupportedImageFormats(cl_context p0, cl_mem_flags p1, cl_mem_object_type p2, cl_uint p3, cl_image_format* p4, cl_uint* p5) { return clGetSupportedImageFormats_pfn(p0, p1, p2, p3, p4, p5); }
|
||||||
|
#undef clLinkProgram
|
||||||
|
#define clLinkProgram clLinkProgram_fn
|
||||||
|
inline cl_program clLinkProgram(cl_context p0, cl_uint p1, const cl_device_id* p2, const char* p3, cl_uint p4, const cl_program* p5, void (CL_CALLBACK*p6) (cl_program, void*), void* p7, cl_int* p8) { return clLinkProgram_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); }
|
||||||
|
#undef clReleaseCommandQueue
|
||||||
|
#define clReleaseCommandQueue clReleaseCommandQueue_fn
|
||||||
|
inline cl_int clReleaseCommandQueue(cl_command_queue p0) { return clReleaseCommandQueue_pfn(p0); }
|
||||||
|
#undef clReleaseContext
|
||||||
|
#define clReleaseContext clReleaseContext_fn
|
||||||
|
inline cl_int clReleaseContext(cl_context p0) { return clReleaseContext_pfn(p0); }
|
||||||
|
#undef clReleaseDevice
|
||||||
|
#define clReleaseDevice clReleaseDevice_fn
|
||||||
|
inline cl_int clReleaseDevice(cl_device_id p0) { return clReleaseDevice_pfn(p0); }
|
||||||
|
#undef clReleaseEvent
|
||||||
|
#define clReleaseEvent clReleaseEvent_fn
|
||||||
|
inline cl_int clReleaseEvent(cl_event p0) { return clReleaseEvent_pfn(p0); }
|
||||||
|
#undef clReleaseKernel
|
||||||
|
#define clReleaseKernel clReleaseKernel_fn
|
||||||
|
inline cl_int clReleaseKernel(cl_kernel p0) { return clReleaseKernel_pfn(p0); }
|
||||||
|
#undef clReleaseMemObject
|
||||||
|
#define clReleaseMemObject clReleaseMemObject_fn
|
||||||
|
inline cl_int clReleaseMemObject(cl_mem p0) { return clReleaseMemObject_pfn(p0); }
|
||||||
|
#undef clReleaseProgram
|
||||||
|
#define clReleaseProgram clReleaseProgram_fn
|
||||||
|
inline cl_int clReleaseProgram(cl_program p0) { return clReleaseProgram_pfn(p0); }
|
||||||
|
#undef clReleaseSampler
|
||||||
|
#define clReleaseSampler clReleaseSampler_fn
|
||||||
|
inline cl_int clReleaseSampler(cl_sampler p0) { return clReleaseSampler_pfn(p0); }
|
||||||
|
#undef clRetainCommandQueue
|
||||||
|
#define clRetainCommandQueue clRetainCommandQueue_fn
|
||||||
|
inline cl_int clRetainCommandQueue(cl_command_queue p0) { return clRetainCommandQueue_pfn(p0); }
|
||||||
|
#undef clRetainContext
|
||||||
|
#define clRetainContext clRetainContext_fn
|
||||||
|
inline cl_int clRetainContext(cl_context p0) { return clRetainContext_pfn(p0); }
|
||||||
|
#undef clRetainDevice
|
||||||
|
#define clRetainDevice clRetainDevice_fn
|
||||||
|
inline cl_int clRetainDevice(cl_device_id p0) { return clRetainDevice_pfn(p0); }
|
||||||
|
#undef clRetainEvent
|
||||||
|
#define clRetainEvent clRetainEvent_fn
|
||||||
|
inline cl_int clRetainEvent(cl_event p0) { return clRetainEvent_pfn(p0); }
|
||||||
|
#undef clRetainKernel
|
||||||
|
#define clRetainKernel clRetainKernel_fn
|
||||||
|
inline cl_int clRetainKernel(cl_kernel p0) { return clRetainKernel_pfn(p0); }
|
||||||
|
#undef clRetainMemObject
|
||||||
|
#define clRetainMemObject clRetainMemObject_fn
|
||||||
|
inline cl_int clRetainMemObject(cl_mem p0) { return clRetainMemObject_pfn(p0); }
|
||||||
|
#undef clRetainProgram
|
||||||
|
#define clRetainProgram clRetainProgram_fn
|
||||||
|
inline cl_int clRetainProgram(cl_program p0) { return clRetainProgram_pfn(p0); }
|
||||||
|
#undef clRetainSampler
|
||||||
|
#define clRetainSampler clRetainSampler_fn
|
||||||
|
inline cl_int clRetainSampler(cl_sampler p0) { return clRetainSampler_pfn(p0); }
|
||||||
|
#undef clSetEventCallback
|
||||||
|
#define clSetEventCallback clSetEventCallback_fn
|
||||||
|
inline cl_int clSetEventCallback(cl_event p0, cl_int p1, void (CL_CALLBACK*p2) (cl_event, cl_int, void*), void* p3) { return clSetEventCallback_pfn(p0, p1, p2, p3); }
|
||||||
|
#undef clSetKernelArg
|
||||||
|
#define clSetKernelArg clSetKernelArg_fn
|
||||||
|
inline cl_int clSetKernelArg(cl_kernel p0, cl_uint p1, size_t p2, const void* p3) { return clSetKernelArg_pfn(p0, p1, p2, p3); }
|
||||||
|
#undef clSetMemObjectDestructorCallback
|
||||||
|
#define clSetMemObjectDestructorCallback clSetMemObjectDestructorCallback_fn
|
||||||
|
inline cl_int clSetMemObjectDestructorCallback(cl_mem p0, void (CL_CALLBACK*p1) (cl_mem, void*), void* p2) { return clSetMemObjectDestructorCallback_pfn(p0, p1, p2); }
|
||||||
|
#undef clSetUserEventStatus
|
||||||
|
#define clSetUserEventStatus clSetUserEventStatus_fn
|
||||||
|
inline cl_int clSetUserEventStatus(cl_event p0, cl_int p1) { return clSetUserEventStatus_pfn(p0, p1); }
|
||||||
|
#undef clUnloadCompiler
|
||||||
|
#define clUnloadCompiler clUnloadCompiler_fn
|
||||||
|
inline cl_int clUnloadCompiler() { return clUnloadCompiler_pfn(); }
|
||||||
|
#undef clUnloadPlatformCompiler
|
||||||
|
#define clUnloadPlatformCompiler clUnloadPlatformCompiler_fn
|
||||||
|
inline cl_int clUnloadPlatformCompiler(cl_platform_id p0) { return clUnloadPlatformCompiler_pfn(p0); }
|
||||||
|
#undef clWaitForEvents
|
||||||
|
#define clWaitForEvents clWaitForEvents_fn
|
||||||
|
inline cl_int clWaitForEvents(cl_uint p0, const cl_event* p1) { return clWaitForEvents_pfn(p0, p1); }
|
||||||
@@ -0,0 +1,62 @@
|
|||||||
|
//
|
||||||
|
// AUTOGENERATED, DO NOT EDIT
|
||||||
|
//
|
||||||
|
#ifndef OPENCV_CORE_OCL_RUNTIME_OPENCL_GL_HPP
|
||||||
|
#error "Invalid usage"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// generated by parser_cl.py
|
||||||
|
#define clCreateFromGLBuffer clCreateFromGLBuffer_
|
||||||
|
#define clCreateFromGLRenderbuffer clCreateFromGLRenderbuffer_
|
||||||
|
#define clCreateFromGLTexture clCreateFromGLTexture_
|
||||||
|
#define clCreateFromGLTexture2D clCreateFromGLTexture2D_
|
||||||
|
#define clCreateFromGLTexture3D clCreateFromGLTexture3D_
|
||||||
|
#define clEnqueueAcquireGLObjects clEnqueueAcquireGLObjects_
|
||||||
|
#define clEnqueueReleaseGLObjects clEnqueueReleaseGLObjects_
|
||||||
|
#define clGetGLContextInfoKHR clGetGLContextInfoKHR_
|
||||||
|
#define clGetGLObjectInfo clGetGLObjectInfo_
|
||||||
|
#define clGetGLTextureInfo clGetGLTextureInfo_
|
||||||
|
|
||||||
|
#if defined __APPLE__
|
||||||
|
#include <OpenCL/cl_gl.h>
|
||||||
|
#else
|
||||||
|
#include <CL/cl_gl.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// generated by parser_cl.py
|
||||||
|
#undef clCreateFromGLBuffer
|
||||||
|
#define clCreateFromGLBuffer clCreateFromGLBuffer_pfn
|
||||||
|
#undef clCreateFromGLRenderbuffer
|
||||||
|
#define clCreateFromGLRenderbuffer clCreateFromGLRenderbuffer_pfn
|
||||||
|
#undef clCreateFromGLTexture
|
||||||
|
#define clCreateFromGLTexture clCreateFromGLTexture_pfn
|
||||||
|
#undef clCreateFromGLTexture2D
|
||||||
|
#define clCreateFromGLTexture2D clCreateFromGLTexture2D_pfn
|
||||||
|
#undef clCreateFromGLTexture3D
|
||||||
|
#define clCreateFromGLTexture3D clCreateFromGLTexture3D_pfn
|
||||||
|
#undef clEnqueueAcquireGLObjects
|
||||||
|
#define clEnqueueAcquireGLObjects clEnqueueAcquireGLObjects_pfn
|
||||||
|
#undef clEnqueueReleaseGLObjects
|
||||||
|
#define clEnqueueReleaseGLObjects clEnqueueReleaseGLObjects_pfn
|
||||||
|
#undef clGetGLContextInfoKHR
|
||||||
|
#define clGetGLContextInfoKHR clGetGLContextInfoKHR_pfn
|
||||||
|
#undef clGetGLObjectInfo
|
||||||
|
#define clGetGLObjectInfo clGetGLObjectInfo_pfn
|
||||||
|
#undef clGetGLTextureInfo
|
||||||
|
#define clGetGLTextureInfo clGetGLTextureInfo_pfn
|
||||||
|
|
||||||
|
#ifdef cl_khr_gl_sharing
|
||||||
|
|
||||||
|
// generated by parser_cl.py
|
||||||
|
extern CL_RUNTIME_EXPORT cl_mem (CL_API_CALL*clCreateFromGLBuffer)(cl_context, cl_mem_flags, cl_GLuint, int*);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_mem (CL_API_CALL*clCreateFromGLRenderbuffer)(cl_context, cl_mem_flags, cl_GLuint, cl_int*);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_mem (CL_API_CALL*clCreateFromGLTexture)(cl_context, cl_mem_flags, cl_GLenum, cl_GLint, cl_GLuint, cl_int*);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_mem (CL_API_CALL*clCreateFromGLTexture2D)(cl_context, cl_mem_flags, cl_GLenum, cl_GLint, cl_GLuint, cl_int*);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_mem (CL_API_CALL*clCreateFromGLTexture3D)(cl_context, cl_mem_flags, cl_GLenum, cl_GLint, cl_GLuint, cl_int*);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueAcquireGLObjects)(cl_command_queue, cl_uint, const cl_mem*, cl_uint, const cl_event*, cl_event*);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueReleaseGLObjects)(cl_command_queue, cl_uint, const cl_mem*, cl_uint, const cl_event*, cl_event*);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetGLContextInfoKHR)(const cl_context_properties*, cl_gl_context_info, size_t, void*, size_t*);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetGLObjectInfo)(cl_mem, cl_gl_object_type*, cl_GLuint*);
|
||||||
|
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetGLTextureInfo)(cl_mem, cl_gl_texture_info, size_t, void*, size_t*);
|
||||||
|
|
||||||
|
#endif // cl_khr_gl_sharing
|
||||||
@@ -0,0 +1,42 @@
|
|||||||
|
//
|
||||||
|
// AUTOGENERATED, DO NOT EDIT
|
||||||
|
//
|
||||||
|
#ifndef OPENCV_CORE_OCL_RUNTIME_OPENCL_GL_WRAPPERS_HPP
|
||||||
|
#error "Invalid usage"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef cl_khr_gl_sharing
|
||||||
|
|
||||||
|
// generated by parser_cl.py
|
||||||
|
#undef clCreateFromGLBuffer
|
||||||
|
#define clCreateFromGLBuffer clCreateFromGLBuffer_fn
|
||||||
|
inline cl_mem clCreateFromGLBuffer(cl_context p0, cl_mem_flags p1, cl_GLuint p2, int* p3) { return clCreateFromGLBuffer_pfn(p0, p1, p2, p3); }
|
||||||
|
#undef clCreateFromGLRenderbuffer
|
||||||
|
#define clCreateFromGLRenderbuffer clCreateFromGLRenderbuffer_fn
|
||||||
|
inline cl_mem clCreateFromGLRenderbuffer(cl_context p0, cl_mem_flags p1, cl_GLuint p2, cl_int* p3) { return clCreateFromGLRenderbuffer_pfn(p0, p1, p2, p3); }
|
||||||
|
#undef clCreateFromGLTexture
|
||||||
|
#define clCreateFromGLTexture clCreateFromGLTexture_fn
|
||||||
|
inline cl_mem clCreateFromGLTexture(cl_context p0, cl_mem_flags p1, cl_GLenum p2, cl_GLint p3, cl_GLuint p4, cl_int* p5) { return clCreateFromGLTexture_pfn(p0, p1, p2, p3, p4, p5); }
|
||||||
|
#undef clCreateFromGLTexture2D
|
||||||
|
#define clCreateFromGLTexture2D clCreateFromGLTexture2D_fn
|
||||||
|
inline cl_mem clCreateFromGLTexture2D(cl_context p0, cl_mem_flags p1, cl_GLenum p2, cl_GLint p3, cl_GLuint p4, cl_int* p5) { return clCreateFromGLTexture2D_pfn(p0, p1, p2, p3, p4, p5); }
|
||||||
|
#undef clCreateFromGLTexture3D
|
||||||
|
#define clCreateFromGLTexture3D clCreateFromGLTexture3D_fn
|
||||||
|
inline cl_mem clCreateFromGLTexture3D(cl_context p0, cl_mem_flags p1, cl_GLenum p2, cl_GLint p3, cl_GLuint p4, cl_int* p5) { return clCreateFromGLTexture3D_pfn(p0, p1, p2, p3, p4, p5); }
|
||||||
|
#undef clEnqueueAcquireGLObjects
|
||||||
|
#define clEnqueueAcquireGLObjects clEnqueueAcquireGLObjects_fn
|
||||||
|
inline cl_int clEnqueueAcquireGLObjects(cl_command_queue p0, cl_uint p1, const cl_mem* p2, cl_uint p3, const cl_event* p4, cl_event* p5) { return clEnqueueAcquireGLObjects_pfn(p0, p1, p2, p3, p4, p5); }
|
||||||
|
#undef clEnqueueReleaseGLObjects
|
||||||
|
#define clEnqueueReleaseGLObjects clEnqueueReleaseGLObjects_fn
|
||||||
|
inline cl_int clEnqueueReleaseGLObjects(cl_command_queue p0, cl_uint p1, const cl_mem* p2, cl_uint p3, const cl_event* p4, cl_event* p5) { return clEnqueueReleaseGLObjects_pfn(p0, p1, p2, p3, p4, p5); }
|
||||||
|
#undef clGetGLContextInfoKHR
|
||||||
|
#define clGetGLContextInfoKHR clGetGLContextInfoKHR_fn
|
||||||
|
inline cl_int clGetGLContextInfoKHR(const cl_context_properties* p0, cl_gl_context_info p1, size_t p2, void* p3, size_t* p4) { return clGetGLContextInfoKHR_pfn(p0, p1, p2, p3, p4); }
|
||||||
|
#undef clGetGLObjectInfo
|
||||||
|
#define clGetGLObjectInfo clGetGLObjectInfo_fn
|
||||||
|
inline cl_int clGetGLObjectInfo(cl_mem p0, cl_gl_object_type* p1, cl_GLuint* p2) { return clGetGLObjectInfo_pfn(p0, p1, p2); }
|
||||||
|
#undef clGetGLTextureInfo
|
||||||
|
#define clGetGLTextureInfo clGetGLTextureInfo_fn
|
||||||
|
inline cl_int clGetGLTextureInfo(cl_mem p0, cl_gl_texture_info p1, size_t p2, void* p3, size_t* p4) { return clGetGLTextureInfo_pfn(p0, p1, p2, p3, p4); }
|
||||||
|
|
||||||
|
#endif // cl_khr_gl_sharing
|
||||||
@@ -0,0 +1,53 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the OpenCV Foundation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#ifndef OPENCV_CORE_OCL_RUNTIME_CLAMDBLAS_HPP
|
||||||
|
#define OPENCV_CORE_OCL_RUNTIME_CLAMDBLAS_HPP
|
||||||
|
|
||||||
|
#ifdef HAVE_CLAMDBLAS
|
||||||
|
|
||||||
|
#include "opencl_core.hpp"
|
||||||
|
|
||||||
|
#include "autogenerated/opencl_clblas.hpp"
|
||||||
|
|
||||||
|
#endif // HAVE_CLAMDBLAS
|
||||||
|
|
||||||
|
#endif // OPENCV_CORE_OCL_RUNTIME_CLAMDBLAS_HPP
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user