Modules
	Private implementation helpers

Classes
struct	cv::v_reg< _Tp, n >

Typedefs
typedef v_reg< float, 4 >	cv::v_float32x4
	Four 32-bit floating point values (single precision) More...

typedef v_reg< double, 2 >	cv::v_float64x2
	Two 64-bit floating point values (double precision) More...

typedef v_reg< short, 8 >	cv::v_int16x8
	Eight 16-bit signed integer values. More...

typedef v_reg< int, 4 >	cv::v_int32x4
	Four 32-bit signed integer values. More...

typedef v_reg< int64, 2 >	cv::v_int64x2
	Two 64-bit signed integer values. More...

typedef v_reg< schar, 16 >	cv::v_int8x16
	Sixteen 8-bit signed integer values. More...

typedef v_reg< ushort, 8 >	cv::v_uint16x8
	Eight 16-bit unsigned integer values. More...

typedef v_reg< unsigned, 4 >	cv::v_uint32x4
	Four 32-bit unsigned integer values. More...

typedef v_reg< uint64, 2 >	cv::v_uint64x2
	Two 64-bit unsigned integer values. More...

typedef v_reg< uchar, 16 >	cv::v_uint8x16
	Sixteen 8-bit unsigned integer values. More...

Functions
template<typename _Tp , int n>
v_reg< _Tp, n >	cv::operator & (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	Bitwise AND. More...

template<typename _Tp , int n>
v_reg< _Tp, n > &	cv::operator &= (v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)

template<typename _Tp , int n>
v_reg< _Tp, n >	cv::operator!= (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	Not equal comparison. More...

template<typename _Tp , int n>
v_reg< _Tp, n >	cv::operator* (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	Multiply values. More...

template<typename _Tp , int n>
v_reg< _Tp, n > &	cv::operator*= (v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)

template<typename _Tp , int n>
v_reg< _Tp, n >	cv::operator+ (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	Add values. More...

template<typename _Tp , int n>
v_reg< _Tp, n > &	cv::operator+= (v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)

template<typename _Tp , int n>
v_reg< _Tp, n >	cv::operator- (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	Subtract values. More...

template<typename _Tp , int n>
v_reg< _Tp, n > &	cv::operator-= (v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)

template<typename _Tp , int n>
v_reg< _Tp, n >	cv::operator/ (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	Divide values. More...

template<typename _Tp , int n>
v_reg< _Tp, n > &	cv::operator/= (v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)

template<typename _Tp , int n>
v_reg< _Tp, n >	cv::operator< (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	Less-than comparison. More...

template<typename _Tp , int n>
v_reg< _Tp, n >	cv::operator<< (const v_reg< _Tp, n > &a, int imm)
	Bitwise shift left. More...

template<typename _Tp , int n>
v_reg< _Tp, n >	cv::operator<= (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	Less-than or equal comparison. More...

template<typename _Tp , int n>
v_reg< _Tp, n >	cv::operator== (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	Equal comparison. More...

template<typename _Tp , int n>
v_reg< _Tp, n >	cv::operator> (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	Greater-than comparison. More...

template<typename _Tp , int n>
v_reg< _Tp, n >	cv::operator>= (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	Greater-than or equal comparison. More...

template<typename _Tp , int n>
v_reg< _Tp, n >	cv::operator>> (const v_reg< _Tp, n > &a, int imm)
	Bitwise shift right. More...

template<typename _Tp , int n>
v_reg< _Tp, n >	cv::operator^ (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	Bitwise XOR. More...

template<typename _Tp , int n>
v_reg< _Tp, n > &	cv::operator^= (v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)

template<typename _Tp , int n>
v_reg< _Tp, n >	cv::operator\| (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	Bitwise OR. More...

template<typename _Tp , int n>
v_reg< _Tp, n > &	cv::operator\|= (v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)

template<typename _Tp , int n>
v_reg< _Tp, n >	cv::operator~ (const v_reg< _Tp, n > &a)
	Bitwise NOT. More...

template<typename _Tp , int n>
v_reg< typename V_TypeTraits< _Tp >::abs_type, n >	cv::v_abs (const v_reg< _Tp, n > &a)
	Absolute value of elements. More...

template<typename _Tp , int n>
v_reg< typename V_TypeTraits< _Tp >::abs_type, n >	cv::v_absdiff (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	Absolute difference. More...

v_float32x4	cv::v_absdiff (const v_float32x4 &a, const v_float32x4 &b)

v_float64x2	cv::v_absdiff (const v_float64x2 &a, const v_float64x2 &b)

template<typename _Tp , int n>
v_reg< _Tp, n >	cv::v_absdiffs (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	Saturating absolute difference. More...

template<typename _Tp , int n>
v_reg< _Tp, n >	cv::v_add_wrap (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	Add values without saturation. More...

template<int i, typename _Tp , int n>
v_reg< _Tp, n >	cv::v_broadcast_element (const v_reg< _Tp, n > &a)
	Broadcast i-th element of vector. More...

v_reg< int, 4 >	cv::v_ceil (const v_reg< float, 4 > &a)
	Ceil elements. More...

v_reg< int, 4 >	cv::v_ceil (const v_reg< double, 2 > &a)

template<int n>
v_reg< int, n >	cv::v_ceil (const v_reg< float, n > &a)
	Ceil. More...

template<int n>
v_reg< int, n *2 >	cv::v_ceil (const v_reg< double, n > &a)

template<typename _Tp , int n>
bool	cv::v_check_all (const v_reg< _Tp, n > &a)
	Check if all packed values are less than zero. More...

template<typename _Tp , int n>
bool	cv::v_check_any (const v_reg< _Tp, n > &a)
	Check if any of packed values is less than zero. More...

void	cv::v_cleanup ()

template<typename _Tp , int n>
v_reg< _Tp, n >	cv::v_combine_high (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	Combine vector from last elements of two vectors. More...

template<typename _Tp , int n>
v_reg< _Tp, n >	cv::v_combine_low (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	Combine vector from first elements of two vectors. More...

template<int n>
v_reg< float, n >	cv::v_cvt_f32 (const v_reg< int, n > &a)
	Convert to float. More...

template<int n>
v_reg< float, n *2 >	cv::v_cvt_f32 (const v_reg< double, n > &a)

template<int n>
v_reg< float, n *2 >	cv::v_cvt_f32 (const v_reg< double, n > &a, const v_reg< double, n > &b)

v_reg< double, 2 >	cv::v_cvt_f64 (const v_reg< int, 4 > &a)
	Convert to double. More...

v_reg< double, 2 >	cv::v_cvt_f64 (const v_reg< float, 4 > &a)
	Convert to double. More...

v_reg< double, 2 >	cv::v_cvt_f64 (const v_reg< int64, 2 > &a)
	Convert to double. More...

v_reg< double, 2 >	cv::v_cvt_f64_high (const v_reg< int, 4 > &a)
	Convert to double high part of vector. More...

v_reg< double, 2 >	cv::v_cvt_f64_high (const v_reg< float, 4 > &a)
	Convert to double high part of vector. More...

v_reg< double, 2 >	cv::v_cvt_f64_high (const v_reg< int64, 2 > &a)
	Convert to double high part of vector. More...

template<typename _Tp , int n>
v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 >	cv::v_dotprod (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	Dot product of elements. More...

template<typename _Tp , int n>
v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 >	cv::v_dotprod (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, const v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &c)
	Dot product of elements. More...

template<typename _Tp , int n>
v_reg< typename V_TypeTraits< _Tp >::q_type, n/4 >	cv::v_dotprod_expand (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	Dot product of elements and expand. More...

template<typename _Tp , int n>
v_reg< typename V_TypeTraits< _Tp >::q_type, n/4 >	cv::v_dotprod_expand (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, const v_reg< typename V_TypeTraits< _Tp >::q_type, n/4 > &c)
	Dot product of elements. More...

v_float64x2	cv::v_dotprod_expand (const v_int32x4 &a, const v_int32x4 &b)

v_float64x2	cv::v_dotprod_expand (const v_int32x4 &a, const v_int32x4 &b, const v_float64x2 &c)

template<typename _Tp , int n>
v_reg< typename V_TypeTraits< _Tp >::q_type, n/4 >	cv::v_dotprod_expand_fast (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	Fast Dot product of elements and expand. More...

template<typename _Tp , int n>
v_reg< typename V_TypeTraits< _Tp >::q_type, n/4 >	cv::v_dotprod_expand_fast (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, const v_reg< typename V_TypeTraits< _Tp >::q_type, n/4 > &c)
	Fast Dot product of elements. More...

v_float64x2	cv::v_dotprod_expand_fast (const v_int32x4 &a, const v_int32x4 &b)

v_float64x2	cv::v_dotprod_expand_fast (const v_int32x4 &a, const v_int32x4 &b, const v_float64x2 &c)

template<typename _Tp , int n>
v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 >	cv::v_dotprod_fast (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	Fast Dot product of elements. More...

template<typename _Tp , int n>
v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 >	cv::v_dotprod_fast (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, const v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &c)
	Fast Dot product of elements. More...

template<typename _Tp , int n>
void	cv::v_expand (const v_reg< _Tp, n > &a, v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &b0, v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &b1)
	Expand values to the wider pack type. More...

template<typename _Tp , int n>
v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 >	cv::v_expand_high (const v_reg< _Tp, n > &a)
	Expand higher values to the wider pack type. More...

template<typename _Tp , int n>
v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 >	cv::v_expand_low (const v_reg< _Tp, n > &a)
	Expand lower values to the wider pack type. More...

template<int s, typename _Tp , int n>
v_reg< _Tp, n >	cv::v_extract (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	Vector extract. More...

template<int s, typename _Tp , int n>
_Tp	cv::v_extract_n (const v_reg< _Tp, n > &v)
	Vector extract. More...

v_reg< int, 4 >	cv::v_floor (const v_reg< float, 4 > &a)
	Floor elements. More...

v_reg< int, 4 >	cv::v_floor (const v_reg< double, 2 > &a)

template<int n>
v_reg< int, n >	cv::v_floor (const v_reg< float, n > &a)
	Floor. More...

template<int n>
v_reg< int, n *2 >	cv::v_floor (const v_reg< double, n > &a)

template<typename _Tp , int n>
v_reg< _Tp, n >	cv::v_fma (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, const v_reg< _Tp, n > &c)
	Multiply and add. More...

template<typename _Tp , int n>
v_reg< _Tp, n >	cv::v_interleave_pairs (const v_reg< _Tp, n > &vec)

template<typename _Tp , int n>
v_reg< _Tp, n >	cv::v_interleave_quads (const v_reg< _Tp, n > &vec)

template<typename _Tp , int n>
v_reg< _Tp, n >	cv::v_invsqrt (const v_reg< _Tp, n > &a)
	Inversed square root. More...

template<typename _Tp >
v_reg< _Tp, V_TypeTraits< _Tp >::nlanes128 >	cv::v_load (const _Tp *ptr)
	Load register contents from memory. More...

template<typename _Tp >
v_reg< _Tp, V_TypeTraits< _Tp >::nlanes128 >	cv::v_load_aligned (const _Tp *ptr)
	Load register contents from memory (aligned) More...

template<typename _Tp , int n>
void	cv::v_load_deinterleave (const _Tp *ptr, v_reg< _Tp, n > &a, v_reg< _Tp, n > &b)
	Load and deinterleave (2 channels) More...

template<typename _Tp , int n>
void	cv::v_load_deinterleave (const _Tp *ptr, v_reg< _Tp, n > &a, v_reg< _Tp, n > &b, v_reg< _Tp, n > &c)
	Load and deinterleave (3 channels) More...

template<typename _Tp , int n>
void	cv::v_load_deinterleave (const _Tp *ptr, v_reg< _Tp, n > &a, v_reg< _Tp, n > &b, v_reg< _Tp, n > &c, v_reg< _Tp, n > &d)
	Load and deinterleave (4 channels) More...

template<typename _Tp >
v_reg< typename V_TypeTraits< _Tp >::w_type, V_TypeTraits< _Tp >::nlanes128/2 >	cv::v_load_expand (const _Tp *ptr)
	Load register contents from memory with double expand. More...

v_reg< float, V_TypeTraits< float >::nlanes128 >	cv::v_load_expand (const float16_t *ptr)

template<typename _Tp >
v_reg< typename V_TypeTraits< _Tp >::q_type, V_TypeTraits< _Tp >::nlanes128/4 >	cv::v_load_expand_q (const _Tp *ptr)
	Load register contents from memory with quad expand. More...

template<typename _Tp >
v_reg< _Tp, V_TypeTraits< _Tp >::nlanes128 >	cv::v_load_halves (const _Tp loptr, const _Tp hiptr)
	Load register contents from two memory blocks. More...

template<typename _Tp >
v_reg< _Tp, V_TypeTraits< _Tp >::nlanes128 >	cv::v_load_low (const _Tp *ptr)
	Load 64-bits of data to lower part (high part is undefined). More...

template<typename _Tp >
v_reg< _Tp, V_TypeTraits< _Tp >::nlanes128 >	cv::v_lut (const _Tp tab, const int idx)

template<int n>
v_reg< int, n >	cv::v_lut (const int *tab, const v_reg< int, n > &idx)

template<int n>
v_reg< unsigned, n >	cv::v_lut (const unsigned *tab, const v_reg< int, n > &idx)

template<int n>
v_reg< float, n >	cv::v_lut (const float *tab, const v_reg< int, n > &idx)

template<int n>
v_reg< double, n >	cv::v_lut (const double tab, const v_reg< int, n 2 > &idx)

v_int32x4	cv::v_lut (const int *tab, const v_int32x4 &idxvec)

v_uint32x4	cv::v_lut (const unsigned *tab, const v_int32x4 &idxvec)

v_float32x4	cv::v_lut (const float *tab, const v_int32x4 &idxvec)

v_float64x2	cv::v_lut (const double *tab, const v_int32x4 &idxvec)

template<int n>
void	cv::v_lut_deinterleave (const float *tab, const v_reg< int, n > &idx, v_reg< float, n > &x, v_reg< float, n > &y)

template<int n>
void	cv::v_lut_deinterleave (const double tab, const v_reg< int, n 2 > &idx, v_reg< double, n > &x, v_reg< double, n > &y)

template<typename _Tp >
v_reg< _Tp, V_TypeTraits< _Tp >::nlanes128 >	cv::v_lut_pairs (const _Tp tab, const int idx)

template<typename _Tp >
v_reg< _Tp, V_TypeTraits< _Tp >::nlanes128 >	cv::v_lut_quads (const _Tp tab, const int idx)

template<typename _Tp , int n>
v_reg< _Tp, n >	cv::v_magnitude (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	Magnitude. More...

v_float32x4	cv::v_matmul (const v_float32x4 &v, const v_float32x4 &m0, const v_float32x4 &m1, const v_float32x4 &m2, const v_float32x4 &m3)
	Matrix multiplication. More...

v_float32x4	cv::v_matmuladd (const v_float32x4 &v, const v_float32x4 &m0, const v_float32x4 &m1, const v_float32x4 &m2, const v_float32x4 &m3)
	Matrix multiplication and add. More...

template<typename _Tp , int n>
v_reg< _Tp, n >	cv::v_max (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	Choose max values for each pair. More...

template<typename _Tp , int n>
v_reg< _Tp, n >	cv::v_min (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	Choose min values for each pair. More...

template<typename _Tp , int n>
void	cv::v_mul_expand (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &c, v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &d)
	Multiply and expand. More...

template<typename _Tp , int n>
v_reg< _Tp, n >	cv::v_mul_hi (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	Multiply and extract high part. More...

template<typename _Tp , int n>
v_reg< _Tp, n >	cv::v_mul_wrap (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	Multiply values without saturation. More...

template<typename _Tp , int n>
v_reg< _Tp, n >	cv::v_muladd (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, const v_reg< _Tp, n > &c)
	A synonym for v_fma. More...

template<int n>
v_reg< float, n >	cv::v_not_nan (const v_reg< float, n > &a)

template<int n>
v_reg< double, n >	cv::v_not_nan (const v_reg< double, n > &a)

void	cv::v_pack_store (float16_t *ptr, const v_reg< float, V_TypeTraits< float >::nlanes128 > &v)

template<typename _Tp , int n>
v_reg< _Tp, n >	cv::v_pack_triplets (const v_reg< _Tp, n > &vec)

template<typename _Tp , int n>
v_reg< typename V_TypeTraits< _Tp >::abs_type, n >	cv::v_popcount (const v_reg< _Tp, n > &a)
	Count the 1 bits in the vector lanes and return result as corresponding unsigned type. More...

template<typename _Tp , int n>
void	cv::v_recombine (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, v_reg< _Tp, n > &low, v_reg< _Tp, n > &high)
	Combine two vectors from lower and higher parts of two other vectors. More...

template<typename _Tp , int n>
_Tp	cv::v_reduce_max (const v_reg< _Tp, n > &a)
	Find one max value. More...

template<typename _Tp , int n>
_Tp	cv::v_reduce_min (const v_reg< _Tp, n > &a)
	Find one min value. More...

template<typename _Tp , int n>
V_TypeTraits< typename V_TypeTraits< _Tp >::abs_type >::sum_type	cv::v_reduce_sad (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	Sum absolute differences of values. More...

template<typename _Tp , int n>
V_TypeTraits< _Tp >::sum_type	cv::v_reduce_sum (const v_reg< _Tp, n > &a)
	Sum packed values. More...

v_float32x4	cv::v_reduce_sum4 (const v_float32x4 &a, const v_float32x4 &b, const v_float32x4 &c, const v_float32x4 &d)
	Sums all elements of each input vector, returns the vector of sums. More...

template<typename _Tp , int n>
v_reg< _Tp, n >	cv::v_reverse (const v_reg< _Tp, n > &a)
	Vector reverse order. More...

template<int imm, typename _Tp , int n>
v_reg< _Tp, n >	cv::v_rotate_left (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)

template<int imm, typename _Tp , int n>
v_reg< _Tp, n >	cv::v_rotate_left (const v_reg< _Tp, n > &a)
	Element shift left among vector. More...

template<int imm, typename _Tp , int n>
v_reg< _Tp, n >	cv::v_rotate_right (const v_reg< _Tp, n > &a)
	Element shift right among vector. More...

template<int imm, typename _Tp , int n>
v_reg< _Tp, n >	cv::v_rotate_right (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)

v_reg< int, 4 >	cv::v_round (const v_reg< double, 2 > &a)

v_reg< int, 4 >	cv::v_round (const v_reg< float, 4 > &a)
	Round elements. More...

template<int n>
v_reg< int, n >	cv::v_round (const v_reg< float, n > &a)
	Round. More...

template<int n>
v_reg< int, n *2 >	cv::v_round (const v_reg< double, n > &a, const v_reg< double, n > &b)

template<int n>
v_reg< int, n *2 >	cv::v_round (const v_reg< double, n > &a)

template<typename _Tp , int n>
int	cv::v_scan_forward (const v_reg< _Tp, n > &a)
	Get first negative lane index. More...

template<typename _Tp , int n>
v_reg< _Tp, n >	cv::v_select (const v_reg< _Tp, n > &mask, const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	Per-element select (blend operation) More...

template<typename _Tp , int n>
int	cv::v_signmask (const v_reg< _Tp, n > &a)
	Get negative values mask. More...

template<typename _Tp , int n>
v_reg< _Tp, n >	cv::v_sqr_magnitude (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	Square of the magnitude. More...

template<typename _Tp , int n>
v_reg< _Tp, n >	cv::v_sqrt (const v_reg< _Tp, n > &a)
	Square root of elements. More...

template<typename _Tp , int n>
void	cv::v_store (_Tp *ptr, const v_reg< _Tp, n > &a)
	Store data to memory. More...

template<typename _Tp , int n>
void	cv::v_store (_Tp *ptr, const v_reg< _Tp, n > &a, hal::StoreMode)

template<typename _Tp , int n>
void	cv::v_store_aligned (_Tp *ptr, const v_reg< _Tp, n > &a)
	Store data to memory (aligned) More...

template<typename _Tp , int n>
void	cv::v_store_aligned (_Tp *ptr, const v_reg< _Tp, n > &a, hal::StoreMode)

template<typename _Tp , int n>
void	cv::v_store_aligned_nocache (_Tp *ptr, const v_reg< _Tp, n > &a)

template<typename _Tp , int n>
void	cv::v_store_high (_Tp *ptr, const v_reg< _Tp, n > &a)
	Store data to memory (higher half) More...

template<typename _Tp , int n>
void	cv::v_store_interleave (_Tp *ptr, const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, hal::StoreMode=hal::STORE_UNALIGNED)
	Interleave and store (2 channels) More...

template<typename _Tp , int n>
void	cv::v_store_interleave (_Tp *ptr, const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, const v_reg< _Tp, n > &c, hal::StoreMode=hal::STORE_UNALIGNED)
	Interleave and store (3 channels) More...

template<typename _Tp , int n>
void	cv::v_store_interleave (_Tp *ptr, const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, const v_reg< _Tp, n > &c, const v_reg< _Tp, n > &d, hal::StoreMode=hal::STORE_UNALIGNED)
	Interleave and store (4 channels) More...

template<typename _Tp , int n>
void	cv::v_store_low (_Tp *ptr, const v_reg< _Tp, n > &a)
	Store data to memory (lower half) More...

template<typename _Tp , int n>
v_reg< _Tp, n >	cv::v_sub_wrap (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	Subtract values without saturation. More...

template<typename _Tp >
void	cv::v_transpose4x4 (v_reg< _Tp, 4 > &a0, const v_reg< _Tp, 4 > &a1, const v_reg< _Tp, 4 > &a2, const v_reg< _Tp, 4 > &a3, v_reg< _Tp, 4 > &b0, v_reg< _Tp, 4 > &b1, v_reg< _Tp, 4 > &b2, v_reg< _Tp, 4 > &b3)
	Transpose 4x4 matrix. More...

v_reg< int, 4 >	cv::v_trunc (const v_reg< double, 2 > &a)

v_reg< int, 4 >	cv::v_trunc (const v_reg< float, 4 > &a)
	Truncate elements. More...

template<int n>
v_reg< int, n >	cv::v_trunc (const v_reg< float, n > &a)
	Trunc. More...

template<int n>
v_reg< int, n *2 >	cv::v_trunc (const v_reg< double, n > &a)

template<typename _Tp , int n>
void	cv::v_zip (const v_reg< _Tp, n > &a0, const v_reg< _Tp, n > &a1, v_reg< _Tp, n > &b0, v_reg< _Tp, n > &b1)
	Interleave two vectors. More...

Variables
static const unsigned char	cv::popCountTable []

Init with zero
Create new vector with zero elements
v_uint8x16	cv::v_setzero_u8 ()

v_int8x16	cv::v_setzero_s8 ()

v_uint16x8	cv::v_setzero_u16 ()

v_int16x8	cv::v_setzero_s16 ()

v_uint32x4	cv::v_setzero_u32 ()

v_int32x4	cv::v_setzero_s32 ()

v_float32x4	cv::v_setzero_f32 ()

v_float64x2	cv::v_setzero_f64 ()

v_uint64x2	cv::v_setzero_u64 ()

v_int64x2	cv::v_setzero_s64 ()

Init with value
Create new vector with elements set to a specific value
v_uint8x16	cv::v_setall_u8 (uchar val)

v_int8x16	cv::v_setall_s8 (schar val)

v_uint16x8	cv::v_setall_u16 (ushort val)

v_int16x8	cv::v_setall_s16 (short val)

v_uint32x4	cv::v_setall_u32 (unsigned val)

v_int32x4	cv::v_setall_s32 (int val)

v_float32x4	cv::v_setall_f32 (float val)

v_float64x2	cv::v_setall_f64 (double val)

v_uint64x2	cv::v_setall_u64 (uint64 val)

v_int64x2	cv::v_setall_s64 (int64 val)

Reinterpret
Convert vector to different type without modifying underlying data.
template<typename _Tp0 , int n0>
v_uint8x16	cv::v_reinterpret_as_u8 (const v_reg< _Tp0, n0 > &a)

template<typename _Tp0 , int n0>
v_int8x16	cv::v_reinterpret_as_s8 (const v_reg< _Tp0, n0 > &a)

template<typename _Tp0 , int n0>
v_uint16x8	cv::v_reinterpret_as_u16 (const v_reg< _Tp0, n0 > &a)

template<typename _Tp0 , int n0>
v_int16x8	cv::v_reinterpret_as_s16 (const v_reg< _Tp0, n0 > &a)

template<typename _Tp0 , int n0>
v_uint32x4	cv::v_reinterpret_as_u32 (const v_reg< _Tp0, n0 > &a)

template<typename _Tp0 , int n0>
v_int32x4	cv::v_reinterpret_as_s32 (const v_reg< _Tp0, n0 > &a)

template<typename _Tp0 , int n0>
v_float32x4	cv::v_reinterpret_as_f32 (const v_reg< _Tp0, n0 > &a)

template<typename _Tp0 , int n0>
v_float64x2	cv::v_reinterpret_as_f64 (const v_reg< _Tp0, n0 > &a)

template<typename _Tp0 , int n0>
v_uint64x2	cv::v_reinterpret_as_u64 (const v_reg< _Tp0, n0 > &a)

template<typename _Tp0 , int n0>
v_int64x2	cv::v_reinterpret_as_s64 (const v_reg< _Tp0, n0 > &a)

Left shift
Shift left
template<int n>
v_uint16x8	cv::v_shl (const v_uint16x8 &a)

template<int n>
v_int16x8	cv::v_shl (const v_int16x8 &a)

template<int n>
v_uint32x4	cv::v_shl (const v_uint32x4 &a)

template<int n>
v_int32x4	cv::v_shl (const v_int32x4 &a)

template<int n>
v_uint64x2	cv::v_shl (const v_uint64x2 &a)

template<int n>
v_int64x2	cv::v_shl (const v_int64x2 &a)

Right shift
Shift right
template<int n>
v_uint16x8	cv::v_shr (const v_uint16x8 &a)

template<int n>
v_int16x8	cv::v_shr (const v_int16x8 &a)

template<int n>
v_uint32x4	cv::v_shr (const v_uint32x4 &a)

template<int n>
v_int32x4	cv::v_shr (const v_int32x4 &a)

template<int n>
v_uint64x2	cv::v_shr (const v_uint64x2 &a)

template<int n>
v_int64x2	cv::v_shr (const v_int64x2 &a)

Rounding shift
Rounding shift right
template<int n>
v_uint16x8	cv::v_rshr (const v_uint16x8 &a)

template<int n>
v_int16x8	cv::v_rshr (const v_int16x8 &a)

template<int n>
v_uint32x4	cv::v_rshr (const v_uint32x4 &a)

template<int n>
v_int32x4	cv::v_rshr (const v_int32x4 &a)

template<int n>
v_uint64x2	cv::v_rshr (const v_uint64x2 &a)

template<int n>
v_int64x2	cv::v_rshr (const v_int64x2 &a)

Pack
Pack values from two vectors to one Return vector type have twice more elements than input vector types. Variant with u suffix also converts to corresponding unsigned type. pack: for 16-, 32- and 64-bit integer input types pack_u: for 16- and 32-bit signed integer input types Note All variants except 64-bit use saturation.
v_uint8x16	cv::v_pack (const v_uint16x8 &a, const v_uint16x8 &b)

v_int8x16	cv::v_pack (const v_int16x8 &a, const v_int16x8 &b)

v_uint16x8	cv::v_pack (const v_uint32x4 &a, const v_uint32x4 &b)

v_int16x8	cv::v_pack (const v_int32x4 &a, const v_int32x4 &b)

v_uint32x4	cv::v_pack (const v_uint64x2 &a, const v_uint64x2 &b)

v_int32x4	cv::v_pack (const v_int64x2 &a, const v_int64x2 &b)

v_uint8x16	cv::v_pack_u (const v_int16x8 &a, const v_int16x8 &b)

v_uint16x8	cv::v_pack_u (const v_int32x4 &a, const v_int32x4 &b)

Pack with rounding shift
Pack values from two vectors to one with rounding shift Values from the input vectors will be shifted right by n bits with rounding, converted to narrower type and returned in the result vector. Variant with u suffix converts to unsigned type. pack: for 16-, 32- and 64-bit integer input types pack_u: for 16- and 32-bit signed integer input types Note All variants except 64-bit use saturation.
template<int n>
v_uint8x16	cv::v_rshr_pack (const v_uint16x8 &a, const v_uint16x8 &b)

template<int n>
v_int8x16	cv::v_rshr_pack (const v_int16x8 &a, const v_int16x8 &b)

template<int n>
v_uint16x8	cv::v_rshr_pack (const v_uint32x4 &a, const v_uint32x4 &b)

template<int n>
v_int16x8	cv::v_rshr_pack (const v_int32x4 &a, const v_int32x4 &b)

template<int n>
v_uint32x4	cv::v_rshr_pack (const v_uint64x2 &a, const v_uint64x2 &b)

template<int n>
v_int32x4	cv::v_rshr_pack (const v_int64x2 &a, const v_int64x2 &b)

template<int n>
v_uint8x16	cv::v_rshr_pack_u (const v_int16x8 &a, const v_int16x8 &b)

template<int n>
v_uint16x8	cv::v_rshr_pack_u (const v_int32x4 &a, const v_int32x4 &b)

Pack and store
Store values from the input vector into memory with pack Values will be stored into memory with conversion to narrower type. Variant with u suffix converts to corresponding unsigned type. pack: for 16-, 32- and 64-bit integer input types pack_u: for 16- and 32-bit signed integer input types Note All variants except 64-bit use saturation.
void	cv::v_pack_store (uchar *ptr, const v_uint16x8 &a)

void	cv::v_pack_store (schar *ptr, const v_int16x8 &a)

void	cv::v_pack_store (ushort *ptr, const v_uint32x4 &a)

void	cv::v_pack_store (short *ptr, const v_int32x4 &a)

void	cv::v_pack_store (unsigned *ptr, const v_uint64x2 &a)

void	cv::v_pack_store (int *ptr, const v_int64x2 &a)

void	cv::v_pack_u_store (uchar *ptr, const v_int16x8 &a)

void	cv::v_pack_u_store (ushort *ptr, const v_int32x4 &a)

Pack and store with rounding shift
Store values from the input vector into memory with pack Values will be shifted n bits right with rounding, converted to narrower type and stored into memory. Variant with u suffix converts to unsigned type. pack: for 16-, 32- and 64-bit integer input types pack_u: for 16- and 32-bit signed integer input types Note All variants except 64-bit use saturation.
template<int n>
void	cv::v_rshr_pack_store (uchar *ptr, const v_uint16x8 &a)

template<int n>
void	cv::v_rshr_pack_store (schar *ptr, const v_int16x8 &a)

template<int n>
void	cv::v_rshr_pack_store (ushort *ptr, const v_uint32x4 &a)

template<int n>
void	cv::v_rshr_pack_store (short *ptr, const v_int32x4 &a)

template<int n>
void	cv::v_rshr_pack_store (unsigned *ptr, const v_uint64x2 &a)

template<int n>
void	cv::v_rshr_pack_store (int *ptr, const v_int64x2 &a)

template<int n>
void	cv::v_rshr_pack_u_store (uchar *ptr, const v_int16x8 &a)

template<int n>
void	cv::v_rshr_pack_u_store (ushort *ptr, const v_int32x4 &a)

Pack boolean values
Pack boolean values from multiple vectors to one unsigned 8-bit integer vector Note Must provide valid boolean values to guarantee same result for all architectures.
v_uint8x16	cv::v_pack_b (const v_uint16x8 &a, const v_uint16x8 &b)
	! For 16-bit boolean values More...

v_uint8x16	cv::v_pack_b (const v_uint32x4 &a, const v_uint32x4 &b, const v_uint32x4 &c, const v_uint32x4 &d)

v_uint8x16	cv::v_pack_b (const v_uint64x2 &a, const v_uint64x2 &b, const v_uint64x2 &c, const v_uint64x2 &d, const v_uint64x2 &e, const v_uint64x2 &f, const v_uint64x2 &g, const v_uint64x2 &h)

Detailed Description

"Universal intrinsics" is a types and functions set intended to simplify vectorization of code on different platforms. Currently there are two supported SIMD extensions: SSE/SSE2 on x86 architectures and NEON on ARM architectures, both allow working with 128 bit registers containing packed values of different types. In case when there is no SIMD extension available during compilation, fallback C++ implementation of intrinsics will be chosen and code will work as expected although it could be slower.

Types

There are several types representing 128-bit register as a vector of packed values, each type is implemented as a structure based on a one SIMD register.

cv::v_uint8x16 and cv::v_int8x16: sixteen 8-bit integer values (unsigned/signed) - char
cv::v_uint16x8 and cv::v_int16x8: eight 16-bit integer values (unsigned/signed) - short
cv::v_uint32x4 and cv::v_int32x4: four 32-bit integer values (unsigned/signed) - int
cv::v_uint64x2 and cv::v_int64x2: two 64-bit integer values (unsigned/signed) - int64
cv::v_float32x4: four 32-bit floating point values (signed) - float
cv::v_float64x2: two 64-bit floating point values (signed) - double

Note: cv::v_float64x2 is not implemented in NEON variant, if you want to use this type, don't forget to check the CV_SIMD128_64F preprocessor definition:
#if CV_SIMD128_64F
//...
#endif

Load and store operations

These operations allow to set contents of the register explicitly or by loading it from some memory block and to save contents of the register to memory block.

Constructors: from memory, from two values, ...
Other create methods: v_setall_s8, v_setall_u8, ..., v_setzero_u8, v_setzero_s8, ...
Memory operations: v_load, v_load_aligned, v_load_low, v_load_halves, v_store, v_store_aligned, v_store_high, v_store_low

Value reordering

These operations allow to reorder or recombine elements in one or multiple vectors.

Interleave, deinterleave (2, 3 and 4 channels): v_load_deinterleave, v_store_interleave
Expand: v_load_expand, v_load_expand_q, v_expand, v_expand_low, v_expand_high
Pack: v_pack, v_pack_u, v_pack_b, v_rshr_pack, v_rshr_pack_u, v_pack_store, v_pack_u_store, v_rshr_pack_store, v_rshr_pack_u_store
Recombine: v_zip, v_recombine, v_combine_low, v_combine_high
Reverse: v_reverse
Extract: v_extract

Arithmetic, bitwise and comparison operations

Element-wise binary and unary operations.

Arithmetics: +, -, *, /, v_mul_expand
Non-saturating arithmetics: v_add_wrap, v_sub_wrap
Bitwise shifts: <<, >>, v_shl, v_shr
Bitwise logic: &, |, ^, ~
Comparison: >, >=, <, <=, ==, !=
min/max: v_min, v_max

Reduce and mask

Most of these operations return only one value.

Reduce: v_reduce_min, v_reduce_max, v_reduce_sum, v_popcount
Mask: v_signmask, v_check_all, v_check_any, v_select

Other math

Some frequent operations: v_sqrt, v_invsqrt, v_magnitude, v_sqr_magnitude
Absolute values: v_abs, v_absdiff, v_absdiffs

Conversions

Different type conversions and casts:

Rounding: v_round, v_floor, v_ceil, v_trunc,
To float: v_cvt_f32, v_cvt_f64
Reinterpret: v_reinterpret_as_u8, v_reinterpret_as_s8, ...

Matrix operations

In these operations vectors represent matrix rows/columns: v_dotprod, v_dotprod_fast, v_dotprod_expand, v_dotprod_expand_fast, v_matmul, v_transpose4x4

Usability

Most operations are implemented only for some subset of the available types, following matrices shows the applicability of different operations to the types.

Regular integers:

Operations\Types	uint 8x16	int 8x16	uint 16x8	int 16x8	uint 32x4	int 32x4
load, store	x	x	x	x	x	x
interleave	x	x	x	x	x	x
expand	x	x	x	x	x	x
expand_low	x	x	x	x	x	x
expand_high	x	x	x	x	x	x
expand_q	x	x
add, sub	x	x	x	x	x	x
add_wrap, sub_wrap	x	x	x	x
mul_wrap	x	x	x	x
mul	x	x	x	x	x	x
mul_expand	x	x	x	x	x
compare	x	x	x	x	x	x
shift			x	x	x	x
dotprod				x		x
dotprod_fast				x		x
dotprod_expand	x	x	x	x		x
dotprod_expand_fast	x	x	x	x		x
logical	x	x	x	x	x	x
min, max	x	x	x	x	x	x
absdiff	x	x	x	x	x	x
absdiffs		x		x
reduce	x	x	x	x	x	x
mask	x	x	x	x	x	x
pack	x	x	x	x	x	x
pack_u	x		x
pack_b	x
unpack	x	x	x	x	x	x
extract	x	x	x	x	x	x
rotate (lanes)	x	x	x	x	x	x
cvt_flt32						x
cvt_flt64						x
transpose4x4					x	x
reverse	x	x	x	x	x	x
extract_n	x	x	x	x	x	x
broadcast_element					x	x

Big integers:

Operations\Types	uint 64x2	int 64x2
load, store	x	x
add, sub	x	x
shift	x	x
logical	x	x
reverse	x	x
extract	x	x
rotate (lanes)	x	x
cvt_flt64		x
extract_n	x	x

Floating point:

Operations\Types	float 32x4	float 64x2
load, store	x	x
interleave	x
add, sub	x	x
mul	x	x
div	x	x
compare	x	x
min, max	x	x
absdiff	x	x
reduce	x
mask	x	x
unpack	x	x
cvt_flt32		x
cvt_flt64	x
sqrt, abs	x	x
float math	x	x
transpose4x4	x
extract	x	x
rotate (lanes)	x	x
reverse	x	x
extract_n	x	x
broadcast_element	x

Typedef Documentation

◆ v_float32x4

typedef v_reg<float, 4> cv::v_float32x4

#include <opencv2/core/hal/intrin_cpp.hpp>

Four 32-bit floating point values (single precision)

◆ v_float64x2

typedef v_reg<double, 2> cv::v_float64x2

#include <opencv2/core/hal/intrin_cpp.hpp>

Two 64-bit floating point values (double precision)

◆ v_int16x8

typedef v_reg<short, 8> cv::v_int16x8

#include <opencv2/core/hal/intrin_cpp.hpp>

Eight 16-bit signed integer values.

◆ v_int32x4

typedef v_reg<int, 4> cv::v_int32x4

#include <opencv2/core/hal/intrin_cpp.hpp>

Four 32-bit signed integer values.

◆ v_int64x2

typedef v_reg<int64, 2> cv::v_int64x2

#include <opencv2/core/hal/intrin_cpp.hpp>

Two 64-bit signed integer values.

◆ v_int8x16

typedef v_reg<schar, 16> cv::v_int8x16

#include <opencv2/core/hal/intrin_cpp.hpp>

Sixteen 8-bit signed integer values.

◆ v_uint16x8

typedef v_reg<ushort, 8> cv::v_uint16x8

#include <opencv2/core/hal/intrin_cpp.hpp>

Eight 16-bit unsigned integer values.

◆ v_uint32x4

typedef v_reg<unsigned, 4> cv::v_uint32x4

#include <opencv2/core/hal/intrin_cpp.hpp>

Four 32-bit unsigned integer values.

◆ v_uint64x2

typedef v_reg<uint64, 2> cv::v_uint64x2

#include <opencv2/core/hal/intrin_cpp.hpp>

Two 64-bit unsigned integer values.

◆ v_uint8x16

typedef v_reg<uchar, 16> cv::v_uint8x16

#include <opencv2/core/hal/intrin_cpp.hpp>

Sixteen 8-bit unsigned integer values.

Function Documentation

◆ operator &()

template<typename _Tp , int n>

v_reg<_Tp, n> cv::operator&	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

#include <opencv2/core/hal/intrin_cpp.hpp>

Bitwise AND.

Only for integer types.

◆ operator &=()

template<typename _Tp , int n>

v_reg<_Tp, n>& cv::operator&=	(	v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ operator!=()

template<typename _Tp , int n>

v_reg<_Tp, n> cv::operator!=	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Not equal comparison.

For all types except 64-bit integer values.

◆ operator*()

template<typename _Tp , int n>

v_reg<_Tp, n> cv::operator*	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

#include <opencv2/core/hal/intrin_cpp.hpp>

Multiply values.

For 16- and 32-bit integer types and floating types.

◆ operator*=()

template<typename _Tp , int n>

v_reg<_Tp, n>& cv::operator*=	(	v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ operator+()

template<typename _Tp , int n>

v_reg<_Tp, n> cv::operator+	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

#include <opencv2/core/hal/intrin_cpp.hpp>

Add values.

For all types.

◆ operator+=()

template<typename _Tp , int n>

v_reg<_Tp, n>& cv::operator+=	(	v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ operator-()

template<typename _Tp , int n>

v_reg<_Tp, n> cv::operator-	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

#include <opencv2/core/hal/intrin_cpp.hpp>

Subtract values.

For all types.

◆ operator-=()

template<typename _Tp , int n>

v_reg<_Tp, n>& cv::operator-=	(	v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ operator/()

template<typename _Tp , int n>

v_reg<_Tp, n> cv::operator/	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

#include <opencv2/core/hal/intrin_cpp.hpp>

Divide values.

For floating types only.

◆ operator/=()

template<typename _Tp , int n>

v_reg<_Tp, n>& cv::operator/=	(	v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ operator<()

template<typename _Tp , int n>

v_reg<_Tp, n> cv::operator<	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Less-than comparison.

For all types except 64-bit integer values.

◆ operator<<()

template<typename _Tp , int n>

v_reg<_Tp, n> cv::operator<<	(	const v_reg< _Tp, n > &	a,
		int	imm
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Bitwise shift left.

For 16-, 32- and 64-bit integer values.

Examples:: samples/cpp/filestorage.cpp.

◆ operator<=()

template<typename _Tp , int n>

v_reg<_Tp, n> cv::operator<=	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Less-than or equal comparison.

For all types except 64-bit integer values.

◆ operator==()

template<typename _Tp , int n>

v_reg<_Tp, n> cv::operator==	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Equal comparison.

For all types except 64-bit integer values.

◆ operator>()

template<typename _Tp , int n>

v_reg<_Tp, n> cv::operator>	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Greater-than comparison.

For all types except 64-bit integer values.

◆ operator>=()

template<typename _Tp , int n>

v_reg<_Tp, n> cv::operator>=	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Greater-than or equal comparison.

For all types except 64-bit integer values.

◆ operator>>()

template<typename _Tp , int n>

v_reg<_Tp, n> cv::operator>>	(	const v_reg< _Tp, n > &	a,
		int	imm
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Bitwise shift right.

For 16-, 32- and 64-bit integer values.

◆ operator^()

template<typename _Tp , int n>

v_reg<_Tp, n> cv::operator^	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

#include <opencv2/core/hal/intrin_cpp.hpp>

Bitwise XOR.

Only for integer types.

◆ operator^=()

template<typename _Tp , int n>

v_reg<_Tp, n>& cv::operator^=	(	v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ operator|()

template<typename _Tp , int n>

v_reg<_Tp, n> cv::operator\|	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

#include <opencv2/core/hal/intrin_cpp.hpp>

Bitwise OR.

Only for integer types.

◆ operator|=()

template<typename _Tp , int n>

v_reg<_Tp, n>& cv::operator\|=	(	v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ operator~()

template<typename _Tp , int n>

v_reg<_Tp, n> cv::operator~ ( const v_reg< _Tp, n > & a )

#include <opencv2/core/hal/intrin_cpp.hpp>

Bitwise NOT.

Only for integer types.

◆ v_abs()

template<typename _Tp , int n>

v_reg< typename V_TypeTraits<_Tp>::abs_type , n> cv::v_abs ( const v_reg< _Tp, n > & a )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Absolute value of elements.

Only for floating point types.

◆ v_absdiff() [1/3]

template<typename _Tp , int n>

v_reg<typename V_TypeTraits<_Tp>::abs_type, n> cv::v_absdiff	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Absolute difference.

Returns \( |a - b| \) converted to corresponding unsigned type. Example:

v_int32x4 a, b; // {1, 2, 3, 4} and {4, 3, 2, 1}

v_uint32x4 c = v_absdiff(a, b); // result is {3, 1, 1, 3}

For 8-, 16-, 32-bit integer source types.

◆ v_absdiff() [2/3]

v_float32x4 cv::v_absdiff	(	const v_float32x4 &	a,
		const v_float32x4 &	b
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

For 32-bit floating point values

◆ v_absdiff() [3/3]

v_float64x2 cv::v_absdiff	(	const v_float64x2 &	a,
		const v_float64x2 &	b
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

For 64-bit floating point values

◆ v_absdiffs()

template<typename _Tp , int n>

v_reg<_Tp, n> cv::v_absdiffs	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Saturating absolute difference.

Returns \( saturate(|a - b|) \) . For 8-, 16-bit signed integer source types.

◆ v_add_wrap()

template<typename _Tp , int n>

v_reg< _Tp , n> cv::v_add_wrap	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Add values without saturation.

For 8- and 16-bit integer values.

◆ v_broadcast_element()

template<int i, typename _Tp , int n>

v_reg<_Tp, n> cv::v_broadcast_element ( const v_reg< _Tp, n > & a )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Broadcast i-th element of vector.

Scheme:

{ v[0] v[1] v[2] ... v[SZ] } => { v[i], v[i], v[i] ... v[i] }

Restriction: 0 <= i < nlanes Supported types: 32-bit integers and floats (s32/u32/f32)

◆ v_ceil() [1/4]

v_reg<int, 4> cv::v_ceil ( const v_reg< float, 4 > & a )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Ceil elements.

Only for floating point types.

◆ v_ceil() [2/4]

v_reg<int, 4> cv::v_ceil ( const v_reg< double, 2 > & a )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_ceil() [3/4]

template<int n>

v_reg<int, n> cv::v_ceil ( const v_reg< float, n > & a )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Ceil.

Ceil each value. Input type is float vector ==> output type is int vector.

◆ v_ceil() [4/4]

template<int n>

v_reg<int, n*2> cv::v_ceil ( const v_reg< double, n > & a )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

◆ v_check_all()

template<typename _Tp , int n>

bool cv::v_check_all ( const v_reg< _Tp, n > & a )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Check if all packed values are less than zero.

Unsigned values will be casted to signed: uchar 254 => char -2.

◆ v_check_any()

template<typename _Tp , int n>

bool cv::v_check_any ( const v_reg< _Tp, n > & a )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Check if any of packed values is less than zero.

Unsigned values will be casted to signed: uchar 254 => char -2.

◆ v_cleanup()

void cv::v_cleanup ( )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_combine_high()

template<typename _Tp , int n>

v_reg<_Tp, n> cv::v_combine_high	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Combine vector from last elements of two vectors.

Scheme:

  {A1 A2 A3 A4}
  {B1 B2 B3 B4}
---------------
  {A3 A4 B3 B4}

For all types except 64-bit.

◆ v_combine_low()

template<typename _Tp , int n>

v_reg<_Tp, n> cv::v_combine_low	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Combine vector from first elements of two vectors.

Scheme:

  {A1 A2 A3 A4}
  {B1 B2 B3 B4}
---------------
  {A1 A2 B1 B2}

For all types except 64-bit.

◆ v_cvt_f32() [1/3]

template<int n>

v_reg<float, n> cv::v_cvt_f32 ( const v_reg< int, n > & a )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Convert to float.

Supported input type is cv::v_int32x4.

◆ v_cvt_f32() [2/3]

template<int n>

v_reg<float, n*2> cv::v_cvt_f32 ( const v_reg< double, n > & a )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_cvt_f32() [3/3]

template<int n>

v_reg<float, n*2> cv::v_cvt_f32	(	const v_reg< double, n > &	a,
		const v_reg< double, n > &	b
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_cvt_f64() [1/3]

v_reg<double, 2> cv::v_cvt_f64 ( const v_reg< int, 4 > & a )

#include <opencv2/core/hal/intrin_cpp.hpp>

Convert to double.

Supported input type is cv::v_int32x4.

◆ v_cvt_f64() [2/3]

v_reg<double, 2> cv::v_cvt_f64 ( const v_reg< float, 4 > & a )

#include <opencv2/core/hal/intrin_cpp.hpp>

Convert to double.

Supported input type is cv::v_float32x4.

◆ v_cvt_f64() [3/3]

v_reg<double, 2> cv::v_cvt_f64 ( const v_reg< int64, 2 > & a )

#include <opencv2/core/hal/intrin_cpp.hpp>

Convert to double.

Supported input type is cv::v_int64x2.

◆ v_cvt_f64_high() [1/3]

v_reg<double, 2> cv::v_cvt_f64_high ( const v_reg< int, 4 > & a )

#include <opencv2/core/hal/intrin_cpp.hpp>

Convert to double high part of vector.

Supported input type is cv::v_int32x4.

◆ v_cvt_f64_high() [2/3]

v_reg<double, 2> cv::v_cvt_f64_high ( const v_reg< float, 4 > & a )

#include <opencv2/core/hal/intrin_cpp.hpp>

Convert to double high part of vector.

Supported input type is cv::v_float32x4.

◆ v_cvt_f64_high() [3/3]

v_reg<double, 2> cv::v_cvt_f64_high ( const v_reg< int64, 2 > & a )

#include <opencv2/core/hal/intrin_cpp.hpp>

Convert to double high part of vector.

Supported input type is cv::v_int64x2.

◆ v_dotprod() [1/2]

template<typename _Tp , int n>

v_reg<typename V_TypeTraits<_Tp>::w_type, n/2> cv::v_dotprod	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Dot product of elements.

Multiply values in two registers and sum adjacent result pairs.

Scheme:

  {A1 A2 ...} // 16-bit
x {B1 B2 ...} // 16-bit
-------------
{A1B1+A2B2 ...} // 32-bit

◆ v_dotprod() [2/2]

template<typename _Tp , int n>

v_reg<typename V_TypeTraits<_Tp>::w_type, n/2> cv::v_dotprod	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b,
		const v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &	c
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Dot product of elements.

Same as cv::v_dotprod, but add a third element to the sum of adjacent pairs. Scheme:

  {A1 A2 ...} // 16-bit
x {B1 B2 ...} // 16-bit
-------------
  {A1B1+A2B2+C1 ...} // 32-bit

◆ v_dotprod_expand() [1/4]

template<typename _Tp , int n>

v_reg<typename V_TypeTraits<_Tp>::q_type, n/4> cv::v_dotprod_expand	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Dot product of elements and expand.

Multiply values in two registers and expand the sum of adjacent result pairs.

Scheme:

  {A1 A2 A3 A4 ...} // 8-bit
x {B1 B2 B3 B4 ...} // 8-bit
-------------
  {A1B1+A2B2+A3B3+A4B4 ...} // 32-bit

◆ v_dotprod_expand() [2/4]

template<typename _Tp , int n>

v_reg<typename V_TypeTraits<_Tp>::q_type, n/4> cv::v_dotprod_expand	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b,
		const v_reg< typename V_TypeTraits< _Tp >::q_type, n/4 > &	c
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Dot product of elements.

Same as cv::v_dotprod_expand, but add a third element to the sum of adjacent pairs. Scheme:

  {A1 A2 A3 A4 ...} // 8-bit
x {B1 B2 B3 B4 ...} // 8-bit
-------------
  {A1B1+A2B2+A3B3+A4B4+C1 ...} // 32-bit

◆ v_dotprod_expand() [3/4]

v_float64x2 cv::v_dotprod_expand	(	const v_int32x4 &	a,
		const v_int32x4 &	b
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_dotprod_expand() [4/4]

v_float64x2 cv::v_dotprod_expand	(	const v_int32x4 &	a,
		const v_int32x4 &	b,
		const v_float64x2 &	c
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_dotprod_expand_fast() [1/4]

template<typename _Tp , int n>

v_reg<typename V_TypeTraits<_Tp>::q_type, n/4> cv::v_dotprod_expand_fast	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Fast Dot product of elements and expand.

Multiply values in two registers and expand the sum of adjacent result pairs.

Same as cv::v_dotprod_expand, but it may perform unorder sum between result pairs in some platforms, this intrinsic can be used if the sum among all lanes is only matters and also it should be yielding better performance on the affected platforms.

◆ v_dotprod_expand_fast() [2/4]

template<typename _Tp , int n>

v_reg<typename V_TypeTraits<_Tp>::q_type, n/4> cv::v_dotprod_expand_fast	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b,
		const v_reg< typename V_TypeTraits< _Tp >::q_type, n/4 > &	c
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Fast Dot product of elements.

Same as cv::v_dotprod_expand_fast, but add a third element to the sum of adjacent pairs.

◆ v_dotprod_expand_fast() [3/4]

v_float64x2 cv::v_dotprod_expand_fast	(	const v_int32x4 &	a,
		const v_int32x4 &	b
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_dotprod_expand_fast() [4/4]

v_float64x2 cv::v_dotprod_expand_fast	(	const v_int32x4 &	a,
		const v_int32x4 &	b,
		const v_float64x2 &	c
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_dotprod_fast() [1/2]

template<typename _Tp , int n>

v_reg<typename V_TypeTraits<_Tp>::w_type, n/2> cv::v_dotprod_fast	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Fast Dot product of elements.

Same as cv::v_dotprod, but it may perform unorder sum between result pairs in some platforms, this intrinsic can be used if the sum among all lanes is only matters and also it should be yielding better performance on the affected platforms.

◆ v_dotprod_fast() [2/2]

template<typename _Tp , int n>

v_reg<typename V_TypeTraits<_Tp>::w_type, n/2> cv::v_dotprod_fast	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b,
		const v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &	c
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Fast Dot product of elements.

Same as cv::v_dotprod_fast, but add a third element to the sum of adjacent pairs.

◆ v_expand()

template<typename _Tp , int n>

void cv::v_expand	(	const v_reg< _Tp, n > &	a,
		v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &	b0,
		v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &	b1
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Expand values to the wider pack type.

Copy contents of register to two registers with 2x wider pack type. Scheme:

int32x4 int64x2 int64x2

{A B C D} ==> {A B} , {C D}

◆ v_expand_high()

template<typename _Tp , int n>

v_reg<typename V_TypeTraits<_Tp>::w_type, n/2> cv::v_expand_high ( const v_reg< _Tp, n > & a )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Expand higher values to the wider pack type.

Same as cv::v_expand_low, but expand higher half of the vector instead.

Scheme:

int32x4 int64x2

{A B C D} ==> {C D}

◆ v_expand_low()

template<typename _Tp , int n>

v_reg<typename V_TypeTraits<_Tp>::w_type, n/2> cv::v_expand_low ( const v_reg< _Tp, n > & a )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Expand lower values to the wider pack type.

Same as cv::v_expand, but return lower half of the vector.

Scheme:

int32x4 int64x2

{A B C D} ==> {A B}

◆ v_extract()

template<int s, typename _Tp , int n>

v_reg<_Tp, n> cv::v_extract	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Vector extract.

Scheme:

  {A1 A2 A3 A4}
  {B1 B2 B3 B4}
========================
shift = 1  {A2 A3 A4 B1}
shift = 2  {A3 A4 B1 B2}
shift = 3  {A4 B1 B2 B3}

Restriction: 0 <= shift < nlanes

Usage:

v_int32x4 a, b, c;

c = v_extract<2>(a, b);

For all types.

◆ v_extract_n()

template<int s, typename _Tp , int n>

_Tp cv::v_extract_n ( const v_reg< _Tp, n > & v )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Vector extract.

Scheme: Return the s-th element of v. Restriction: 0 <= s < nlanes

Usage:

v_int32x4 a;
int r;
r = v_extract_n<2>(a);

For all types.

◆ v_floor() [1/4]

v_reg<int, 4> cv::v_floor ( const v_reg< float, 4 > & a )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Floor elements.

Only for floating point types.

◆ v_floor() [2/4]

v_reg<int, 4> cv::v_floor ( const v_reg< double, 2 > & a )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_floor() [3/4]

template<int n>

v_reg<int, n> cv::v_floor ( const v_reg< float, n > & a )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Floor.

Floor each value. Input type is float vector ==> output type is int vector.

◆ v_floor() [4/4]

template<int n>

v_reg<int, n*2> cv::v_floor ( const v_reg< double, n > & a )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

◆ v_fma()

template<typename _Tp , int n>

v_reg<_Tp, n> cv::v_fma	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b,
		const v_reg< _Tp, n > &	c
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Multiply and add.

Returns \( a*b + c \) For floating point types and signed 32bit int only.

◆ v_interleave_pairs()

template<typename _Tp , int n>

v_reg<_Tp, n> cv::v_interleave_pairs ( const v_reg< _Tp, n > & vec )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_interleave_quads()

template<typename _Tp , int n>

v_reg<_Tp, n> cv::v_interleave_quads ( const v_reg< _Tp, n > & vec )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_invsqrt()

template<typename _Tp , int n>

v_reg<_Tp, n> cv::v_invsqrt ( const v_reg< _Tp, n > & a )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Inversed square root.

Returns \( 1/sqrt(a) \) For floating point types only.

◆ v_load()

template<typename _Tp >

v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> cv::v_load ( const _Tp * ptr )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Load register contents from memory.

Parameters

ptr	pointer to memory block with data

Returns: register object

Note: Returned type will be detected from passed pointer type, for example uchar ==> cv::v_uint8x16, int ==> cv::v_int32x4, etc.; Alignment requirement: if CV_STRONG_ALIGNMENT=1 then passed pointer must be aligned (sizeof(lane type) should be enough). Do not cast pointer types without runtime check for pointer alignment (like uchar* => int*).

◆ v_load_aligned()

template<typename _Tp >

v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> cv::v_load_aligned ( const _Tp * ptr )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Load register contents from memory (aligned)

similar to cv::v_load, but source memory block should be aligned (to 16-byte boundary in case of SIMD128, 32-byte - SIMD256, etc)

◆ v_load_deinterleave() [1/3]

template<typename _Tp , int n>

void cv::v_load_deinterleave	(	const _Tp *	ptr,
		v_reg< _Tp, n > &	a,
		v_reg< _Tp, n > &	b
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Load and deinterleave (2 channels)

Load data from memory deinterleave and store to 2 registers. Scheme:

{A1 B1 A2 B2 ...} ==> {A1 A2 ...}, {B1 B2 ...}

For all types except 64-bit.

◆ v_load_deinterleave() [2/3]

template<typename _Tp , int n>

void cv::v_load_deinterleave	(	const _Tp *	ptr,
		v_reg< _Tp, n > &	a,
		v_reg< _Tp, n > &	b,
		v_reg< _Tp, n > &	c
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Load and deinterleave (3 channels)

Load data from memory deinterleave and store to 3 registers. Scheme:

{A1 B1 C1 A2 B2 C2 ...} ==> {A1 A2 ...}, {B1 B2 ...}, {C1 C2 ...}

For all types except 64-bit.

◆ v_load_deinterleave() [3/3]

template<typename _Tp , int n>

void cv::v_load_deinterleave	(	const _Tp *	ptr,
		v_reg< _Tp, n > &	a,
		v_reg< _Tp, n > &	b,
		v_reg< _Tp, n > &	c,
		v_reg< _Tp, n > &	d
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Load and deinterleave (4 channels)

Load data from memory deinterleave and store to 4 registers. Scheme:

{A1 B1 C1 D1 A2 B2 C2 D2 ...} ==> {A1 A2 ...}, {B1 B2 ...}, {C1 C2 ...}, {D1 D2 ...}

For all types except 64-bit.

◆ v_load_expand() [1/2]

template<typename _Tp >

v_reg<typename V_TypeTraits<_Tp>::w_type, V_TypeTraits<_Tp>::nlanes128 / 2> cv::v_load_expand ( const _Tp * ptr )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Load register contents from memory with double expand.

Same as cv::v_load, but result pack type will be 2x wider than memory type.

short buf[4] = {1, 2, 3, 4}; // type is int16

v_int32x4 r = v_load_expand(buf); // r = {1, 2, 3, 4} - type is int32

For 8-, 16-, 32-bit integer source types.

◆ v_load_expand() [2/2]

v_reg<float, V_TypeTraits<float>::nlanes128> cv::v_load_expand ( const float16_t * ptr )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_load_expand_q()

template<typename _Tp >

v_reg<typename V_TypeTraits<_Tp>::q_type, V_TypeTraits<_Tp>::nlanes128 / 4> cv::v_load_expand_q ( const _Tp * ptr )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Load register contents from memory with quad expand.

Same as cv::v_load_expand, but result type is 4 times wider than source.

char buf[4] = {1, 2, 3, 4}; // type is int8

v_int32x4 r = v_load_q(buf); // r = {1, 2, 3, 4} - type is int32

For 8-bit integer source types.

◆ v_load_halves()

template<typename _Tp >

v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> cv::v_load_halves	(	const _Tp *	loptr,
		const _Tp *	hiptr
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Load register contents from two memory blocks.

Parameters

loptr	memory block containing data for first half (0..n/2)
hiptr	memory block containing data for second half (n/2..n)

int lo[2] = { 1, 2 }, hi[2] = { 3, 4 };

v_int32x4 r = v_load_halves(lo, hi);

◆ v_load_low()

template<typename _Tp >

v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> cv::v_load_low ( const _Tp * ptr )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Load 64-bits of data to lower part (high part is undefined).

Parameters

ptr	memory block containing data for first half (0..n/2)

int lo[2] = { 1, 2 };

v_int32x4 r = v_load_low(lo);

◆ v_lut() [1/9]

template<typename _Tp >

v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> cv::v_lut	(	const _Tp *	tab,
		const int *	idx
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_lut() [2/9]

template<int n>

v_reg<int, n> cv::v_lut	(	const int *	tab,
		const v_reg< int, n > &	idx
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_lut() [3/9]

template<int n>

v_reg<unsigned, n> cv::v_lut	(	const unsigned *	tab,
		const v_reg< int, n > &	idx
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_lut() [4/9]

template<int n>

v_reg<float, n> cv::v_lut	(	const float *	tab,
		const v_reg< int, n > &	idx
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_lut() [5/9]

template<int n>

v_reg<double, n> cv::v_lut	(	const double *	tab,
		const v_reg< int, n *2 > &	idx
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_lut() [6/9]

v_int32x4 cv::v_lut	(	const int *	tab,
		const v_int32x4 &	idxvec
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_lut() [7/9]

v_uint32x4 cv::v_lut	(	const unsigned *	tab,
		const v_int32x4 &	idxvec
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_lut() [8/9]

v_float32x4 cv::v_lut	(	const float *	tab,
		const v_int32x4 &	idxvec
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_lut() [9/9]

v_float64x2 cv::v_lut	(	const double *	tab,
		const v_int32x4 &	idxvec
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_lut_deinterleave() [1/2]

template<int n>

void cv::v_lut_deinterleave	(	const float *	tab,
		const v_reg< int, n > &	idx,
		v_reg< float, n > &	x,
		v_reg< float, n > &	y
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_lut_deinterleave() [2/2]

template<int n>

void cv::v_lut_deinterleave	(	const double *	tab,
		const v_reg< int, n *2 > &	idx,
		v_reg< double, n > &	x,
		v_reg< double, n > &	y
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_lut_pairs()

template<typename _Tp >

v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> cv::v_lut_pairs	(	const _Tp *	tab,
		const int *	idx
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_lut_quads()

template<typename _Tp >

v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> cv::v_lut_quads	(	const _Tp *	tab,
		const int *	idx
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_magnitude()

template<typename _Tp , int n>

v_reg<_Tp, n> cv::v_magnitude	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Magnitude.

Returns \( sqrt(a^2 + b^2) \) For floating point types only.

◆ v_matmul()

v_float32x4 cv::v_matmul	(	const v_float32x4 &	v,
		const v_float32x4 &	m0,
		const v_float32x4 &	m1,
		const v_float32x4 &	m2,
		const v_float32x4 &	m3
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Matrix multiplication.

Scheme:

{A0 A1 A2 A3}   |V0|
{B0 B1 B2 B3}   |V1|
{C0 C1 C2 C3}   |V2|
{D0 D1 D2 D3} x |V3|
====================
{R0 R1 R2 R3}, where:
R0 = A0V0 + A1V1 + A2V2 + A3V3,
R1 = B0V0 + B1V1 + B2V2 + B3V3
...

◆ v_matmuladd()

v_float32x4 cv::v_matmuladd	(	const v_float32x4 &	v,
		const v_float32x4 &	m0,
		const v_float32x4 &	m1,
		const v_float32x4 &	m2,
		const v_float32x4 &	m3
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Matrix multiplication and add.

Scheme:

{A0 A1 A2   }   |V0|   |D0|
{B0 B1 B2   }   |V1|   |D1|
{C0 C1 C2   } x |V2| + |D2|
====================
{R0 R1 R2 R3}, where:
R0 = A0V0 + A1V1 + A2V2 + D0,
R1 = B0V0 + B1V1 + B2V2 + D1
...

◆ v_max()

template<typename _Tp , int n>

v_reg<_Tp, n> cv::v_max	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Choose max values for each pair.

Scheme:

{A1 A2 ...}
{B1 B2 ...}
--------------
{max(A1,B1) max(A2,B2) ...}

For all types except 64-bit integer.

◆ v_min()

template<typename _Tp , int n>

v_reg<_Tp, n> cv::v_min	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Choose min values for each pair.

Scheme:

{A1 A2 ...}
{B1 B2 ...}
--------------
{min(A1,B1) min(A2,B2) ...}

For all types except 64-bit integer.

◆ v_mul_expand()

template<typename _Tp , int n>

void cv::v_mul_expand	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b,
		v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &	c,
		v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &	d
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Multiply and expand.

Multiply values two registers and store results in two registers with wider pack type. Scheme:

  {A B C D} // 32-bit
x {E F G H} // 32-bit
---------------
{AE BF}         // 64-bit
        {CG DH} // 64-bit

Example:

v_uint32x4 a, b; // {1,2,3,4} and {2,2,2,2}
v_uint64x2 c, d; // results
v_mul_expand(a, b, c, d); // c, d = {2,4}, {6, 8}

Implemented only for 16- and unsigned 32-bit source types (v_int16x8, v_uint16x8, v_uint32x4).

◆ v_mul_hi()

template<typename _Tp , int n>

v_reg<_Tp, n> cv::v_mul_hi	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Multiply and extract high part.

Multiply values two registers and store high part of the results. Implemented only for 16-bit source types (v_int16x8, v_uint16x8). Returns \( a*b >> 16 \)

◆ v_mul_wrap()

template<typename _Tp , int n>

v_reg< _Tp , n> cv::v_mul_wrap	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Multiply values without saturation.

For 8- and 16-bit integer values.

◆ v_muladd()

template<typename _Tp , int n>

v_reg<_Tp, n> cv::v_muladd	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b,
		const v_reg< _Tp, n > &	c
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

A synonym for v_fma.

◆ v_not_nan() [1/2]

template<int n>

v_reg<float, n> cv::v_not_nan ( const v_reg< float, n > & a )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_not_nan() [2/2]

template<int n>

v_reg<double, n> cv::v_not_nan ( const v_reg< double, n > & a )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_pack() [1/6]

v_uint8x16 cv::v_pack	(	const v_uint16x8 &	a,
		const v_uint16x8 &	b
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_pack() [2/6]

v_int8x16 cv::v_pack	(	const v_int16x8 &	a,
		const v_int16x8 &	b
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_pack() [3/6]

v_uint16x8 cv::v_pack	(	const v_uint32x4 &	a,
		const v_uint32x4 &	b
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_pack() [4/6]

v_int16x8 cv::v_pack	(	const v_int32x4 &	a,
		const v_int32x4 &	b
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_pack() [5/6]

v_uint32x4 cv::v_pack	(	const v_uint64x2 &	a,
		const v_uint64x2 &	b
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_pack() [6/6]

v_int32x4 cv::v_pack	(	const v_int64x2 &	a,
		const v_int64x2 &	b
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_pack_b() [1/3]

v_uint8x16 cv::v_pack_b	(	const v_uint16x8 &	a,
		const v_uint16x8 &	b
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

! For 16-bit boolean values

Scheme:

a  {0xFFFF 0 0 0xFFFF 0 0xFFFF 0xFFFF 0}
b  {0xFFFF 0 0xFFFF 0 0 0xFFFF 0 0xFFFF}
===============
{
   0xFF 0 0 0xFF 0 0xFF 0xFF 0
   0xFF 0 0xFF 0 0 0xFF 0 0xFF
}

◆ v_pack_b() [2/3]

v_uint8x16 cv::v_pack_b	(	const v_uint32x4 &	a,
		const v_uint32x4 &	b,
		const v_uint32x4 &	c,
		const v_uint32x4 &	d
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts. For 32-bit boolean values

Scheme:

a  {0xFFFF.. 0 0 0xFFFF..}
b  {0 0xFFFF.. 0xFFFF.. 0}
c  {0xFFFF.. 0 0xFFFF.. 0}
d  {0 0xFFFF.. 0 0xFFFF..}
===============
{
   0xFF 0 0 0xFF 0 0xFF 0xFF 0
   0xFF 0 0xFF 0 0 0xFF 0 0xFF
}

◆ v_pack_b() [3/3]

v_uint8x16 cv::v_pack_b	(	const v_uint64x2 &	a,
		const v_uint64x2 &	b,
		const v_uint64x2 &	c,
		const v_uint64x2 &	d,
		const v_uint64x2 &	e,
		const v_uint64x2 &	f,
		const v_uint64x2 &	g,
		const v_uint64x2 &	h
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts. For 64-bit boolean values

Scheme:

a  {0xFFFF.. 0}
b  {0 0xFFFF..}
c  {0xFFFF.. 0}
d  {0 0xFFFF..}
e  {0xFFFF.. 0}
f  {0xFFFF.. 0}
g  {0 0xFFFF..}
h  {0 0xFFFF..}
===============
{
   0xFF 0 0 0xFF 0xFF 0 0 0xFF
   0xFF 0 0xFF 0 0 0xFF 0 0xFF
}

◆ v_pack_store() [1/7]

void cv::v_pack_store	(	uchar *	ptr,
		const v_uint16x8 &	a
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_pack_store() [2/7]

void cv::v_pack_store	(	schar *	ptr,
		const v_int16x8 &	a
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_pack_store() [3/7]

void cv::v_pack_store	(	ushort *	ptr,
		const v_uint32x4 &	a
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_pack_store() [4/7]

void cv::v_pack_store	(	short *	ptr,
		const v_int32x4 &	a
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_pack_store() [5/7]

void cv::v_pack_store	(	unsigned *	ptr,
		const v_uint64x2 &	a
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_pack_store() [6/7]

void cv::v_pack_store	(	int *	ptr,
		const v_int64x2 &	a
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_pack_store() [7/7]

void cv::v_pack_store	(	float16_t *	ptr,
		const v_reg< float, V_TypeTraits< float >::nlanes128 > &	v
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_pack_triplets()

template<typename _Tp , int n>

v_reg<_Tp, n> cv::v_pack_triplets ( const v_reg< _Tp, n > & vec )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_pack_u() [1/2]

v_uint8x16 cv::v_pack_u	(	const v_int16x8 &	a,
		const v_int16x8 &	b
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_pack_u() [2/2]

v_uint16x8 cv::v_pack_u	(	const v_int32x4 &	a,
		const v_int32x4 &	b
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_pack_u_store() [1/2]

void cv::v_pack_u_store	(	uchar *	ptr,
		const v_int16x8 &	a
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_pack_u_store() [2/2]

void cv::v_pack_u_store	(	ushort *	ptr,
		const v_int32x4 &	a
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_popcount()

template<typename _Tp , int n>

v_reg<typename V_TypeTraits<_Tp>::abs_type, n> cv::v_popcount ( const v_reg< _Tp, n > & a )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Count the 1 bits in the vector lanes and return result as corresponding unsigned type.

Scheme:

{A1 A2 A3 ...} => {popcount(A1), popcount(A2), popcount(A3), ...}

For all integer types.

◆ v_recombine()

template<typename _Tp , int n>

void cv::v_recombine	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b,
		v_reg< _Tp, n > &	low,
		v_reg< _Tp, n > &	high
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Combine two vectors from lower and higher parts of two other vectors.

low = cv::v_combine_low(a, b);

high = cv::v_combine_high(a, b);

◆ v_reduce_max()

template<typename _Tp , int n>

_Tp cv::v_reduce_max ( const v_reg< _Tp, n > & a )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Find one max value.

Scheme:

{A1 A2 A3 ...} => max(A1,A2,A3,...)

For all types except 64-bit integer and 64-bit floating point types.

◆ v_reduce_min()

template<typename _Tp , int n>

_Tp cv::v_reduce_min ( const v_reg< _Tp, n > & a )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Find one min value.

Scheme:

{A1 A2 A3 ...} => min(A1,A2,A3,...)

For all types except 64-bit integer and 64-bit floating point types.

◆ v_reduce_sad()

template<typename _Tp , int n>

V_TypeTraits< typename V_TypeTraits<_Tp>::abs_type >::sum_type cv::v_reduce_sad	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Sum absolute differences of values.

Scheme:

{A1 A2 A3 ...} {B1 B2 B3 ...} => sum{ABS(A1-B1),abs(A2-B2),abs(A3-B3),...}

For all types except 64-bit types.

◆ v_reduce_sum()

template<typename _Tp , int n>

V_TypeTraits<_Tp>::sum_type cv::v_reduce_sum ( const v_reg< _Tp, n > & a )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Sum packed values.

Scheme:

{A1 A2 A3 ...} => sum{A1,A2,A3,...}

◆ v_reduce_sum4()

v_float32x4 cv::v_reduce_sum4	(	const v_float32x4 &	a,
		const v_float32x4 &	b,
		const v_float32x4 &	c,
		const v_float32x4 &	d
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Sums all elements of each input vector, returns the vector of sums.

Scheme:

result[0] = a[0] + a[1] + a[2] + a[3]
result[1] = b[0] + b[1] + b[2] + b[3]
result[2] = c[0] + c[1] + c[2] + c[3]
result[3] = d[0] + d[1] + d[2] + d[3]

◆ v_reinterpret_as_f32()

template<typename _Tp0 , int n0>

v_float32x4 cv::v_reinterpret_as_f32 ( const v_reg< _Tp0, n0 > & a )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_reinterpret_as_f64()

template<typename _Tp0 , int n0>

v_float64x2 cv::v_reinterpret_as_f64 ( const v_reg< _Tp0, n0 > & a )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_reinterpret_as_s16()

template<typename _Tp0 , int n0>

v_int16x8 cv::v_reinterpret_as_s16 ( const v_reg< _Tp0, n0 > & a )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_reinterpret_as_s32()

template<typename _Tp0 , int n0>

v_int32x4 cv::v_reinterpret_as_s32 ( const v_reg< _Tp0, n0 > & a )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_reinterpret_as_s64()

template<typename _Tp0 , int n0>

v_int64x2 cv::v_reinterpret_as_s64 ( const v_reg< _Tp0, n0 > & a )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_reinterpret_as_s8()

template<typename _Tp0 , int n0>

v_int8x16 cv::v_reinterpret_as_s8 ( const v_reg< _Tp0, n0 > & a )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_reinterpret_as_u16()

template<typename _Tp0 , int n0>

v_uint16x8 cv::v_reinterpret_as_u16 ( const v_reg< _Tp0, n0 > & a )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_reinterpret_as_u32()

template<typename _Tp0 , int n0>

v_uint32x4 cv::v_reinterpret_as_u32 ( const v_reg< _Tp0, n0 > & a )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_reinterpret_as_u64()

template<typename _Tp0 , int n0>

v_uint64x2 cv::v_reinterpret_as_u64 ( const v_reg< _Tp0, n0 > & a )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_reinterpret_as_u8()

template<typename _Tp0 , int n0>

v_uint8x16 cv::v_reinterpret_as_u8 ( const v_reg< _Tp0, n0 > & a )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_reverse()

template<typename _Tp , int n>

v_reg<_Tp, n> cv::v_reverse ( const v_reg< _Tp, n > & a )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Vector reverse order.

Reverse the order of the vector Scheme:

REG {A1 ... An} ==> REG {An ... A1}

For all types.

◆ v_rotate_left() [1/2]

template<int imm, typename _Tp , int n>

v_reg<_Tp, n> cv::v_rotate_left	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_rotate_left() [2/2]

template<int imm, typename _Tp , int n>

v_reg<_Tp, n> cv::v_rotate_left ( const v_reg< _Tp, n > & a )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Element shift left among vector.

For all type

◆ v_rotate_right() [1/2]

template<int imm, typename _Tp , int n>

v_reg<_Tp, n> cv::v_rotate_right ( const v_reg< _Tp, n > & a )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Element shift right among vector.

For all type

◆ v_rotate_right() [2/2]

template<int imm, typename _Tp , int n>

v_reg<_Tp, n> cv::v_rotate_right	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_round() [1/5]

v_reg<int, 4> cv::v_round ( const v_reg< float, 4 > & a )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Round elements.

Only for floating point types.

◆ v_round() [2/5]

v_reg<int, 4> cv::v_round ( const v_reg< double, 2 > & a )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_round() [3/5]

template<int n>

v_reg<int, n> cv::v_round ( const v_reg< float, n > & a )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Round.

Rounds each value. Input type is float vector ==> output type is int vector.

◆ v_round() [4/5]

template<int n>

v_reg<int, n*2> cv::v_round	(	const v_reg< double, n > &	a,
		const v_reg< double, n > &	b
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

◆ v_round() [5/5]

template<int n>

v_reg<int, n*2> cv::v_round ( const v_reg< double, n > & a )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

◆ v_rshr() [1/6]

template<int n>

v_uint16x8 cv::v_rshr ( const v_uint16x8 & a )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_rshr() [2/6]

template<int n>

v_int16x8 cv::v_rshr ( const v_int16x8 & a )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_rshr() [3/6]

template<int n>

v_uint32x4 cv::v_rshr ( const v_uint32x4 & a )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_rshr() [4/6]

template<int n>

v_int32x4 cv::v_rshr ( const v_int32x4 & a )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_rshr() [5/6]

template<int n>

v_uint64x2 cv::v_rshr ( const v_uint64x2 & a )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_rshr() [6/6]

template<int n>

v_int64x2 cv::v_rshr ( const v_int64x2 & a )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_rshr_pack() [1/6]

template<int n>

v_uint8x16 cv::v_rshr_pack	(	const v_uint16x8 &	a,
		const v_uint16x8 &	b
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_rshr_pack() [2/6]

template<int n>

v_int8x16 cv::v_rshr_pack	(	const v_int16x8 &	a,
		const v_int16x8 &	b
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_rshr_pack() [3/6]

template<int n>

v_uint16x8 cv::v_rshr_pack	(	const v_uint32x4 &	a,
		const v_uint32x4 &	b
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_rshr_pack() [4/6]

template<int n>

v_int16x8 cv::v_rshr_pack	(	const v_int32x4 &	a,
		const v_int32x4 &	b
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_rshr_pack() [5/6]

template<int n>

v_uint32x4 cv::v_rshr_pack	(	const v_uint64x2 &	a,
		const v_uint64x2 &	b
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_rshr_pack() [6/6]

template<int n>

v_int32x4 cv::v_rshr_pack	(	const v_int64x2 &	a,
		const v_int64x2 &	b
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_rshr_pack_store() [1/6]

template<int n>

void cv::v_rshr_pack_store	(	uchar *	ptr,
		const v_uint16x8 &	a
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_rshr_pack_store() [2/6]

template<int n>

void cv::v_rshr_pack_store	(	schar *	ptr,
		const v_int16x8 &	a
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_rshr_pack_store() [3/6]

template<int n>

void cv::v_rshr_pack_store	(	ushort *	ptr,
		const v_uint32x4 &	a
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_rshr_pack_store() [4/6]

template<int n>

void cv::v_rshr_pack_store	(	short *	ptr,
		const v_int32x4 &	a
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_rshr_pack_store() [5/6]

template<int n>

void cv::v_rshr_pack_store	(	unsigned *	ptr,
		const v_uint64x2 &	a
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_rshr_pack_store() [6/6]

template<int n>

void cv::v_rshr_pack_store	(	int *	ptr,
		const v_int64x2 &	a
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_rshr_pack_u() [1/2]

template<int n>

v_uint8x16 cv::v_rshr_pack_u	(	const v_int16x8 &	a,
		const v_int16x8 &	b
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_rshr_pack_u() [2/2]

template<int n>

v_uint16x8 cv::v_rshr_pack_u	(	const v_int32x4 &	a,
		const v_int32x4 &	b
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_rshr_pack_u_store() [1/2]

template<int n>

void cv::v_rshr_pack_u_store	(	uchar *	ptr,
		const v_int16x8 &	a
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_rshr_pack_u_store() [2/2]

template<int n>

void cv::v_rshr_pack_u_store	(	ushort *	ptr,
		const v_int32x4 &	a
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_scan_forward()

template<typename _Tp , int n>

int cv::v_scan_forward ( const v_reg< _Tp, n > & a )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Get first negative lane index.

Returned value is an index of first negative lane (undefined for input of all positive values) Example:

v_int32x4 r; // set to {0, 0, -1, -1}

int idx = v_heading_zeros(r); // idx = 2

◆ v_select()

template<typename _Tp , int n>

v_reg<_Tp, n> cv::v_select	(	const v_reg< _Tp, n > &	mask,
		const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Per-element select (blend operation)

Return value will be built by combining values a and b using the following scheme: result[i] = mask[i] ? a[i] : b[i];

Note

: mask element values are restricted to these values:

0: select element from b
0xff/0xffff/etc: select element from a (fully compatible with bitwise-based operator)

◆ v_setall_f32()

v_float32x4 cv::v_setall_f32 ( float val )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_setall_f64()

v_float64x2 cv::v_setall_f64 ( double val )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_setall_s16()

v_int16x8 cv::v_setall_s16 ( short val )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_setall_s32()

v_int32x4 cv::v_setall_s32 ( int val )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_setall_s64()

v_int64x2 cv::v_setall_s64 ( int64 val )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_setall_s8()

v_int8x16 cv::v_setall_s8 ( schar val )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_setall_u16()

v_uint16x8 cv::v_setall_u16 ( ushort val )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_setall_u32()

v_uint32x4 cv::v_setall_u32 ( unsigned val )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_setall_u64()

v_uint64x2 cv::v_setall_u64 ( uint64 val )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_setall_u8()

v_uint8x16 cv::v_setall_u8 ( uchar val )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_setzero_f32()

v_float32x4 cv::v_setzero_f32 ( )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_setzero_f64()

v_float64x2 cv::v_setzero_f64 ( )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_setzero_s16()

v_int16x8 cv::v_setzero_s16 ( )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_setzero_s32()

v_int32x4 cv::v_setzero_s32 ( )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_setzero_s64()

v_int64x2 cv::v_setzero_s64 ( )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_setzero_s8()

v_int8x16 cv::v_setzero_s8 ( )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_setzero_u16()

v_uint16x8 cv::v_setzero_u16 ( )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_setzero_u32()

v_uint32x4 cv::v_setzero_u32 ( )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_setzero_u64()

v_uint64x2 cv::v_setzero_u64 ( )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_setzero_u8()

v_uint8x16 cv::v_setzero_u8 ( )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_shl() [1/6]

template<int n>

v_uint16x8 cv::v_shl ( const v_uint16x8 & a )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_shl() [2/6]

template<int n>

v_int16x8 cv::v_shl ( const v_int16x8 & a )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_shl() [3/6]

template<int n>

v_uint32x4 cv::v_shl ( const v_uint32x4 & a )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_shl() [4/6]

template<int n>

v_int32x4 cv::v_shl ( const v_int32x4 & a )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_shl() [5/6]

template<int n>

v_uint64x2 cv::v_shl ( const v_uint64x2 & a )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_shl() [6/6]

template<int n>

v_int64x2 cv::v_shl ( const v_int64x2 & a )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_shr() [1/6]

template<int n>

v_uint16x8 cv::v_shr ( const v_uint16x8 & a )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_shr() [2/6]

template<int n>

v_int16x8 cv::v_shr ( const v_int16x8 & a )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_shr() [3/6]

template<int n>

v_uint32x4 cv::v_shr ( const v_uint32x4 & a )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_shr() [4/6]

template<int n>

v_int32x4 cv::v_shr ( const v_int32x4 & a )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_shr() [5/6]

template<int n>

v_uint64x2 cv::v_shr ( const v_uint64x2 & a )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_shr() [6/6]

template<int n>

v_int64x2 cv::v_shr ( const v_int64x2 & a )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_signmask()

template<typename _Tp , int n>

int cv::v_signmask ( const v_reg< _Tp, n > & a )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Get negative values mask.

Deprecated:: v_signmask depends on a lane count heavily and therefore isn't universal enough

Returned value is a bit mask with bits set to 1 on places corresponding to negative packed values indexes. Example:

v_int32x4 r; // set to {-1, -1, 1, 1}

int mask = v_signmask(r); // mask = 3 <== 00000000 00000000 00000000 00000011

◆ v_sqr_magnitude()

template<typename _Tp , int n>

v_reg<_Tp, n> cv::v_sqr_magnitude	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Square of the magnitude.

Returns \( a^2 + b^2 \) For floating point types only.

◆ v_sqrt()

template<typename _Tp , int n>

v_reg< _Tp , n> cv::v_sqrt ( const v_reg< _Tp, n > & a )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Square root of elements.

Only for floating point types.

◆ v_store() [1/2]

template<typename _Tp , int n>

void cv::v_store	(	_Tp *	ptr,
		const v_reg< _Tp, n > &	a
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Store data to memory.

Store register contents to memory. Scheme:

REG {A B C D} ==> MEM {A B C D}

Pointer can be unaligned.

◆ v_store() [2/2]

template<typename _Tp , int n>

void cv::v_store	(	_Tp *	ptr,
		const v_reg< _Tp, n > &	a,
		hal::StoreMode
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_store_aligned() [1/2]

template<typename _Tp , int n>

void cv::v_store_aligned	(	_Tp *	ptr,
		const v_reg< _Tp, n > &	a
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Store data to memory (aligned)

Store register contents to memory. Scheme:

REG {A B C D} ==> MEM {A B C D}

Pointer should be aligned by 16-byte boundary.

◆ v_store_aligned() [2/2]

template<typename _Tp , int n>

void cv::v_store_aligned	(	_Tp *	ptr,
		const v_reg< _Tp, n > &	a,
		hal::StoreMode
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_store_aligned_nocache()

template<typename _Tp , int n>

void cv::v_store_aligned_nocache	(	_Tp *	ptr,
		const v_reg< _Tp, n > &	a
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_store_high()

template<typename _Tp , int n>

void cv::v_store_high	(	_Tp *	ptr,
		const v_reg< _Tp, n > &	a
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Store data to memory (higher half)

Store higher half of register contents to memory. Scheme:

REG {A B C D} ==> MEM {C D}

◆ v_store_interleave() [1/3]

template<typename _Tp , int n>

void cv::v_store_interleave	(	_Tp *	ptr,
		const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b,
		hal::StoreMode	= `hal::STORE_UNALIGNED`
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Interleave and store (2 channels)

Interleave and store data from 2 registers to memory. Scheme:

{A1 A2 ...}, {B1 B2 ...} ==> {A1 B1 A2 B2 ...}

For all types except 64-bit.

◆ v_store_interleave() [2/3]

template<typename _Tp , int n>

void cv::v_store_interleave	(	_Tp *	ptr,
		const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b,
		const v_reg< _Tp, n > &	c,
		hal::StoreMode	= `hal::STORE_UNALIGNED`
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Interleave and store (3 channels)

Interleave and store data from 3 registers to memory. Scheme:

{A1 A2 ...}, {B1 B2 ...}, {C1 C2 ...} ==> {A1 B1 C1 A2 B2 C2 ...}

For all types except 64-bit.

◆ v_store_interleave() [3/3]

template<typename _Tp , int n>

void cv::v_store_interleave	(	_Tp *	ptr,
		const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b,
		const v_reg< _Tp, n > &	c,
		const v_reg< _Tp, n > &	d,
		hal::StoreMode	= `hal::STORE_UNALIGNED`
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Interleave and store (4 channels)

Interleave and store data from 4 registers to memory. Scheme:

{A1 A2 ...}, {B1 B2 ...}, {C1 C2 ...}, {D1 D2 ...} ==> {A1 B1 C1 D1 A2 B2 C2 D2 ...}

For all types except 64-bit.

◆ v_store_low()

template<typename _Tp , int n>

void cv::v_store_low	(	_Tp *	ptr,
		const v_reg< _Tp, n > &	a
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Store data to memory (lower half)

Store lower half of register contents to memory. Scheme:

REG {A B C D} ==> MEM {A B}

◆ v_sub_wrap()

template<typename _Tp , int n>

v_reg< _Tp , n> cv::v_sub_wrap	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Subtract values without saturation.

For 8- and 16-bit integer values.

◆ v_transpose4x4()

template<typename _Tp >

void cv::v_transpose4x4	(	v_reg< _Tp, 4 > &	a0,
		const v_reg< _Tp, 4 > &	a1,
		const v_reg< _Tp, 4 > &	a2,
		const v_reg< _Tp, 4 > &	a3,
		v_reg< _Tp, 4 > &	b0,
		v_reg< _Tp, 4 > &	b1,
		v_reg< _Tp, 4 > &	b2,
		v_reg< _Tp, 4 > &	b3
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Transpose 4x4 matrix.

Scheme:

a0  {A1 A2 A3 A4}
a1  {B1 B2 B3 B4}
a2  {C1 C2 C3 C4}
a3  {D1 D2 D3 D4}
===============
b0  {A1 B1 C1 D1}
b1  {A2 B2 C2 D2}
b2  {A3 B3 C3 D3}
b3  {A4 B4 C4 D4}

◆ v_trunc() [1/4]

v_reg<int, 4> cv::v_trunc ( const v_reg< float, 4 > & a )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Truncate elements.

Only for floating point types.

◆ v_trunc() [2/4]

v_reg<int, 4> cv::v_trunc ( const v_reg< double, 2 > & a )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

◆ v_trunc() [3/4]

template<int n>

v_reg<int, n> cv::v_trunc ( const v_reg< float, n > & a )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Trunc.

Truncate each value. Input type is float vector ==> output type is int vector.

◆ v_trunc() [4/4]

template<int n>

v_reg<int, n*2> cv::v_trunc ( const v_reg< double, n > & a )

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

◆ v_zip()

template<typename _Tp , int n>

void cv::v_zip	(	const v_reg< _Tp, n > &	a0,
		const v_reg< _Tp, n > &	a1,
		v_reg< _Tp, n > &	b0,
		v_reg< _Tp, n > &	b1
	)

inline

#include <opencv2/core/hal/intrin_cpp.hpp>

Interleave two vectors.

Scheme:

  {A1 A2 A3 A4}
  {B1 B2 B3 B4}
---------------
  {A1 B1 A2 B2} and {A3 B3 A4 B4}

For all types except 64-bit.

Variable Documentation

◆ popCountTable

const unsigned char cv::popCountTable[]

static

#include <opencv2/core/hal/intrin_cpp.hpp>

Initial value:

=
{
    0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
    1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
    1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
    1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
    3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
    1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
    3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
    3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
    3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
    4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8,
}

Modules

Classes

Typedefs

Functions

Variables

Init with zero

Init with value

Reinterpret

Left shift

Right shift

Rounding shift

Pack

Pack with rounding shift

Pack and store

Pack and store with rounding shift

Pack boolean values

Detailed Description

Types

Load and store operations

Value reordering

Arithmetic, bitwise and comparison operations

Reduce and mask

Other math

Conversions

Matrix operations

Usability

Typedef Documentation

◆ v_float32x4

◆ v_float64x2

◆ v_int16x8

◆ v_int32x4

◆ v_int64x2

◆ v_int8x16

◆ v_uint16x8

◆ v_uint32x4

◆ v_uint64x2

◆ v_uint8x16

Function Documentation

◆ operator &()

◆ operator &=()

◆ operator!=()

◆ operator*()

◆ operator*=()

◆ operator+()

◆ operator+=()

◆ operator-()

◆ operator-=()

◆ operator/()

◆ operator/=()

◆ operator<()

◆ operator<<()

◆ operator<=()

◆ operator==()

◆ operator>()

◆ operator>=()

◆ operator>>()

◆ operator^()

◆ operator^=()

◆ operator|()

◆ operator|=()

◆ operator~()

◆ v_abs()

◆ v_absdiff() [1/3]

◆ v_absdiff() [2/3]

◆ v_absdiff() [3/3]

◆ v_absdiffs()

◆ v_add_wrap()

◆ v_broadcast_element()

◆ v_ceil() [1/4]

◆ v_ceil() [2/4]

◆ v_ceil() [3/4]

◆ v_ceil() [4/4]

◆ v_check_all()

◆ v_check_any()

◆ v_cleanup()

◆ v_combine_high()

◆ v_combine_low()

◆ v_cvt_f32() [1/3]

◆ v_cvt_f32() [2/3]

◆ v_cvt_f32() [3/3]