AI Engine-ML Intrinsics User Guide  (v2023.2)

Intrinsics allowing you perform vector shuffles. More...

Overview

Intrinsics allowing you perform vector shuffles.

This table gives the different shuffle modes. For an illustration see Illustration of Shuffle Modes.

Element
Size
Matrix
Transpose
Dimensions
Mode Value
Forward operation
(Deinterleaving)
Backward Operation
(Interleaving)
extract low extract high extract low extract high
8-bit 64x2 → 2x64 shuffle_T8_64x2_lo shuffle_T8_64x2_hi shuffle_T8_2x64_lo shuffle_T8_2x64_hi
16-bit 32x2 → 2x32 shuffle_T16_32x2_lo shuffle_T16_32x2_hi shuffle_T16_2x32_lo shuffle_T16_2x32_hi
32-bit 16x2 → 2x16 shuffle_T32_16x2_lo shuffle_T32_16x2_hi shuffle_T32_2x16_lo shuffle_T32_2x16_hi
64-bit 8x2 → 2x8 shuffle_T64_8x2_lo shuffle_T64_8x2_hi shuffle_T64_2x8_lo shuffle_T64_2x8_hi
128-bit 4x2 → 2x4 shuffle_T128_4x2_lo shuffle_T128_4x2_hi shuffle_T128_2x4_lo shuffle_T128_2x4_hi
256-bit 2x2 → 2x2 shuffle_T256_2x2_lo shuffle_T256_2x2_hi shuffle_T256_2x2_lo shuffle_T256_2x2_hi
512-bit 1x2 → 2x1 shuffle_T512_1x2_lo shuffle_T512_1x2_hi shuffle_T512_1x2_lo shuffle_T512_1x2_hi
16-bit 16x4 → 4x16 shuffle_T16_16x4_lo shuffle_T16_16x4_hi shuffle_T16_4x16_lo shuffle_T16_4x16_hi
16-bit 8x4 → 4x8 shuffle_T16_8x4 - shuffle_T16_4x8 -
32-bit 8x4 → 4x8 shuffle_T32_8x4_lo shuffle_T32_8x4_hi shuffle_T32_4x8_lo shuffle_T32_4x8_hi
32-bit 4x4 → 4x4 shuffle_T32_4x4 - shuffle_T32_4x4 -
8-bit 8x8 → 8x8 shuffle_T8_8x8 - shuffle_T32_8x8 -
8-bit 16x4 → 4x16 shuffle_T8_16x4 - shuffle_T8_4x16 -
16-bit 16x[a,b] → 16x[b,a] shuffle_T16_1x2_flip - shuffle_T16_1x2_flip -
16-bit 4x4 → 4x4 shuffle_T16_4x4 - shuffle_T16_4x4 -
16-bit 4x2 → 2x4 shuffle_T16_4x2 - shuffle_T16_2x4 -
16-bit 8x2 → 2x8 shuffle_T16_8x2 - shuffle_T16_2x8 -
16-bit 16x2 → 2x16 shuffle_T16_16x2 - shuffle_T16_2x16 -
8-bit 8x4 → 4x8 shuffle_T8_8x4 - shuffle_T8_4x8

-

Modules

 Illustration of Shuffle Modes
 

Enumerations

enum  eShuffleMode {
  shuffle_T8_64x2_lo = 0, shuffle_T8_64x2_hi = 1, shuffle_T16_32x2_lo = 2, shuffle_T16_32x2_hi = 3,
  shuffle_T32_16x2_lo = 4, shuffle_T32_16x2_hi = 5, shuffle_T64_8x2_lo = 6, shuffle_T64_8x2_hi = 7,
  shuffle_T128_4x2_lo = 8, shuffle_T128_4x2_hi = 9, shuffle_T256_2x2_lo = 10, shuffle_T256_2x2_hi = 11,
  shuffle_T128_2x4_lo = 12, shuffle_T128_2x4_hi = 13, shuffle_T64_2x8_lo = 14, shuffle_T64_2x8_hi = 15,
  shuffle_T32_2x16_lo = 16, shuffle_T32_2x16_hi = 17, shuffle_T16_2x32_lo = 18, shuffle_T16_2x32_hi = 19,
  shuffle_T8_2x64_lo = 20, shuffle_T8_2x64_hi = 21, shuffle_T512_1x2_lo = 22, shuffle_T512_1x2_hi = 23,
  shuffle_T16_16x4_lo = 24, shuffle_T16_16x4_hi = 25, shuffle_T16_4x16_lo = 26, shuffle_T16_4x16_hi = 27,
  shuffle_T16_8x4 = 28, shuffle_T16_4x8 = 29, shuffle_T32_8x4_lo = 30, shuffle_T32_8x4_hi = 31,
  shuffle_T32_4x8_lo = 32, shuffle_T32_4x8_hi = 33, shuffle_T32_4x4 = 34, shuffle_T8_8x8 = 35,
  shuffle_T8_16x4 = 36, shuffle_T8_4x16 = 37, shuffle_T16_1x2_flip = 38, shuffle_T16_4x4 = 39,
  shuffle_T16_4x2 = 40, shuffle_T16_2x4 = 41, shuffle_T16_8x2 = 42, shuffle_T16_2x8 = 43,
  shuffle_T16_16x2 = 44, shuffle_T16_2x16 = 45, shuffle_T8_8x4 = 46, shuffle_T8_4x8 = 47
}
 Shuffle modes. More...
 

Shuffle two vectors

Shuffle two vectors

Parameters
aFirst vector to shuffle
bSecond vector to shuffle
modeShuffling mode (of type eShuffleMode)
Returns
shuffled vector
v16int32 shuffle (v16int32, v16int32, unsigned int)
 
v32int16 shuffle (v32int16, v32int16, unsigned int)
 
v64int8 shuffle (v64int8, v64int8, unsigned int)
 
v128int4 shuffle (v128int4, v128int4, unsigned int)
 
v16uint32 shuffle (v16uint32, v16uint32, unsigned int)
 
v32uint16 shuffle (v32uint16, v32uint16, unsigned int)
 
v64uint8 shuffle (v64uint8, v64uint8, unsigned int)
 
v128uint4 shuffle (v128uint4, v128uint4, unsigned int)
 
v8cint32 shuffle (v8cint32, v8cint32, unsigned int)
 
v16cint16 shuffle (v16cint16, v16cint16, unsigned int)
 
v32bfloat16 shuffle (v32bfloat16, v32bfloat16, unsigned int)
 
v16float shuffle (v16float, v16float, unsigned int)
 
v8cfloat shuffle (v8cfloat, v8cfloat, unsigned int)
 

Shuffle one vector

Shuffle one vector

Parameters
aVector to shuffle
modeShuffling mode (of type eShuffleMode)
Returns
shuffled vector
v16int32 shuffle (v16int32 a, unsigned int mode)
 
v32int16 shuffle (v32int16 a, unsigned int mode)
 
v64int8 shuffle (v64int8 a, unsigned int mode)
 
v128int4 shuffle (v128int4 a, unsigned int mode)
 
v16uint32 shuffle (v16uint32 a, unsigned int mode)
 
v32uint16 shuffle (v32uint16 a, unsigned int mode)
 
v64uint8 shuffle (v64uint8 a, unsigned int mode)
 
v128uint4 shuffle (v128uint4 a, unsigned int mode)
 
v8cint32 shuffle (v8cint32 a, unsigned int mode)
 
v16cint16 shuffle (v16cint16 a, unsigned int mode)
 
v32bfloat16 shuffle (v32bfloat16 a, unsigned int mode)
 
v16float shuffle (v16float a, unsigned int mode)
 
v8cfloat shuffle (v8cfloat a, unsigned int mode)
 

Updating all elements with same value and shuffle

Broadcasts input value to all vector lanes

Parameters
bvalue to be broadcasted
v64int8 shuffle_s8 (int b, unsigned int m)
 
v32int16 shuffle_s16 (int b, unsigned int m)
 
v16int32 shuffle_s32 (int b, unsigned int m)
 
v16int32 shuffle_v2s32 (v2int32 b, unsigned int m)
 
v64uint8 shuffle_u8 (unsigned int b, unsigned int m)
 
v32uint16 shuffle_u16 (unsigned int b, unsigned int m)
 
v16uint32 shuffle_u32 (unsigned int b, unsigned int m)
 
v16uint32 shuffle_v2u32 (v2uint32 b, unsigned int m)
 
v16cint16 shuffle_c16 (cint16 b, unsigned int m)
 
v32bfloat16 shuffle_bfloat16 (bfloat16 b, unsigned int m)
 
v16float shuffle_float (float b, unsigned int m)
 
v16int32 shuffle_s64 (long long b, unsigned int m)
 
v16uint32 shuffle_u64 (unsigned long long b, unsigned int m)
 
v8cint32 shuffle_c32 (cint32 b, unsigned int m)
 

Shuffle a sparse vector

Shuffle a sparse vector

Parameters
aVector to shuffle
modeShuffling mode (of type eShuffleMode)
Returns
shuffled vector
v256int4_sparse shuffle (v256int4_sparse qx, int itlv)
 
v128int8_sparse shuffle (v128int8_sparse qx, int itlv)
 
v64int16_sparse shuffle (v64int16_sparse qx, int itlv)
 
v256uint4_sparse shuffle (v256uint4_sparse qx, int itlv)
 
v128uint8_sparse shuffle (v128uint8_sparse qx, int itlv)
 
v64uint16_sparse shuffle (v64uint16_sparse qx, int itlv)
 
v64bfloat16_sparse shuffle (v64bfloat16_sparse qx, int itlv)
 

Enumeration Type Documentation

◆ eShuffleMode

Shuffle modes.

Definition of valid modes for vector shuffle

Enumerator
shuffle_T8_64x2_lo 

Transpose 64x2 matrix of 8 bit values. Extract low 512 bits of result.

shuffle_T8_64x2_hi 

Transpose 64x2 matrix of 8 bit values. Extract high 512 bits of result.

shuffle_T16_32x2_lo 

Transpose 32x2 matrix of 16 bit values. Extract low 512 bits of result.

shuffle_T16_32x2_hi 

Transpose 32x2 matrix of 16 bit values. Extract high 512 bits of result.

shuffle_T32_16x2_lo 

Transpose 16x2 matrix of 32 bit values. Extract low 512 bits of result.

shuffle_T32_16x2_hi 

Transpose 16x2 matrix of 32 bit values. Extract high 512 bits of result.

shuffle_T64_8x2_lo 

Transpose 8x2 matrix of 64 bit values. Extract low 512 bits of result.

shuffle_T64_8x2_hi 

Transpose 8x2 matrix of 64 bit values. Extract high 512 bits of result.

shuffle_T128_4x2_lo 

Transpose 4x2 matrix of 128 bit values. Extract low 512 bits of result.

shuffle_T128_4x2_hi 

Transpose 4x2 matrix of 128 bit values. Extract high 512 bits of result.

shuffle_T256_2x2_lo 

Transpose 2x2 matrix of 256 bit values. Extract low 512 bits of result.

shuffle_T256_2x2_hi 

Transpose 2x2 matrix of 256 bit values. Extract high 512 bits of result.

shuffle_T128_2x4_lo 

Transpose 2x4 matrix of 128 bit values. Extract low 512 bits of result.

shuffle_T128_2x4_hi 

Transpose 2x4 matrix of 128 bit values. Extract high 512 bits of result.

shuffle_T64_2x8_lo 

Transpose 2x8 matrix of 64 bit values. Extract low 512 bits of result.

shuffle_T64_2x8_hi 

Transpose 2x8 matrix of 64 bit values. Extract high 512 bits of result.

shuffle_T32_2x16_lo 

Transpose 2x16 matrix of 32 bit values. Extract low 512 bits of result.

shuffle_T32_2x16_hi 

Transpose 2x16 matrix of 32 bit values. Extract high 512 bits of result.

shuffle_T16_2x32_lo 

Transpose 2x32 matrix of 16 bit values. Extract low 512 bits of result.

shuffle_T16_2x32_hi 

Transpose 2x32 matrix of 16 bit values. Extract high 512 bits of result.

shuffle_T8_2x64_lo 

Transpose 2x64 matrix of 8 bit values. Extract low 512 bits of result.

shuffle_T8_2x64_hi 

Transpose 2x64 matrix of 8 bit values. Extract high 512 bits of result.

shuffle_T512_1x2_lo 

Transpose 1x2 matrix of 512 bit values. Extract low 512 bits of result.

shuffle_T512_1x2_hi 

Transpose 1x2 matrix of 512 bit values. Extract high 512 bits of result.

shuffle_T16_16x4_lo 

Transpose 16x4 matrix of 16 bit values. Extract low 512 bits of result.

shuffle_T16_16x4_hi 

Transpose 16x4 matrix of 16 bit values. Extract high 512 bits of result.

shuffle_T16_4x16_lo 

Transpose 4x16 matrix of 16 bit values. Extract low 512 bits of result.

shuffle_T16_4x16_hi 

Transpose 4x16 matrix of 16 bit values. Extract high 512 bits of result.

shuffle_T16_8x4 

Transpose 8x4 matrix of 16 bit values. Extract resulting 512 bit vector.

shuffle_T16_4x8 

Transpose 4x8 matrix of 16 bit values. Extract resulting 512 bit vector.

shuffle_T32_8x4_lo 

Transpose 8x4 matrix of 32 bit values. Extract low 512 bits of result.

shuffle_T32_8x4_hi 

Transpose 8x4 matrix of 32 bit values. Extract high 512 bits of result.

shuffle_T32_4x8_lo 

Transpose 4x8 matrix of 32 bit values. Extract low 512 bits of result.

shuffle_T32_4x8_hi 

Transpose 4x8 matrix of 32 bit values. Extract high 512 bits of result.

shuffle_T32_4x4 

Transpose 4x4 matrix of 32 bit values. Extract resulting 512 bit vector.

shuffle_T8_8x8 

Transpose 4x4 matrix of 8 bit values. Extract resulting 512 bit vector.

shuffle_T8_16x4 

Transpose 16x4 matrix of 8 bit values. Extract resulting 512 bit vector.

shuffle_T8_4x16 

Transpose 4x16 matrix of 8 bit values. Extract resulting 512 bit vector.

shuffle_T16_1x2_flip 

Flip inner dimension of 16x2 matrix of 16 bit values. Extract resulting 512 bit vector.

shuffle_T16_4x4 

Transpose 4x4 matrix of 16 bit values. Extract resulting 512 bit vector.

shuffle_T16_4x2 

Transpose 4x2 matrix of 16 bit values. Extract resulting 512 bit vector.

shuffle_T16_2x4 

Transpose 2x4 matrix of 16 bit values. Extract resulting 512 bit vector.

shuffle_T16_8x2 

Transpose 8x2 matrix of 16 bit values. Extract resulting 512 bit vector.

shuffle_T16_2x8 

Transpose 2x8 matrix of 16 bit values. Extract resulting 512 bit vector.

shuffle_T16_16x2 

Transpose 16x2 matrix of 16 bit values. Extract resulting 512 bit vector.

shuffle_T16_2x16 

Transpose 2x16 matrix of 16 bit values. Extract resulting 512 bit vector.

shuffle_T8_8x4 

Transpose 8x4 matrix of 8 bit values. Extract resulting 512 bit vector.

shuffle_T8_4x8 

Transpose 4x8 matrix of 8 bit values. Extract resulting 512 bit vector.

Function Documentation

◆ shuffle() [1/33]

v128int4 shuffle ( v128int4  a,
unsigned int  mode 
)

◆ shuffle() [2/33]

v128int4 shuffle ( v128int4  ,
v128int4  ,
unsigned int   
)

◆ shuffle() [3/33]

v128int8_sparse shuffle ( v128int8_sparse  qx,
int  itlv 
)

◆ shuffle() [4/33]

v128uint4 shuffle ( v128uint4  a,
unsigned int  mode 
)

◆ shuffle() [5/33]

v128uint4 shuffle ( v128uint4  ,
v128uint4  ,
unsigned int   
)

◆ shuffle() [6/33]

v128uint8_sparse shuffle ( v128uint8_sparse  qx,
int  itlv 
)

◆ shuffle() [7/33]

v16cint16 shuffle ( v16cint16  a,
unsigned int  mode 
)

◆ shuffle() [8/33]

v16cint16 shuffle ( v16cint16  ,
v16cint16  ,
unsigned int   
)

◆ shuffle() [9/33]

v16float shuffle ( v16float  a,
unsigned int  mode 
)

◆ shuffle() [10/33]

v16float shuffle ( v16float  ,
v16float  ,
unsigned int   
)

◆ shuffle() [11/33]

v16int32 shuffle ( v16int32  a,
unsigned int  mode 
)

◆ shuffle() [12/33]

v16int32 shuffle ( v16int32  ,
v16int32  ,
unsigned int   
)

◆ shuffle() [13/33]

v16uint32 shuffle ( v16uint32  a,
unsigned int  mode 
)

◆ shuffle() [14/33]

v16uint32 shuffle ( v16uint32  ,
v16uint32  ,
unsigned int   
)

◆ shuffle() [15/33]

v256int4_sparse shuffle ( v256int4_sparse  qx,
int  itlv 
)

◆ shuffle() [16/33]

v256uint4_sparse shuffle ( v256uint4_sparse  qx,
int  itlv 
)

◆ shuffle() [17/33]

v32bfloat16 shuffle ( v32bfloat16  a,
unsigned int  mode 
)

◆ shuffle() [18/33]

v32bfloat16 shuffle ( v32bfloat16  ,
v32bfloat16  ,
unsigned int   
)

◆ shuffle() [19/33]

v32int16 shuffle ( v32int16  a,
unsigned int  mode 
)

◆ shuffle() [20/33]

v32int16 shuffle ( v32int16  ,
v32int16  ,
unsigned int   
)

◆ shuffle() [21/33]

v32uint16 shuffle ( v32uint16  a,
unsigned int  mode 
)

◆ shuffle() [22/33]

v32uint16 shuffle ( v32uint16  ,
v32uint16  ,
unsigned int   
)

◆ shuffle() [23/33]

v64bfloat16_sparse shuffle ( v64bfloat16_sparse  qx,
int  itlv 
)

◆ shuffle() [24/33]

v64int16_sparse shuffle ( v64int16_sparse  qx,
int  itlv 
)

◆ shuffle() [25/33]

v64int8 shuffle ( v64int8  a,
unsigned int  mode 
)

◆ shuffle() [26/33]

v64int8 shuffle ( v64int8  ,
v64int8  ,
unsigned int   
)

◆ shuffle() [27/33]

v64uint16_sparse shuffle ( v64uint16_sparse  qx,
int  itlv 
)

◆ shuffle() [28/33]

v64uint8 shuffle ( v64uint8  a,
unsigned int  mode 
)

◆ shuffle() [29/33]

v64uint8 shuffle ( v64uint8  ,
v64uint8  ,
unsigned int   
)

◆ shuffle() [30/33]

v8cfloat shuffle ( v8cfloat  a,
unsigned int  mode 
)

◆ shuffle() [31/33]

v8cfloat shuffle ( v8cfloat  ,
v8cfloat  ,
unsigned int   
)

◆ shuffle() [32/33]

v8cint32 shuffle ( v8cint32  a,
unsigned int  mode 
)

◆ shuffle() [33/33]

v8cint32 shuffle ( v8cint32  ,
v8cint32  ,
unsigned int   
)

◆ shuffle_bfloat16()

v32bfloat16 shuffle_bfloat16 ( bfloat16  b,
unsigned int  m 
)

◆ shuffle_c16()

v16cint16 shuffle_c16 ( cint16  b,
unsigned int  m 
)

◆ shuffle_c32()

v8cint32 shuffle_c32 ( cint32  b,
unsigned int  m 
)

◆ shuffle_float()

v16float shuffle_float ( float  b,
unsigned int  m 
)

◆ shuffle_s16()

v32int16 shuffle_s16 ( int  b,
unsigned int  m 
)

◆ shuffle_s32()

v16int32 shuffle_s32 ( int  b,
unsigned int  m 
)

◆ shuffle_s64()

v16int32 shuffle_s64 ( long long  b,
unsigned int  m 
)

◆ shuffle_s8()

v64int8 shuffle_s8 ( int  b,
unsigned int  m 
)

◆ shuffle_u16()

v32uint16 shuffle_u16 ( unsigned int  b,
unsigned int  m 
)

◆ shuffle_u32()

v16uint32 shuffle_u32 ( unsigned int  b,
unsigned int  m 
)

◆ shuffle_u64()

v16uint32 shuffle_u64 ( unsigned long long  b,
unsigned int  m 
)

◆ shuffle_u8()

v64uint8 shuffle_u8 ( unsigned int  b,
unsigned int  m 
)

◆ shuffle_v2s32()

v16int32 shuffle_v2s32 ( v2int32  b,
unsigned int  m 
)

◆ shuffle_v2u32()

v16uint32 shuffle_v2u32 ( v2uint32  b,
unsigned int  m 
)