SSE Intrinsic命令まとめ
まとめて書いてるやつが無かったので自分で書いてみた。殆どMSDNのぱくり。
関数名 | 型 | 内容 | 詳細 | 備考 |
---|---|---|---|---|
_mm_add_ss | float | 和 | r0 := a0 + b0 r1 := a1 r2 := a2 r3 := a3 |
|
_mm_add_ps | float | 和 | r0 := a0 + b0 r1 := a1 + b1 r2 := a2 + b2 r3 := a3 + b3 |
|
_mm_sub_ss | float | 差 | r0 := a0 - b0 r1 := a1 r2 := a2 r3 := a3 |
|
_mm_sub_ps | float | 差 | r0 := a0 - b0 r1 := a1 - b1 r2 := a2 - b2 r3 := a3 - b3 |
|
_mm_mul_ss | float | 積 | r0 := a0 * b0 r1 := a1 r2 := a2 r3 := a3 |
|
_mm_mul_ps | float | 積 | r0 := a0 * b0 r1 := a1 * b1 r2 := a2 * b2 r3 := a3 * b3 |
|
_mm_div_ss | float | 商 | r0 := a0 / b0 r1 := a1 r2 := a2 r3 := a3 |
|
_mm_div_ps | float | 商 | r0 := a0 / b0 r1 := a1 / b1 r2 := a2 / b2 r3 := a3 / b3 |
|
_mm_sqrt_ss | float | 平方根 | r0 := sqrt(a0) r1 := a1 r2 := a2 r3 := a3 |
|
_mm_sqrt_ps | float | 平方根 | r0 := sqrt(a0) r1 := sqrt(a1) r2 := sqrt(a2) r3 := sqrt(a3) |
|
_mm_rcp_ss | float | 逆数 | r0 := recip(a0) r1 := a1 r2 := a2 r3 := a3 |
|
_mm_rcp_ps | float | 逆数 | r0 := recip(a0) r1 := recip(a1) r2 := recip(a2) r3 := recip(a3) |
|
_mm_rsqrt_ss | float | 平方根の逆数 | r0 := recip(sqrt(a0)) r1 := a1 r2 := a2 r3 := a3 |
|
_mm_rsqrt_ps | float | 平方根の逆数 | r0 := recip(sqrt(a0)) r1 := recip(sqrt(a1)) r2 := recip(sqrt(a2)) r3 := recip(sqrt(a3)) |
|
_mm_min_ss | float | 平方根の逆数 | r0 := min(a0, b0) r1 := a1 r2 := a2 r3 := a3 |
|
_mm_min_ps | float | 平方根の逆数 | r0 := min(a0, b0) r1 := min(a1, b1) r2 := min(a2, b2) r3 := min(a3, b3) |
|
_mm_max_ss | float | 平方根の逆数 | r0 := max(a0, b0) r1 := a1 r2 := a2 r3 := a3 |
|
_mm_max_ps | float | 平方根の逆数 | r0 := max(a0, b0) r1 := max(a1, b1) r2 := max(a2, b2) r3 := max(a3, b3) |
|
_mm_and_ps | float | 論理積 | r0 := a0 & b0 r1 := a1 & b1 r2 := a2 & b2 r3 := a3 & b3 |
|
_mm_andnot_ps | float | 論理積の否定 | r0 := ~a0 & b0 r1 := ~a1 & b1 r2 := ~a2 & b2 r3 := ~a3 & b3 |
|
_mm_or_ps | float | 論理和 | r0 := a0 or b0 r1 := a1 or b1 r2 := a2 or b2 r3 := a3 or b3 |
|
_mm_xor_ps | float | 排他的論理和 | r0 := a0 ^ b0 r1 := a1 ^ b1 r2 := a2 ^ b2 r3 := a3 ^ b3 |
|
_mm_cmpeq_ss | float | 等しい | r0 := (a0 == b0) ? 0xffffffff : 0x0 r1 := a1 r2 := a2 r3 := a3 |
|
_mm_cmpeq_ps | float | 等しい | r0 := (a0 == b0) ? 0xffffffff : 0x0 r1 := (a1 == b1) ? 0xffffffff : 0x0 r2 := (a2 == b2) ? 0xffffffff : 0x0 r0 := (a3 == b3) ? 0xffffffff : 0x0 |
|
_mm_cmplt_ss | float | 小さい | r0 := (a0 < b0) ? 0xffffffff : 0x0 r1 := a1 r2 := a2 r3 := a3 |
|
_mm_cmplt_ps | float | 小さい | r0 := (a0 < b0) ? 0xffffffff : 0x0 r1 := (a1 < b1) ? 0xffffffff : 0x0 r2 := (a2 < b2) ? 0xffffffff : 0x0 r0 := (a3 < b3) ? 0xffffffff : 0x0 |
|
_mm_cmple_ss | float | 以下 | r0 := (a0 <= b0) ? 0xffffffff : 0x0 r1 := a1 r2 := a2 r3 := a3 |
|
_mm_cmple_ps | float | 以下 | r0 := (a0 <= b0) ? 0xffffffff : 0x0 r1 := (a1 <= b1) ? 0xffffffff : 0x0 r2 := (a2 <= b2) ? 0xffffffff : 0x0 r0 := (a3 <= b3) ? 0xffffffff : 0x0 |
|
_mm_cmpgt_ss | float | 大きい | r0 := (a0 > b0) ? 0xffffffff : 0x0 r1 := a1 r2 := a2 r3 := a3 |
|
_mm_cmpgt_ps | float | 大きい | r0 := (a0 > b0) ? 0xffffffff : 0x0 r1 := (a1 > b1) ? 0xffffffff : 0x0 r2 := (a2 > b2) ? 0xffffffff : 0x0 r0 := (a3 > b3) ? 0xffffffff : 0x0 |
|
_mm_cmpge_ss | float | 以上 | r0 := (a0 >= b0) ? 0xffffffff : 0x0 =r1 := a1 =r2 := a2 =r3 := a3 |
|
_mm_cmpge_ps | float | 以上 | r0 := (a0 >= b0) ? 0xffffffff : 0x0 =r1 := (a1 >= b1) ? 0xffffffff : 0x0 =r2 := (a2 >= b2) ? 0xffffffff : 0x0 =r0 := (a3 >= b3) ? 0xffffffff : 0x0 |
|
_mm_cmpneq_ss | float | 等しくない | r0 := (a0 != b0) ? 0xffffffff : 0x0 r1 := a1 r2 := a2 r3 := a3 |
|
_mm_cmpneq_ps | float | 等しくない | r0 := (a0 != b0) ? 0xffffffff : 0x0 r1 := (a1 != b1) ? 0xffffffff : 0x0 r2 := (a2 != b2) ? 0xffffffff : 0x0 r0 := (a3 != b3) ? 0xffffffff : 0x0 |
|
_mm_cmpnlt_ss | float | 小さいの否定 | r0 := ~(a0 < b0) ? 0xffffffff : 0x0 r1 := a1 r2 := a2 r3 := a3 |
|
_mm_cmpnlt_ps | float | 小さいの否定 | r0 := ~(a0 < b0) ? 0xffffffff : 0x0 r1 := ~(a1 < b1) ? 0xffffffff : 0x0 r2 := ~(a2 < b2) ? 0xffffffff : 0x0 r0 := ~(a3 < b3) ? 0xffffffff : 0x0 |
|
_mm_cmpnlt_ss | float | 以下の否定 | r0 := ~(a0 <= b0) ? 0xffffffff : 0x0 r1 := a1 r2 := a2 r3 := a3 |
|
_mm_cmpnlt_ps | float | 以下の否定 | r0 := ~(a0 <= b0) ? 0xffffffff : 0x0 r1 := ~(a1 <= b1) ? 0xffffffff : 0x0 r2 := ~(a2 <= b2) ? 0xffffffff : 0x0 r0 := ~(a3 <= b3) ? 0xffffffff : 0x0 |
|
_mm_cmpngt_ss | float | 大きいの否定 | r0 := ~(a0 > b0) ? 0xffffffff : 0x0 r1 := a1 r2 := a2 r3 := a3 |
|
_mm_cmpngt_ps | float | 大きいの否定 | r0 := ~(a0 > b0) ? 0xffffffff : 0x0 r1 := ~(a1 > b1) ? 0xffffffff : 0x0 r2 := ~(a2 > b2) ? 0xffffffff : 0x0 r0 := ~(a3 > b3) ? 0xffffffff : 0x0 |
|
_mm_cmpnge_ss | float | 以上の否定 | r0 := ~(a0 >= b0) ? 0xffffffff : 0x0 r1 := a1 r2 := a2 r3 := a3 |
|
_mm_cmpnge_ps | float | 以上の否定 | r0 := ~(a0 >= b0) ? 0xffffffff : 0x0 r1 := ~(a1 >= b1) ? 0xffffffff : 0x0 r2 := ~(a2 >= b2) ? 0xffffffff : 0x0 r0 := ~(a3 >= b3) ? 0xffffffff : 0x0 |
|
_mm_cmpord_ss | float | Ordered | r0 := (a0 ord? b0) ? 0xffffffff : 0x0 r1 := a1 r2 := a2 r3 := a3 |
|
_mm_cmpord_ps | float | Ordered | r0 := (a0 ord? b0) ? 0xffffffff : 0x0 r1 := (a1 ord? b1) ? 0xffffffff : 0x0 r2 := (a2 ord? b2) ? 0xffffffff : 0x0 r0 := (a3 ord? b3) ? 0xffffffff : 0x0 |
|
_mm_cmpunord_ss | float | Unordered | r0 := (a0 unord? b0) ? 0xffffffff : 0x0 r1 := a1 r2 := a2 r3 := a3 |
|
_mm_cmpunord_ss | float | Unordered | r0 := (a0 ununord? b0) ? 0xffffffff : 0x0 r1 := (a1 unord? b1) ? 0xffffffff : 0x0 r2 := (a2 unord? b2) ? 0xffffffff : 0x0 r0 := (a3 unord? b3) ? 0xffffffff : 0x0 |
|
_mm_comieq_ss | float | 等しい | r := (a0 == b0) ? 0x1 : 0x0 | |
_mm_comilt_ss | float | 小さい | r := (a0 < b0) ? 0x1 : 0x0 | |
_mm_comile_ss | float | 以下 | r := (a0 <= b0) ? 0x1 : 0x0 | |
_mm_comigt_ss | float | 大きい | r := (a0 > b0) ? 0x1 : 0x0 | |
_mm_comige_ss | float | 以上 | r := (a0 >= b0) ? 0x1 : 0x0 | |
_mm_comineq_ss | float | 等しくない | r := (a0 != b0) ? 0x1 : 0x0 | |
_mm_ucomieq_ss | float | 等しい | r := (a0 == b0) ? 0x1 : 0x0 | |
_mm_ucomilt_ss | float | 小さい | r := (a0 < b0) ? 0x1 : 0x0 | |
_mm_ucomile_ss | float | 以下 | r := (a0 <= b0) ? 0x1 : 0x0 | |
_mm_ucomigt_ss | float | 大きい | r := (a0 > b0) ? 0x1 : 0x0 | |
_mm_ucomige_ss | float | 以上 | r := (a0 >= b0) ? 0x1 : 0x0 | |
_mm_ucomineq_ss | float | 等しくない | r := (a0 != b0) ? 0x1 : 0x0 | |
_mm_cvtss_si32 | float | float -> long | r := (int)a0 | 現在設定されている小数丸めモードが使われる |
_mm_cvtps_pi32 | float | float -> long | r0 := (int)a0 r1 := (int)a1 |
〃 |
_mm_cvttss_si32 | float | float -> long | r := (int)a0 | 切り捨て |
_mm_cvttps_pi32 | float | float -> long | r0 := (int)a0 r1 := (int)a1 |
〃 |
_mm_cvtsi32_ss | float | long -> float | r0 := (float)b r1 := a1 r2 := a2 r3 := a3 |
|
_mm_cvtpi32_ps | float | long -> float | r0 := (float)b0 r1 := (float)b1 r2 := a2 r3 := a3 |
|
_mm_cvtpi16_ps | float | long -> float | r0 := (float)a0 r1 := (float)a1 r2 := (float)a2 r3 := (float)a3 |
|
_mm_cvtpu16_ps | float | unsigned short -> float | r0 := (float)a0 r1 := (float)a1 r2 := (float)a2 r3 := (float)a3 |
|
_mm_cvtpi8_ps | float | char -> float | r0 := (float)a0 r1 := (float)a1 r2 := (float)a2 r3 := (float)a3 |
|
_mm_cvtpu8_ps | float | unsigned char -> float | r0 := (float)a0 r1 := (float)a1 r2 := (float)a2 r3 := (float)a3 |
|
_mm_cvtpi32x2_ps | float | unsigned long -> float | r0 := (float)a0 r1 := (float)a1 r2 := (float)b0 r3 := (float)b1 |
|
_mm_cvtps_pi16 | float | float ->signed short | r0 := (short)a0 r1 := (short)a1 r2 := (short)b0 r3 := (short)b1 |
|
_mm_cvtps_pi8 | float | float ->signed char | r0 := (char)a0 r1 := (char)a1 r2 := (char)b0 r3 := (char)b1 |
と思ったものの・・・、量が半端じゃなく多いのでここらで終了。
SSE以降の命令の名前の付け方は、