|
63 | 63 | # ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
64 | 64 | my ($key2, $key1, $tweak, $length, $input, $output); |
65 | 65 |
|
66 | | - if ($win64) { |
67 | | - $input = "%rcx"; |
68 | | - $output = "%rdx"; |
69 | | - $length = "%r8"; |
70 | | - $key1 = "%r9"; |
71 | | - $key2 = "%r10"; |
72 | | - $tweak = "%r11"; |
73 | | - } else { |
74 | 66 | $input = "%rdi"; |
75 | 67 | $output = "%rsi"; |
76 | 68 | $length = "%rdx"; |
77 | 69 | $key1 = "%rcx"; |
78 | 70 | $key2 = "%r8"; |
79 | 71 | $tweak = "%r9"; |
80 | | - } |
81 | 72 |
|
82 | 73 | # arguments for temp parameters |
83 | 74 | my ($tmp1, $gf_poly_8b, $gf_poly_8b_temp); |
84 | | - if ($win64) { |
85 | | - $tmp1 = "%r10"; |
86 | | - $gf_poly_8b = "%rdi"; |
87 | | - $gf_poly_8b_temp = "%rsi"; |
88 | | - } else { |
89 | 75 | $tmp1 = "%r8"; |
90 | 76 | $gf_poly_8b = "%r10"; |
91 | 77 | $gf_poly_8b_temp = "%r11"; |
92 | | - } |
93 | 78 |
|
94 | 79 | # ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
95 | 80 | # ;;; Helper functions |
|
1469 | 1454 | $code.=<<___; |
1470 | 1455 | .globl aes_hw_xts_encrypt_avx512 |
1471 | 1456 | .hidden aes_hw_xts_encrypt_avx512 |
1472 | | - .type aes_hw_xts_encrypt_avx512,\@abi-omnipotent |
| 1457 | + .type aes_hw_xts_encrypt_avx512,\@function,6 |
1473 | 1458 | .align 32 |
1474 | 1459 | aes_hw_xts_encrypt_avx512: |
1475 | 1460 | .cfi_startproc |
|
1860 | 1845 | vmovdqa $XMM_STORAGE + 16 * 3($TW), %xmm9 |
1861 | 1846 |
|
1862 | 1847 | # Zero the 64 bytes we just restored to the xmm registers. |
1863 | | - vmovdqa64 %zmm0,$XMM_STORAGE($TW) |
| 1848 | + vmovdqu64 %zmm0,$XMM_STORAGE($TW) |
1864 | 1849 |
|
1865 | 1850 | vmovdqa $XMM_STORAGE + 16 * 4($TW), %xmm10 |
1866 | 1851 | vmovdqa $XMM_STORAGE + 16 * 5($TW), %xmm11 |
1867 | 1852 | vmovdqa $XMM_STORAGE + 16 * 6($TW), %xmm12 |
1868 | 1853 | vmovdqa $XMM_STORAGE + 16 * 7($TW), %xmm13 |
1869 | 1854 |
|
1870 | 1855 | # And again. |
1871 | | - vmovdqa64 %zmm0,$XMM_STORAGE + 16 * 4($TW) |
| 1856 | + vmovdqu64 %zmm0,$XMM_STORAGE + 16 * 4($TW) |
1872 | 1857 |
|
1873 | 1858 | vmovdqa $XMM_STORAGE + 16 * 8($TW), %xmm14 |
1874 | 1859 | vmovdqa $XMM_STORAGE + 16 * 9($TW), %xmm15 |
1875 | 1860 |
|
1876 | 1861 | # Last round is only 32 bytes (256-bits), so we use `%ymm` as the |
1877 | 1862 | # source operand. |
1878 | | - vmovdqa %ymm0,$XMM_STORAGE + 16 * 8($TW) |
| 1863 | + vmovdqu %ymm0,$XMM_STORAGE + 16 * 8($TW) |
1879 | 1864 | ___ |
1880 | 1865 | } |
1881 | 1866 |
|
|
2113 | 2098 | $code.=<<___; |
2114 | 2099 | .globl aes_hw_xts_decrypt_avx512 |
2115 | 2100 | .hidden aes_hw_xts_decrypt_avx512 |
2116 | | - .type aes_hw_xts_decrypt_avx512,\@abi-omnipotent |
| 2101 | + .type aes_hw_xts_decrypt_avx512,\@function,6 |
2117 | 2102 | .align 32 |
2118 | 2103 | aes_hw_xts_decrypt_avx512: |
2119 | 2104 | .cfi_startproc |
|
2635 | 2620 | vmovdqa $XMM_STORAGE + 16 * 3($TW), %xmm9 |
2636 | 2621 |
|
2637 | 2622 | # Zero the 64 bytes we just restored to the xmm registers. |
2638 | | - vmovdqa64 %zmm0,$XMM_STORAGE($TW) |
| 2623 | + vmovdqu64 %zmm0,$XMM_STORAGE($TW) |
2639 | 2624 |
|
2640 | 2625 | vmovdqa $XMM_STORAGE + 16 * 4($TW), %xmm10 |
2641 | 2626 | vmovdqa $XMM_STORAGE + 16 * 5($TW), %xmm11 |
2642 | 2627 | vmovdqa $XMM_STORAGE + 16 * 6($TW), %xmm12 |
2643 | 2628 | vmovdqa $XMM_STORAGE + 16 * 7($TW), %xmm13 |
2644 | 2629 |
|
2645 | 2630 | # And again. |
2646 | | - vmovdqa64 %zmm0,$XMM_STORAGE + 16 * 4($TW) |
| 2631 | + vmovdqu64 %zmm0,$XMM_STORAGE + 16 * 4($TW) |
2647 | 2632 |
|
2648 | 2633 | vmovdqa $XMM_STORAGE + 16 * 8($TW), %xmm14 |
2649 | 2634 | vmovdqa $XMM_STORAGE + 16 * 9($TW), %xmm15 |
2650 | 2635 |
|
2651 | 2636 | # Last round is only 32 bytes (256-bits), so we use `%ymm` as the |
2652 | 2637 | # source operand. |
2653 | | - vmovdqa %ymm0,$XMM_STORAGE + 16 * 8($TW) |
| 2638 | + vmovdqu %ymm0,$XMM_STORAGE + 16 * 8($TW) |
2654 | 2639 | ___ |
2655 | 2640 | } |
2656 | 2641 |
|
|
0 commit comments