mirror of
https://github.com/RPCS3/rpcs3.git
synced 2026-04-29 23:41:12 -06:00
ffmpeg
Update rpcs3.yml
Update rpcs3.yml
Update rpcs3.yml
folded scalar with neg newline
x64-windows-release
x64-windows-rel
if: "!cancelled()"
vcpkg_build_type
Update rpcs3.yml
no ccache
${{github.run_id}}
zlib
vcpkg qt
vulkan
ffmpeg
llvm
build with clang-cl
Create build-windows-clang-cl.ps1
llvm --keep-going
llvm[clang,core,tools,lld,target-x86]:x64-windows-release
llvm:x64-windows-release@17.0.2
llvm with debug on
Create vcpkg.json
Update rpcs3.yml
ffmpeg features
vcpkg nuget
minimal vcpkg.json
Update rpcs3.yml
fetch nuget
more packages vcpkg.json
libpng vcpkg classic
cmake 3.29.0
Rename vcpkg.json to x_vcpkg.json
Update rpcs3.yml
Update rpcs3.yml
llvm
Update rpcs3.yml
git llvm
LLVM with cache
Update rpcs3.yml
Update rpcs3.yml
build llvm
LLVM_TARGETS_TO_BUILD="X86"
llvm binary
set path
build rpcs3
DIA SDK
Update rpcs3.yml
Update rpcs3.yml
Update rpcs3.yml
fix conditionals
fix conditionals
set shell
VCPKG env vars
DIA SDK
Update asm.hpp
Update types.hpp
Update aesni.cpp
Update CMakeLists.txt
Update ConfigureCompiler.cmake
Update StrFmt.cpp
Update CMakeLists.txt
Update CMakeLists.txt
Build with changes
Update CMakeLists.txt
D:\a\rpcs3\rpcs3\llvm
D:\a\rpcs3\rpcs3\llvm
llvm-*
llvm-${{ matrix.llvmver }}
clangpath llvm-*
$llvmver
$clangPath
$clangPath include bin
rm duplicate "add_compile_options"
USE_SYSTEM_ZSTD
USE_SYSTEM_ZSTD
USE_SYSTEM_ZSTD
USE_SYSTEM_ZSTD
zstd
Update CMakeLists.txt
PkgConfig zstd
zstd::zstd ALIAS PkgConfig::libzstd
clang-cl only
cache hit
Update CMakeLists.txt
cache-hit
cache vcpkg/vcpkg.exe
NOT USE_SYSTEM_ZSTD
vcpkg_root
revert zstd
Update CMakeLists.txt
Update CMakeLists.txt
Update CMakeLists.txt
/defaultlib:zstd_static.lib
Remove else
/defaultlib:zstd.lib
Zstd ahared
Nodefaultlib
Create Findzstd.cmake
zstd CMakeLists.txt
not use zstd system CMakeLists.txt
dont add 3rdparty::libzstd
add_library(PkgConfig::libzstd ALIAS 3rdparty::zstd)
add_library(PkgConfig::libzstd ALIAS 3rdparty::zstd)
add_library(3rdparty::libzstd ALIAS PkgConfig::zstd)
add_library(3rdparty::zstd ALIAS PkgConfig::zstd)
Update Findzstd.cmake
zstd::zstd
Update CMakeLists.txt
zstd::zstd
zstd::zstd CMakeLists.txt
PkgConfig::libzstd CMakeLists.txt
zstd::libzstd
Update CMakeLists.txt
Update CMakeLists.txt
vcpkg zstd CMakeLists.txt
MODULES CMakeLists.txt
zstd::libzstd
add_library(3rdparty::7zip ALIAS 3rdparty_7zip)
LLVM Static-link on
set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Release>:Release>")
set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded")
Update CMakeLists.txt
message(STATUS "MSVC Runtime Library: ${CMAKE_MSVC_RUNTIME_LIBRARY}")
revert CMakeLists.txt
DCMAKE_MSVC_RUNTIME_LIBRARY="MultiThreaded"
rpcs3_emu SHARED
STATIC CMakeLists.txt
cmake_policy(SET CMP0091 NEW)
LLVM_AVAILABLE=ON
add_compile_definitions(LLVM_AVAILABLE=true)
add_compile_options(/MT)
LLVM_AVAILABLE=1
add_compile_definitions(_DISABLE_STRING_ANNOTATION=1 _DISABLE_VECTOR_ANNOTATION=1)
Update build-windows-clang-cl.ps1
clang msvc17-msvcrt
rm compressed archve
cachee name
cache name again
builtin clang-rt
build all
set $llvmPath
extract into llvm
copy with -verbose
mv destination path
build llvm cache
$clangPath
full build
LLVM static
LLVM -> LLVMCore
STATIC_LINK_LLVM=OFF
no lookup llvm bin dir
revert
revert
revert
LLVMCore -> LLVM
LLVM -> LLVM-C
llvm_map_components_to_libnames
Update CMakeLists.txt
LLVM CMakeLists.txt
LLVM_DIR=$llvmPath
MultiThreadedDLL
CMAKE_MSVC_RUNTIME_LIBRARY=MultiThreaded
-DCMAKE_VERBOSE_MAKEFILE:BOOL=ON
clang-cl version
cmake -v
--log-level=VERBOSE
built-in LLVM
llvm lib folder
PF short name
built-in LLVM/Clang
20.1.8
mt.exe path
Use llvm-mt
"$clangPath/llvm-mt.exe"
fix terminator
Add winqtdeploy to PATH
Test windeployqt6.exe with version
Missing )
No winqtdeploy
Build no quotes
prep artifacts
no winqtdeploy
$VcpkgWindeployqt --version
$($VcpkgWindeployqt) --version
--version
Invoke-Expression
Update build-windows-clang-cl.ps1
rpcs3_win_clang.7z
rpcs3 artifacts dir
cp artifacts
build/bin dir
clone recent asmjit
Update build-windows-clang-cl.ps1
CMAKE_CXX_SCAN_FOR_MODULES=ON
default-openal
USE_NATIVE_INSTRUCTIONS=ON
Update ConfigureCompiler.cmake
USE_NATIVE_INSTRUCTIONS=OFF / rm 512 instuctions
revert
set(LLVM_LIBS LLVM)
COMPILER_X86 only
add_compile_options(-msse -msse2 -mcx16 -mavx512f -mavx -mavx2 -maes -mrtm -mpclmul -mmwaitx -mwaitpkg)
avx512 flags
add_compile_options(-march=native)
check_cxx_compiler_flag("-march=native"
add_compile_options(-maes -mrtm -mpclmul -mmwaitx -mwaitpkg)
COMMAND Qt6::windeployqt --version
COMMAND Qt6::windeployqt $<TARGET_FILE:rpcs3>
add vcpkg bin to PATH
check vcpkg is added to PATH
update PATH cache
[System.EnvironmentVariableTarget]::Machine
display all paths
cmd.exe /c "echo %PATH%"
make windeployqt verbose
verbose 2
Invoke-Expression "cmd.exe /c "set PATH=%PATH%;$VcpkgBin""
Update build-windows-clang-cl.ps1
no invoke
cmd.exe /c "set PATH=$VcpkgBin;%PATH%"
--ignore-library-errors
-DQTPATH_EXE="$VcpkgQtpath"
no --ignore-library-errors
Update CMakeLists.txt
Update CMakeLists.txt
Update CMakeLists.txt
Update CMakeLists.txt
Update CMakeLists.txt
Update CMakeLists.txt
Update CMakeLists.txt
Update CMakeLists.txt
change \ to /
${WINDEPLOYQT_EXECUTABLE}
Qt6::windeployqt
gci vcpkg tools bin
--ignore-library-errors
x64-windows/tools
VCPKG_TRIPLET: x64-windows
Save vcpkg cache
revert
revert
remove MSVC runtime message
revert
rm dupes
revert
Delete buildfiles/cmake/Findzstd.cmake
WINDEPLOYQT_EXECUTABLE
Delete x_vcpkg.json
add AVX512 compile options
Wno-deprecated-anon-enum-enum-conversion
clean-up
silence warnings
terminate properly
set PRIVATE CFLAGS
--no-vulkan
rm vulkan lib at package step
override cflags
remove OpenGL_GL_PREFERENCE=LEGACY
rm --no-vulkan switch
Update CMakeLists.txt
restore LLVM dir
ASMJIT_CMAKE_FLAGS
revert
order
check_cxx_compiler_flag
Wno-unused-value
revert
revert
Update CMakeLists.txt
Update llvm_build_clang_cl.vcxproj
Update llvm_build_clang_cl.vcxproj
Update rpcs3.yml
go to deploy if successful
Create deploy-windows-clang-cl.sh
build then deploy
Update build-windows-clang-cl.ps1
deploy step
test
Update ProgramStateCache.cpp
remove AVX512
Update JITLLVM.cpp
FFMPEG 8.0
FFMPEG 8.0
LLVM 21
LLVM 21
set CMAKE_MSVC_RUNTIME_LIBRARY
update OpenAL
msys2 clang
git openal-soft reset
update yaml
reset to master
reset to master
"Build succeeded"
ALSOFT_ENABLE_MODULES OFF
Build All Jobs
Run Builds when not on Main branch
Win Qt 6.9.3
Update build-mac-arm64.sh
Update build-mac.sh
Create TCDarwinX86_64.cmake
703 lines
27 KiB
C++
703 lines
27 KiB
C++
#if defined(__SSE2__) || defined(_M_X64)
|
|
|
|
/*
|
|
* AES-NI support functions
|
|
*
|
|
* Copyright (C) 2013, Brainspark B.V.
|
|
*
|
|
* This file is part of PolarSSL (http://www.polarssl.org)
|
|
* Lead Maintainer: Paul Bakker <polarssl_maintainer at polarssl.org>
|
|
*
|
|
* All rights reserved.
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License along
|
|
* with this program; if not, write to the Free Software Foundation, Inc.,
|
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
*/
|
|
|
|
/*
|
|
* [AES-WP] http://software.intel.com/en-us/articles/intel-advanced-encryption-standard-aes-instructions-set
|
|
* [CLMUL-WP] http://software.intel.com/en-us/articles/intel-carry-less-multiplication-instruction-and-its-usage-for-computing-the-gcm-mode/
|
|
*/
|
|
|
|
#include "aesni.h"
|
|
|
|
#if defined(_MSC_VER) && defined(_M_X64)
|
|
#define POLARSSL_HAVE_MSVC_X64_INTRINSICS
|
|
#include <intrin.h>
|
|
#endif
|
|
|
|
/*
|
|
* AES-NI support detection routine
|
|
*/
|
|
int aesni_supports( unsigned int what )
|
|
{
|
|
static int done = 0;
|
|
static unsigned int c = 0;
|
|
|
|
if( ! done )
|
|
{
|
|
#if defined(POLARSSL_HAVE_MSVC_X64_INTRINSICS)
|
|
int regs[4]; // eax, ebx, ecx, edx
|
|
__cpuid( regs, 1 );
|
|
c = regs[2];
|
|
#else
|
|
asm( "movl $1, %%eax \n"
|
|
"cpuid \n"
|
|
: "=c" (c)
|
|
:
|
|
: "eax", "ebx", "edx" );
|
|
#endif /* POLARSSL_HAVE_MSVC_X64_INTRINSICS */
|
|
done = 1;
|
|
}
|
|
|
|
return( ( c & what ) != 0 );
|
|
}
|
|
|
|
/*
|
|
* AES-NI AES-ECB block en(de)cryption
|
|
*/
|
|
int aesni_crypt_ecb( aes_context *ctx,
|
|
int mode,
|
|
const unsigned char input[16],
|
|
unsigned char output[16] )
|
|
{
|
|
#if defined(POLARSSL_HAVE_MSVC_X64_INTRINSICS)
|
|
__m128i* rk, a;
|
|
int i;
|
|
|
|
rk = (__m128i*)ctx->rk;
|
|
a = _mm_xor_si128( _mm_loadu_si128( (__m128i*)input ), _mm_loadu_si128( rk++ ) );
|
|
|
|
if (mode == AES_ENCRYPT)
|
|
{
|
|
for (i = ctx->nr - 1; i; --i)
|
|
a = _mm_aesenc_si128( a, _mm_loadu_si128( rk++ ) );
|
|
a = _mm_aesenclast_si128( a, _mm_loadu_si128( rk ) );
|
|
}
|
|
else
|
|
{
|
|
for (i = ctx->nr - 1; i; --i)
|
|
a = _mm_aesdec_si128( a, _mm_loadu_si128( rk++ ) );
|
|
a = _mm_aesdeclast_si128( a, _mm_loadu_si128( rk ) );
|
|
}
|
|
|
|
_mm_storeu_si128( (__m128i*)output, a );
|
|
#else
|
|
asm( "movdqu (%3), %%xmm0 \n" // load input
|
|
"movdqu (%1), %%xmm1 \n" // load round key 0
|
|
"pxor %%xmm1, %%xmm0 \n" // round 0
|
|
"addq $16, %1 \n" // point to next round key
|
|
"subl $1, %0 \n" // normal rounds = nr - 1
|
|
"test %2, %2 \n" // mode?
|
|
"jz 2f \n" // 0 = decrypt
|
|
|
|
"1: \n" // encryption loop
|
|
"movdqu (%1), %%xmm1 \n" // load round key
|
|
"aesenc %%xmm1, %%xmm0 \n" // do round
|
|
"addq $16, %1 \n" // point to next round key
|
|
"subl $1, %0 \n" // loop
|
|
"jnz 1b \n"
|
|
"movdqu (%1), %%xmm1 \n" // load round key
|
|
"aesenclast %%xmm1, %%xmm0 \n" // last round
|
|
"jmp 3f \n"
|
|
|
|
"2: \n" // decryption loop
|
|
"movdqu (%1), %%xmm1 \n"
|
|
"aesdec %%xmm1, %%xmm0 \n"
|
|
"addq $16, %1 \n"
|
|
"subl $1, %0 \n"
|
|
"jnz 2b \n"
|
|
"movdqu (%1), %%xmm1 \n" // load round key
|
|
"aesdeclast %%xmm1, %%xmm0 \n" // last round
|
|
|
|
"3: \n"
|
|
"movdqu %%xmm0, (%4) \n" // export output
|
|
:
|
|
: "r" (ctx->nr), "r" (ctx->rk), "r" (mode), "r" (input), "r" (output)
|
|
: "memory", "cc", "xmm0", "xmm1" );
|
|
#endif /* POLARSSL_HAVE_MSVC_X64_INTRINSICS */
|
|
|
|
return( 0 );
|
|
}
|
|
|
|
#if defined(POLARSSL_HAVE_MSVC_X64_INTRINSICS)
|
|
static inline void clmul256( __m128i a, __m128i b, __m128i* r0, __m128i* r1 )
|
|
{
|
|
__m128i c, d, e, f, ef;
|
|
c = _mm_clmulepi64_si128( a, b, 0x00 );
|
|
d = _mm_clmulepi64_si128( a, b, 0x11 );
|
|
e = _mm_clmulepi64_si128( a, b, 0x10 );
|
|
f = _mm_clmulepi64_si128( a, b, 0x01 );
|
|
|
|
// r0 = f0^e0^c1:c0 = c1:c0 ^ f0^e0:0
|
|
// r1 = d1:f1^e1^d0 = d1:d0 ^ 0:f1^e1
|
|
|
|
ef = _mm_xor_si128( e, f );
|
|
*r0 = _mm_xor_si128( c, _mm_slli_si128( ef, 8 ) );
|
|
*r1 = _mm_xor_si128( d, _mm_srli_si128( ef, 8 ) );
|
|
}
|
|
|
|
static inline void sll256( __m128i a0, __m128i a1, __m128i* s0, __m128i* s1 )
|
|
{
|
|
__m128i l0, l1, r0, r1;
|
|
|
|
l0 = _mm_slli_epi64( a0, 1 );
|
|
l1 = _mm_slli_epi64( a1, 1 );
|
|
|
|
r0 = _mm_srli_epi64( a0, 63 );
|
|
r1 = _mm_srli_epi64( a1, 63 );
|
|
|
|
*s0 = _mm_or_si128( l0, _mm_slli_si128( r0, 8 ) );
|
|
*s1 = _mm_or_si128( _mm_or_si128( l1, _mm_srli_si128( r0, 8 ) ), _mm_slli_si128( r1, 8 ) );
|
|
}
|
|
|
|
static inline __m128i reducemod128( __m128i x10, __m128i x32 )
|
|
{
|
|
__m128i a, b, c, dx0, e, f, g, h;
|
|
|
|
// (1) left shift x0 by 63, 62 and 57
|
|
a = _mm_slli_epi64( x10, 63 );
|
|
b = _mm_slli_epi64( x10, 62 );
|
|
c = _mm_slli_epi64( x10, 57 );
|
|
|
|
// (2) compute D xor'ing a, b, c and x1
|
|
// d:x0 = x1:x0 ^ [a^b^c:0]
|
|
dx0 = _mm_xor_si128( x10, _mm_slli_si128( _mm_xor_si128( _mm_xor_si128( a, b ), c ), 8 ) );
|
|
|
|
// (3) right shift [d:x0] by 1, 2, 7
|
|
e = _mm_or_si128( _mm_srli_epi64( dx0, 1 ), _mm_srli_si128( _mm_slli_epi64( dx0, 63 ), 8 ) );
|
|
f = _mm_or_si128( _mm_srli_epi64( dx0, 2 ), _mm_srli_si128( _mm_slli_epi64( dx0, 62 ), 8 ) );
|
|
g = _mm_or_si128( _mm_srli_epi64( dx0, 7 ), _mm_srli_si128( _mm_slli_epi64( dx0, 57 ), 8 ) );
|
|
|
|
// (4) compute h = d^e1^f1^g1 : x0^e0^f0^g0
|
|
h = _mm_xor_si128( dx0, _mm_xor_si128( e, _mm_xor_si128( f, g ) ) );
|
|
|
|
// result is x3^h1:x2^h0
|
|
return _mm_xor_si128( x32, h );
|
|
}
|
|
#endif /* POLARSSL_HAVE_MSVC_X64_INTRINSICS */
|
|
|
|
/*
|
|
* GCM multiplication: c = a times b in GF(2^128)
|
|
* Based on [CLMUL-WP] algorithms 1 (with equation 27) and 5.
|
|
*/
|
|
void aesni_gcm_mult( unsigned char c[16],
|
|
const unsigned char a[16],
|
|
const unsigned char b[16] )
|
|
{
|
|
#if defined(POLARSSL_HAVE_MSVC_X64_INTRINSICS)
|
|
#ifdef __clang__
|
|
__m128i xa, xb, m0, m1, x10, x32, r;
|
|
|
|
xa[1] = _byteswap_uint64( *((unsigned __int64*)a + 0) );
|
|
xa[0] = _byteswap_uint64( *((unsigned __int64*)a + 1) );
|
|
xb[1] = _byteswap_uint64( *((unsigned __int64*)b + 0) );
|
|
xb[0] = _byteswap_uint64( *((unsigned __int64*)b + 1) );
|
|
|
|
clmul256( xa, xb, &m0, &m1 );
|
|
sll256( m0, m1, &x10, &x32 );
|
|
r = reducemod128( x10, x32 );
|
|
|
|
*((unsigned __int64*)c + 0) = _byteswap_uint64( r[1] );
|
|
*((unsigned __int64*)c + 1) = _byteswap_uint64( r[0] );
|
|
#else
|
|
__m128i xa, xb, m0, m1, x10, x32, r;
|
|
|
|
xa.m128i_u64[1] = _byteswap_uint64( *((unsigned __int64*)a + 0) );
|
|
xa.m128i_u64[0] = _byteswap_uint64( *((unsigned __int64*)a + 1) );
|
|
xb.m128i_u64[1] = _byteswap_uint64( *((unsigned __int64*)b + 0) );
|
|
xb.m128i_u64[0] = _byteswap_uint64( *((unsigned __int64*)b + 1) );
|
|
|
|
clmul256( xa, xb, &m0, &m1 );
|
|
sll256( m0, m1, &x10, &x32 );
|
|
r = reducemod128( x10, x32 );
|
|
|
|
*((unsigned __int64*)c + 0) = _byteswap_uint64( r.m128i_u64[1] );
|
|
*((unsigned __int64*)c + 1) = _byteswap_uint64( r.m128i_u64[0] );
|
|
#endif
|
|
#else
|
|
unsigned char aa[16], bb[16], cc[16];
|
|
size_t i;
|
|
|
|
/* The inputs are in big-endian order, so byte-reverse them */
|
|
for( i = 0; i < 16; i++ )
|
|
{
|
|
aa[i] = a[15 - i];
|
|
bb[i] = b[15 - i];
|
|
}
|
|
|
|
asm( "movdqu (%0), %%xmm0 \n" // a1:a0
|
|
"movdqu (%1), %%xmm1 \n" // b1:b0
|
|
|
|
/*
|
|
* Caryless multiplication xmm2:xmm1 = xmm0 * xmm1
|
|
* using [CLMUL-WP] algorithm 1 (p. 13).
|
|
*/
|
|
"movdqa %%xmm1, %%xmm2 \n" // copy of b1:b0
|
|
"movdqa %%xmm1, %%xmm3 \n" // same
|
|
"movdqa %%xmm1, %%xmm4 \n" // same
|
|
"pclmulqdq $0x00, %%xmm0, %%xmm1 \n" // a0*b0 = c1:c0
|
|
"pclmulqdq $0x11, %%xmm0, %%xmm2 \n" // a1*b1 = d1:d0
|
|
"pclmulqdq $0x10, %%xmm0, %%xmm3 \n" // a0*b1 = e1:e0
|
|
"pclmulqdq $0x01, %%xmm0, %%xmm4 \n" // a1*b0 = f1:f0
|
|
"pxor %%xmm3, %%xmm4 \n" // e1+f1:e0+f0
|
|
"movdqa %%xmm4, %%xmm3 \n" // same
|
|
"psrldq $8, %%xmm4 \n" // 0:e1+f1
|
|
"pslldq $8, %%xmm3 \n" // e0+f0:0
|
|
"pxor %%xmm4, %%xmm2 \n" // d1:d0+e1+f1
|
|
"pxor %%xmm3, %%xmm1 \n" // c1+e0+f1:c0
|
|
|
|
/*
|
|
* Now shift the result one bit to the left,
|
|
* taking advantage of [CLMUL-WP] eq 27 (p. 20)
|
|
*/
|
|
"movdqa %%xmm1, %%xmm3 \n" // r1:r0
|
|
"movdqa %%xmm2, %%xmm4 \n" // r3:r2
|
|
"psllq $1, %%xmm1 \n" // r1<<1:r0<<1
|
|
"psllq $1, %%xmm2 \n" // r3<<1:r2<<1
|
|
"psrlq $63, %%xmm3 \n" // r1>>63:r0>>63
|
|
"psrlq $63, %%xmm4 \n" // r3>>63:r2>>63
|
|
"movdqa %%xmm3, %%xmm5 \n" // r1>>63:r0>>63
|
|
"pslldq $8, %%xmm3 \n" // r0>>63:0
|
|
"pslldq $8, %%xmm4 \n" // r2>>63:0
|
|
"psrldq $8, %%xmm5 \n" // 0:r1>>63
|
|
"por %%xmm3, %%xmm1 \n" // r1<<1|r0>>63:r0<<1
|
|
"por %%xmm4, %%xmm2 \n" // r3<<1|r2>>62:r2<<1
|
|
"por %%xmm5, %%xmm2 \n" // r3<<1|r2>>62:r2<<1|r1>>63
|
|
|
|
/*
|
|
* Now reduce modulo the GCM polynomial x^128 + x^7 + x^2 + x + 1
|
|
* using [CLMUL-WP] algorithm 5 (p. 20).
|
|
* Currently xmm2:xmm1 holds x3:x2:x1:x0 (already shifted).
|
|
*/
|
|
/* Step 2 (1) */
|
|
"movdqa %%xmm1, %%xmm3 \n" // x1:x0
|
|
"movdqa %%xmm1, %%xmm4 \n" // same
|
|
"movdqa %%xmm1, %%xmm5 \n" // same
|
|
"psllq $63, %%xmm3 \n" // x1<<63:x0<<63 = stuff:a
|
|
"psllq $62, %%xmm4 \n" // x1<<62:x0<<62 = stuff:b
|
|
"psllq $57, %%xmm5 \n" // x1<<57:x0<<57 = stuff:c
|
|
|
|
/* Step 2 (2) */
|
|
"pxor %%xmm4, %%xmm3 \n" // stuff:a+b
|
|
"pxor %%xmm5, %%xmm3 \n" // stuff:a+b+c
|
|
"pslldq $8, %%xmm3 \n" // a+b+c:0
|
|
"pxor %%xmm3, %%xmm1 \n" // x1+a+b+c:x0 = d:x0
|
|
|
|
/* Steps 3 and 4 */
|
|
"movdqa %%xmm1,%%xmm0 \n" // d:x0
|
|
"movdqa %%xmm1,%%xmm4 \n" // same
|
|
"movdqa %%xmm1,%%xmm5 \n" // same
|
|
"psrlq $1, %%xmm0 \n" // e1:x0>>1 = e1:e0'
|
|
"psrlq $2, %%xmm4 \n" // f1:x0>>2 = f1:f0'
|
|
"psrlq $7, %%xmm5 \n" // g1:x0>>7 = g1:g0'
|
|
"pxor %%xmm4, %%xmm0 \n" // e1+f1:e0'+f0'
|
|
"pxor %%xmm5, %%xmm0 \n" // e1+f1+g1:e0'+f0'+g0'
|
|
// e0'+f0'+g0' is almost e0+f0+g0, except for some missing
|
|
// bits carried from d. Now get those bits back in.
|
|
"movdqa %%xmm1,%%xmm3 \n" // d:x0
|
|
"movdqa %%xmm1,%%xmm4 \n" // same
|
|
"movdqa %%xmm1,%%xmm5 \n" // same
|
|
"psllq $63, %%xmm3 \n" // d<<63:stuff
|
|
"psllq $62, %%xmm4 \n" // d<<62:stuff
|
|
"psllq $57, %%xmm5 \n" // d<<57:stuff
|
|
"pxor %%xmm4, %%xmm3 \n" // d<<63+d<<62:stuff
|
|
"pxor %%xmm5, %%xmm3 \n" // missing bits of d:stuff
|
|
"psrldq $8, %%xmm3 \n" // 0:missing bits of d
|
|
"pxor %%xmm3, %%xmm0 \n" // e1+f1+g1:e0+f0+g0
|
|
"pxor %%xmm1, %%xmm0 \n" // h1:h0
|
|
"pxor %%xmm2, %%xmm0 \n" // x3+h1:x2+h0
|
|
|
|
"movdqu %%xmm0, (%2) \n" // done
|
|
:
|
|
: "r" (aa), "r" (bb), "r" (cc)
|
|
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" );
|
|
|
|
/* Now byte-reverse the outputs */
|
|
for( i = 0; i < 16; i++ )
|
|
c[i] = cc[15 - i];
|
|
#endif /* POLARSSL_HAVE_MSVC_X64_INTRINSICS */
|
|
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Compute decryption round keys from encryption round keys
|
|
*/
|
|
void aesni_inverse_key( unsigned char *invkey,
|
|
const unsigned char *fwdkey, int nr )
|
|
{
|
|
unsigned char *ik = invkey;
|
|
const unsigned char *fk = fwdkey + 16 * nr;
|
|
|
|
memcpy( ik, fk, 16 );
|
|
|
|
for( fk -= 16, ik += 16; fk > fwdkey; fk -= 16, ik += 16 )
|
|
#if defined(POLARSSL_HAVE_MSVC_X64_INTRINSICS)
|
|
_mm_storeu_si128( (__m128i*)ik, _mm_aesimc_si128( _mm_loadu_si128( (__m128i*)fk) ) );
|
|
#else
|
|
asm( "movdqu (%0), %%xmm0 \n"
|
|
"aesimc %%xmm0, %%xmm0 \n"
|
|
"movdqu %%xmm0, (%1) \n"
|
|
:
|
|
: "r" (fk), "r" (ik)
|
|
: "memory", "xmm0" );
|
|
#endif /* POLARSSL_HAVE_MSVC_X64_INTRINSICS */
|
|
|
|
memcpy( ik, fk, 16 );
|
|
}
|
|
|
|
#if defined(POLARSSL_HAVE_MSVC_X64_INTRINSICS)
|
|
inline static __m128i aes_key_128_assist( __m128i key, __m128i kg )
|
|
{
|
|
key = _mm_xor_si128( key, _mm_slli_si128( key, 4 ) );
|
|
key = _mm_xor_si128( key, _mm_slli_si128( key, 4 ) );
|
|
key = _mm_xor_si128( key, _mm_slli_si128( key, 4 ) );
|
|
kg = _mm_shuffle_epi32( kg, _MM_SHUFFLE( 3, 3, 3, 3 ) );
|
|
return _mm_xor_si128( key, kg );
|
|
}
|
|
|
|
// [AES-WP] Part of Fig. 25 page 32
|
|
inline static void aes_key_192_assist( __m128i* temp1, __m128i * temp3, __m128i kg )
|
|
{
|
|
__m128i temp4;
|
|
kg = _mm_shuffle_epi32( kg, 0x55 );
|
|
temp4 = _mm_slli_si128( *temp1, 0x4 );
|
|
*temp1 = _mm_xor_si128( *temp1, temp4 );
|
|
temp4 = _mm_slli_si128( temp4, 0x4 );
|
|
*temp1 = _mm_xor_si128( *temp1, temp4 );
|
|
temp4 = _mm_slli_si128( temp4, 0x4 );
|
|
*temp1 = _mm_xor_si128( *temp1, temp4 );
|
|
*temp1 = _mm_xor_si128( *temp1, kg );
|
|
kg = _mm_shuffle_epi32( *temp1, 0xff );
|
|
temp4 = _mm_slli_si128( *temp3, 0x4 );
|
|
*temp3 = _mm_xor_si128( *temp3, temp4 );
|
|
*temp3 = _mm_xor_si128( *temp3, kg );
|
|
}
|
|
|
|
// [AES-WP] Part of Fig. 26 page 34
|
|
inline static void aes_key_256_assist_1( __m128i* temp1, __m128i kg )
|
|
{
|
|
__m128i temp4;
|
|
kg = _mm_shuffle_epi32( kg, 0xff );
|
|
temp4 = _mm_slli_si128( *temp1, 0x4 );
|
|
*temp1 = _mm_xor_si128( *temp1, temp4 );
|
|
temp4 = _mm_slli_si128( temp4, 0x4 );
|
|
*temp1 = _mm_xor_si128( *temp1, temp4 );
|
|
temp4 = _mm_slli_si128( temp4, 0x4 );
|
|
*temp1 = _mm_xor_si128( *temp1, temp4 );
|
|
*temp1 = _mm_xor_si128( *temp1, kg );
|
|
}
|
|
|
|
inline static void aes_key_256_assist_2( __m128i* temp1, __m128i* temp3 )
|
|
{
|
|
__m128i temp2, temp4;
|
|
temp4 = _mm_aeskeygenassist_si128( *temp1, 0x0 );
|
|
temp2 = _mm_shuffle_epi32( temp4, 0xaa );
|
|
temp4 = _mm_slli_si128( *temp3, 0x4 );
|
|
*temp3 = _mm_xor_si128( *temp3, temp4 );
|
|
temp4 = _mm_slli_si128( temp4, 0x4 );
|
|
*temp3 = _mm_xor_si128( *temp3, temp4 );
|
|
temp4 = _mm_slli_si128( temp4, 0x4 );
|
|
*temp3 = _mm_xor_si128( *temp3, temp4 );
|
|
*temp3 = _mm_xor_si128( *temp3, temp2 );
|
|
}
|
|
#endif /* POLARSSL_HAVE_MSVC_X64_INTRINSICS */
|
|
|
|
/*
|
|
* Key expansion, 128-bit case
|
|
*/
|
|
static void aesni_setkey_enc_128( unsigned char *rk,
|
|
const unsigned char *key )
|
|
{
|
|
#if defined(POLARSSL_HAVE_MSVC_X64_INTRINSICS)
|
|
__m128i* xrk, k;
|
|
|
|
xrk = (__m128i*)rk;
|
|
|
|
#define EXPAND_ROUND(k, rcon) \
|
|
_mm_storeu_si128( xrk++, k ); \
|
|
k = aes_key_128_assist( k, _mm_aeskeygenassist_si128( k, rcon ) )
|
|
|
|
k = _mm_loadu_si128( (__m128i*)key );
|
|
EXPAND_ROUND( k, 0x01 );
|
|
EXPAND_ROUND( k, 0x02 );
|
|
EXPAND_ROUND( k, 0x04 );
|
|
EXPAND_ROUND( k, 0x08 );
|
|
EXPAND_ROUND( k, 0x10 );
|
|
EXPAND_ROUND( k, 0x20 );
|
|
EXPAND_ROUND( k, 0x40 );
|
|
EXPAND_ROUND( k, 0x80 );
|
|
EXPAND_ROUND( k, 0x1b );
|
|
EXPAND_ROUND( k, 0x36 );
|
|
_mm_storeu_si128( xrk, k );
|
|
|
|
#undef EXPAND_ROUND
|
|
|
|
#else
|
|
asm( "movdqu (%1), %%xmm0 \n" // copy the original key
|
|
"movdqu %%xmm0, (%0) \n" // as round key 0
|
|
"jmp 2f \n" // skip auxiliary routine
|
|
|
|
/*
|
|
* Finish generating the next round key.
|
|
*
|
|
* On entry xmm0 is r3:r2:r1:r0 and xmm1 is X:stuff:stuff:stuff
|
|
* with X = rot( sub( r3 ) ) ^ RCON.
|
|
*
|
|
* On exit, xmm0 is r7:r6:r5:r4
|
|
* with r4 = X + r0, r5 = r4 + r1, r6 = r5 + r2, r7 = r6 + r3
|
|
* and those are written to the round key buffer.
|
|
*/
|
|
"1: \n"
|
|
"pshufd $0xff, %%xmm1, %%xmm1 \n" // X:X:X:X
|
|
"pxor %%xmm0, %%xmm1 \n" // X+r3:X+r2:X+r1:r4
|
|
"pslldq $4, %%xmm0 \n" // r2:r1:r0:0
|
|
"pxor %%xmm0, %%xmm1 \n" // X+r3+r2:X+r2+r1:r5:r4
|
|
"pslldq $4, %%xmm0 \n" // etc
|
|
"pxor %%xmm0, %%xmm1 \n"
|
|
"pslldq $4, %%xmm0 \n"
|
|
"pxor %%xmm1, %%xmm0 \n" // update xmm0 for next time!
|
|
"add $16, %0 \n" // point to next round key
|
|
"movdqu %%xmm0, (%0) \n" // write it
|
|
"ret \n"
|
|
|
|
/* Main "loop" */
|
|
"2: \n"
|
|
"aeskeygenassist $0x01, %%xmm0, %%xmm1 \ncall 1b \n"
|
|
"aeskeygenassist $0x02, %%xmm0, %%xmm1 \ncall 1b \n"
|
|
"aeskeygenassist $0x04, %%xmm0, %%xmm1 \ncall 1b \n"
|
|
"aeskeygenassist $0x08, %%xmm0, %%xmm1 \ncall 1b \n"
|
|
"aeskeygenassist $0x10, %%xmm0, %%xmm1 \ncall 1b \n"
|
|
"aeskeygenassist $0x20, %%xmm0, %%xmm1 \ncall 1b \n"
|
|
"aeskeygenassist $0x40, %%xmm0, %%xmm1 \ncall 1b \n"
|
|
"aeskeygenassist $0x80, %%xmm0, %%xmm1 \ncall 1b \n"
|
|
"aeskeygenassist $0x1B, %%xmm0, %%xmm1 \ncall 1b \n"
|
|
"aeskeygenassist $0x36, %%xmm0, %%xmm1 \ncall 1b \n"
|
|
:
|
|
: "r" (rk), "r" (key)
|
|
: "memory", "cc", "0" );
|
|
#endif /* POLARSSL_HAVE_MSVC_X64_INTRINSICS */
|
|
}
|
|
|
|
/*
|
|
* Key expansion, 192-bit case
|
|
*/
|
|
static void aesni_setkey_enc_192( unsigned char *rk,
|
|
const unsigned char *key )
|
|
{
|
|
#if defined(POLARSSL_HAVE_MSVC_X64_INTRINSICS)
|
|
__m128i temp1, temp3;
|
|
__m128i *key_schedule = (__m128i*)rk;
|
|
temp1 = _mm_loadu_si128( (__m128i*)key );
|
|
temp3 = _mm_loadu_si128( (__m128i*)(key + 16) );
|
|
key_schedule[0] = temp1;
|
|
key_schedule[1] = temp3;
|
|
aes_key_192_assist( &temp1, &temp3, _mm_aeskeygenassist_si128(temp3, 0x1) );
|
|
key_schedule[1] = _mm_castpd_si128( _mm_shuffle_pd( _mm_castsi128_pd( key_schedule[1] ), _mm_castsi128_pd( temp1 ), 0 ) );
|
|
key_schedule[2] = _mm_castpd_si128( _mm_shuffle_pd( _mm_castsi128_pd( temp1 ), _mm_castsi128_pd( temp3 ), 1 ) );
|
|
aes_key_192_assist( &temp1, &temp3, _mm_aeskeygenassist_si128( temp3, 0x2 ) );
|
|
key_schedule[3] = temp1;
|
|
key_schedule[4] = temp3;
|
|
aes_key_192_assist( &temp1, &temp3, _mm_aeskeygenassist_si128( temp3, 0x4 ) );
|
|
key_schedule[4] = _mm_castpd_si128( _mm_shuffle_pd( _mm_castsi128_pd( key_schedule[4] ), _mm_castsi128_pd( temp1 ), 0 ) );
|
|
key_schedule[5] = _mm_castpd_si128( _mm_shuffle_pd( _mm_castsi128_pd( temp1 ), _mm_castsi128_pd( temp3 ), 1 ) );
|
|
aes_key_192_assist( &temp1, &temp3, _mm_aeskeygenassist_si128( temp3, 0x8 ) );
|
|
key_schedule[6] = temp1;
|
|
key_schedule[7] = temp3;
|
|
aes_key_192_assist( &temp1, &temp3, _mm_aeskeygenassist_si128( temp3, 0x10 ) );
|
|
key_schedule[7] = _mm_castpd_si128( _mm_shuffle_pd( _mm_castsi128_pd( key_schedule[7] ), _mm_castsi128_pd( temp1 ), 0 ) );
|
|
key_schedule[8] = _mm_castpd_si128( _mm_shuffle_pd( _mm_castsi128_pd( temp1 ), _mm_castsi128_pd( temp3 ), 1 ) );
|
|
aes_key_192_assist( &temp1, &temp3, _mm_aeskeygenassist_si128( temp3, 0x20 ) );
|
|
key_schedule[9] = temp1;
|
|
key_schedule[10] = temp3;
|
|
aes_key_192_assist( &temp1, &temp3, _mm_aeskeygenassist_si128( temp3, 0x40 ) );
|
|
key_schedule[10] = _mm_castpd_si128( _mm_shuffle_pd( _mm_castsi128_pd( key_schedule[10] ), _mm_castsi128_pd( temp1 ), 0 ) );
|
|
key_schedule[11] = _mm_castpd_si128( _mm_shuffle_pd( _mm_castsi128_pd( temp1 ), _mm_castsi128_pd( temp3 ), 1 ) );
|
|
aes_key_192_assist( &temp1, &temp3, _mm_aeskeygenassist_si128( temp3, 0x80 ) );
|
|
key_schedule[12] = temp1;
|
|
#else
|
|
asm( "movdqu (%1), %%xmm0 \n" // copy original round key
|
|
"movdqu %%xmm0, (%0) \n"
|
|
"add $16, %0 \n"
|
|
"movq 16(%1), %%xmm1 \n"
|
|
"movq %%xmm1, (%0) \n"
|
|
"add $8, %0 \n"
|
|
"jmp 2f \n" // skip auxiliary routine
|
|
|
|
/*
|
|
* Finish generating the next 6 quarter-keys.
|
|
*
|
|
* On entry xmm0 is r3:r2:r1:r0, xmm1 is stuff:stuff:r5:r4
|
|
* and xmm2 is stuff:stuff:X:stuff with X = rot( sub( r3 ) ) ^ RCON.
|
|
*
|
|
* On exit, xmm0 is r9:r8:r7:r6 and xmm1 is stuff:stuff:r11:r10
|
|
* and those are written to the round key buffer.
|
|
*/
|
|
"1: \n"
|
|
"pshufd $0x55, %%xmm2, %%xmm2 \n" // X:X:X:X
|
|
"pxor %%xmm0, %%xmm2 \n" // X+r3:X+r2:X+r1:r4
|
|
"pslldq $4, %%xmm0 \n" // etc
|
|
"pxor %%xmm0, %%xmm2 \n"
|
|
"pslldq $4, %%xmm0 \n"
|
|
"pxor %%xmm0, %%xmm2 \n"
|
|
"pslldq $4, %%xmm0 \n"
|
|
"pxor %%xmm2, %%xmm0 \n" // update xmm0 = r9:r8:r7:r6
|
|
"movdqu %%xmm0, (%0) \n"
|
|
"add $16, %0 \n"
|
|
"pshufd $0xff, %%xmm0, %%xmm2 \n" // r9:r9:r9:r9
|
|
"pxor %%xmm1, %%xmm2 \n" // stuff:stuff:r9+r5:r10
|
|
"pslldq $4, %%xmm1 \n" // r2:r1:r0:0
|
|
"pxor %%xmm2, %%xmm1 \n" // update xmm1 = stuff:stuff:r11:r10
|
|
"movq %%xmm1, (%0) \n"
|
|
"add $8, %0 \n"
|
|
"ret \n"
|
|
|
|
"2: \n"
|
|
"aeskeygenassist $0x01, %%xmm1, %%xmm2 \ncall 1b \n"
|
|
"aeskeygenassist $0x02, %%xmm1, %%xmm2 \ncall 1b \n"
|
|
"aeskeygenassist $0x04, %%xmm1, %%xmm2 \ncall 1b \n"
|
|
"aeskeygenassist $0x08, %%xmm1, %%xmm2 \ncall 1b \n"
|
|
"aeskeygenassist $0x10, %%xmm1, %%xmm2 \ncall 1b \n"
|
|
"aeskeygenassist $0x20, %%xmm1, %%xmm2 \ncall 1b \n"
|
|
"aeskeygenassist $0x40, %%xmm1, %%xmm2 \ncall 1b \n"
|
|
"aeskeygenassist $0x80, %%xmm1, %%xmm2 \ncall 1b \n"
|
|
|
|
:
|
|
: "r" (rk), "r" (key)
|
|
: "memory", "cc", "0" );
|
|
#endif /* POLARSSL_HAVE_MSVC_X64_INTRINSICS */
|
|
}
|
|
|
|
/*
|
|
* Key expansion, 256-bit case
|
|
*/
|
|
static void aesni_setkey_enc_256( unsigned char *rk,
|
|
const unsigned char *key )
|
|
{
|
|
#if defined(POLARSSL_HAVE_MSVC_X64_INTRINSICS)
|
|
__m128i temp1, temp3;
|
|
__m128i *key_schedule = (__m128i*)rk;
|
|
temp1 = _mm_loadu_si128( (__m128i*)key );
|
|
temp3 = _mm_loadu_si128( (__m128i*)(key + 16) );
|
|
key_schedule[0] = temp1;
|
|
key_schedule[1] = temp3;
|
|
aes_key_256_assist_1( &temp1, _mm_aeskeygenassist_si128( temp3, 0x01 ) );
|
|
key_schedule[2] = temp1;
|
|
aes_key_256_assist_2( &temp1, &temp3 );
|
|
key_schedule[3] = temp3;
|
|
aes_key_256_assist_1( &temp1, _mm_aeskeygenassist_si128( temp3, 0x02 ) );
|
|
key_schedule[4] = temp1;
|
|
aes_key_256_assist_2( &temp1, &temp3 );
|
|
key_schedule[5] = temp3;
|
|
aes_key_256_assist_1( &temp1, _mm_aeskeygenassist_si128( temp3, 0x04 ) );
|
|
key_schedule[6] = temp1;
|
|
aes_key_256_assist_2( &temp1, &temp3 );
|
|
key_schedule[7] = temp3;
|
|
aes_key_256_assist_1( &temp1, _mm_aeskeygenassist_si128( temp3, 0x08 ) );
|
|
key_schedule[8] = temp1;
|
|
aes_key_256_assist_2( &temp1, &temp3 );
|
|
key_schedule[9] = temp3;
|
|
aes_key_256_assist_1( &temp1, _mm_aeskeygenassist_si128( temp3, 0x10 ) );
|
|
key_schedule[10] = temp1;
|
|
aes_key_256_assist_2( &temp1, &temp3 );
|
|
key_schedule[11] = temp3;
|
|
aes_key_256_assist_1( &temp1, _mm_aeskeygenassist_si128( temp3, 0x20 ) );
|
|
key_schedule[12] = temp1;
|
|
aes_key_256_assist_2( &temp1, &temp3 );
|
|
key_schedule[13] = temp3;
|
|
aes_key_256_assist_1( &temp1, _mm_aeskeygenassist_si128( temp3, 0x40 ) );
|
|
key_schedule[14] = temp1;
|
|
#else
|
|
asm( "movdqu (%1), %%xmm0 \n"
|
|
"movdqu %%xmm0, (%0) \n"
|
|
"add $16, %0 \n"
|
|
"movdqu 16(%1), %%xmm1 \n"
|
|
"movdqu %%xmm1, (%0) \n"
|
|
"jmp 2f \n" // skip auxiliary routine
|
|
|
|
/*
|
|
* Finish generating the next two round keys.
|
|
*
|
|
* On entry xmm0 is r3:r2:r1:r0, xmm1 is r7:r6:r5:r4 and
|
|
* xmm2 is X:stuff:stuff:stuff with X = rot( sub( r7 )) ^ RCON
|
|
*
|
|
* On exit, xmm0 is r11:r10:r9:r8 and xmm1 is r15:r14:r13:r12
|
|
* and those have been written to the output buffer.
|
|
*/
|
|
"1: \n"
|
|
"pshufd $0xff, %%xmm2, %%xmm2 \n"
|
|
"pxor %%xmm0, %%xmm2 \n"
|
|
"pslldq $4, %%xmm0 \n"
|
|
"pxor %%xmm0, %%xmm2 \n"
|
|
"pslldq $4, %%xmm0 \n"
|
|
"pxor %%xmm0, %%xmm2 \n"
|
|
"pslldq $4, %%xmm0 \n"
|
|
"pxor %%xmm2, %%xmm0 \n"
|
|
"add $16, %0 \n"
|
|
"movdqu %%xmm0, (%0) \n"
|
|
|
|
/* Set xmm2 to stuff:Y:stuff:stuff with Y = subword( r11 )
|
|
* and proceed to generate next round key from there */
|
|
"aeskeygenassist $0, %%xmm0, %%xmm2\n"
|
|
"pshufd $0xaa, %%xmm2, %%xmm2 \n"
|
|
"pxor %%xmm1, %%xmm2 \n"
|
|
"pslldq $4, %%xmm1 \n"
|
|
"pxor %%xmm1, %%xmm2 \n"
|
|
"pslldq $4, %%xmm1 \n"
|
|
"pxor %%xmm1, %%xmm2 \n"
|
|
"pslldq $4, %%xmm1 \n"
|
|
"pxor %%xmm2, %%xmm1 \n"
|
|
"add $16, %0 \n"
|
|
"movdqu %%xmm1, (%0) \n"
|
|
"ret \n"
|
|
|
|
/*
|
|
* Main "loop" - Generating one more key than necessary,
|
|
* see definition of aes_context.buf
|
|
*/
|
|
"2: \n"
|
|
"aeskeygenassist $0x01, %%xmm1, %%xmm2 \ncall 1b \n"
|
|
"aeskeygenassist $0x02, %%xmm1, %%xmm2 \ncall 1b \n"
|
|
"aeskeygenassist $0x04, %%xmm1, %%xmm2 \ncall 1b \n"
|
|
"aeskeygenassist $0x08, %%xmm1, %%xmm2 \ncall 1b \n"
|
|
"aeskeygenassist $0x10, %%xmm1, %%xmm2 \ncall 1b \n"
|
|
"aeskeygenassist $0x20, %%xmm1, %%xmm2 \ncall 1b \n"
|
|
"aeskeygenassist $0x40, %%xmm1, %%xmm2 \ncall 1b \n"
|
|
:
|
|
: "r" (rk), "r" (key)
|
|
: "memory", "cc", "0" );
|
|
#endif /* POLARSSL_HAVE_MSVC_X64_INTRINSICS */
|
|
}
|
|
|
|
/*
|
|
* Key expansion, wrapper
|
|
*/
|
|
int aesni_setkey_enc( unsigned char *rk,
|
|
const unsigned char *key,
|
|
size_t bits )
|
|
{
|
|
switch( bits )
|
|
{
|
|
case 128: aesni_setkey_enc_128( rk, key ); break;
|
|
case 192: aesni_setkey_enc_192( rk, key ); break;
|
|
case 256: aesni_setkey_enc_256( rk, key ); break;
|
|
default : return( POLARSSL_ERR_AES_INVALID_KEY_LENGTH );
|
|
}
|
|
|
|
return( 0 );
|
|
}
|
|
|
|
#endif
|