source-engine/external/crypto++-5.6.3/rdrand.S
FluorescentCIAAfricanAmerican 3bf9df6b27 1
2020-04-22 12:56:21 -04:00

597 lines
11 KiB
ArmAsm

;; rdrand.asm - written and placed in public domain by Jeffrey Walton and Uri Blumenthal.
;; Copyright assigned to the Crypto++ project.
;; This ASM file provides RDRAND and RDSEED to downlevel Unix and Linux tool chains.
;; Additionally, the inline assembly code produced by GCC and Clang is not that
;; impressive. However, using this code requires NASM and an edit to the GNUmakefile.
;; nasm -f elf32 rdrand.S -DX86 -g -o rdrand-x86.o
;; nasm -f elfx32 rdrand.S -DX32 -g -o rdrand-x32.o
;; nasm -f elf64 rdrand.S -DX64 -g -o rdrand-x64.o
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Naming convention used in rdrand.{h|cpp|asm|S}
;; MSC = Microsoft Compiler (and compatibles)
;; GCC = GNU Compiler (and compatibles)
;; ALL = MSC and GCC (and compatibles)
;; RRA = RDRAND, Assembly
;; RSA = RDSEED, Assembly
;; RRI = RDRAND, Intrinsic
;; RSA = RDSEED, Intrinsic
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; C/C++ Function prototypes
;; X86, X32 and X64:
;; extern "C" int NASM_RRA_GenerateBlock(byte* ptr, size_t size, unsigned int safety);
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Return values
%define RDRAND_SUCCESS 1
%define RDRAND_FAILURE 0
%define RDSEED_SUCCESS 1
%define RDSEED_FAILURE 0
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
%ifdef X86 or X32 ;; Set via the command line
;; Arg1, byte* buffer
;; Arg2, size_t bsize
;; Arg3, unsigned int safety
;; EAX (out): success (1), failure (0)
global NASM_RRA_GenerateBlock
section .text
%ifdef X86
align 8
cpu 486
%else
align 16
%endif
NASM_RRA_GenerateBlock:
%ifdef X86
%define arg1 [ebp+04h]
%define arg2 [ebp+08h]
%define arg3 [ebp+0ch]
%define MWSIZE 04h ;; machine word size
%else
%define MWSIZE 08h ;; machine word size
%endif
%define buffer edi
%define bsize esi
%define safety edx
%ifdef X86
.Load_Arguments:
mov buffer, arg1
mov bsize, arg2
mov safety, arg3
%endif
.Validate_Pointer:
cmp buffer, 0
je .GenerateBlock_PreRet
;; Top of While loop
.GenerateBlock_Top:
;; Check remaining size
cmp bsize, 0
je .GenerateBlock_Success
%ifdef X86
.Call_RDRAND_EAX:
%else
.Call_RDRAND_RAX:
DB 48h ;; X32 can use the full register, issue the REX.w prefix
%endif
;; RDRAND is not available prior to VS2012. Just emit
;; the byte codes using DB. This is `rdrand eax`.
DB 0Fh, 07h, F0h
;; If CF=1, the number returned by RDRAND is valid.
;; If CF=0, a random number was not available.
jc .RDRAND_succeeded
.RDRAND_failed:
;; Exit if we've reached the limit
cmp safety, 0
je .GenerateBlock_Failure
dec safety
jmp .GenerateBlock_Top
.RDRAND_succeeded:
cmp bsize, MWSIZE
jb .Partial_Machine_Word
.Full_Machine_Word:
%ifdef X32
mov [buffer+4], eax ;; We can only move 4 at a time
DB 048h ;; Combined, these result in
shr eax, 32 ;; `shr rax, 32`
%endif
mov [buffer], eax
add buffer, MWSIZE ;; No need for Intel Core 2 slow word workarounds,
sub bsize, MWSIZE ;; like `lea buffer,[buffer+MWSIZE]` for faster adds
;; Continue
jmp .GenerateBlock_Top
;; 1,2,3 bytes remain for X86
;; 1,2,3,4,5,6,7 remain for X32
.Partial_Machine_Word:
%ifdef X32
;; Test bit 2 to see if size is at least 4
test bsize, 4
jz .Bit_2_Not_Set
mov [buffer], eax
add buffer, 4
DB 048h ;; Combined, these result in
shr eax, 32 ;; `shr rax, 32`
.Bit_2_Not_Set:
%endif
;; Test bit 1 to see if size is at least 2
test bsize, 2
jz .Bit_1_Not_Set
mov [buffer], ax
shr eax, 16
add buffer, 2
.Bit_1_Not_Set:
;; Test bit 0 to see if size is at least 1
test bsize, 1
jz .GenerateBlock_Success
mov [buffer], al
.Bit_0_Not_Set:
;; We've hit all the bits
jmp .GenerateBlock_Success
.GenerateBlock_PreRet:
;; Test for success (was the request completely fulfilled?)
cmp bsize, 0
je .GenerateBlock_Success
.GenerateBlock_Failure:
xor eax, eax
mov al, RDRAND_FAILURE
ret
.GenerateBlock_Success:
xor eax, eax
mov al, RDRAND_SUCCESS
ret
%endif ;; X86 and X32
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
%ifdef X64 ;; Set via the command line
global NASM_RRA_GenerateBlock
section .text
align 16
;; Arg1, byte* buffer
;; Arg2, size_t bsize
;; Arg3, unsigned int safety
;; RAX (out): success (1), failure (0)
NASM_RRA_GenerateBlock:
%define MWSIZE 08h ;; machine word size
%define buffer rdi
%define bsize rsi
%define safety edx
;; No need for Load_Arguments due to fastcall
.Validate_Pointer:
;; Validate pointer
cmp buffer, 0
je .GenerateBlock_PreRet
;; Top of While loop
.GenerateBlock_Top:
;; Check remaining size
cmp bsize, 0
je .GenerateBlock_Success
.Call_RDRAND_RAX:
;; RDRAND is not available prior to VS2012. Just emit
;; the byte codes using DB. This is `rdrand rax`.
DB 048h, 0Fh, 0C7h, 0F0h
;; If CF=1, the number returned by RDRAND is valid.
;; If CF=0, a random number was not available.
jc .RDRAND_succeeded
.RDRAND_failed:
;; Exit if we've reached the limit
cmp safety, 0h
je .GenerateBlock_Failure
dec safety
jmp .GenerateBlock_Top
.RDRAND_succeeded:
cmp bsize, MWSIZE
jb .Partial_Machine_Word
.Full_Machine_Word:
mov [buffer], rax
add buffer, MWSIZE
sub bsize, MWSIZE
;; Continue
jmp .GenerateBlock_Top
;; 1,2,3,4,5,6,7 bytes remain
.Partial_Machine_Word:
;; Test bit 2 to see if size is at least 4
test bsize, 4
jz .Bit_2_Not_Set
mov [buffer], eax
shr rax, 32
add buffer, 4
.Bit_2_Not_Set:
;; Test bit 1 to see if size is at least 2
test bsize, 2
jz .Bit_1_Not_Set
mov [buffer], ax
shr eax, 16
add buffer, 2
.Bit_1_Not_Set:
;; Test bit 0 to see if size is at least 1
test bsize, 1
jz .GenerateBlock_Success
mov [buffer], al
.Bit_0_Not_Set:
;; We've hit all the bits
jmp .GenerateBlock_Success
.GenerateBlock_PreRet:
;; Test for success (was the request completely fulfilled?)
cmp bsize, 0
je .GenerateBlock_Success
.GenerateBlock_Failure:
xor rax, rax
mov al, RDRAND_FAILURE
ret
.GenerateBlock_Success:
xor rax, rax
mov al, RDRAND_SUCCESS
ret
%endif ;; X64
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
%ifdef X86 or X32 ;; Set via the command line
;; Arg1, byte* buffer
;; Arg2, size_t bsize
;; Arg3, unsigned int safety
;; EAX (out): success (1), failure (0)
global NASM_RSA_GenerateBlock
section .text
align 8
%ifdef X86
align 8
cpu 486
%else
align 16
%endif
NASM_RSA_GenerateBlock:
%ifdef X86
%define arg1 [ebp+04h]
%define arg2 [ebp+08h]
%define arg3 [ebp+0ch]
%define MWSIZE 04h ;; machine word size
%else
%define MWSIZE 08h ;; machine word size
%endif
%define buffer edi
%define bsize esi
%define safety edx
%ifdef X86
.Load_Arguments:
mov buffer, arg1
mov bsize, arg2
mov safety, arg3
%endif
.Validate_Pointer:
cmp buffer, 0
je .GenerateBlock_PreRet
;; Top of While loop
.GenerateBlock_Top:
;; Check remaining size
cmp bsize, 0
je .GenerateBlock_Success
%ifdef X86
.Call_RDSEED_EAX:
%else
.Call_RDSEED_RAX:
DB 48h ;; X32 can use the full register, issue the REX.w prefix
%endif
;; RDSEED is not available prior to VS2012. Just emit
;; the byte codes using DB. This is `rdseed eax`.
DB 0Fh, 0C7h, 0F8h
;; If CF=1, the number returned by RDSEED is valid.
;; If CF=0, a random number was not available.
jc .RDSEED_succeeded
.RDSEED_failed:
;; Exit if we've reached the limit
cmp safety, 0
je .GenerateBlock_Failure
dec safety
jmp .GenerateBlock_Top
.RDSEED_succeeded:
cmp bsize, MWSIZE
jb .Partial_Machine_Word
.Full_Machine_Word:
mov [buffer], eax
add buffer, MWSIZE ;; No need for Intel Core 2 slow word workarounds,
sub bsize, MWSIZE ;; like `lea buffer,[buffer+MWSIZE]` for faster adds
;; Continue
jmp .GenerateBlock_Top
;; 1,2,3 bytes remain for X86
;; 1,2,3,4,5,6,7 remain for X32
.Partial_Machine_Word:
%ifdef X32
;; Test bit 2 to see if size is at least 4
test bsize, 4
jz .Bit_2_Not_Set
mov [buffer], eax
add buffer, 4
DB 048h ;; Combined, these result in
shr eax, 32 ;; `shr rax, 32`
.Bit_2_Not_Set:
%endif
;; Test bit 1 to see if size is at least 2
test bsize, 2
jz .Bit_1_Not_Set
mov [buffer], ax
shr eax, 16
add buffer, 2
.Bit_1_Not_Set:
;; Test bit 0 to see if size is at least 1
test bsize, 1
jz .GenerateBlock_Success
mov [buffer], al
.Bit_0_Not_Set:
;; We've hit all the bits
jmp .GenerateBlock_Success
.GenerateBlock_PreRet:
;; Test for success (was the request completely fulfilled?)
cmp bsize, 0
je .GenerateBlock_Success
.GenerateBlock_Failure:
xor eax, eax
mov al, RDSEED_FAILURE
ret
.GenerateBlock_Success:
xor eax, eax
mov al, RDSEED_SUCCESS
ret
%endif ;; X86 and X32
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
%ifdef X64 ;; Set via the command line
global NASM_RSA_GenerateBlock
section .text
align 16
;; Arg1, byte* buffer
;; Arg2, size_t bsize
;; Arg3, unsigned int safety
;; RAX (out): success (1), failure (0)
NASM_RSA_GenerateBlock:
%define MWSIZE 08h ;; machine word size
%define buffer rdi
%define bsize rsi
%define safety edx
;; No need for Load_Arguments due to fastcall
.Validate_Pointer:
;; Validate pointer
cmp buffer, 0
je .GenerateBlock_PreRet
;; Top of While loop
.GenerateBlock_Top:
;; Check remaining size
cmp bsize, 0
je .GenerateBlock_Success
.Call_RDSEED_RAX:
;; RDSEED is not available prior to VS2012. Just emit
;; the byte codes using DB. This is `rdseed rax`.
DB 048h, 0Fh, 0C7h, 0F8h
;; If CF=1, the number returned by RDSEED is valid.
;; If CF=0, a random number was not available.
jc .RDSEED_succeeded
.RDSEED_failed:
;; Exit if we've reached the limit
cmp safety, 0
je .GenerateBlock_Failure
dec safety
jmp .GenerateBlock_Top
.RDSEED_succeeded:
cmp bsize, MWSIZE
jb .Partial_Machine_Word
.Full_Machine_Word:
mov [buffer], rax
add buffer, MWSIZE
sub bsize, MWSIZE
;; Continue
jmp .GenerateBlock_Top
;; 1,2,3,4,5,6,7 bytes remain
.Partial_Machine_Word:
;; Test bit 2 to see if size is at least 4
test bsize, 4
jz .Bit_2_Not_Set
mov [buffer], eax
shr rax, 32
add buffer, 4
.Bit_2_Not_Set:
;; Test bit 1 to see if size is at least 2
test bsize, 2
jz .Bit_1_Not_Set
mov [buffer], ax
shr eax, 16
add buffer, 2
.Bit_1_Not_Set:
;; Test bit 0 to see if size is at least 1
test bsize, 1
jz .GenerateBlock_Success
mov [buffer], al
.Bit_0_Not_Set:
;; We've hit all the bits
jmp .GenerateBlock_Success
.GenerateBlock_PreRet:
;; Test for success (was the request completely fulfilled?)
cmp bsize, 0
je .GenerateBlock_Success
.GenerateBlock_Failure:
xor rax, rax
mov al, RDSEED_FAILURE
ret
.GenerateBlock_Success:
xor rax, rax
mov al, RDSEED_SUCCESS
ret
%endif ;; _M_X64
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;