| #if defined(__x86_64__) | 
 | .text	 | 
 |  | 
 | .extern	OPENSSL_ia32cap_P | 
 | .hidden OPENSSL_ia32cap_P | 
 |  | 
 | .globl	rsaz_512_sqr | 
 | .hidden rsaz_512_sqr | 
 | .type	rsaz_512_sqr,@function | 
 | .align	32 | 
 | rsaz_512_sqr: | 
 | 	pushq	%rbx | 
 | 	pushq	%rbp | 
 | 	pushq	%r12 | 
 | 	pushq	%r13 | 
 | 	pushq	%r14 | 
 | 	pushq	%r15 | 
 |  | 
 | 	subq	$128+24,%rsp | 
 | .Lsqr_body: | 
 | 	movq	%rdx,%rbp | 
 | 	movq	(%rsi),%rdx | 
 | 	movq	8(%rsi),%rax | 
 | 	movq	%rcx,128(%rsp) | 
 | 	jmp	.Loop_sqr | 
 |  | 
 | .align	32 | 
 | .Loop_sqr: | 
 | 	movl	%r8d,128+8(%rsp) | 
 |  | 
 | 	movq	%rdx,%rbx | 
 | 	mulq	%rdx | 
 | 	movq	%rax,%r8 | 
 | 	movq	16(%rsi),%rax | 
 | 	movq	%rdx,%r9 | 
 |  | 
 | 	mulq	%rbx | 
 | 	addq	%rax,%r9 | 
 | 	movq	24(%rsi),%rax | 
 | 	movq	%rdx,%r10 | 
 | 	adcq	$0,%r10 | 
 |  | 
 | 	mulq	%rbx | 
 | 	addq	%rax,%r10 | 
 | 	movq	32(%rsi),%rax | 
 | 	movq	%rdx,%r11 | 
 | 	adcq	$0,%r11 | 
 |  | 
 | 	mulq	%rbx | 
 | 	addq	%rax,%r11 | 
 | 	movq	40(%rsi),%rax | 
 | 	movq	%rdx,%r12 | 
 | 	adcq	$0,%r12 | 
 |  | 
 | 	mulq	%rbx | 
 | 	addq	%rax,%r12 | 
 | 	movq	48(%rsi),%rax | 
 | 	movq	%rdx,%r13 | 
 | 	adcq	$0,%r13 | 
 |  | 
 | 	mulq	%rbx | 
 | 	addq	%rax,%r13 | 
 | 	movq	56(%rsi),%rax | 
 | 	movq	%rdx,%r14 | 
 | 	adcq	$0,%r14 | 
 |  | 
 | 	mulq	%rbx | 
 | 	addq	%rax,%r14 | 
 | 	movq	%rbx,%rax | 
 | 	movq	%rdx,%r15 | 
 | 	adcq	$0,%r15 | 
 |  | 
 | 	addq	%r8,%r8 | 
 | 	movq	%r9,%rcx | 
 | 	adcq	%r9,%r9 | 
 |  | 
 | 	mulq	%rax | 
 | 	movq	%rax,(%rsp) | 
 | 	addq	%rdx,%r8 | 
 | 	adcq	$0,%r9 | 
 |  | 
 | 	movq	%r8,8(%rsp) | 
 | 	shrq	$63,%rcx | 
 |  | 
 |  | 
 | 	movq	8(%rsi),%r8 | 
 | 	movq	16(%rsi),%rax | 
 | 	mulq	%r8 | 
 | 	addq	%rax,%r10 | 
 | 	movq	24(%rsi),%rax | 
 | 	movq	%rdx,%rbx | 
 | 	adcq	$0,%rbx | 
 |  | 
 | 	mulq	%r8 | 
 | 	addq	%rax,%r11 | 
 | 	movq	32(%rsi),%rax | 
 | 	adcq	$0,%rdx | 
 | 	addq	%rbx,%r11 | 
 | 	movq	%rdx,%rbx | 
 | 	adcq	$0,%rbx | 
 |  | 
 | 	mulq	%r8 | 
 | 	addq	%rax,%r12 | 
 | 	movq	40(%rsi),%rax | 
 | 	adcq	$0,%rdx | 
 | 	addq	%rbx,%r12 | 
 | 	movq	%rdx,%rbx | 
 | 	adcq	$0,%rbx | 
 |  | 
 | 	mulq	%r8 | 
 | 	addq	%rax,%r13 | 
 | 	movq	48(%rsi),%rax | 
 | 	adcq	$0,%rdx | 
 | 	addq	%rbx,%r13 | 
 | 	movq	%rdx,%rbx | 
 | 	adcq	$0,%rbx | 
 |  | 
 | 	mulq	%r8 | 
 | 	addq	%rax,%r14 | 
 | 	movq	56(%rsi),%rax | 
 | 	adcq	$0,%rdx | 
 | 	addq	%rbx,%r14 | 
 | 	movq	%rdx,%rbx | 
 | 	adcq	$0,%rbx | 
 |  | 
 | 	mulq	%r8 | 
 | 	addq	%rax,%r15 | 
 | 	movq	%r8,%rax | 
 | 	adcq	$0,%rdx | 
 | 	addq	%rbx,%r15 | 
 | 	movq	%rdx,%r8 | 
 | 	movq	%r10,%rdx | 
 | 	adcq	$0,%r8 | 
 |  | 
 | 	addq	%rdx,%rdx | 
 | 	leaq	(%rcx,%r10,2),%r10 | 
 | 	movq	%r11,%rbx | 
 | 	adcq	%r11,%r11 | 
 |  | 
 | 	mulq	%rax | 
 | 	addq	%rax,%r9 | 
 | 	adcq	%rdx,%r10 | 
 | 	adcq	$0,%r11 | 
 |  | 
 | 	movq	%r9,16(%rsp) | 
 | 	movq	%r10,24(%rsp) | 
 | 	shrq	$63,%rbx | 
 |  | 
 |  | 
 | 	movq	16(%rsi),%r9 | 
 | 	movq	24(%rsi),%rax | 
 | 	mulq	%r9 | 
 | 	addq	%rax,%r12 | 
 | 	movq	32(%rsi),%rax | 
 | 	movq	%rdx,%rcx | 
 | 	adcq	$0,%rcx | 
 |  | 
 | 	mulq	%r9 | 
 | 	addq	%rax,%r13 | 
 | 	movq	40(%rsi),%rax | 
 | 	adcq	$0,%rdx | 
 | 	addq	%rcx,%r13 | 
 | 	movq	%rdx,%rcx | 
 | 	adcq	$0,%rcx | 
 |  | 
 | 	mulq	%r9 | 
 | 	addq	%rax,%r14 | 
 | 	movq	48(%rsi),%rax | 
 | 	adcq	$0,%rdx | 
 | 	addq	%rcx,%r14 | 
 | 	movq	%rdx,%rcx | 
 | 	adcq	$0,%rcx | 
 |  | 
 | 	mulq	%r9 | 
 | 	movq	%r12,%r10 | 
 | 	leaq	(%rbx,%r12,2),%r12 | 
 | 	addq	%rax,%r15 | 
 | 	movq	56(%rsi),%rax | 
 | 	adcq	$0,%rdx | 
 | 	addq	%rcx,%r15 | 
 | 	movq	%rdx,%rcx | 
 | 	adcq	$0,%rcx | 
 |  | 
 | 	mulq	%r9 | 
 | 	shrq	$63,%r10 | 
 | 	addq	%rax,%r8 | 
 | 	movq	%r9,%rax | 
 | 	adcq	$0,%rdx | 
 | 	addq	%rcx,%r8 | 
 | 	movq	%rdx,%r9 | 
 | 	adcq	$0,%r9 | 
 |  | 
 | 	movq	%r13,%rcx | 
 | 	leaq	(%r10,%r13,2),%r13 | 
 |  | 
 | 	mulq	%rax | 
 | 	addq	%rax,%r11 | 
 | 	adcq	%rdx,%r12 | 
 | 	adcq	$0,%r13 | 
 |  | 
 | 	movq	%r11,32(%rsp) | 
 | 	movq	%r12,40(%rsp) | 
 | 	shrq	$63,%rcx | 
 |  | 
 |  | 
 | 	movq	24(%rsi),%r10 | 
 | 	movq	32(%rsi),%rax | 
 | 	mulq	%r10 | 
 | 	addq	%rax,%r14 | 
 | 	movq	40(%rsi),%rax | 
 | 	movq	%rdx,%rbx | 
 | 	adcq	$0,%rbx | 
 |  | 
 | 	mulq	%r10 | 
 | 	addq	%rax,%r15 | 
 | 	movq	48(%rsi),%rax | 
 | 	adcq	$0,%rdx | 
 | 	addq	%rbx,%r15 | 
 | 	movq	%rdx,%rbx | 
 | 	adcq	$0,%rbx | 
 |  | 
 | 	mulq	%r10 | 
 | 	movq	%r14,%r12 | 
 | 	leaq	(%rcx,%r14,2),%r14 | 
 | 	addq	%rax,%r8 | 
 | 	movq	56(%rsi),%rax | 
 | 	adcq	$0,%rdx | 
 | 	addq	%rbx,%r8 | 
 | 	movq	%rdx,%rbx | 
 | 	adcq	$0,%rbx | 
 |  | 
 | 	mulq	%r10 | 
 | 	shrq	$63,%r12 | 
 | 	addq	%rax,%r9 | 
 | 	movq	%r10,%rax | 
 | 	adcq	$0,%rdx | 
 | 	addq	%rbx,%r9 | 
 | 	movq	%rdx,%r10 | 
 | 	adcq	$0,%r10 | 
 |  | 
 | 	movq	%r15,%rbx | 
 | 	leaq	(%r12,%r15,2),%r15 | 
 |  | 
 | 	mulq	%rax | 
 | 	addq	%rax,%r13 | 
 | 	adcq	%rdx,%r14 | 
 | 	adcq	$0,%r15 | 
 |  | 
 | 	movq	%r13,48(%rsp) | 
 | 	movq	%r14,56(%rsp) | 
 | 	shrq	$63,%rbx | 
 |  | 
 |  | 
 | 	movq	32(%rsi),%r11 | 
 | 	movq	40(%rsi),%rax | 
 | 	mulq	%r11 | 
 | 	addq	%rax,%r8 | 
 | 	movq	48(%rsi),%rax | 
 | 	movq	%rdx,%rcx | 
 | 	adcq	$0,%rcx | 
 |  | 
 | 	mulq	%r11 | 
 | 	addq	%rax,%r9 | 
 | 	movq	56(%rsi),%rax | 
 | 	adcq	$0,%rdx | 
 | 	movq	%r8,%r12 | 
 | 	leaq	(%rbx,%r8,2),%r8 | 
 | 	addq	%rcx,%r9 | 
 | 	movq	%rdx,%rcx | 
 | 	adcq	$0,%rcx | 
 |  | 
 | 	mulq	%r11 | 
 | 	shrq	$63,%r12 | 
 | 	addq	%rax,%r10 | 
 | 	movq	%r11,%rax | 
 | 	adcq	$0,%rdx | 
 | 	addq	%rcx,%r10 | 
 | 	movq	%rdx,%r11 | 
 | 	adcq	$0,%r11 | 
 |  | 
 | 	movq	%r9,%rcx | 
 | 	leaq	(%r12,%r9,2),%r9 | 
 |  | 
 | 	mulq	%rax | 
 | 	addq	%rax,%r15 | 
 | 	adcq	%rdx,%r8 | 
 | 	adcq	$0,%r9 | 
 |  | 
 | 	movq	%r15,64(%rsp) | 
 | 	movq	%r8,72(%rsp) | 
 | 	shrq	$63,%rcx | 
 |  | 
 |  | 
 | 	movq	40(%rsi),%r12 | 
 | 	movq	48(%rsi),%rax | 
 | 	mulq	%r12 | 
 | 	addq	%rax,%r10 | 
 | 	movq	56(%rsi),%rax | 
 | 	movq	%rdx,%rbx | 
 | 	adcq	$0,%rbx | 
 |  | 
 | 	mulq	%r12 | 
 | 	addq	%rax,%r11 | 
 | 	movq	%r12,%rax | 
 | 	movq	%r10,%r15 | 
 | 	leaq	(%rcx,%r10,2),%r10 | 
 | 	adcq	$0,%rdx | 
 | 	shrq	$63,%r15 | 
 | 	addq	%rbx,%r11 | 
 | 	movq	%rdx,%r12 | 
 | 	adcq	$0,%r12 | 
 |  | 
 | 	movq	%r11,%rbx | 
 | 	leaq	(%r15,%r11,2),%r11 | 
 |  | 
 | 	mulq	%rax | 
 | 	addq	%rax,%r9 | 
 | 	adcq	%rdx,%r10 | 
 | 	adcq	$0,%r11 | 
 |  | 
 | 	movq	%r9,80(%rsp) | 
 | 	movq	%r10,88(%rsp) | 
 |  | 
 |  | 
 | 	movq	48(%rsi),%r13 | 
 | 	movq	56(%rsi),%rax | 
 | 	mulq	%r13 | 
 | 	addq	%rax,%r12 | 
 | 	movq	%r13,%rax | 
 | 	movq	%rdx,%r13 | 
 | 	adcq	$0,%r13 | 
 |  | 
 | 	xorq	%r14,%r14 | 
 | 	shlq	$1,%rbx | 
 | 	adcq	%r12,%r12 | 
 | 	adcq	%r13,%r13 | 
 | 	adcq	%r14,%r14 | 
 |  | 
 | 	mulq	%rax | 
 | 	addq	%rax,%r11 | 
 | 	adcq	%rdx,%r12 | 
 | 	adcq	$0,%r13 | 
 |  | 
 | 	movq	%r11,96(%rsp) | 
 | 	movq	%r12,104(%rsp) | 
 |  | 
 |  | 
 | 	movq	56(%rsi),%rax | 
 | 	mulq	%rax | 
 | 	addq	%rax,%r13 | 
 | 	adcq	$0,%rdx | 
 |  | 
 | 	addq	%rdx,%r14 | 
 |  | 
 | 	movq	%r13,112(%rsp) | 
 | 	movq	%r14,120(%rsp) | 
 |  | 
 | 	movq	(%rsp),%r8 | 
 | 	movq	8(%rsp),%r9 | 
 | 	movq	16(%rsp),%r10 | 
 | 	movq	24(%rsp),%r11 | 
 | 	movq	32(%rsp),%r12 | 
 | 	movq	40(%rsp),%r13 | 
 | 	movq	48(%rsp),%r14 | 
 | 	movq	56(%rsp),%r15 | 
 |  | 
 | 	call	__rsaz_512_reduce | 
 |  | 
 | 	addq	64(%rsp),%r8 | 
 | 	adcq	72(%rsp),%r9 | 
 | 	adcq	80(%rsp),%r10 | 
 | 	adcq	88(%rsp),%r11 | 
 | 	adcq	96(%rsp),%r12 | 
 | 	adcq	104(%rsp),%r13 | 
 | 	adcq	112(%rsp),%r14 | 
 | 	adcq	120(%rsp),%r15 | 
 | 	sbbq	%rcx,%rcx | 
 |  | 
 | 	call	__rsaz_512_subtract | 
 |  | 
 | 	movq	%r8,%rdx | 
 | 	movq	%r9,%rax | 
 | 	movl	128+8(%rsp),%r8d | 
 | 	movq	%rdi,%rsi | 
 |  | 
 | 	decl	%r8d | 
 | 	jnz	.Loop_sqr | 
 |  | 
 | 	leaq	128+24+48(%rsp),%rax | 
 | 	movq	-48(%rax),%r15 | 
 | 	movq	-40(%rax),%r14 | 
 | 	movq	-32(%rax),%r13 | 
 | 	movq	-24(%rax),%r12 | 
 | 	movq	-16(%rax),%rbp | 
 | 	movq	-8(%rax),%rbx | 
 | 	leaq	(%rax),%rsp | 
 | .Lsqr_epilogue: | 
 | 	.byte	0xf3,0xc3 | 
 | .size	rsaz_512_sqr,.-rsaz_512_sqr | 
 | .globl	rsaz_512_mul | 
 | .hidden rsaz_512_mul | 
 | .type	rsaz_512_mul,@function | 
 | .align	32 | 
 | rsaz_512_mul: | 
 | 	pushq	%rbx | 
 | 	pushq	%rbp | 
 | 	pushq	%r12 | 
 | 	pushq	%r13 | 
 | 	pushq	%r14 | 
 | 	pushq	%r15 | 
 |  | 
 | 	subq	$128+24,%rsp | 
 | .Lmul_body: | 
 | .byte	102,72,15,110,199 | 
 | .byte	102,72,15,110,201 | 
 | 	movq	%r8,128(%rsp) | 
 | 	movq	(%rdx),%rbx | 
 | 	movq	%rdx,%rbp | 
 | 	call	__rsaz_512_mul | 
 |  | 
 | .byte	102,72,15,126,199 | 
 | .byte	102,72,15,126,205 | 
 |  | 
 | 	movq	(%rsp),%r8 | 
 | 	movq	8(%rsp),%r9 | 
 | 	movq	16(%rsp),%r10 | 
 | 	movq	24(%rsp),%r11 | 
 | 	movq	32(%rsp),%r12 | 
 | 	movq	40(%rsp),%r13 | 
 | 	movq	48(%rsp),%r14 | 
 | 	movq	56(%rsp),%r15 | 
 |  | 
 | 	call	__rsaz_512_reduce | 
 | 	addq	64(%rsp),%r8 | 
 | 	adcq	72(%rsp),%r9 | 
 | 	adcq	80(%rsp),%r10 | 
 | 	adcq	88(%rsp),%r11 | 
 | 	adcq	96(%rsp),%r12 | 
 | 	adcq	104(%rsp),%r13 | 
 | 	adcq	112(%rsp),%r14 | 
 | 	adcq	120(%rsp),%r15 | 
 | 	sbbq	%rcx,%rcx | 
 |  | 
 | 	call	__rsaz_512_subtract | 
 |  | 
 | 	leaq	128+24+48(%rsp),%rax | 
 | 	movq	-48(%rax),%r15 | 
 | 	movq	-40(%rax),%r14 | 
 | 	movq	-32(%rax),%r13 | 
 | 	movq	-24(%rax),%r12 | 
 | 	movq	-16(%rax),%rbp | 
 | 	movq	-8(%rax),%rbx | 
 | 	leaq	(%rax),%rsp | 
 | .Lmul_epilogue: | 
 | 	.byte	0xf3,0xc3 | 
 | .size	rsaz_512_mul,.-rsaz_512_mul | 
 | .globl	rsaz_512_mul_gather4 | 
 | .hidden rsaz_512_mul_gather4 | 
 | .type	rsaz_512_mul_gather4,@function | 
 | .align	32 | 
 | rsaz_512_mul_gather4: | 
 | 	pushq	%rbx | 
 | 	pushq	%rbp | 
 | 	pushq	%r12 | 
 | 	pushq	%r13 | 
 | 	pushq	%r14 | 
 | 	pushq	%r15 | 
 |  | 
 | 	movl	%r9d,%r9d | 
 | 	subq	$128+24,%rsp | 
 | .Lmul_gather4_body: | 
 | 	movl	64(%rdx,%r9,4),%eax | 
 | .byte	102,72,15,110,199 | 
 | 	movl	(%rdx,%r9,4),%ebx | 
 | .byte	102,72,15,110,201 | 
 | 	movq	%r8,128(%rsp) | 
 |  | 
 | 	shlq	$32,%rax | 
 | 	orq	%rax,%rbx | 
 | 	movq	(%rsi),%rax | 
 | 	movq	8(%rsi),%rcx | 
 | 	leaq	128(%rdx,%r9,4),%rbp | 
 | 	mulq	%rbx | 
 | 	movq	%rax,(%rsp) | 
 | 	movq	%rcx,%rax | 
 | 	movq	%rdx,%r8 | 
 |  | 
 | 	mulq	%rbx | 
 | 	movd	(%rbp),%xmm4 | 
 | 	addq	%rax,%r8 | 
 | 	movq	16(%rsi),%rax | 
 | 	movq	%rdx,%r9 | 
 | 	adcq	$0,%r9 | 
 |  | 
 | 	mulq	%rbx | 
 | 	movd	64(%rbp),%xmm5 | 
 | 	addq	%rax,%r9 | 
 | 	movq	24(%rsi),%rax | 
 | 	movq	%rdx,%r10 | 
 | 	adcq	$0,%r10 | 
 |  | 
 | 	mulq	%rbx | 
 | 	pslldq	$4,%xmm5 | 
 | 	addq	%rax,%r10 | 
 | 	movq	32(%rsi),%rax | 
 | 	movq	%rdx,%r11 | 
 | 	adcq	$0,%r11 | 
 |  | 
 | 	mulq	%rbx | 
 | 	por	%xmm5,%xmm4 | 
 | 	addq	%rax,%r11 | 
 | 	movq	40(%rsi),%rax | 
 | 	movq	%rdx,%r12 | 
 | 	adcq	$0,%r12 | 
 |  | 
 | 	mulq	%rbx | 
 | 	addq	%rax,%r12 | 
 | 	movq	48(%rsi),%rax | 
 | 	movq	%rdx,%r13 | 
 | 	adcq	$0,%r13 | 
 |  | 
 | 	mulq	%rbx | 
 | 	leaq	128(%rbp),%rbp | 
 | 	addq	%rax,%r13 | 
 | 	movq	56(%rsi),%rax | 
 | 	movq	%rdx,%r14 | 
 | 	adcq	$0,%r14 | 
 |  | 
 | 	mulq	%rbx | 
 | .byte	102,72,15,126,227 | 
 | 	addq	%rax,%r14 | 
 | 	movq	(%rsi),%rax | 
 | 	movq	%rdx,%r15 | 
 | 	adcq	$0,%r15 | 
 |  | 
 | 	leaq	8(%rsp),%rdi | 
 | 	movl	$7,%ecx | 
 | 	jmp	.Loop_mul_gather | 
 |  | 
 | .align	32 | 
 | .Loop_mul_gather: | 
 | 	mulq	%rbx | 
 | 	addq	%rax,%r8 | 
 | 	movq	8(%rsi),%rax | 
 | 	movq	%r8,(%rdi) | 
 | 	movq	%rdx,%r8 | 
 | 	adcq	$0,%r8 | 
 |  | 
 | 	mulq	%rbx | 
 | 	movd	(%rbp),%xmm4 | 
 | 	addq	%rax,%r9 | 
 | 	movq	16(%rsi),%rax | 
 | 	adcq	$0,%rdx | 
 | 	addq	%r9,%r8 | 
 | 	movq	%rdx,%r9 | 
 | 	adcq	$0,%r9 | 
 |  | 
 | 	mulq	%rbx | 
 | 	movd	64(%rbp),%xmm5 | 
 | 	addq	%rax,%r10 | 
 | 	movq	24(%rsi),%rax | 
 | 	adcq	$0,%rdx | 
 | 	addq	%r10,%r9 | 
 | 	movq	%rdx,%r10 | 
 | 	adcq	$0,%r10 | 
 |  | 
 | 	mulq	%rbx | 
 | 	pslldq	$4,%xmm5 | 
 | 	addq	%rax,%r11 | 
 | 	movq	32(%rsi),%rax | 
 | 	adcq	$0,%rdx | 
 | 	addq	%r11,%r10 | 
 | 	movq	%rdx,%r11 | 
 | 	adcq	$0,%r11 | 
 |  | 
 | 	mulq	%rbx | 
 | 	por	%xmm5,%xmm4 | 
 | 	addq	%rax,%r12 | 
 | 	movq	40(%rsi),%rax | 
 | 	adcq	$0,%rdx | 
 | 	addq	%r12,%r11 | 
 | 	movq	%rdx,%r12 | 
 | 	adcq	$0,%r12 | 
 |  | 
 | 	mulq	%rbx | 
 | 	addq	%rax,%r13 | 
 | 	movq	48(%rsi),%rax | 
 | 	adcq	$0,%rdx | 
 | 	addq	%r13,%r12 | 
 | 	movq	%rdx,%r13 | 
 | 	adcq	$0,%r13 | 
 |  | 
 | 	mulq	%rbx | 
 | 	addq	%rax,%r14 | 
 | 	movq	56(%rsi),%rax | 
 | 	adcq	$0,%rdx | 
 | 	addq	%r14,%r13 | 
 | 	movq	%rdx,%r14 | 
 | 	adcq	$0,%r14 | 
 |  | 
 | 	mulq	%rbx | 
 | .byte	102,72,15,126,227 | 
 | 	addq	%rax,%r15 | 
 | 	movq	(%rsi),%rax | 
 | 	adcq	$0,%rdx | 
 | 	addq	%r15,%r14 | 
 | 	movq	%rdx,%r15 | 
 | 	adcq	$0,%r15 | 
 |  | 
 | 	leaq	128(%rbp),%rbp | 
 | 	leaq	8(%rdi),%rdi | 
 |  | 
 | 	decl	%ecx | 
 | 	jnz	.Loop_mul_gather | 
 |  | 
 | 	movq	%r8,(%rdi) | 
 | 	movq	%r9,8(%rdi) | 
 | 	movq	%r10,16(%rdi) | 
 | 	movq	%r11,24(%rdi) | 
 | 	movq	%r12,32(%rdi) | 
 | 	movq	%r13,40(%rdi) | 
 | 	movq	%r14,48(%rdi) | 
 | 	movq	%r15,56(%rdi) | 
 |  | 
 | .byte	102,72,15,126,199 | 
 | .byte	102,72,15,126,205 | 
 |  | 
 | 	movq	(%rsp),%r8 | 
 | 	movq	8(%rsp),%r9 | 
 | 	movq	16(%rsp),%r10 | 
 | 	movq	24(%rsp),%r11 | 
 | 	movq	32(%rsp),%r12 | 
 | 	movq	40(%rsp),%r13 | 
 | 	movq	48(%rsp),%r14 | 
 | 	movq	56(%rsp),%r15 | 
 |  | 
 | 	call	__rsaz_512_reduce | 
 | 	addq	64(%rsp),%r8 | 
 | 	adcq	72(%rsp),%r9 | 
 | 	adcq	80(%rsp),%r10 | 
 | 	adcq	88(%rsp),%r11 | 
 | 	adcq	96(%rsp),%r12 | 
 | 	adcq	104(%rsp),%r13 | 
 | 	adcq	112(%rsp),%r14 | 
 | 	adcq	120(%rsp),%r15 | 
 | 	sbbq	%rcx,%rcx | 
 |  | 
 | 	call	__rsaz_512_subtract | 
 |  | 
 | 	leaq	128+24+48(%rsp),%rax | 
 | 	movq	-48(%rax),%r15 | 
 | 	movq	-40(%rax),%r14 | 
 | 	movq	-32(%rax),%r13 | 
 | 	movq	-24(%rax),%r12 | 
 | 	movq	-16(%rax),%rbp | 
 | 	movq	-8(%rax),%rbx | 
 | 	leaq	(%rax),%rsp | 
 | .Lmul_gather4_epilogue: | 
 | 	.byte	0xf3,0xc3 | 
 | .size	rsaz_512_mul_gather4,.-rsaz_512_mul_gather4 | 
 | .globl	rsaz_512_mul_scatter4 | 
 | .hidden rsaz_512_mul_scatter4 | 
 | .type	rsaz_512_mul_scatter4,@function | 
 | .align	32 | 
 | rsaz_512_mul_scatter4: | 
 | 	pushq	%rbx | 
 | 	pushq	%rbp | 
 | 	pushq	%r12 | 
 | 	pushq	%r13 | 
 | 	pushq	%r14 | 
 | 	pushq	%r15 | 
 |  | 
 | 	movl	%r9d,%r9d | 
 | 	subq	$128+24,%rsp | 
 | .Lmul_scatter4_body: | 
 | 	leaq	(%r8,%r9,4),%r8 | 
 | .byte	102,72,15,110,199 | 
 | .byte	102,72,15,110,202 | 
 | .byte	102,73,15,110,208 | 
 | 	movq	%rcx,128(%rsp) | 
 |  | 
 | 	movq	%rdi,%rbp | 
 | 	movq	(%rdi),%rbx | 
 | 	call	__rsaz_512_mul | 
 |  | 
 | .byte	102,72,15,126,199 | 
 | .byte	102,72,15,126,205 | 
 |  | 
 | 	movq	(%rsp),%r8 | 
 | 	movq	8(%rsp),%r9 | 
 | 	movq	16(%rsp),%r10 | 
 | 	movq	24(%rsp),%r11 | 
 | 	movq	32(%rsp),%r12 | 
 | 	movq	40(%rsp),%r13 | 
 | 	movq	48(%rsp),%r14 | 
 | 	movq	56(%rsp),%r15 | 
 |  | 
 | 	call	__rsaz_512_reduce | 
 | 	addq	64(%rsp),%r8 | 
 | 	adcq	72(%rsp),%r9 | 
 | 	adcq	80(%rsp),%r10 | 
 | 	adcq	88(%rsp),%r11 | 
 | 	adcq	96(%rsp),%r12 | 
 | 	adcq	104(%rsp),%r13 | 
 | 	adcq	112(%rsp),%r14 | 
 | 	adcq	120(%rsp),%r15 | 
 | .byte	102,72,15,126,214 | 
 | 	sbbq	%rcx,%rcx | 
 |  | 
 | 	call	__rsaz_512_subtract | 
 |  | 
 | 	movl	%r8d,0(%rsi) | 
 | 	shrq	$32,%r8 | 
 | 	movl	%r9d,128(%rsi) | 
 | 	shrq	$32,%r9 | 
 | 	movl	%r10d,256(%rsi) | 
 | 	shrq	$32,%r10 | 
 | 	movl	%r11d,384(%rsi) | 
 | 	shrq	$32,%r11 | 
 | 	movl	%r12d,512(%rsi) | 
 | 	shrq	$32,%r12 | 
 | 	movl	%r13d,640(%rsi) | 
 | 	shrq	$32,%r13 | 
 | 	movl	%r14d,768(%rsi) | 
 | 	shrq	$32,%r14 | 
 | 	movl	%r15d,896(%rsi) | 
 | 	shrq	$32,%r15 | 
 | 	movl	%r8d,64(%rsi) | 
 | 	movl	%r9d,192(%rsi) | 
 | 	movl	%r10d,320(%rsi) | 
 | 	movl	%r11d,448(%rsi) | 
 | 	movl	%r12d,576(%rsi) | 
 | 	movl	%r13d,704(%rsi) | 
 | 	movl	%r14d,832(%rsi) | 
 | 	movl	%r15d,960(%rsi) | 
 |  | 
 | 	leaq	128+24+48(%rsp),%rax | 
 | 	movq	-48(%rax),%r15 | 
 | 	movq	-40(%rax),%r14 | 
 | 	movq	-32(%rax),%r13 | 
 | 	movq	-24(%rax),%r12 | 
 | 	movq	-16(%rax),%rbp | 
 | 	movq	-8(%rax),%rbx | 
 | 	leaq	(%rax),%rsp | 
 | .Lmul_scatter4_epilogue: | 
 | 	.byte	0xf3,0xc3 | 
 | .size	rsaz_512_mul_scatter4,.-rsaz_512_mul_scatter4 | 
 | .globl	rsaz_512_mul_by_one | 
 | .hidden rsaz_512_mul_by_one | 
 | .type	rsaz_512_mul_by_one,@function | 
 | .align	32 | 
 | rsaz_512_mul_by_one: | 
 | 	pushq	%rbx | 
 | 	pushq	%rbp | 
 | 	pushq	%r12 | 
 | 	pushq	%r13 | 
 | 	pushq	%r14 | 
 | 	pushq	%r15 | 
 |  | 
 | 	subq	$128+24,%rsp | 
 | .Lmul_by_one_body: | 
 | 	movq	%rdx,%rbp | 
 | 	movq	%rcx,128(%rsp) | 
 |  | 
 | 	movq	(%rsi),%r8 | 
 | 	pxor	%xmm0,%xmm0 | 
 | 	movq	8(%rsi),%r9 | 
 | 	movq	16(%rsi),%r10 | 
 | 	movq	24(%rsi),%r11 | 
 | 	movq	32(%rsi),%r12 | 
 | 	movq	40(%rsi),%r13 | 
 | 	movq	48(%rsi),%r14 | 
 | 	movq	56(%rsi),%r15 | 
 |  | 
 | 	movdqa	%xmm0,(%rsp) | 
 | 	movdqa	%xmm0,16(%rsp) | 
 | 	movdqa	%xmm0,32(%rsp) | 
 | 	movdqa	%xmm0,48(%rsp) | 
 | 	movdqa	%xmm0,64(%rsp) | 
 | 	movdqa	%xmm0,80(%rsp) | 
 | 	movdqa	%xmm0,96(%rsp) | 
 | 	call	__rsaz_512_reduce | 
 | 	movq	%r8,(%rdi) | 
 | 	movq	%r9,8(%rdi) | 
 | 	movq	%r10,16(%rdi) | 
 | 	movq	%r11,24(%rdi) | 
 | 	movq	%r12,32(%rdi) | 
 | 	movq	%r13,40(%rdi) | 
 | 	movq	%r14,48(%rdi) | 
 | 	movq	%r15,56(%rdi) | 
 |  | 
 | 	leaq	128+24+48(%rsp),%rax | 
 | 	movq	-48(%rax),%r15 | 
 | 	movq	-40(%rax),%r14 | 
 | 	movq	-32(%rax),%r13 | 
 | 	movq	-24(%rax),%r12 | 
 | 	movq	-16(%rax),%rbp | 
 | 	movq	-8(%rax),%rbx | 
 | 	leaq	(%rax),%rsp | 
 | .Lmul_by_one_epilogue: | 
 | 	.byte	0xf3,0xc3 | 
 | .size	rsaz_512_mul_by_one,.-rsaz_512_mul_by_one | 
 | .type	__rsaz_512_reduce,@function | 
 | .align	32 | 
 | __rsaz_512_reduce: | 
 | 	movq	%r8,%rbx | 
 | 	imulq	128+8(%rsp),%rbx | 
 | 	movq	0(%rbp),%rax | 
 | 	movl	$8,%ecx | 
 | 	jmp	.Lreduction_loop | 
 |  | 
 | .align	32 | 
 | .Lreduction_loop: | 
 | 	mulq	%rbx | 
 | 	movq	8(%rbp),%rax | 
 | 	negq	%r8 | 
 | 	movq	%rdx,%r8 | 
 | 	adcq	$0,%r8 | 
 |  | 
 | 	mulq	%rbx | 
 | 	addq	%rax,%r9 | 
 | 	movq	16(%rbp),%rax | 
 | 	adcq	$0,%rdx | 
 | 	addq	%r9,%r8 | 
 | 	movq	%rdx,%r9 | 
 | 	adcq	$0,%r9 | 
 |  | 
 | 	mulq	%rbx | 
 | 	addq	%rax,%r10 | 
 | 	movq	24(%rbp),%rax | 
 | 	adcq	$0,%rdx | 
 | 	addq	%r10,%r9 | 
 | 	movq	%rdx,%r10 | 
 | 	adcq	$0,%r10 | 
 |  | 
 | 	mulq	%rbx | 
 | 	addq	%rax,%r11 | 
 | 	movq	32(%rbp),%rax | 
 | 	adcq	$0,%rdx | 
 | 	addq	%r11,%r10 | 
 | 	movq	128+8(%rsp),%rsi | 
 |  | 
 |  | 
 | 	adcq	$0,%rdx | 
 | 	movq	%rdx,%r11 | 
 |  | 
 | 	mulq	%rbx | 
 | 	addq	%rax,%r12 | 
 | 	movq	40(%rbp),%rax | 
 | 	adcq	$0,%rdx | 
 | 	imulq	%r8,%rsi | 
 | 	addq	%r12,%r11 | 
 | 	movq	%rdx,%r12 | 
 | 	adcq	$0,%r12 | 
 |  | 
 | 	mulq	%rbx | 
 | 	addq	%rax,%r13 | 
 | 	movq	48(%rbp),%rax | 
 | 	adcq	$0,%rdx | 
 | 	addq	%r13,%r12 | 
 | 	movq	%rdx,%r13 | 
 | 	adcq	$0,%r13 | 
 |  | 
 | 	mulq	%rbx | 
 | 	addq	%rax,%r14 | 
 | 	movq	56(%rbp),%rax | 
 | 	adcq	$0,%rdx | 
 | 	addq	%r14,%r13 | 
 | 	movq	%rdx,%r14 | 
 | 	adcq	$0,%r14 | 
 |  | 
 | 	mulq	%rbx | 
 | 	movq	%rsi,%rbx | 
 | 	addq	%rax,%r15 | 
 | 	movq	0(%rbp),%rax | 
 | 	adcq	$0,%rdx | 
 | 	addq	%r15,%r14 | 
 | 	movq	%rdx,%r15 | 
 | 	adcq	$0,%r15 | 
 |  | 
 | 	decl	%ecx | 
 | 	jne	.Lreduction_loop | 
 |  | 
 | 	.byte	0xf3,0xc3 | 
 | .size	__rsaz_512_reduce,.-__rsaz_512_reduce | 
 | .type	__rsaz_512_subtract,@function | 
 | .align	32 | 
 | __rsaz_512_subtract: | 
 | 	movq	%r8,(%rdi) | 
 | 	movq	%r9,8(%rdi) | 
 | 	movq	%r10,16(%rdi) | 
 | 	movq	%r11,24(%rdi) | 
 | 	movq	%r12,32(%rdi) | 
 | 	movq	%r13,40(%rdi) | 
 | 	movq	%r14,48(%rdi) | 
 | 	movq	%r15,56(%rdi) | 
 |  | 
 | 	movq	0(%rbp),%r8 | 
 | 	movq	8(%rbp),%r9 | 
 | 	negq	%r8 | 
 | 	notq	%r9 | 
 | 	andq	%rcx,%r8 | 
 | 	movq	16(%rbp),%r10 | 
 | 	andq	%rcx,%r9 | 
 | 	notq	%r10 | 
 | 	movq	24(%rbp),%r11 | 
 | 	andq	%rcx,%r10 | 
 | 	notq	%r11 | 
 | 	movq	32(%rbp),%r12 | 
 | 	andq	%rcx,%r11 | 
 | 	notq	%r12 | 
 | 	movq	40(%rbp),%r13 | 
 | 	andq	%rcx,%r12 | 
 | 	notq	%r13 | 
 | 	movq	48(%rbp),%r14 | 
 | 	andq	%rcx,%r13 | 
 | 	notq	%r14 | 
 | 	movq	56(%rbp),%r15 | 
 | 	andq	%rcx,%r14 | 
 | 	notq	%r15 | 
 | 	andq	%rcx,%r15 | 
 |  | 
 | 	addq	(%rdi),%r8 | 
 | 	adcq	8(%rdi),%r9 | 
 | 	adcq	16(%rdi),%r10 | 
 | 	adcq	24(%rdi),%r11 | 
 | 	adcq	32(%rdi),%r12 | 
 | 	adcq	40(%rdi),%r13 | 
 | 	adcq	48(%rdi),%r14 | 
 | 	adcq	56(%rdi),%r15 | 
 |  | 
 | 	movq	%r8,(%rdi) | 
 | 	movq	%r9,8(%rdi) | 
 | 	movq	%r10,16(%rdi) | 
 | 	movq	%r11,24(%rdi) | 
 | 	movq	%r12,32(%rdi) | 
 | 	movq	%r13,40(%rdi) | 
 | 	movq	%r14,48(%rdi) | 
 | 	movq	%r15,56(%rdi) | 
 |  | 
 | 	.byte	0xf3,0xc3 | 
 | .size	__rsaz_512_subtract,.-__rsaz_512_subtract | 
 | .type	__rsaz_512_mul,@function | 
 | .align	32 | 
 | __rsaz_512_mul: | 
 | 	leaq	8(%rsp),%rdi | 
 |  | 
 | 	movq	(%rsi),%rax | 
 | 	mulq	%rbx | 
 | 	movq	%rax,(%rdi) | 
 | 	movq	8(%rsi),%rax | 
 | 	movq	%rdx,%r8 | 
 |  | 
 | 	mulq	%rbx | 
 | 	addq	%rax,%r8 | 
 | 	movq	16(%rsi),%rax | 
 | 	movq	%rdx,%r9 | 
 | 	adcq	$0,%r9 | 
 |  | 
 | 	mulq	%rbx | 
 | 	addq	%rax,%r9 | 
 | 	movq	24(%rsi),%rax | 
 | 	movq	%rdx,%r10 | 
 | 	adcq	$0,%r10 | 
 |  | 
 | 	mulq	%rbx | 
 | 	addq	%rax,%r10 | 
 | 	movq	32(%rsi),%rax | 
 | 	movq	%rdx,%r11 | 
 | 	adcq	$0,%r11 | 
 |  | 
 | 	mulq	%rbx | 
 | 	addq	%rax,%r11 | 
 | 	movq	40(%rsi),%rax | 
 | 	movq	%rdx,%r12 | 
 | 	adcq	$0,%r12 | 
 |  | 
 | 	mulq	%rbx | 
 | 	addq	%rax,%r12 | 
 | 	movq	48(%rsi),%rax | 
 | 	movq	%rdx,%r13 | 
 | 	adcq	$0,%r13 | 
 |  | 
 | 	mulq	%rbx | 
 | 	addq	%rax,%r13 | 
 | 	movq	56(%rsi),%rax | 
 | 	movq	%rdx,%r14 | 
 | 	adcq	$0,%r14 | 
 |  | 
 | 	mulq	%rbx | 
 | 	addq	%rax,%r14 | 
 | 	movq	(%rsi),%rax | 
 | 	movq	%rdx,%r15 | 
 | 	adcq	$0,%r15 | 
 |  | 
 | 	leaq	8(%rbp),%rbp | 
 | 	leaq	8(%rdi),%rdi | 
 |  | 
 | 	movl	$7,%ecx | 
 | 	jmp	.Loop_mul | 
 |  | 
 | .align	32 | 
 | .Loop_mul: | 
 | 	movq	(%rbp),%rbx | 
 | 	mulq	%rbx | 
 | 	addq	%rax,%r8 | 
 | 	movq	8(%rsi),%rax | 
 | 	movq	%r8,(%rdi) | 
 | 	movq	%rdx,%r8 | 
 | 	adcq	$0,%r8 | 
 |  | 
 | 	mulq	%rbx | 
 | 	addq	%rax,%r9 | 
 | 	movq	16(%rsi),%rax | 
 | 	adcq	$0,%rdx | 
 | 	addq	%r9,%r8 | 
 | 	movq	%rdx,%r9 | 
 | 	adcq	$0,%r9 | 
 |  | 
 | 	mulq	%rbx | 
 | 	addq	%rax,%r10 | 
 | 	movq	24(%rsi),%rax | 
 | 	adcq	$0,%rdx | 
 | 	addq	%r10,%r9 | 
 | 	movq	%rdx,%r10 | 
 | 	adcq	$0,%r10 | 
 |  | 
 | 	mulq	%rbx | 
 | 	addq	%rax,%r11 | 
 | 	movq	32(%rsi),%rax | 
 | 	adcq	$0,%rdx | 
 | 	addq	%r11,%r10 | 
 | 	movq	%rdx,%r11 | 
 | 	adcq	$0,%r11 | 
 |  | 
 | 	mulq	%rbx | 
 | 	addq	%rax,%r12 | 
 | 	movq	40(%rsi),%rax | 
 | 	adcq	$0,%rdx | 
 | 	addq	%r12,%r11 | 
 | 	movq	%rdx,%r12 | 
 | 	adcq	$0,%r12 | 
 |  | 
 | 	mulq	%rbx | 
 | 	addq	%rax,%r13 | 
 | 	movq	48(%rsi),%rax | 
 | 	adcq	$0,%rdx | 
 | 	addq	%r13,%r12 | 
 | 	movq	%rdx,%r13 | 
 | 	adcq	$0,%r13 | 
 |  | 
 | 	mulq	%rbx | 
 | 	addq	%rax,%r14 | 
 | 	movq	56(%rsi),%rax | 
 | 	adcq	$0,%rdx | 
 | 	addq	%r14,%r13 | 
 | 	movq	%rdx,%r14 | 
 | 	leaq	8(%rbp),%rbp | 
 | 	adcq	$0,%r14 | 
 |  | 
 | 	mulq	%rbx | 
 | 	addq	%rax,%r15 | 
 | 	movq	(%rsi),%rax | 
 | 	adcq	$0,%rdx | 
 | 	addq	%r15,%r14 | 
 | 	movq	%rdx,%r15 | 
 | 	adcq	$0,%r15 | 
 |  | 
 | 	leaq	8(%rdi),%rdi | 
 |  | 
 | 	decl	%ecx | 
 | 	jnz	.Loop_mul | 
 |  | 
 | 	movq	%r8,(%rdi) | 
 | 	movq	%r9,8(%rdi) | 
 | 	movq	%r10,16(%rdi) | 
 | 	movq	%r11,24(%rdi) | 
 | 	movq	%r12,32(%rdi) | 
 | 	movq	%r13,40(%rdi) | 
 | 	movq	%r14,48(%rdi) | 
 | 	movq	%r15,56(%rdi) | 
 |  | 
 | 	.byte	0xf3,0xc3 | 
 | .size	__rsaz_512_mul,.-__rsaz_512_mul | 
 | .globl	rsaz_512_scatter4 | 
 | .hidden rsaz_512_scatter4 | 
 | .type	rsaz_512_scatter4,@function | 
 | .align	16 | 
 | rsaz_512_scatter4: | 
 | 	leaq	(%rdi,%rdx,4),%rdi | 
 | 	movl	$8,%r9d | 
 | 	jmp	.Loop_scatter | 
 | .align	16 | 
 | .Loop_scatter: | 
 | 	movq	(%rsi),%rax | 
 | 	leaq	8(%rsi),%rsi | 
 | 	movl	%eax,(%rdi) | 
 | 	shrq	$32,%rax | 
 | 	movl	%eax,64(%rdi) | 
 | 	leaq	128(%rdi),%rdi | 
 | 	decl	%r9d | 
 | 	jnz	.Loop_scatter | 
 | 	.byte	0xf3,0xc3 | 
 | .size	rsaz_512_scatter4,.-rsaz_512_scatter4 | 
 |  | 
 | .globl	rsaz_512_gather4 | 
 | .hidden rsaz_512_gather4 | 
 | .type	rsaz_512_gather4,@function | 
 | .align	16 | 
 | rsaz_512_gather4: | 
 | 	leaq	(%rsi,%rdx,4),%rsi | 
 | 	movl	$8,%r9d | 
 | 	jmp	.Loop_gather | 
 | .align	16 | 
 | .Loop_gather: | 
 | 	movl	(%rsi),%eax | 
 | 	movl	64(%rsi),%r8d | 
 | 	leaq	128(%rsi),%rsi | 
 | 	shlq	$32,%r8 | 
 | 	orq	%r8,%rax | 
 | 	movq	%rax,(%rdi) | 
 | 	leaq	8(%rdi),%rdi | 
 | 	decl	%r9d | 
 | 	jnz	.Loop_gather | 
 | 	.byte	0xf3,0xc3 | 
 | .size	rsaz_512_gather4,.-rsaz_512_gather4 | 
 | #endif |