|  | #if defined(__x86_64__) | 
|  | .text | 
|  |  | 
|  |  | 
|  |  | 
|  | .globl	rsaz_512_sqr | 
|  | .hidden rsaz_512_sqr | 
|  | .type	rsaz_512_sqr,@function | 
|  | .align	32 | 
|  | rsaz_512_sqr: | 
|  | pushq	%rbx | 
|  | pushq	%rbp | 
|  | pushq	%r12 | 
|  | pushq	%r13 | 
|  | pushq	%r14 | 
|  | pushq	%r15 | 
|  |  | 
|  | subq	$128+24,%rsp | 
|  | .Lsqr_body: | 
|  | movq	%rdx,%rbp | 
|  | movq	(%rsi),%rdx | 
|  | movq	8(%rsi),%rax | 
|  | movq	%rcx,128(%rsp) | 
|  | jmp	.Loop_sqr | 
|  |  | 
|  | .align	32 | 
|  | .Loop_sqr: | 
|  | movl	%r8d,128+8(%rsp) | 
|  |  | 
|  | movq	%rdx,%rbx | 
|  | mulq	%rdx | 
|  | movq	%rax,%r8 | 
|  | movq	16(%rsi),%rax | 
|  | movq	%rdx,%r9 | 
|  |  | 
|  | mulq	%rbx | 
|  | addq	%rax,%r9 | 
|  | movq	24(%rsi),%rax | 
|  | movq	%rdx,%r10 | 
|  | adcq	$0,%r10 | 
|  |  | 
|  | mulq	%rbx | 
|  | addq	%rax,%r10 | 
|  | movq	32(%rsi),%rax | 
|  | movq	%rdx,%r11 | 
|  | adcq	$0,%r11 | 
|  |  | 
|  | mulq	%rbx | 
|  | addq	%rax,%r11 | 
|  | movq	40(%rsi),%rax | 
|  | movq	%rdx,%r12 | 
|  | adcq	$0,%r12 | 
|  |  | 
|  | mulq	%rbx | 
|  | addq	%rax,%r12 | 
|  | movq	48(%rsi),%rax | 
|  | movq	%rdx,%r13 | 
|  | adcq	$0,%r13 | 
|  |  | 
|  | mulq	%rbx | 
|  | addq	%rax,%r13 | 
|  | movq	56(%rsi),%rax | 
|  | movq	%rdx,%r14 | 
|  | adcq	$0,%r14 | 
|  |  | 
|  | mulq	%rbx | 
|  | addq	%rax,%r14 | 
|  | movq	%rbx,%rax | 
|  | movq	%rdx,%r15 | 
|  | adcq	$0,%r15 | 
|  |  | 
|  | addq	%r8,%r8 | 
|  | movq	%r9,%rcx | 
|  | adcq	%r9,%r9 | 
|  |  | 
|  | mulq	%rax | 
|  | movq	%rax,(%rsp) | 
|  | addq	%rdx,%r8 | 
|  | adcq	$0,%r9 | 
|  |  | 
|  | movq	%r8,8(%rsp) | 
|  | shrq	$63,%rcx | 
|  |  | 
|  |  | 
|  | movq	8(%rsi),%r8 | 
|  | movq	16(%rsi),%rax | 
|  | mulq	%r8 | 
|  | addq	%rax,%r10 | 
|  | movq	24(%rsi),%rax | 
|  | movq	%rdx,%rbx | 
|  | adcq	$0,%rbx | 
|  |  | 
|  | mulq	%r8 | 
|  | addq	%rax,%r11 | 
|  | movq	32(%rsi),%rax | 
|  | adcq	$0,%rdx | 
|  | addq	%rbx,%r11 | 
|  | movq	%rdx,%rbx | 
|  | adcq	$0,%rbx | 
|  |  | 
|  | mulq	%r8 | 
|  | addq	%rax,%r12 | 
|  | movq	40(%rsi),%rax | 
|  | adcq	$0,%rdx | 
|  | addq	%rbx,%r12 | 
|  | movq	%rdx,%rbx | 
|  | adcq	$0,%rbx | 
|  |  | 
|  | mulq	%r8 | 
|  | addq	%rax,%r13 | 
|  | movq	48(%rsi),%rax | 
|  | adcq	$0,%rdx | 
|  | addq	%rbx,%r13 | 
|  | movq	%rdx,%rbx | 
|  | adcq	$0,%rbx | 
|  |  | 
|  | mulq	%r8 | 
|  | addq	%rax,%r14 | 
|  | movq	56(%rsi),%rax | 
|  | adcq	$0,%rdx | 
|  | addq	%rbx,%r14 | 
|  | movq	%rdx,%rbx | 
|  | adcq	$0,%rbx | 
|  |  | 
|  | mulq	%r8 | 
|  | addq	%rax,%r15 | 
|  | movq	%r8,%rax | 
|  | adcq	$0,%rdx | 
|  | addq	%rbx,%r15 | 
|  | movq	%rdx,%r8 | 
|  | movq	%r10,%rdx | 
|  | adcq	$0,%r8 | 
|  |  | 
|  | addq	%rdx,%rdx | 
|  | leaq	(%rcx,%r10,2),%r10 | 
|  | movq	%r11,%rbx | 
|  | adcq	%r11,%r11 | 
|  |  | 
|  | mulq	%rax | 
|  | addq	%rax,%r9 | 
|  | adcq	%rdx,%r10 | 
|  | adcq	$0,%r11 | 
|  |  | 
|  | movq	%r9,16(%rsp) | 
|  | movq	%r10,24(%rsp) | 
|  | shrq	$63,%rbx | 
|  |  | 
|  |  | 
|  | movq	16(%rsi),%r9 | 
|  | movq	24(%rsi),%rax | 
|  | mulq	%r9 | 
|  | addq	%rax,%r12 | 
|  | movq	32(%rsi),%rax | 
|  | movq	%rdx,%rcx | 
|  | adcq	$0,%rcx | 
|  |  | 
|  | mulq	%r9 | 
|  | addq	%rax,%r13 | 
|  | movq	40(%rsi),%rax | 
|  | adcq	$0,%rdx | 
|  | addq	%rcx,%r13 | 
|  | movq	%rdx,%rcx | 
|  | adcq	$0,%rcx | 
|  |  | 
|  | mulq	%r9 | 
|  | addq	%rax,%r14 | 
|  | movq	48(%rsi),%rax | 
|  | adcq	$0,%rdx | 
|  | addq	%rcx,%r14 | 
|  | movq	%rdx,%rcx | 
|  | adcq	$0,%rcx | 
|  |  | 
|  | mulq	%r9 | 
|  | movq	%r12,%r10 | 
|  | leaq	(%rbx,%r12,2),%r12 | 
|  | addq	%rax,%r15 | 
|  | movq	56(%rsi),%rax | 
|  | adcq	$0,%rdx | 
|  | addq	%rcx,%r15 | 
|  | movq	%rdx,%rcx | 
|  | adcq	$0,%rcx | 
|  |  | 
|  | mulq	%r9 | 
|  | shrq	$63,%r10 | 
|  | addq	%rax,%r8 | 
|  | movq	%r9,%rax | 
|  | adcq	$0,%rdx | 
|  | addq	%rcx,%r8 | 
|  | movq	%rdx,%r9 | 
|  | adcq	$0,%r9 | 
|  |  | 
|  | movq	%r13,%rcx | 
|  | leaq	(%r10,%r13,2),%r13 | 
|  |  | 
|  | mulq	%rax | 
|  | addq	%rax,%r11 | 
|  | adcq	%rdx,%r12 | 
|  | adcq	$0,%r13 | 
|  |  | 
|  | movq	%r11,32(%rsp) | 
|  | movq	%r12,40(%rsp) | 
|  | shrq	$63,%rcx | 
|  |  | 
|  |  | 
|  | movq	24(%rsi),%r10 | 
|  | movq	32(%rsi),%rax | 
|  | mulq	%r10 | 
|  | addq	%rax,%r14 | 
|  | movq	40(%rsi),%rax | 
|  | movq	%rdx,%rbx | 
|  | adcq	$0,%rbx | 
|  |  | 
|  | mulq	%r10 | 
|  | addq	%rax,%r15 | 
|  | movq	48(%rsi),%rax | 
|  | adcq	$0,%rdx | 
|  | addq	%rbx,%r15 | 
|  | movq	%rdx,%rbx | 
|  | adcq	$0,%rbx | 
|  |  | 
|  | mulq	%r10 | 
|  | movq	%r14,%r12 | 
|  | leaq	(%rcx,%r14,2),%r14 | 
|  | addq	%rax,%r8 | 
|  | movq	56(%rsi),%rax | 
|  | adcq	$0,%rdx | 
|  | addq	%rbx,%r8 | 
|  | movq	%rdx,%rbx | 
|  | adcq	$0,%rbx | 
|  |  | 
|  | mulq	%r10 | 
|  | shrq	$63,%r12 | 
|  | addq	%rax,%r9 | 
|  | movq	%r10,%rax | 
|  | adcq	$0,%rdx | 
|  | addq	%rbx,%r9 | 
|  | movq	%rdx,%r10 | 
|  | adcq	$0,%r10 | 
|  |  | 
|  | movq	%r15,%rbx | 
|  | leaq	(%r12,%r15,2),%r15 | 
|  |  | 
|  | mulq	%rax | 
|  | addq	%rax,%r13 | 
|  | adcq	%rdx,%r14 | 
|  | adcq	$0,%r15 | 
|  |  | 
|  | movq	%r13,48(%rsp) | 
|  | movq	%r14,56(%rsp) | 
|  | shrq	$63,%rbx | 
|  |  | 
|  |  | 
|  | movq	32(%rsi),%r11 | 
|  | movq	40(%rsi),%rax | 
|  | mulq	%r11 | 
|  | addq	%rax,%r8 | 
|  | movq	48(%rsi),%rax | 
|  | movq	%rdx,%rcx | 
|  | adcq	$0,%rcx | 
|  |  | 
|  | mulq	%r11 | 
|  | addq	%rax,%r9 | 
|  | movq	56(%rsi),%rax | 
|  | adcq	$0,%rdx | 
|  | movq	%r8,%r12 | 
|  | leaq	(%rbx,%r8,2),%r8 | 
|  | addq	%rcx,%r9 | 
|  | movq	%rdx,%rcx | 
|  | adcq	$0,%rcx | 
|  |  | 
|  | mulq	%r11 | 
|  | shrq	$63,%r12 | 
|  | addq	%rax,%r10 | 
|  | movq	%r11,%rax | 
|  | adcq	$0,%rdx | 
|  | addq	%rcx,%r10 | 
|  | movq	%rdx,%r11 | 
|  | adcq	$0,%r11 | 
|  |  | 
|  | movq	%r9,%rcx | 
|  | leaq	(%r12,%r9,2),%r9 | 
|  |  | 
|  | mulq	%rax | 
|  | addq	%rax,%r15 | 
|  | adcq	%rdx,%r8 | 
|  | adcq	$0,%r9 | 
|  |  | 
|  | movq	%r15,64(%rsp) | 
|  | movq	%r8,72(%rsp) | 
|  | shrq	$63,%rcx | 
|  |  | 
|  |  | 
|  | movq	40(%rsi),%r12 | 
|  | movq	48(%rsi),%rax | 
|  | mulq	%r12 | 
|  | addq	%rax,%r10 | 
|  | movq	56(%rsi),%rax | 
|  | movq	%rdx,%rbx | 
|  | adcq	$0,%rbx | 
|  |  | 
|  | mulq	%r12 | 
|  | addq	%rax,%r11 | 
|  | movq	%r12,%rax | 
|  | movq	%r10,%r15 | 
|  | leaq	(%rcx,%r10,2),%r10 | 
|  | adcq	$0,%rdx | 
|  | shrq	$63,%r15 | 
|  | addq	%rbx,%r11 | 
|  | movq	%rdx,%r12 | 
|  | adcq	$0,%r12 | 
|  |  | 
|  | movq	%r11,%rbx | 
|  | leaq	(%r15,%r11,2),%r11 | 
|  |  | 
|  | mulq	%rax | 
|  | addq	%rax,%r9 | 
|  | adcq	%rdx,%r10 | 
|  | adcq	$0,%r11 | 
|  |  | 
|  | movq	%r9,80(%rsp) | 
|  | movq	%r10,88(%rsp) | 
|  |  | 
|  |  | 
|  | movq	48(%rsi),%r13 | 
|  | movq	56(%rsi),%rax | 
|  | mulq	%r13 | 
|  | addq	%rax,%r12 | 
|  | movq	%r13,%rax | 
|  | movq	%rdx,%r13 | 
|  | adcq	$0,%r13 | 
|  |  | 
|  | xorq	%r14,%r14 | 
|  | shlq	$1,%rbx | 
|  | adcq	%r12,%r12 | 
|  | adcq	%r13,%r13 | 
|  | adcq	%r14,%r14 | 
|  |  | 
|  | mulq	%rax | 
|  | addq	%rax,%r11 | 
|  | adcq	%rdx,%r12 | 
|  | adcq	$0,%r13 | 
|  |  | 
|  | movq	%r11,96(%rsp) | 
|  | movq	%r12,104(%rsp) | 
|  |  | 
|  |  | 
|  | movq	56(%rsi),%rax | 
|  | mulq	%rax | 
|  | addq	%rax,%r13 | 
|  | adcq	$0,%rdx | 
|  |  | 
|  | addq	%rdx,%r14 | 
|  |  | 
|  | movq	%r13,112(%rsp) | 
|  | movq	%r14,120(%rsp) | 
|  |  | 
|  | movq	(%rsp),%r8 | 
|  | movq	8(%rsp),%r9 | 
|  | movq	16(%rsp),%r10 | 
|  | movq	24(%rsp),%r11 | 
|  | movq	32(%rsp),%r12 | 
|  | movq	40(%rsp),%r13 | 
|  | movq	48(%rsp),%r14 | 
|  | movq	56(%rsp),%r15 | 
|  |  | 
|  | call	__rsaz_512_reduce | 
|  |  | 
|  | addq	64(%rsp),%r8 | 
|  | adcq	72(%rsp),%r9 | 
|  | adcq	80(%rsp),%r10 | 
|  | adcq	88(%rsp),%r11 | 
|  | adcq	96(%rsp),%r12 | 
|  | adcq	104(%rsp),%r13 | 
|  | adcq	112(%rsp),%r14 | 
|  | adcq	120(%rsp),%r15 | 
|  | sbbq	%rcx,%rcx | 
|  |  | 
|  | call	__rsaz_512_subtract | 
|  |  | 
|  | movq	%r8,%rdx | 
|  | movq	%r9,%rax | 
|  | movl	128+8(%rsp),%r8d | 
|  | movq	%rdi,%rsi | 
|  |  | 
|  | decl	%r8d | 
|  | jnz	.Loop_sqr | 
|  |  | 
|  | leaq	128+24+48(%rsp),%rax | 
|  | movq	-48(%rax),%r15 | 
|  | movq	-40(%rax),%r14 | 
|  | movq	-32(%rax),%r13 | 
|  | movq	-24(%rax),%r12 | 
|  | movq	-16(%rax),%rbp | 
|  | movq	-8(%rax),%rbx | 
|  | leaq	(%rax),%rsp | 
|  | .Lsqr_epilogue: | 
|  | .byte	0xf3,0xc3 | 
|  | .size	rsaz_512_sqr,.-rsaz_512_sqr | 
|  | .globl	rsaz_512_mul | 
|  | .hidden rsaz_512_mul | 
|  | .type	rsaz_512_mul,@function | 
|  | .align	32 | 
|  | rsaz_512_mul: | 
|  | pushq	%rbx | 
|  | pushq	%rbp | 
|  | pushq	%r12 | 
|  | pushq	%r13 | 
|  | pushq	%r14 | 
|  | pushq	%r15 | 
|  |  | 
|  | subq	$128+24,%rsp | 
|  | .Lmul_body: | 
|  | .byte	102,72,15,110,199 | 
|  | .byte	102,72,15,110,201 | 
|  | movq	%r8,128(%rsp) | 
|  | movq	(%rdx),%rbx | 
|  | movq	%rdx,%rbp | 
|  | call	__rsaz_512_mul | 
|  |  | 
|  | .byte	102,72,15,126,199 | 
|  | .byte	102,72,15,126,205 | 
|  |  | 
|  | movq	(%rsp),%r8 | 
|  | movq	8(%rsp),%r9 | 
|  | movq	16(%rsp),%r10 | 
|  | movq	24(%rsp),%r11 | 
|  | movq	32(%rsp),%r12 | 
|  | movq	40(%rsp),%r13 | 
|  | movq	48(%rsp),%r14 | 
|  | movq	56(%rsp),%r15 | 
|  |  | 
|  | call	__rsaz_512_reduce | 
|  | addq	64(%rsp),%r8 | 
|  | adcq	72(%rsp),%r9 | 
|  | adcq	80(%rsp),%r10 | 
|  | adcq	88(%rsp),%r11 | 
|  | adcq	96(%rsp),%r12 | 
|  | adcq	104(%rsp),%r13 | 
|  | adcq	112(%rsp),%r14 | 
|  | adcq	120(%rsp),%r15 | 
|  | sbbq	%rcx,%rcx | 
|  |  | 
|  | call	__rsaz_512_subtract | 
|  |  | 
|  | leaq	128+24+48(%rsp),%rax | 
|  | movq	-48(%rax),%r15 | 
|  | movq	-40(%rax),%r14 | 
|  | movq	-32(%rax),%r13 | 
|  | movq	-24(%rax),%r12 | 
|  | movq	-16(%rax),%rbp | 
|  | movq	-8(%rax),%rbx | 
|  | leaq	(%rax),%rsp | 
|  | .Lmul_epilogue: | 
|  | .byte	0xf3,0xc3 | 
|  | .size	rsaz_512_mul,.-rsaz_512_mul | 
|  | .globl	rsaz_512_mul_gather4 | 
|  | .hidden rsaz_512_mul_gather4 | 
|  | .type	rsaz_512_mul_gather4,@function | 
|  | .align	32 | 
|  | rsaz_512_mul_gather4: | 
|  | pushq	%rbx | 
|  | pushq	%rbp | 
|  | pushq	%r12 | 
|  | pushq	%r13 | 
|  | pushq	%r14 | 
|  | pushq	%r15 | 
|  |  | 
|  | movl	%r9d,%r9d | 
|  | subq	$128+24,%rsp | 
|  | .Lmul_gather4_body: | 
|  | movl	64(%rdx,%r9,4),%eax | 
|  | .byte	102,72,15,110,199 | 
|  | movl	(%rdx,%r9,4),%ebx | 
|  | .byte	102,72,15,110,201 | 
|  | movq	%r8,128(%rsp) | 
|  |  | 
|  | shlq	$32,%rax | 
|  | orq	%rax,%rbx | 
|  | movq	(%rsi),%rax | 
|  | movq	8(%rsi),%rcx | 
|  | leaq	128(%rdx,%r9,4),%rbp | 
|  | mulq	%rbx | 
|  | movq	%rax,(%rsp) | 
|  | movq	%rcx,%rax | 
|  | movq	%rdx,%r8 | 
|  |  | 
|  | mulq	%rbx | 
|  | movd	(%rbp),%xmm4 | 
|  | addq	%rax,%r8 | 
|  | movq	16(%rsi),%rax | 
|  | movq	%rdx,%r9 | 
|  | adcq	$0,%r9 | 
|  |  | 
|  | mulq	%rbx | 
|  | movd	64(%rbp),%xmm5 | 
|  | addq	%rax,%r9 | 
|  | movq	24(%rsi),%rax | 
|  | movq	%rdx,%r10 | 
|  | adcq	$0,%r10 | 
|  |  | 
|  | mulq	%rbx | 
|  | pslldq	$4,%xmm5 | 
|  | addq	%rax,%r10 | 
|  | movq	32(%rsi),%rax | 
|  | movq	%rdx,%r11 | 
|  | adcq	$0,%r11 | 
|  |  | 
|  | mulq	%rbx | 
|  | por	%xmm5,%xmm4 | 
|  | addq	%rax,%r11 | 
|  | movq	40(%rsi),%rax | 
|  | movq	%rdx,%r12 | 
|  | adcq	$0,%r12 | 
|  |  | 
|  | mulq	%rbx | 
|  | addq	%rax,%r12 | 
|  | movq	48(%rsi),%rax | 
|  | movq	%rdx,%r13 | 
|  | adcq	$0,%r13 | 
|  |  | 
|  | mulq	%rbx | 
|  | leaq	128(%rbp),%rbp | 
|  | addq	%rax,%r13 | 
|  | movq	56(%rsi),%rax | 
|  | movq	%rdx,%r14 | 
|  | adcq	$0,%r14 | 
|  |  | 
|  | mulq	%rbx | 
|  | .byte	102,72,15,126,227 | 
|  | addq	%rax,%r14 | 
|  | movq	(%rsi),%rax | 
|  | movq	%rdx,%r15 | 
|  | adcq	$0,%r15 | 
|  |  | 
|  | leaq	8(%rsp),%rdi | 
|  | movl	$7,%ecx | 
|  | jmp	.Loop_mul_gather | 
|  |  | 
|  | .align	32 | 
|  | .Loop_mul_gather: | 
|  | mulq	%rbx | 
|  | addq	%rax,%r8 | 
|  | movq	8(%rsi),%rax | 
|  | movq	%r8,(%rdi) | 
|  | movq	%rdx,%r8 | 
|  | adcq	$0,%r8 | 
|  |  | 
|  | mulq	%rbx | 
|  | movd	(%rbp),%xmm4 | 
|  | addq	%rax,%r9 | 
|  | movq	16(%rsi),%rax | 
|  | adcq	$0,%rdx | 
|  | addq	%r9,%r8 | 
|  | movq	%rdx,%r9 | 
|  | adcq	$0,%r9 | 
|  |  | 
|  | mulq	%rbx | 
|  | movd	64(%rbp),%xmm5 | 
|  | addq	%rax,%r10 | 
|  | movq	24(%rsi),%rax | 
|  | adcq	$0,%rdx | 
|  | addq	%r10,%r9 | 
|  | movq	%rdx,%r10 | 
|  | adcq	$0,%r10 | 
|  |  | 
|  | mulq	%rbx | 
|  | pslldq	$4,%xmm5 | 
|  | addq	%rax,%r11 | 
|  | movq	32(%rsi),%rax | 
|  | adcq	$0,%rdx | 
|  | addq	%r11,%r10 | 
|  | movq	%rdx,%r11 | 
|  | adcq	$0,%r11 | 
|  |  | 
|  | mulq	%rbx | 
|  | por	%xmm5,%xmm4 | 
|  | addq	%rax,%r12 | 
|  | movq	40(%rsi),%rax | 
|  | adcq	$0,%rdx | 
|  | addq	%r12,%r11 | 
|  | movq	%rdx,%r12 | 
|  | adcq	$0,%r12 | 
|  |  | 
|  | mulq	%rbx | 
|  | addq	%rax,%r13 | 
|  | movq	48(%rsi),%rax | 
|  | adcq	$0,%rdx | 
|  | addq	%r13,%r12 | 
|  | movq	%rdx,%r13 | 
|  | adcq	$0,%r13 | 
|  |  | 
|  | mulq	%rbx | 
|  | addq	%rax,%r14 | 
|  | movq	56(%rsi),%rax | 
|  | adcq	$0,%rdx | 
|  | addq	%r14,%r13 | 
|  | movq	%rdx,%r14 | 
|  | adcq	$0,%r14 | 
|  |  | 
|  | mulq	%rbx | 
|  | .byte	102,72,15,126,227 | 
|  | addq	%rax,%r15 | 
|  | movq	(%rsi),%rax | 
|  | adcq	$0,%rdx | 
|  | addq	%r15,%r14 | 
|  | movq	%rdx,%r15 | 
|  | adcq	$0,%r15 | 
|  |  | 
|  | leaq	128(%rbp),%rbp | 
|  | leaq	8(%rdi),%rdi | 
|  |  | 
|  | decl	%ecx | 
|  | jnz	.Loop_mul_gather | 
|  |  | 
|  | movq	%r8,(%rdi) | 
|  | movq	%r9,8(%rdi) | 
|  | movq	%r10,16(%rdi) | 
|  | movq	%r11,24(%rdi) | 
|  | movq	%r12,32(%rdi) | 
|  | movq	%r13,40(%rdi) | 
|  | movq	%r14,48(%rdi) | 
|  | movq	%r15,56(%rdi) | 
|  |  | 
|  | .byte	102,72,15,126,199 | 
|  | .byte	102,72,15,126,205 | 
|  |  | 
|  | movq	(%rsp),%r8 | 
|  | movq	8(%rsp),%r9 | 
|  | movq	16(%rsp),%r10 | 
|  | movq	24(%rsp),%r11 | 
|  | movq	32(%rsp),%r12 | 
|  | movq	40(%rsp),%r13 | 
|  | movq	48(%rsp),%r14 | 
|  | movq	56(%rsp),%r15 | 
|  |  | 
|  | call	__rsaz_512_reduce | 
|  | addq	64(%rsp),%r8 | 
|  | adcq	72(%rsp),%r9 | 
|  | adcq	80(%rsp),%r10 | 
|  | adcq	88(%rsp),%r11 | 
|  | adcq	96(%rsp),%r12 | 
|  | adcq	104(%rsp),%r13 | 
|  | adcq	112(%rsp),%r14 | 
|  | adcq	120(%rsp),%r15 | 
|  | sbbq	%rcx,%rcx | 
|  |  | 
|  | call	__rsaz_512_subtract | 
|  |  | 
|  | leaq	128+24+48(%rsp),%rax | 
|  | movq	-48(%rax),%r15 | 
|  | movq	-40(%rax),%r14 | 
|  | movq	-32(%rax),%r13 | 
|  | movq	-24(%rax),%r12 | 
|  | movq	-16(%rax),%rbp | 
|  | movq	-8(%rax),%rbx | 
|  | leaq	(%rax),%rsp | 
|  | .Lmul_gather4_epilogue: | 
|  | .byte	0xf3,0xc3 | 
|  | .size	rsaz_512_mul_gather4,.-rsaz_512_mul_gather4 | 
|  | .globl	rsaz_512_mul_scatter4 | 
|  | .hidden rsaz_512_mul_scatter4 | 
|  | .type	rsaz_512_mul_scatter4,@function | 
|  | .align	32 | 
|  | rsaz_512_mul_scatter4: | 
|  | pushq	%rbx | 
|  | pushq	%rbp | 
|  | pushq	%r12 | 
|  | pushq	%r13 | 
|  | pushq	%r14 | 
|  | pushq	%r15 | 
|  |  | 
|  | movl	%r9d,%r9d | 
|  | subq	$128+24,%rsp | 
|  | .Lmul_scatter4_body: | 
|  | leaq	(%r8,%r9,4),%r8 | 
|  | .byte	102,72,15,110,199 | 
|  | .byte	102,72,15,110,202 | 
|  | .byte	102,73,15,110,208 | 
|  | movq	%rcx,128(%rsp) | 
|  |  | 
|  | movq	%rdi,%rbp | 
|  | movq	(%rdi),%rbx | 
|  | call	__rsaz_512_mul | 
|  |  | 
|  | .byte	102,72,15,126,199 | 
|  | .byte	102,72,15,126,205 | 
|  |  | 
|  | movq	(%rsp),%r8 | 
|  | movq	8(%rsp),%r9 | 
|  | movq	16(%rsp),%r10 | 
|  | movq	24(%rsp),%r11 | 
|  | movq	32(%rsp),%r12 | 
|  | movq	40(%rsp),%r13 | 
|  | movq	48(%rsp),%r14 | 
|  | movq	56(%rsp),%r15 | 
|  |  | 
|  | call	__rsaz_512_reduce | 
|  | addq	64(%rsp),%r8 | 
|  | adcq	72(%rsp),%r9 | 
|  | adcq	80(%rsp),%r10 | 
|  | adcq	88(%rsp),%r11 | 
|  | adcq	96(%rsp),%r12 | 
|  | adcq	104(%rsp),%r13 | 
|  | adcq	112(%rsp),%r14 | 
|  | adcq	120(%rsp),%r15 | 
|  | .byte	102,72,15,126,214 | 
|  | sbbq	%rcx,%rcx | 
|  |  | 
|  | call	__rsaz_512_subtract | 
|  |  | 
|  | movl	%r8d,0(%rsi) | 
|  | shrq	$32,%r8 | 
|  | movl	%r9d,128(%rsi) | 
|  | shrq	$32,%r9 | 
|  | movl	%r10d,256(%rsi) | 
|  | shrq	$32,%r10 | 
|  | movl	%r11d,384(%rsi) | 
|  | shrq	$32,%r11 | 
|  | movl	%r12d,512(%rsi) | 
|  | shrq	$32,%r12 | 
|  | movl	%r13d,640(%rsi) | 
|  | shrq	$32,%r13 | 
|  | movl	%r14d,768(%rsi) | 
|  | shrq	$32,%r14 | 
|  | movl	%r15d,896(%rsi) | 
|  | shrq	$32,%r15 | 
|  | movl	%r8d,64(%rsi) | 
|  | movl	%r9d,192(%rsi) | 
|  | movl	%r10d,320(%rsi) | 
|  | movl	%r11d,448(%rsi) | 
|  | movl	%r12d,576(%rsi) | 
|  | movl	%r13d,704(%rsi) | 
|  | movl	%r14d,832(%rsi) | 
|  | movl	%r15d,960(%rsi) | 
|  |  | 
|  | leaq	128+24+48(%rsp),%rax | 
|  | movq	-48(%rax),%r15 | 
|  | movq	-40(%rax),%r14 | 
|  | movq	-32(%rax),%r13 | 
|  | movq	-24(%rax),%r12 | 
|  | movq	-16(%rax),%rbp | 
|  | movq	-8(%rax),%rbx | 
|  | leaq	(%rax),%rsp | 
|  | .Lmul_scatter4_epilogue: | 
|  | .byte	0xf3,0xc3 | 
|  | .size	rsaz_512_mul_scatter4,.-rsaz_512_mul_scatter4 | 
|  | .globl	rsaz_512_mul_by_one | 
|  | .hidden rsaz_512_mul_by_one | 
|  | .type	rsaz_512_mul_by_one,@function | 
|  | .align	32 | 
|  | rsaz_512_mul_by_one: | 
|  | pushq	%rbx | 
|  | pushq	%rbp | 
|  | pushq	%r12 | 
|  | pushq	%r13 | 
|  | pushq	%r14 | 
|  | pushq	%r15 | 
|  |  | 
|  | subq	$128+24,%rsp | 
|  | .Lmul_by_one_body: | 
|  | movq	%rdx,%rbp | 
|  | movq	%rcx,128(%rsp) | 
|  |  | 
|  | movq	(%rsi),%r8 | 
|  | pxor	%xmm0,%xmm0 | 
|  | movq	8(%rsi),%r9 | 
|  | movq	16(%rsi),%r10 | 
|  | movq	24(%rsi),%r11 | 
|  | movq	32(%rsi),%r12 | 
|  | movq	40(%rsi),%r13 | 
|  | movq	48(%rsi),%r14 | 
|  | movq	56(%rsi),%r15 | 
|  |  | 
|  | movdqa	%xmm0,(%rsp) | 
|  | movdqa	%xmm0,16(%rsp) | 
|  | movdqa	%xmm0,32(%rsp) | 
|  | movdqa	%xmm0,48(%rsp) | 
|  | movdqa	%xmm0,64(%rsp) | 
|  | movdqa	%xmm0,80(%rsp) | 
|  | movdqa	%xmm0,96(%rsp) | 
|  | call	__rsaz_512_reduce | 
|  | movq	%r8,(%rdi) | 
|  | movq	%r9,8(%rdi) | 
|  | movq	%r10,16(%rdi) | 
|  | movq	%r11,24(%rdi) | 
|  | movq	%r12,32(%rdi) | 
|  | movq	%r13,40(%rdi) | 
|  | movq	%r14,48(%rdi) | 
|  | movq	%r15,56(%rdi) | 
|  |  | 
|  | leaq	128+24+48(%rsp),%rax | 
|  | movq	-48(%rax),%r15 | 
|  | movq	-40(%rax),%r14 | 
|  | movq	-32(%rax),%r13 | 
|  | movq	-24(%rax),%r12 | 
|  | movq	-16(%rax),%rbp | 
|  | movq	-8(%rax),%rbx | 
|  | leaq	(%rax),%rsp | 
|  | .Lmul_by_one_epilogue: | 
|  | .byte	0xf3,0xc3 | 
|  | .size	rsaz_512_mul_by_one,.-rsaz_512_mul_by_one | 
|  | .type	__rsaz_512_reduce,@function | 
|  | .align	32 | 
|  | __rsaz_512_reduce: | 
|  | movq	%r8,%rbx | 
|  | imulq	128+8(%rsp),%rbx | 
|  | movq	0(%rbp),%rax | 
|  | movl	$8,%ecx | 
|  | jmp	.Lreduction_loop | 
|  |  | 
|  | .align	32 | 
|  | .Lreduction_loop: | 
|  | mulq	%rbx | 
|  | movq	8(%rbp),%rax | 
|  | negq	%r8 | 
|  | movq	%rdx,%r8 | 
|  | adcq	$0,%r8 | 
|  |  | 
|  | mulq	%rbx | 
|  | addq	%rax,%r9 | 
|  | movq	16(%rbp),%rax | 
|  | adcq	$0,%rdx | 
|  | addq	%r9,%r8 | 
|  | movq	%rdx,%r9 | 
|  | adcq	$0,%r9 | 
|  |  | 
|  | mulq	%rbx | 
|  | addq	%rax,%r10 | 
|  | movq	24(%rbp),%rax | 
|  | adcq	$0,%rdx | 
|  | addq	%r10,%r9 | 
|  | movq	%rdx,%r10 | 
|  | adcq	$0,%r10 | 
|  |  | 
|  | mulq	%rbx | 
|  | addq	%rax,%r11 | 
|  | movq	32(%rbp),%rax | 
|  | adcq	$0,%rdx | 
|  | addq	%r11,%r10 | 
|  | movq	128+8(%rsp),%rsi | 
|  |  | 
|  |  | 
|  | adcq	$0,%rdx | 
|  | movq	%rdx,%r11 | 
|  |  | 
|  | mulq	%rbx | 
|  | addq	%rax,%r12 | 
|  | movq	40(%rbp),%rax | 
|  | adcq	$0,%rdx | 
|  | imulq	%r8,%rsi | 
|  | addq	%r12,%r11 | 
|  | movq	%rdx,%r12 | 
|  | adcq	$0,%r12 | 
|  |  | 
|  | mulq	%rbx | 
|  | addq	%rax,%r13 | 
|  | movq	48(%rbp),%rax | 
|  | adcq	$0,%rdx | 
|  | addq	%r13,%r12 | 
|  | movq	%rdx,%r13 | 
|  | adcq	$0,%r13 | 
|  |  | 
|  | mulq	%rbx | 
|  | addq	%rax,%r14 | 
|  | movq	56(%rbp),%rax | 
|  | adcq	$0,%rdx | 
|  | addq	%r14,%r13 | 
|  | movq	%rdx,%r14 | 
|  | adcq	$0,%r14 | 
|  |  | 
|  | mulq	%rbx | 
|  | movq	%rsi,%rbx | 
|  | addq	%rax,%r15 | 
|  | movq	0(%rbp),%rax | 
|  | adcq	$0,%rdx | 
|  | addq	%r15,%r14 | 
|  | movq	%rdx,%r15 | 
|  | adcq	$0,%r15 | 
|  |  | 
|  | decl	%ecx | 
|  | jne	.Lreduction_loop | 
|  |  | 
|  | .byte	0xf3,0xc3 | 
|  | .size	__rsaz_512_reduce,.-__rsaz_512_reduce | 
|  | .type	__rsaz_512_subtract,@function | 
|  | .align	32 | 
|  | __rsaz_512_subtract: | 
|  | movq	%r8,(%rdi) | 
|  | movq	%r9,8(%rdi) | 
|  | movq	%r10,16(%rdi) | 
|  | movq	%r11,24(%rdi) | 
|  | movq	%r12,32(%rdi) | 
|  | movq	%r13,40(%rdi) | 
|  | movq	%r14,48(%rdi) | 
|  | movq	%r15,56(%rdi) | 
|  |  | 
|  | movq	0(%rbp),%r8 | 
|  | movq	8(%rbp),%r9 | 
|  | negq	%r8 | 
|  | notq	%r9 | 
|  | andq	%rcx,%r8 | 
|  | movq	16(%rbp),%r10 | 
|  | andq	%rcx,%r9 | 
|  | notq	%r10 | 
|  | movq	24(%rbp),%r11 | 
|  | andq	%rcx,%r10 | 
|  | notq	%r11 | 
|  | movq	32(%rbp),%r12 | 
|  | andq	%rcx,%r11 | 
|  | notq	%r12 | 
|  | movq	40(%rbp),%r13 | 
|  | andq	%rcx,%r12 | 
|  | notq	%r13 | 
|  | movq	48(%rbp),%r14 | 
|  | andq	%rcx,%r13 | 
|  | notq	%r14 | 
|  | movq	56(%rbp),%r15 | 
|  | andq	%rcx,%r14 | 
|  | notq	%r15 | 
|  | andq	%rcx,%r15 | 
|  |  | 
|  | addq	(%rdi),%r8 | 
|  | adcq	8(%rdi),%r9 | 
|  | adcq	16(%rdi),%r10 | 
|  | adcq	24(%rdi),%r11 | 
|  | adcq	32(%rdi),%r12 | 
|  | adcq	40(%rdi),%r13 | 
|  | adcq	48(%rdi),%r14 | 
|  | adcq	56(%rdi),%r15 | 
|  |  | 
|  | movq	%r8,(%rdi) | 
|  | movq	%r9,8(%rdi) | 
|  | movq	%r10,16(%rdi) | 
|  | movq	%r11,24(%rdi) | 
|  | movq	%r12,32(%rdi) | 
|  | movq	%r13,40(%rdi) | 
|  | movq	%r14,48(%rdi) | 
|  | movq	%r15,56(%rdi) | 
|  |  | 
|  | .byte	0xf3,0xc3 | 
|  | .size	__rsaz_512_subtract,.-__rsaz_512_subtract | 
|  | .type	__rsaz_512_mul,@function | 
|  | .align	32 | 
|  | __rsaz_512_mul: | 
|  | leaq	8(%rsp),%rdi | 
|  |  | 
|  | movq	(%rsi),%rax | 
|  | mulq	%rbx | 
|  | movq	%rax,(%rdi) | 
|  | movq	8(%rsi),%rax | 
|  | movq	%rdx,%r8 | 
|  |  | 
|  | mulq	%rbx | 
|  | addq	%rax,%r8 | 
|  | movq	16(%rsi),%rax | 
|  | movq	%rdx,%r9 | 
|  | adcq	$0,%r9 | 
|  |  | 
|  | mulq	%rbx | 
|  | addq	%rax,%r9 | 
|  | movq	24(%rsi),%rax | 
|  | movq	%rdx,%r10 | 
|  | adcq	$0,%r10 | 
|  |  | 
|  | mulq	%rbx | 
|  | addq	%rax,%r10 | 
|  | movq	32(%rsi),%rax | 
|  | movq	%rdx,%r11 | 
|  | adcq	$0,%r11 | 
|  |  | 
|  | mulq	%rbx | 
|  | addq	%rax,%r11 | 
|  | movq	40(%rsi),%rax | 
|  | movq	%rdx,%r12 | 
|  | adcq	$0,%r12 | 
|  |  | 
|  | mulq	%rbx | 
|  | addq	%rax,%r12 | 
|  | movq	48(%rsi),%rax | 
|  | movq	%rdx,%r13 | 
|  | adcq	$0,%r13 | 
|  |  | 
|  | mulq	%rbx | 
|  | addq	%rax,%r13 | 
|  | movq	56(%rsi),%rax | 
|  | movq	%rdx,%r14 | 
|  | adcq	$0,%r14 | 
|  |  | 
|  | mulq	%rbx | 
|  | addq	%rax,%r14 | 
|  | movq	(%rsi),%rax | 
|  | movq	%rdx,%r15 | 
|  | adcq	$0,%r15 | 
|  |  | 
|  | leaq	8(%rbp),%rbp | 
|  | leaq	8(%rdi),%rdi | 
|  |  | 
|  | movl	$7,%ecx | 
|  | jmp	.Loop_mul | 
|  |  | 
|  | .align	32 | 
|  | .Loop_mul: | 
|  | movq	(%rbp),%rbx | 
|  | mulq	%rbx | 
|  | addq	%rax,%r8 | 
|  | movq	8(%rsi),%rax | 
|  | movq	%r8,(%rdi) | 
|  | movq	%rdx,%r8 | 
|  | adcq	$0,%r8 | 
|  |  | 
|  | mulq	%rbx | 
|  | addq	%rax,%r9 | 
|  | movq	16(%rsi),%rax | 
|  | adcq	$0,%rdx | 
|  | addq	%r9,%r8 | 
|  | movq	%rdx,%r9 | 
|  | adcq	$0,%r9 | 
|  |  | 
|  | mulq	%rbx | 
|  | addq	%rax,%r10 | 
|  | movq	24(%rsi),%rax | 
|  | adcq	$0,%rdx | 
|  | addq	%r10,%r9 | 
|  | movq	%rdx,%r10 | 
|  | adcq	$0,%r10 | 
|  |  | 
|  | mulq	%rbx | 
|  | addq	%rax,%r11 | 
|  | movq	32(%rsi),%rax | 
|  | adcq	$0,%rdx | 
|  | addq	%r11,%r10 | 
|  | movq	%rdx,%r11 | 
|  | adcq	$0,%r11 | 
|  |  | 
|  | mulq	%rbx | 
|  | addq	%rax,%r12 | 
|  | movq	40(%rsi),%rax | 
|  | adcq	$0,%rdx | 
|  | addq	%r12,%r11 | 
|  | movq	%rdx,%r12 | 
|  | adcq	$0,%r12 | 
|  |  | 
|  | mulq	%rbx | 
|  | addq	%rax,%r13 | 
|  | movq	48(%rsi),%rax | 
|  | adcq	$0,%rdx | 
|  | addq	%r13,%r12 | 
|  | movq	%rdx,%r13 | 
|  | adcq	$0,%r13 | 
|  |  | 
|  | mulq	%rbx | 
|  | addq	%rax,%r14 | 
|  | movq	56(%rsi),%rax | 
|  | adcq	$0,%rdx | 
|  | addq	%r14,%r13 | 
|  | movq	%rdx,%r14 | 
|  | leaq	8(%rbp),%rbp | 
|  | adcq	$0,%r14 | 
|  |  | 
|  | mulq	%rbx | 
|  | addq	%rax,%r15 | 
|  | movq	(%rsi),%rax | 
|  | adcq	$0,%rdx | 
|  | addq	%r15,%r14 | 
|  | movq	%rdx,%r15 | 
|  | adcq	$0,%r15 | 
|  |  | 
|  | leaq	8(%rdi),%rdi | 
|  |  | 
|  | decl	%ecx | 
|  | jnz	.Loop_mul | 
|  |  | 
|  | movq	%r8,(%rdi) | 
|  | movq	%r9,8(%rdi) | 
|  | movq	%r10,16(%rdi) | 
|  | movq	%r11,24(%rdi) | 
|  | movq	%r12,32(%rdi) | 
|  | movq	%r13,40(%rdi) | 
|  | movq	%r14,48(%rdi) | 
|  | movq	%r15,56(%rdi) | 
|  |  | 
|  | .byte	0xf3,0xc3 | 
|  | .size	__rsaz_512_mul,.-__rsaz_512_mul | 
|  | .globl	rsaz_512_scatter4 | 
|  | .hidden rsaz_512_scatter4 | 
|  | .type	rsaz_512_scatter4,@function | 
|  | .align	16 | 
|  | rsaz_512_scatter4: | 
|  | leaq	(%rdi,%rdx,4),%rdi | 
|  | movl	$8,%r9d | 
|  | jmp	.Loop_scatter | 
|  | .align	16 | 
|  | .Loop_scatter: | 
|  | movq	(%rsi),%rax | 
|  | leaq	8(%rsi),%rsi | 
|  | movl	%eax,(%rdi) | 
|  | shrq	$32,%rax | 
|  | movl	%eax,64(%rdi) | 
|  | leaq	128(%rdi),%rdi | 
|  | decl	%r9d | 
|  | jnz	.Loop_scatter | 
|  | .byte	0xf3,0xc3 | 
|  | .size	rsaz_512_scatter4,.-rsaz_512_scatter4 | 
|  |  | 
|  | .globl	rsaz_512_gather4 | 
|  | .hidden rsaz_512_gather4 | 
|  | .type	rsaz_512_gather4,@function | 
|  | .align	16 | 
|  | rsaz_512_gather4: | 
|  | leaq	(%rsi,%rdx,4),%rsi | 
|  | movl	$8,%r9d | 
|  | jmp	.Loop_gather | 
|  | .align	16 | 
|  | .Loop_gather: | 
|  | movl	(%rsi),%eax | 
|  | movl	64(%rsi),%r8d | 
|  | leaq	128(%rsi),%rsi | 
|  | shlq	$32,%r8 | 
|  | orq	%r8,%rax | 
|  | movq	%rax,(%rdi) | 
|  | leaq	8(%rdi),%rdi | 
|  | decl	%r9d | 
|  | jnz	.Loop_gather | 
|  | .byte	0xf3,0xc3 | 
|  | .size	rsaz_512_gather4,.-rsaz_512_gather4 | 
|  | #endif |