| #include "arm_arch.h" | 
 |  | 
 | #if __ARM_MAX_ARCH__>=7 | 
 | .text | 
 | .arch	armv8-a+crypto | 
 | .align	5 | 
 | rcon: | 
 | .long	0x01,0x01,0x01,0x01 | 
 | .long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d	// rotate-n-splat | 
 | .long	0x1b,0x1b,0x1b,0x1b | 
 |  | 
 | .globl	aes_v8_set_encrypt_key | 
 | .type	aes_v8_set_encrypt_key,%function | 
 | .align	5 | 
 | aes_v8_set_encrypt_key: | 
 | .Lenc_key: | 
 | 	stp	x29,x30,[sp,#-16]! | 
 | 	add	x29,sp,#0 | 
 | 	mov	x3,#-1 | 
 | 	cmp	x0,#0 | 
 | 	b.eq	.Lenc_key_abort | 
 | 	cmp	x2,#0 | 
 | 	b.eq	.Lenc_key_abort | 
 | 	mov	x3,#-2 | 
 | 	cmp	w1,#128 | 
 | 	b.lt	.Lenc_key_abort | 
 | 	cmp	w1,#256 | 
 | 	b.gt	.Lenc_key_abort | 
 | 	tst	w1,#0x3f | 
 | 	b.ne	.Lenc_key_abort | 
 |  | 
 | 	adr	x3,rcon | 
 | 	cmp	w1,#192 | 
 |  | 
 | 	eor	v0.16b,v0.16b,v0.16b | 
 | 	ld1	{v3.16b},[x0],#16 | 
 | 	mov	w1,#8		// reuse w1 | 
 | 	ld1	{v1.4s,v2.4s},[x3],#32 | 
 |  | 
 | 	b.lt	.Loop128 | 
 | 	b.eq	.L192 | 
 | 	b	.L256 | 
 |  | 
 | .align	4 | 
 | .Loop128: | 
 | 	tbl	v6.16b,{v3.16b},v2.16b | 
 | 	ext	v5.16b,v0.16b,v3.16b,#12 | 
 | 	st1	{v3.4s},[x2],#16 | 
 | 	aese	v6.16b,v0.16b | 
 | 	subs	w1,w1,#1 | 
 |  | 
 | 	eor	v3.16b,v3.16b,v5.16b | 
 | 	ext	v5.16b,v0.16b,v5.16b,#12 | 
 | 	eor	v3.16b,v3.16b,v5.16b | 
 | 	ext	v5.16b,v0.16b,v5.16b,#12 | 
 | 	 eor	v6.16b,v6.16b,v1.16b | 
 | 	eor	v3.16b,v3.16b,v5.16b | 
 | 	shl	v1.16b,v1.16b,#1 | 
 | 	eor	v3.16b,v3.16b,v6.16b | 
 | 	b.ne	.Loop128 | 
 |  | 
 | 	ld1	{v1.4s},[x3] | 
 |  | 
 | 	tbl	v6.16b,{v3.16b},v2.16b | 
 | 	ext	v5.16b,v0.16b,v3.16b,#12 | 
 | 	st1	{v3.4s},[x2],#16 | 
 | 	aese	v6.16b,v0.16b | 
 |  | 
 | 	eor	v3.16b,v3.16b,v5.16b | 
 | 	ext	v5.16b,v0.16b,v5.16b,#12 | 
 | 	eor	v3.16b,v3.16b,v5.16b | 
 | 	ext	v5.16b,v0.16b,v5.16b,#12 | 
 | 	 eor	v6.16b,v6.16b,v1.16b | 
 | 	eor	v3.16b,v3.16b,v5.16b | 
 | 	shl	v1.16b,v1.16b,#1 | 
 | 	eor	v3.16b,v3.16b,v6.16b | 
 |  | 
 | 	tbl	v6.16b,{v3.16b},v2.16b | 
 | 	ext	v5.16b,v0.16b,v3.16b,#12 | 
 | 	st1	{v3.4s},[x2],#16 | 
 | 	aese	v6.16b,v0.16b | 
 |  | 
 | 	eor	v3.16b,v3.16b,v5.16b | 
 | 	ext	v5.16b,v0.16b,v5.16b,#12 | 
 | 	eor	v3.16b,v3.16b,v5.16b | 
 | 	ext	v5.16b,v0.16b,v5.16b,#12 | 
 | 	 eor	v6.16b,v6.16b,v1.16b | 
 | 	eor	v3.16b,v3.16b,v5.16b | 
 | 	eor	v3.16b,v3.16b,v6.16b | 
 | 	st1	{v3.4s},[x2] | 
 | 	add	x2,x2,#0x50 | 
 |  | 
 | 	mov	w12,#10 | 
 | 	b	.Ldone | 
 |  | 
 | .align	4 | 
 | .L192: | 
 | 	ld1	{v4.8b},[x0],#8 | 
 | 	movi	v6.16b,#8			// borrow v6.16b | 
 | 	st1	{v3.4s},[x2],#16 | 
 | 	sub	v2.16b,v2.16b,v6.16b	// adjust the mask | 
 |  | 
 | .Loop192: | 
 | 	tbl	v6.16b,{v4.16b},v2.16b | 
 | 	ext	v5.16b,v0.16b,v3.16b,#12 | 
 | 	st1	{v4.8b},[x2],#8 | 
 | 	aese	v6.16b,v0.16b | 
 | 	subs	w1,w1,#1 | 
 |  | 
 | 	eor	v3.16b,v3.16b,v5.16b | 
 | 	ext	v5.16b,v0.16b,v5.16b,#12 | 
 | 	eor	v3.16b,v3.16b,v5.16b | 
 | 	ext	v5.16b,v0.16b,v5.16b,#12 | 
 | 	eor	v3.16b,v3.16b,v5.16b | 
 |  | 
 | 	dup	v5.4s,v3.s[3] | 
 | 	eor	v5.16b,v5.16b,v4.16b | 
 | 	 eor	v6.16b,v6.16b,v1.16b | 
 | 	ext	v4.16b,v0.16b,v4.16b,#12 | 
 | 	shl	v1.16b,v1.16b,#1 | 
 | 	eor	v4.16b,v4.16b,v5.16b | 
 | 	eor	v3.16b,v3.16b,v6.16b | 
 | 	eor	v4.16b,v4.16b,v6.16b | 
 | 	st1	{v3.4s},[x2],#16 | 
 | 	b.ne	.Loop192 | 
 |  | 
 | 	mov	w12,#12 | 
 | 	add	x2,x2,#0x20 | 
 | 	b	.Ldone | 
 |  | 
 | .align	4 | 
 | .L256: | 
 | 	ld1	{v4.16b},[x0] | 
 | 	mov	w1,#7 | 
 | 	mov	w12,#14 | 
 | 	st1	{v3.4s},[x2],#16 | 
 |  | 
 | .Loop256: | 
 | 	tbl	v6.16b,{v4.16b},v2.16b | 
 | 	ext	v5.16b,v0.16b,v3.16b,#12 | 
 | 	st1	{v4.4s},[x2],#16 | 
 | 	aese	v6.16b,v0.16b | 
 | 	subs	w1,w1,#1 | 
 |  | 
 | 	eor	v3.16b,v3.16b,v5.16b | 
 | 	ext	v5.16b,v0.16b,v5.16b,#12 | 
 | 	eor	v3.16b,v3.16b,v5.16b | 
 | 	ext	v5.16b,v0.16b,v5.16b,#12 | 
 | 	 eor	v6.16b,v6.16b,v1.16b | 
 | 	eor	v3.16b,v3.16b,v5.16b | 
 | 	shl	v1.16b,v1.16b,#1 | 
 | 	eor	v3.16b,v3.16b,v6.16b | 
 | 	st1	{v3.4s},[x2],#16 | 
 | 	b.eq	.Ldone | 
 |  | 
 | 	dup	v6.4s,v3.s[3]		// just splat | 
 | 	ext	v5.16b,v0.16b,v4.16b,#12 | 
 | 	aese	v6.16b,v0.16b | 
 |  | 
 | 	eor	v4.16b,v4.16b,v5.16b | 
 | 	ext	v5.16b,v0.16b,v5.16b,#12 | 
 | 	eor	v4.16b,v4.16b,v5.16b | 
 | 	ext	v5.16b,v0.16b,v5.16b,#12 | 
 | 	eor	v4.16b,v4.16b,v5.16b | 
 |  | 
 | 	eor	v4.16b,v4.16b,v6.16b | 
 | 	b	.Loop256 | 
 |  | 
 | .Ldone: | 
 | 	str	w12,[x2] | 
 | 	mov	x3,#0 | 
 |  | 
 | .Lenc_key_abort: | 
 | 	mov	x0,x3			// return value | 
 | 	ldr	x29,[sp],#16 | 
 | 	ret | 
 | .size	aes_v8_set_encrypt_key,.-aes_v8_set_encrypt_key | 
 |  | 
 | .globl	aes_v8_set_decrypt_key | 
 | .type	aes_v8_set_decrypt_key,%function | 
 | .align	5 | 
 | aes_v8_set_decrypt_key: | 
 | 	stp	x29,x30,[sp,#-16]! | 
 | 	add	x29,sp,#0 | 
 | 	bl	.Lenc_key | 
 |  | 
 | 	cmp	x0,#0 | 
 | 	b.ne	.Ldec_key_abort | 
 |  | 
 | 	sub	x2,x2,#240		// restore original x2 | 
 | 	mov	x4,#-16 | 
 | 	add	x0,x2,x12,lsl#4	// end of key schedule | 
 |  | 
 | 	ld1	{v0.4s},[x2] | 
 | 	ld1	{v1.4s},[x0] | 
 | 	st1	{v0.4s},[x0],x4 | 
 | 	st1	{v1.4s},[x2],#16 | 
 |  | 
 | .Loop_imc: | 
 | 	ld1	{v0.4s},[x2] | 
 | 	ld1	{v1.4s},[x0] | 
 | 	aesimc	v0.16b,v0.16b | 
 | 	aesimc	v1.16b,v1.16b | 
 | 	st1	{v0.4s},[x0],x4 | 
 | 	st1	{v1.4s},[x2],#16 | 
 | 	cmp	x0,x2 | 
 | 	b.hi	.Loop_imc | 
 |  | 
 | 	ld1	{v0.4s},[x2] | 
 | 	aesimc	v0.16b,v0.16b | 
 | 	st1	{v0.4s},[x0] | 
 |  | 
 | 	eor	x0,x0,x0		// return value | 
 | .Ldec_key_abort: | 
 | 	ldp	x29,x30,[sp],#16 | 
 | 	ret | 
 | .size	aes_v8_set_decrypt_key,.-aes_v8_set_decrypt_key | 
 | .globl	aes_v8_encrypt | 
 | .type	aes_v8_encrypt,%function | 
 | .align	5 | 
 | aes_v8_encrypt: | 
 | 	ldr	w3,[x2,#240] | 
 | 	ld1	{v0.4s},[x2],#16 | 
 | 	ld1	{v2.16b},[x0] | 
 | 	sub	w3,w3,#2 | 
 | 	ld1	{v1.4s},[x2],#16 | 
 |  | 
 | .Loop_enc: | 
 | 	aese	v2.16b,v0.16b | 
 | 	ld1	{v0.4s},[x2],#16 | 
 | 	aesmc	v2.16b,v2.16b | 
 | 	subs	w3,w3,#2 | 
 | 	aese	v2.16b,v1.16b | 
 | 	ld1	{v1.4s},[x2],#16 | 
 | 	aesmc	v2.16b,v2.16b | 
 | 	b.gt	.Loop_enc | 
 |  | 
 | 	aese	v2.16b,v0.16b | 
 | 	ld1	{v0.4s},[x2] | 
 | 	aesmc	v2.16b,v2.16b | 
 | 	aese	v2.16b,v1.16b | 
 | 	eor	v2.16b,v2.16b,v0.16b | 
 |  | 
 | 	st1	{v2.16b},[x1] | 
 | 	ret | 
 | .size	aes_v8_encrypt,.-aes_v8_encrypt | 
 | .globl	aes_v8_decrypt | 
 | .type	aes_v8_decrypt,%function | 
 | .align	5 | 
 | aes_v8_decrypt: | 
 | 	ldr	w3,[x2,#240] | 
 | 	ld1	{v0.4s},[x2],#16 | 
 | 	ld1	{v2.16b},[x0] | 
 | 	sub	w3,w3,#2 | 
 | 	ld1	{v1.4s},[x2],#16 | 
 |  | 
 | .Loop_dec: | 
 | 	aesd	v2.16b,v0.16b | 
 | 	ld1	{v0.4s},[x2],#16 | 
 | 	aesimc	v2.16b,v2.16b | 
 | 	subs	w3,w3,#2 | 
 | 	aesd	v2.16b,v1.16b | 
 | 	ld1	{v1.4s},[x2],#16 | 
 | 	aesimc	v2.16b,v2.16b | 
 | 	b.gt	.Loop_dec | 
 |  | 
 | 	aesd	v2.16b,v0.16b | 
 | 	ld1	{v0.4s},[x2] | 
 | 	aesimc	v2.16b,v2.16b | 
 | 	aesd	v2.16b,v1.16b | 
 | 	eor	v2.16b,v2.16b,v0.16b | 
 |  | 
 | 	st1	{v2.16b},[x1] | 
 | 	ret | 
 | .size	aes_v8_decrypt,.-aes_v8_decrypt | 
 | .globl	aes_v8_cbc_encrypt | 
 | .type	aes_v8_cbc_encrypt,%function | 
 | .align	5 | 
 | aes_v8_cbc_encrypt: | 
 | 	stp	x29,x30,[sp,#-16]! | 
 | 	add	x29,sp,#0 | 
 | 	subs	x2,x2,#16 | 
 | 	mov	x8,#16 | 
 | 	b.lo	.Lcbc_abort | 
 | 	csel	x8,xzr,x8,eq | 
 |  | 
 | 	cmp	w5,#0			// en- or decrypting? | 
 | 	ldr	w5,[x3,#240] | 
 | 	and	x2,x2,#-16 | 
 | 	ld1	{v6.16b},[x4] | 
 | 	ld1	{v0.16b},[x0],x8 | 
 |  | 
 | 	ld1	{v16.4s-v17.4s},[x3]		// load key schedule... | 
 | 	sub	w5,w5,#6 | 
 | 	add	x7,x3,x5,lsl#4	// pointer to last 7 round keys | 
 | 	sub	w5,w5,#2 | 
 | 	ld1	{v18.4s-v19.4s},[x7],#32 | 
 | 	ld1	{v20.4s-v21.4s},[x7],#32 | 
 | 	ld1	{v22.4s-v23.4s},[x7],#32 | 
 | 	ld1	{v7.4s},[x7] | 
 |  | 
 | 	add	x7,x3,#32 | 
 | 	mov	w6,w5 | 
 | 	b.eq	.Lcbc_dec | 
 |  | 
 | 	cmp	w5,#2 | 
 | 	eor	v0.16b,v0.16b,v6.16b | 
 | 	eor	v5.16b,v16.16b,v7.16b | 
 | 	b.eq	.Lcbc_enc128 | 
 |  | 
 | .Loop_cbc_enc: | 
 | 	aese	v0.16b,v16.16b | 
 | 	ld1	{v16.4s},[x7],#16 | 
 | 	aesmc	v0.16b,v0.16b | 
 | 	subs	w6,w6,#2 | 
 | 	aese	v0.16b,v17.16b | 
 | 	ld1	{v17.4s},[x7],#16 | 
 | 	aesmc	v0.16b,v0.16b | 
 | 	b.gt	.Loop_cbc_enc | 
 |  | 
 | 	aese	v0.16b,v16.16b | 
 | 	aesmc	v0.16b,v0.16b | 
 | 	 subs	x2,x2,#16 | 
 | 	aese	v0.16b,v17.16b | 
 | 	aesmc	v0.16b,v0.16b | 
 | 	 csel	x8,xzr,x8,eq | 
 | 	aese	v0.16b,v18.16b | 
 | 	aesmc	v0.16b,v0.16b | 
 | 	 add	x7,x3,#16 | 
 | 	aese	v0.16b,v19.16b | 
 | 	aesmc	v0.16b,v0.16b | 
 | 	 ld1	{v16.16b},[x0],x8 | 
 | 	aese	v0.16b,v20.16b | 
 | 	aesmc	v0.16b,v0.16b | 
 | 	 eor	v16.16b,v16.16b,v5.16b | 
 | 	aese	v0.16b,v21.16b | 
 | 	aesmc	v0.16b,v0.16b | 
 | 	 ld1 {v17.4s},[x7],#16	// re-pre-load rndkey[1] | 
 | 	aese	v0.16b,v22.16b | 
 | 	aesmc	v0.16b,v0.16b | 
 | 	aese	v0.16b,v23.16b | 
 |  | 
 | 	 mov	w6,w5 | 
 | 	eor	v6.16b,v0.16b,v7.16b | 
 | 	st1	{v6.16b},[x1],#16 | 
 | 	b.hs	.Loop_cbc_enc | 
 |  | 
 | 	b	.Lcbc_done | 
 |  | 
 | .align	5 | 
 | .Lcbc_enc128: | 
 | 	ld1	{v2.4s-v3.4s},[x7] | 
 | 	aese	v0.16b,v16.16b | 
 | 	aesmc	v0.16b,v0.16b | 
 | 	b	.Lenter_cbc_enc128 | 
 | .Loop_cbc_enc128: | 
 | 	aese	v0.16b,v16.16b | 
 | 	aesmc	v0.16b,v0.16b | 
 | 	 st1	{v6.16b},[x1],#16 | 
 | .Lenter_cbc_enc128: | 
 | 	aese	v0.16b,v17.16b | 
 | 	aesmc	v0.16b,v0.16b | 
 | 	 subs	x2,x2,#16 | 
 | 	aese	v0.16b,v2.16b | 
 | 	aesmc	v0.16b,v0.16b | 
 | 	 csel	x8,xzr,x8,eq | 
 | 	aese	v0.16b,v3.16b | 
 | 	aesmc	v0.16b,v0.16b | 
 | 	aese	v0.16b,v18.16b | 
 | 	aesmc	v0.16b,v0.16b | 
 | 	aese	v0.16b,v19.16b | 
 | 	aesmc	v0.16b,v0.16b | 
 | 	 ld1	{v16.16b},[x0],x8 | 
 | 	aese	v0.16b,v20.16b | 
 | 	aesmc	v0.16b,v0.16b | 
 | 	aese	v0.16b,v21.16b | 
 | 	aesmc	v0.16b,v0.16b | 
 | 	aese	v0.16b,v22.16b | 
 | 	aesmc	v0.16b,v0.16b | 
 | 	 eor	v16.16b,v16.16b,v5.16b | 
 | 	aese	v0.16b,v23.16b | 
 | 	eor	v6.16b,v0.16b,v7.16b | 
 | 	b.hs	.Loop_cbc_enc128 | 
 |  | 
 | 	st1	{v6.16b},[x1],#16 | 
 | 	b	.Lcbc_done | 
 | .align	5 | 
 | .Lcbc_dec: | 
 | 	ld1	{v18.16b},[x0],#16 | 
 | 	subs	x2,x2,#32		// bias | 
 | 	add	w6,w5,#2 | 
 | 	orr	v3.16b,v0.16b,v0.16b | 
 | 	orr	v1.16b,v0.16b,v0.16b | 
 | 	orr	v19.16b,v18.16b,v18.16b | 
 | 	b.lo	.Lcbc_dec_tail | 
 |  | 
 | 	orr	v1.16b,v18.16b,v18.16b | 
 | 	ld1	{v18.16b},[x0],#16 | 
 | 	orr	v2.16b,v0.16b,v0.16b | 
 | 	orr	v3.16b,v1.16b,v1.16b | 
 | 	orr	v19.16b,v18.16b,v18.16b | 
 |  | 
 | .Loop3x_cbc_dec: | 
 | 	aesd	v0.16b,v16.16b | 
 | 	aesd	v1.16b,v16.16b | 
 | 	aesd	v18.16b,v16.16b | 
 | 	ld1	{v16.4s},[x7],#16 | 
 | 	aesimc	v0.16b,v0.16b | 
 | 	aesimc	v1.16b,v1.16b | 
 | 	aesimc	v18.16b,v18.16b | 
 | 	subs	w6,w6,#2 | 
 | 	aesd	v0.16b,v17.16b | 
 | 	aesd	v1.16b,v17.16b | 
 | 	aesd	v18.16b,v17.16b | 
 | 	ld1	{v17.4s},[x7],#16 | 
 | 	aesimc	v0.16b,v0.16b | 
 | 	aesimc	v1.16b,v1.16b | 
 | 	aesimc	v18.16b,v18.16b | 
 | 	b.gt	.Loop3x_cbc_dec | 
 |  | 
 | 	aesd	v0.16b,v16.16b | 
 | 	aesd	v1.16b,v16.16b | 
 | 	aesd	v18.16b,v16.16b | 
 | 	 eor	v4.16b,v6.16b,v7.16b | 
 | 	aesimc	v0.16b,v0.16b | 
 | 	aesimc	v1.16b,v1.16b | 
 | 	aesimc	v18.16b,v18.16b | 
 | 	 eor	v5.16b,v2.16b,v7.16b | 
 | 	aesd	v0.16b,v17.16b | 
 | 	aesd	v1.16b,v17.16b | 
 | 	aesd	v18.16b,v17.16b | 
 | 	 eor	v17.16b,v3.16b,v7.16b | 
 | 	 subs	x2,x2,#0x30 | 
 | 	aesimc	v0.16b,v0.16b | 
 | 	aesimc	v1.16b,v1.16b | 
 | 	aesimc	v18.16b,v18.16b | 
 | 	 orr	v6.16b,v19.16b,v19.16b | 
 | 	 csel	x6,x2,x6,lo			// x6, w6, is zero at this point | 
 | 	aesd	v0.16b,v20.16b | 
 | 	aesd	v1.16b,v20.16b | 
 | 	aesd	v18.16b,v20.16b | 
 | 	 add	x0,x0,x6		// x0 is adjusted in such way that | 
 | 					// at exit from the loop v1.16b-v18.16b | 
 | 					// are loaded with last "words" | 
 | 	aesimc	v0.16b,v0.16b | 
 | 	aesimc	v1.16b,v1.16b | 
 | 	aesimc	v18.16b,v18.16b | 
 | 	 mov	x7,x3 | 
 | 	aesd	v0.16b,v21.16b | 
 | 	aesd	v1.16b,v21.16b | 
 | 	aesd	v18.16b,v21.16b | 
 | 	 ld1	{v2.16b},[x0],#16 | 
 | 	aesimc	v0.16b,v0.16b | 
 | 	aesimc	v1.16b,v1.16b | 
 | 	aesimc	v18.16b,v18.16b | 
 | 	 ld1	{v3.16b},[x0],#16 | 
 | 	aesd	v0.16b,v22.16b | 
 | 	aesd	v1.16b,v22.16b | 
 | 	aesd	v18.16b,v22.16b | 
 | 	 ld1	{v19.16b},[x0],#16 | 
 | 	aesimc	v0.16b,v0.16b | 
 | 	aesimc	v1.16b,v1.16b | 
 | 	aesimc	v18.16b,v18.16b | 
 | 	 ld1 {v16.4s},[x7],#16	// re-pre-load rndkey[0] | 
 | 	aesd	v0.16b,v23.16b | 
 | 	aesd	v1.16b,v23.16b | 
 | 	aesd	v18.16b,v23.16b | 
 |  | 
 | 	 add	w6,w5,#2 | 
 | 	eor	v4.16b,v4.16b,v0.16b | 
 | 	eor	v5.16b,v5.16b,v1.16b | 
 | 	eor	v18.16b,v18.16b,v17.16b | 
 | 	 ld1 {v17.4s},[x7],#16	// re-pre-load rndkey[1] | 
 | 	 orr	v0.16b,v2.16b,v2.16b | 
 | 	st1	{v4.16b},[x1],#16 | 
 | 	 orr	v1.16b,v3.16b,v3.16b | 
 | 	st1	{v5.16b},[x1],#16 | 
 | 	st1	{v18.16b},[x1],#16 | 
 | 	 orr	v18.16b,v19.16b,v19.16b | 
 | 	b.hs	.Loop3x_cbc_dec | 
 |  | 
 | 	cmn	x2,#0x30 | 
 | 	b.eq	.Lcbc_done | 
 | 	nop | 
 |  | 
 | .Lcbc_dec_tail: | 
 | 	aesd	v1.16b,v16.16b | 
 | 	aesd	v18.16b,v16.16b | 
 | 	ld1	{v16.4s},[x7],#16 | 
 | 	aesimc	v1.16b,v1.16b | 
 | 	aesimc	v18.16b,v18.16b | 
 | 	subs	w6,w6,#2 | 
 | 	aesd	v1.16b,v17.16b | 
 | 	aesd	v18.16b,v17.16b | 
 | 	ld1	{v17.4s},[x7],#16 | 
 | 	aesimc	v1.16b,v1.16b | 
 | 	aesimc	v18.16b,v18.16b | 
 | 	b.gt	.Lcbc_dec_tail | 
 |  | 
 | 	aesd	v1.16b,v16.16b | 
 | 	aesd	v18.16b,v16.16b | 
 | 	aesimc	v1.16b,v1.16b | 
 | 	aesimc	v18.16b,v18.16b | 
 | 	aesd	v1.16b,v17.16b | 
 | 	aesd	v18.16b,v17.16b | 
 | 	aesimc	v1.16b,v1.16b | 
 | 	aesimc	v18.16b,v18.16b | 
 | 	aesd	v1.16b,v20.16b | 
 | 	aesd	v18.16b,v20.16b | 
 | 	aesimc	v1.16b,v1.16b | 
 | 	aesimc	v18.16b,v18.16b | 
 | 	 cmn	x2,#0x20 | 
 | 	aesd	v1.16b,v21.16b | 
 | 	aesd	v18.16b,v21.16b | 
 | 	aesimc	v1.16b,v1.16b | 
 | 	aesimc	v18.16b,v18.16b | 
 | 	 eor	v5.16b,v6.16b,v7.16b | 
 | 	aesd	v1.16b,v22.16b | 
 | 	aesd	v18.16b,v22.16b | 
 | 	aesimc	v1.16b,v1.16b | 
 | 	aesimc	v18.16b,v18.16b | 
 | 	 eor	v17.16b,v3.16b,v7.16b | 
 | 	aesd	v1.16b,v23.16b | 
 | 	aesd	v18.16b,v23.16b | 
 | 	b.eq	.Lcbc_dec_one | 
 | 	eor	v5.16b,v5.16b,v1.16b | 
 | 	eor	v17.16b,v17.16b,v18.16b | 
 | 	 orr	v6.16b,v19.16b,v19.16b | 
 | 	st1	{v5.16b},[x1],#16 | 
 | 	st1	{v17.16b},[x1],#16 | 
 | 	b	.Lcbc_done | 
 |  | 
 | .Lcbc_dec_one: | 
 | 	eor	v5.16b,v5.16b,v18.16b | 
 | 	 orr	v6.16b,v19.16b,v19.16b | 
 | 	st1	{v5.16b},[x1],#16 | 
 |  | 
 | .Lcbc_done: | 
 | 	st1	{v6.16b},[x4] | 
 | .Lcbc_abort: | 
 | 	ldr	x29,[sp],#16 | 
 | 	ret | 
 | .size	aes_v8_cbc_encrypt,.-aes_v8_cbc_encrypt | 
 | .globl	aes_v8_ctr32_encrypt_blocks | 
 | .type	aes_v8_ctr32_encrypt_blocks,%function | 
 | .align	5 | 
 | aes_v8_ctr32_encrypt_blocks: | 
 | 	stp		x29,x30,[sp,#-16]! | 
 | 	add		x29,sp,#0 | 
 | 	ldr		w5,[x3,#240] | 
 |  | 
 | 	ldr		w8, [x4, #12] | 
 | 	ld1		{v0.4s},[x4] | 
 |  | 
 | 	ld1		{v16.4s-v17.4s},[x3]		// load key schedule... | 
 | 	sub		w5,w5,#4 | 
 | 	mov		x12,#16 | 
 | 	cmp		x2,#2 | 
 | 	add		x7,x3,x5,lsl#4	// pointer to last 5 round keys | 
 | 	sub		w5,w5,#2 | 
 | 	ld1		{v20.4s-v21.4s},[x7],#32 | 
 | 	ld1		{v22.4s-v23.4s},[x7],#32 | 
 | 	ld1		{v7.4s},[x7] | 
 | 	add		x7,x3,#32 | 
 | 	mov		w6,w5 | 
 | 	csel	x12,xzr,x12,lo | 
 | #ifndef __ARMEB__ | 
 | 	rev		w8, w8 | 
 | #endif | 
 | 	orr		v1.16b,v0.16b,v0.16b | 
 | 	add		w10, w8, #1 | 
 | 	orr		v18.16b,v0.16b,v0.16b | 
 | 	add		w8, w8, #2 | 
 | 	orr		v6.16b,v0.16b,v0.16b | 
 | 	rev		w10, w10 | 
 | 	mov		v1.s[3],w10 | 
 | 	b.ls		.Lctr32_tail | 
 | 	rev		w12, w8 | 
 | 	sub		x2,x2,#3		// bias | 
 | 	mov		v18.s[3],w12 | 
 | 	b		.Loop3x_ctr32 | 
 |  | 
 | .align	4 | 
 | .Loop3x_ctr32: | 
 | 	aese		v0.16b,v16.16b | 
 | 	aese		v1.16b,v16.16b | 
 | 	aese		v18.16b,v16.16b | 
 | 	ld1		{v16.4s},[x7],#16 | 
 | 	aesmc		v0.16b,v0.16b | 
 | 	aesmc		v1.16b,v1.16b | 
 | 	aesmc		v18.16b,v18.16b | 
 | 	subs		w6,w6,#2 | 
 | 	aese		v0.16b,v17.16b | 
 | 	aese		v1.16b,v17.16b | 
 | 	aese		v18.16b,v17.16b | 
 | 	ld1		{v17.4s},[x7],#16 | 
 | 	aesmc		v0.16b,v0.16b | 
 | 	aesmc		v1.16b,v1.16b | 
 | 	aesmc		v18.16b,v18.16b | 
 | 	b.gt		.Loop3x_ctr32 | 
 |  | 
 | 	aese		v0.16b,v16.16b | 
 | 	aese		v1.16b,v16.16b | 
 | 	aese		v18.16b,v16.16b | 
 | 	 mov		x7,x3 | 
 | 	aesmc		v4.16b,v0.16b | 
 | 	 ld1		{v2.16b},[x0],#16 | 
 | 	aesmc		v5.16b,v1.16b | 
 | 	aesmc		v18.16b,v18.16b | 
 | 	 orr		v0.16b,v6.16b,v6.16b | 
 | 	aese		v4.16b,v17.16b | 
 | 	 ld1		{v3.16b},[x0],#16 | 
 | 	aese		v5.16b,v17.16b | 
 | 	aese		v18.16b,v17.16b | 
 | 	 orr		v1.16b,v6.16b,v6.16b | 
 | 	aesmc		v4.16b,v4.16b | 
 | 	 ld1		{v19.16b},[x0],#16 | 
 | 	aesmc		v5.16b,v5.16b | 
 | 	aesmc		v17.16b,v18.16b | 
 | 	 orr		v18.16b,v6.16b,v6.16b | 
 | 	 add		w9,w8,#1 | 
 | 	aese		v4.16b,v20.16b | 
 | 	aese		v5.16b,v20.16b | 
 | 	aese		v17.16b,v20.16b | 
 | 	 eor		v2.16b,v2.16b,v7.16b | 
 | 	 add		w10,w8,#2 | 
 | 	aesmc		v4.16b,v4.16b | 
 | 	aesmc		v5.16b,v5.16b | 
 | 	aesmc		v17.16b,v17.16b | 
 | 	 eor		v3.16b,v3.16b,v7.16b | 
 | 	 add		w8,w8,#3 | 
 | 	aese		v4.16b,v21.16b | 
 | 	aese		v5.16b,v21.16b | 
 | 	aese		v17.16b,v21.16b | 
 | 	 eor		v19.16b,v19.16b,v7.16b | 
 | 	 rev		w9,w9 | 
 | 	aesmc		v4.16b,v4.16b | 
 | 	 ld1	 {v16.4s},[x7],#16	// re-pre-load rndkey[0] | 
 | 	aesmc		v5.16b,v5.16b | 
 | 	aesmc		v17.16b,v17.16b | 
 | 	 mov	v0.s[3], w9 | 
 | 	 rev		w10,w10 | 
 | 	aese		v4.16b,v22.16b | 
 | 	aese		v5.16b,v22.16b | 
 | 	aese		v17.16b,v22.16b | 
 | 	 mov	v1.s[3], w10 | 
 | 	 rev		w12,w8 | 
 | 	aesmc		v4.16b,v4.16b | 
 | 	aesmc		v5.16b,v5.16b | 
 | 	aesmc		v17.16b,v17.16b | 
 | 	 mov	v18.s[3], w12 | 
 | 	 subs		x2,x2,#3 | 
 | 	aese		v4.16b,v23.16b | 
 | 	aese		v5.16b,v23.16b | 
 | 	aese		v17.16b,v23.16b | 
 |  | 
 | 	 mov		w6,w5 | 
 | 	eor		v2.16b,v2.16b,v4.16b | 
 | 	eor		v3.16b,v3.16b,v5.16b | 
 | 	eor		v19.16b,v19.16b,v17.16b | 
 | 	 ld1	 {v17.4s},[x7],#16	// re-pre-load rndkey[1] | 
 | 	st1		{v2.16b},[x1],#16 | 
 | 	st1		{v3.16b},[x1],#16 | 
 | 	st1		{v19.16b},[x1],#16 | 
 | 	b.hs		.Loop3x_ctr32 | 
 |  | 
 | 	adds		x2,x2,#3 | 
 | 	b.eq		.Lctr32_done | 
 | 	cmp		x2,#1 | 
 | 	mov		x12,#16 | 
 | 	csel	x12,xzr,x12,eq | 
 |  | 
 | .Lctr32_tail: | 
 | 	aese		v0.16b,v16.16b | 
 | 	aese		v1.16b,v16.16b | 
 | 	ld1		{v16.4s},[x7],#16 | 
 | 	aesmc		v0.16b,v0.16b | 
 | 	aesmc		v1.16b,v1.16b | 
 | 	subs		w6,w6,#2 | 
 | 	aese		v0.16b,v17.16b | 
 | 	aese		v1.16b,v17.16b | 
 | 	ld1		{v17.4s},[x7],#16 | 
 | 	aesmc		v0.16b,v0.16b | 
 | 	aesmc		v1.16b,v1.16b | 
 | 	b.gt		.Lctr32_tail | 
 |  | 
 | 	aese		v0.16b,v16.16b | 
 | 	aese		v1.16b,v16.16b | 
 | 	aesmc		v0.16b,v0.16b | 
 | 	aesmc		v1.16b,v1.16b | 
 | 	aese		v0.16b,v17.16b | 
 | 	aese		v1.16b,v17.16b | 
 | 	aesmc		v0.16b,v0.16b | 
 | 	aesmc		v1.16b,v1.16b | 
 | 	 ld1		{v2.16b},[x0],x12 | 
 | 	aese		v0.16b,v20.16b | 
 | 	aese		v1.16b,v20.16b | 
 | 	 ld1		{v3.16b},[x0] | 
 | 	aesmc		v0.16b,v0.16b | 
 | 	aesmc		v1.16b,v1.16b | 
 | 	aese		v0.16b,v21.16b | 
 | 	aese		v1.16b,v21.16b | 
 | 	aesmc		v0.16b,v0.16b | 
 | 	aesmc		v1.16b,v1.16b | 
 | 	aese		v0.16b,v22.16b | 
 | 	aese		v1.16b,v22.16b | 
 | 	 eor		v2.16b,v2.16b,v7.16b | 
 | 	aesmc		v0.16b,v0.16b | 
 | 	aesmc		v1.16b,v1.16b | 
 | 	 eor		v3.16b,v3.16b,v7.16b | 
 | 	aese		v0.16b,v23.16b | 
 | 	aese		v1.16b,v23.16b | 
 |  | 
 | 	cmp		x2,#1 | 
 | 	eor		v2.16b,v2.16b,v0.16b | 
 | 	eor		v3.16b,v3.16b,v1.16b | 
 | 	st1		{v2.16b},[x1],#16 | 
 | 	b.eq		.Lctr32_done | 
 | 	st1		{v3.16b},[x1] | 
 |  | 
 | .Lctr32_done: | 
 | 	ldr		x29,[sp],#16 | 
 | 	ret | 
 | .size	aes_v8_ctr32_encrypt_blocks,.-aes_v8_ctr32_encrypt_blocks | 
 | #endif |