Differential D494 Diff 1414 cipher/rijndael.c

Changeset View

Standalone View

cipher/rijndael.c

Context not available.
	size_t nblocks, int encrypt);	size_t nblocks, int encrypt);
	#endif /USE_ARM_ASM/	#endif /USE_ARM_ASM/

	#ifdef USE_PPC_ASM	#ifdef USE_PPC_ASM
	/* POWER 8 AES extensions */	/* POWER 8 AES extensions */
	extern void aes_p8_encrypt (const unsigned char *in,	#include <altivec.h>
	unsigned char *out,
	const RIJNDAEL_context *ctx);	typedef vector unsigned char block;
		vector unsigned char backwards = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0};

		#ifdef __LITTLE_ENDIAN__
		#define swap_if_le(a) \
		vec_perm(a, a, backwards)
		#elif __BIG_ENDIAN__
		#define swap_if_le(a) (a)
		#else
		#error "What endianness?"
		#endif

		/* Passes in AltiVec registers (big-endian)
		* sadly compilers don't know how to unroll outer loops into
		* inner loops with more registers on static functions,
		* so that this can be properly optimized for OOO multi-issue
		* without having to hand-unroll.
		*/
		static block _gcry_aes_ppc8_encrypt_altivec (const RIJNDAEL_context *ctx,
		block a) {
		int r;
		uintptr_t zero = 0;
		int rounds = ctx->rounds;
		block rk = (block)ctx->keyschenc;

		//hexDump("sa", &a, sizeof(a));
		a = rk[0] ^ a;
		//hexDump("sa", &a, sizeof(a));
		for (r = 1;r < rounds;r++) {
		__asm__ volatile ("vcipher %0, %0, %1\n\t"
		:"+v" (a)
		:"v" (rk[r])
		);
		//hexDump("sa", &a, sizeof(a));
		}
		__asm__ volatile ("vcipherlast %0, %0, %1\n\t"
		:"+v" (a)
		:"v" (rk[r])
		);
		//hexDump("end", &a, sizeof(a));
		return a;
		}


		static block _gcry_aes_ppc8_decrypt_altivec (const RIJNDAEL_context *ctx,
		block a) {
		int r;
		uintptr_t zero = 0;
		int rounds = ctx->rounds;
		block rk = (block)ctx->keyschdec;

		//hexDump("sa", &a, sizeof(a));
		a = rk[0] ^ a;
		//hexDump("sa", &a, sizeof(a));
		for (r = 1;r < rounds;r++) {
		__asm__ volatile ("vncipher %0, %0, %1\n\t"
		:"+v" (a)
		:"v" (rk[r])
		);
		//hexDump("sa", &a, sizeof(a));
		}
		__asm__ volatile ("vncipherlast %0, %0, %1\n\t"
		:"+v" (a)
		:"v" (rk[r])
		);
		//hexDump("end", &a, sizeof(a));
		return a;
		}

	static unsigned int _gcry_aes_ppc8_encrypt (const RIJNDAEL_context *ctx,	static unsigned int _gcry_aes_ppc8_encrypt (const RIJNDAEL_context *ctx,
	unsigned char *out,	unsigned char *b,
	const unsigned char *in)	const unsigned char *a) {
	{	uintptr_t zero = 0;
	/* When I tried to switch these registers in the assembly it broke. */	block sa;
	aes_p8_encrypt (in, out, ctx);	//hexDump("key", rk_c, 16 * 15);

		if ((uintptr_t)a % 16 == 0) {
		sa = vec_ld(0, a);
		} else {
		block unalignedprev, unalignedcur;
		unalignedprev = vec_ld(0, a);
		unalignedcur = vec_ld(16, a);
		sa = vec_perm(unalignedprev, unalignedcur, vec_lvsl(0, a));
		}

		sa = swap_if_le(sa);
		sa = _gcry_aes_ppc8_encrypt_altivec(ctx, sa);

		__asm__ volatile ("stxvb16x %x0, %1, %2\n\t"
		:
		: "wa" (sa), "r" (zero), "r" ((uintptr_t)b));

	return 0; /* does not use stack */	return 0; /* does not use stack */
	}	}
	/* this is the decryption key part of context */
	extern void aes_p8_decrypt (const unsigned char *in,
	unsigned char *out,
	const void *sboxes);
	static unsigned int _gcry_aes_ppc8_decrypt (const RIJNDAEL_context *ctx,	static unsigned int _gcry_aes_ppc8_decrypt (const RIJNDAEL_context *ctx,
	unsigned char *out,	unsigned char *b,
	const unsigned char *in)	const unsigned char *a)
	{	{
	aes_p8_decrypt (in, out, &ctx->u2);	int r;
		uintptr_t zero = 0;
		int rounds = ctx->rounds;
		block sa, unalignedprev, unalignedcur;
		block rk = (block)ctx->keyschdec;

		//hexDump("key", rk, 16 * 15);

		if ((uintptr_t)a % 16 == 0) {
		sa = vec_ld(0, a);
		} else {
		unalignedprev = vec_ld(0, a);
		unalignedcur = vec_ld(16, a);
		sa = vec_perm(unalignedprev, unalignedcur, vec_lvsl(0, a));
		}

		sa = swap_if_le(sa);
		sa = _gcry_aes_ppc8_decrypt_altivec(ctx, sa);

		//hexDump("sa", &sa, sizeof(sa));
		if ((uintptr_t)b % 16 == 0)
		vec_vsx_st(swap_if_le(sa), 0, b);
		else {
		__asm__ volatile ("stxvb16x %x0, %1, %2\n\t"
		:
		: "wa" (sa), "r" (zero), "r" ((uintptr_t)b));
		}
	return 0; /* does not use stack */	return 0; /* does not use stack */
	}	}
		size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
		const void *inbuf_arg, size_t nblocks,
		int encrypt) {
		RIJNDAEL_context ctx = (void )&c->context.c;
		unsigned char *outbuf = outbuf_arg;
		const unsigned char *inbuf = inbuf_arg;

		block in = (block)inbuf;
		block out = (block)outbuf;
		uintptr_t zero = 0;
		int r;
		int rounds = ctx->rounds;
		int burn_depth = 0;

		if (encrypt)
		{
		const int unroll = 8;
		block unalignedprev, ctr, iv;
		if (((uintptr_t)inbuf % 16) != 0) {
		unalignedprev = vec_ld(0, in++);
		}

		iv = vec_ld(0, (block*)&c->u_iv.iv);
		ctr = vec_ld(0, (block*)&c->u_ctr.ctr);

		//hexDump("ctr", &ctr, 16);
		//hexDump("key", &ctx->u1, sizeof(ctx->u1));
		for ( ;nblocks >= unroll; nblocks -= unroll)
		{
		u64 i = c->u_mode.ocb.data_nblocks + 1;
		block l0, l1, l2, l3, l4, l5, l6, l7;
		block b0, b1, b2, b3, b4, b5, b6, b7;
		block iv0, iv1, iv2, iv3, iv4, iv5, iv6, iv7;
		const block rk = (block)&ctx->keyschenc;
		int j;

		c->u_mode.ocb.data_nblocks += unroll;

		//hexDump("iv", &iv, 16);
		iv0 = iv;
		if ((uintptr_t)inbuf % 16 == 0)
		{
		b0 = vec_ld(0, in++);
		//hexDump("start", &b0, 16);
		b1 = vec_ld(0, in++);
		b2 = vec_ld(0, in++);
		b3 = vec_ld(0, in++);
		b4 = vec_ld(0, in++);
		b5 = vec_ld(0, in++);
		b6 = vec_ld(0, in++);
		b7 = vec_ld(0, in++);
		}
		else
		{
		block unaligned0, unaligned1, unaligned2,
		unaligned3, unaligned4, unaligned5, unaligned6;
		unaligned0 = vec_ld(0, in++);
		unaligned1 = vec_ld(0, in++);
		unaligned2 = vec_ld(0, in++);
		unaligned3 = vec_ld(0, in++);
		unaligned4 = vec_ld(0, in++);
		unaligned5 = vec_ld(0, in++);
		unaligned6 = vec_ld(0, in++);
		b0 = vec_perm(unalignedprev, unaligned0, vec_lvsl(0, inbuf));
		//hexDump("start", &b0, 16);
		unalignedprev = vec_ld(0, in++);
		b1 = vec_perm(unaligned0, unaligned1, vec_lvsl(0, inbuf));
		b2 = vec_perm(unaligned1, unaligned2, vec_lvsl(0, inbuf));
		b3 = vec_perm(unaligned2, unaligned3, vec_lvsl(0, inbuf));
		b4 = vec_perm(unaligned3, unaligned4, vec_lvsl(0, inbuf));
		b5 = vec_perm(unaligned4, unaligned5, vec_lvsl(0, inbuf));
		b6 = vec_perm(unaligned5, unaligned6, vec_lvsl(0, inbuf));
		b7 = vec_perm(unaligned6, unalignedprev, vec_lvsl(0, inbuf));
		}

		//hexDump("i", &i, sizeof(i));
		l0 = (block)ocb_get_l(c, i++);
		//hexDump("l", &l0, 16);
		l1 = (block)ocb_get_l(c, i++);
		l2 = (block)ocb_get_l(c, i++);
		l3 = (block)ocb_get_l(c, i++);
		l4 = (block)ocb_get_l(c, i++);
		l5 = (block)ocb_get_l(c, i++);
		l6 = (block)ocb_get_l(c, i++);
		l7 = (block)ocb_get_l(c, i++);

		ctr ^= b0 ^ b1 ^ b2 ^ b3 ^ b4 ^ b5 ^ b6 ^ b7;

		iv0 ^= l0;
		//hexDump("xorl", &iv0, 16);
		b0 ^= iv0;
		//hexDump("xor", &b0, 16);
		iv1 = iv0 ^ l1;
		b1 ^= iv1;
		iv2 = iv1 ^ l2;
		b2 ^= iv2;
		iv3 = iv2 ^ l3;
		b3 ^= iv3;
		iv4 = iv3 ^ l4;
		b4 ^= iv4;
		iv5 = iv4 ^ l5;
		b5 ^= iv5;
		iv6 = iv5 ^ l6;
		b6 ^= iv6;
		iv7 = iv6 ^ l7;
		b7 ^= iv7;

		b0 = swap_if_le(b0);
		//hexDump("swap", &b0, 16);
		b1 = swap_if_le(b1);
		b2 = swap_if_le(b2);
		b3 = swap_if_le(b3);
		b4 = swap_if_le(b4);
		b5 = swap_if_le(b5);
		b6 = swap_if_le(b6);
		b7 = swap_if_le(b7);

		b0 ^= rk[0];
		//hexDump("xor ??", &b0, 16);
		b1 ^= rk[0];
		b2 ^= rk[0];
		b3 ^= rk[0];
		b4 ^= rk[0];
		b5 ^= rk[0];
		b6 ^= rk[0];
		b7 ^= rk[0];

		for (r = 1;r < rounds;r++)
		{
		__asm__ volatile ("vcipher %0, %0, %1\n\t"
		:"+v" (b0)
		:"v" (rk[r])
		);
		//hexDump("round", &b0, 16);
		__asm__ volatile ("vcipher %0, %0, %1\n\t"
		:"+v" (b1)
		:"v" (rk[r])
		);
		__asm__ volatile ("vcipher %0, %0, %1\n\t"
		:"+v" (b2)
		:"v" (rk[r])
		);
		__asm__ volatile ("vcipher %0, %0, %1\n\t"
		:"+v" (b3)
		:"v" (rk[r])
		);
		__asm__ volatile ("vcipher %0, %0, %1\n\t"
		:"+v" (b4)
		:"v" (rk[r])
		);
		__asm__ volatile ("vcipher %0, %0, %1\n\t"
		:"+v" (b5)
		:"v" (rk[r])
		);
		__asm__ volatile ("vcipher %0, %0, %1\n\t"
		:"+v" (b6)
		:"v" (rk[r])
		);
		__asm__ volatile ("vcipher %0, %0, %1\n\t"
		:"+v" (b7)
		:"v" (rk[r])
		);
		}
		__asm__ volatile ("vcipherlast %0, %0, %1\n\t"
		:"+v" (b0)
		:"v" (rk[r])
		);
		__asm__ volatile ("vcipherlast %0, %0, %1\n\t"
		:"+v" (b1)
		:"v" (rk[r])
		);
		__asm__ volatile ("vcipherlast %0, %0, %1\n\t"
		:"+v" (b2)
		:"v" (rk[r])
		);
		__asm__ volatile ("vcipherlast %0, %0, %1\n\t"
		:"+v" (b3)
		:"v" (rk[r])
		);
		__asm__ volatile ("vcipherlast %0, %0, %1\n\t"
		:"+v" (b4)
		:"v" (rk[r])
		);
		__asm__ volatile ("vcipherlast %0, %0, %1\n\t"
		:"+v" (b5)
		:"v" (rk[r])
		);
		__asm__ volatile ("vcipherlast %0, %0, %1\n\t"
		:"+v" (b6)
		:"v" (rk[r])
		);
		__asm__ volatile ("vcipherlast %0, %0, %1\n\t"
		:"+v" (b7)
		:"v" (rk[r])
		);

		//hexDump("end", &b0, 16);
		iv = iv7;
		//hexDump("end-iv5", &b0, 16);

		// The unaligned store stxvb16x writes big-endian,
		// so in the unaligned case we swap the iv instead of the bytes
		if ((uintptr_t)outbuf % 16 == 0)
		{
		vec_vsx_st(swap_if_le(b0) ^ iv0, 0, out++);
		//hexDump("out", out - 1, 16);
		vec_vsx_st(swap_if_le(b1) ^ iv1, 0, out++);
		vec_vsx_st(swap_if_le(b2) ^ iv2, 0, out++);
		vec_vsx_st(swap_if_le(b3) ^ iv3, 0, out++);
		vec_vsx_st(swap_if_le(b4) ^ iv4, 0, out++);
		vec_vsx_st(swap_if_le(b5) ^ iv5, 0, out++);
		vec_vsx_st(swap_if_le(b6) ^ iv6, 0, out++);
		vec_vsx_st(swap_if_le(b7) ^ iv7, 0, out++);
		}
		else
		{
		b0 ^= swap_if_le(iv0);
		b1 ^= swap_if_le(iv1);
		b2 ^= swap_if_le(iv2);
		b3 ^= swap_if_le(iv3);
		b4 ^= swap_if_le(iv4);
		b5 ^= swap_if_le(iv5);
		b6 ^= swap_if_le(iv6);
		b7 ^= swap_if_le(iv7);
		__asm__ volatile ("stxvb16x %x0, %1, %2\n\t"
		:: "wa" (b0), "r" (zero), "r" ((uintptr_t)(out++)));
		//hexDump("out-un", out - 1, 16);
		__asm__ volatile ("stxvb16x %x0, %1, %2\n\t"
		:: "wa" (b1), "r" (zero), "r" ((uintptr_t)(out++)));
		__asm__ volatile ("stxvb16x %x0, %1, %2\n\t"
		:: "wa" (b2), "r" (zero), "r" ((uintptr_t)(out++)));
		__asm__ volatile ("stxvb16x %x0, %1, %2\n\t"
		:: "wa" (b3), "r" (zero), "r" ((uintptr_t)(out++)));
		__asm__ volatile ("stxvb16x %x0, %1, %2\n\t"
		:: "wa" (b4), "r" (zero), "r" ((uintptr_t)(out++)));
		__asm__ volatile ("stxvb16x %x0, %1, %2\n\t"
		:: "wa" (b5), "r" (zero), "r" ((uintptr_t)(out++)));
		__asm__ volatile ("stxvb16x %x0, %1, %2\n\t"
		:: "wa" (b6), "r" (zero), "r" ((uintptr_t)(out++)));
		__asm__ volatile ("stxvb16x %x0, %1, %2\n\t"
		:: "wa" (b7), "r" (zero), "r" ((uintptr_t)(out++)));
		}
		}

		for ( ;nblocks; nblocks-- )
		{
		block b;
		u64 i = ++c->u_mode.ocb.data_nblocks;
		const block l = (block)ocb_get_l(c, i);

		/* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
		iv ^= l;
		if ((uintptr_t)in % 16 == 0) {
		b = vec_ld(0, in++);
		} else {
		block unalignedprevprev;
		unalignedprevprev = unalignedprev;
		unalignedprev = vec_ld(0, in++);
		b = vec_perm(unalignedprevprev, unalignedprev, vec_lvsl(0, inbuf));
		}
		//hexDump("start", &b, 16);
		/* Checksum_i = Checksum_{i-1} xor P_i */
		ctr ^= b;
		//hexDump("ctr", &ctr, 16);
		/* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */
		b ^= iv;
		//hexDump("xoriv", &b, 16);
		b = swap_if_le(b);
		b = _gcry_aes_ppc8_encrypt_altivec (ctx, b);
		//hexDump("crypt", &b, 16);
		if ((uintptr_t)out % 16 == 0)
		vec_vsx_st(swap_if_le(b) ^ iv, 0, out++);
		else {
		b ^= swap_if_le(iv);
		__asm__ volatile ("stxvb16x %x0, %1, %2\n\t"
		:
		: "wa" (b), "r" (zero), "r" ((uintptr_t)out++));
		}
		//hexDump("out", out - 1, 16);
		}

		// We want to store iv and ctr big-endian and the unaligned
		// store stxvb16x stores them little endian, so we have to swap them.
		iv = swap_if_le(iv);
		__asm__ volatile ("stxvb16x %x0, %1, %2\n\t"
		:: "wa" (iv), "r" (zero), "r" ((uintptr_t)&c->u_iv.iv));
		ctr = swap_if_le(ctr);
		__asm__ volatile ("stxvb16x %x0, %1, %2\n\t"
		:: "wa" (ctr), "r" (zero), "r" ((uintptr_t)&c->u_ctr.ctr));
		}
		else
		{
		const int unroll = 8;
		block unalignedprev, ctr, iv;
		if (((uintptr_t)inbuf % 16) != 0) {
		unalignedprev = vec_ld(0, in++);
		}

		iv = vec_ld(0, (block*)&c->u_iv.iv);
		ctr = vec_ld(0, (block*)&c->u_ctr.ctr);

		//hexDump("ctr", &ctr, 16);
		//hexDump("key", &ctx->u1, sizeof(ctx->u1));
		for ( ;nblocks >= unroll; nblocks -= unroll)
		{
		u64 i = c->u_mode.ocb.data_nblocks + 1;
		block l0, l1, l2, l3, l4, l5, l6, l7;
		block b0, b1, b2, b3, b4, b5, b6, b7;
		block iv0, iv1, iv2, iv3, iv4, iv5, iv6, iv7;
		const block rk = (block)&ctx->keyschdec;
		int j;

		c->u_mode.ocb.data_nblocks += unroll;

		//hexDump("iv", &iv, 16);
		iv0 = iv;
		if ((uintptr_t)inbuf % 16 == 0)
		{
		b0 = vec_ld(0, in++);
		//hexDump("start", &b0, 16);
		b1 = vec_ld(0, in++);
		b2 = vec_ld(0, in++);
		b3 = vec_ld(0, in++);
		b4 = vec_ld(0, in++);
		b5 = vec_ld(0, in++);
		b6 = vec_ld(0, in++);
		b7 = vec_ld(0, in++);
		}
		else
		{
		block unaligned0, unaligned1, unaligned2,
		unaligned3, unaligned4, unaligned5, unaligned6;
		unaligned0 = vec_ld(0, in++);
		unaligned1 = vec_ld(0, in++);
		unaligned2 = vec_ld(0, in++);
		unaligned3 = vec_ld(0, in++);
		unaligned4 = vec_ld(0, in++);
		unaligned5 = vec_ld(0, in++);
		unaligned6 = vec_ld(0, in++);
		b0 = vec_perm(unalignedprev, unaligned0, vec_lvsl(0, inbuf));
		//hexDump("start", &b0, 16);
		unalignedprev = vec_ld(0, in++);
		b1 = vec_perm(unaligned0, unaligned1, vec_lvsl(0, inbuf));
		b2 = vec_perm(unaligned1, unaligned2, vec_lvsl(0, inbuf));
		b3 = vec_perm(unaligned2, unaligned3, vec_lvsl(0, inbuf));
		b4 = vec_perm(unaligned3, unaligned4, vec_lvsl(0, inbuf));
		b5 = vec_perm(unaligned4, unaligned5, vec_lvsl(0, inbuf));
		b6 = vec_perm(unaligned5, unaligned6, vec_lvsl(0, inbuf));
		b7 = vec_perm(unaligned6, unalignedprev, vec_lvsl(0, inbuf));
		}

		//hexDump("i", &i, sizeof(i));
		l0 = (block)ocb_get_l(c, i++);
		//hexDump("l", &l0, 16);
		l1 = (block)ocb_get_l(c, i++);
		l2 = (block)ocb_get_l(c, i++);
		l3 = (block)ocb_get_l(c, i++);
		l4 = (block)ocb_get_l(c, i++);
		l5 = (block)ocb_get_l(c, i++);
		l6 = (block)ocb_get_l(c, i++);
		l7 = (block)ocb_get_l(c, i++);

		iv0 ^= l0;
		//hexDump("xorl", &iv0, 16);
		b0 ^= iv0;
		//hexDump("xor", &b0, 16);
		iv1 = iv0 ^ l1;
		b1 ^= iv1;
		iv2 = iv1 ^ l2;
		b2 ^= iv2;
		iv3 = iv2 ^ l3;
		b3 ^= iv3;
		iv4 = iv3 ^ l4;
		b4 ^= iv4;
		iv5 = iv4 ^ l5;
		b5 ^= iv5;
		iv6 = iv5 ^ l6;
		b6 ^= iv6;
		iv7 = iv6 ^ l7;
		b7 ^= iv7;

		b0 = swap_if_le(b0);
		//hexDump("swap", &b0, 16);
		b1 = swap_if_le(b1);
		b2 = swap_if_le(b2);
		b3 = swap_if_le(b3);
		b4 = swap_if_le(b4);
		b5 = swap_if_le(b5);
		b6 = swap_if_le(b6);
		b7 = swap_if_le(b7);

		b0 ^= rk[0];
		//hexDump("xor ??", &b0, 16);
		b1 ^= rk[0];
		b2 ^= rk[0];
		b3 ^= rk[0];
		b4 ^= rk[0];
		b5 ^= rk[0];
		b6 ^= rk[0];
		b7 ^= rk[0];

		for (r = 1;r < rounds;r++)
		{
		__asm__ volatile ("vncipher %0, %0, %1\n\t"
		:"+v" (b0)
		:"v" (rk[r])
		);
		//hexDump("round", &b0, 16);
		__asm__ volatile ("vncipher %0, %0, %1\n\t"
		:"+v" (b1)
		:"v" (rk[r])
		);
		__asm__ volatile ("vncipher %0, %0, %1\n\t"
		:"+v" (b2)
		:"v" (rk[r])
		);
		__asm__ volatile ("vncipher %0, %0, %1\n\t"
		:"+v" (b3)
		:"v" (rk[r])
		);
		__asm__ volatile ("vncipher %0, %0, %1\n\t"
		:"+v" (b4)
		:"v" (rk[r])
		);
		__asm__ volatile ("vncipher %0, %0, %1\n\t"
		:"+v" (b5)
		:"v" (rk[r])
		);
		__asm__ volatile ("vncipher %0, %0, %1\n\t"
		:"+v" (b6)
		:"v" (rk[r])
		);
		__asm__ volatile ("vncipher %0, %0, %1\n\t"
		:"+v" (b7)
		:"v" (rk[r])
		);
		}
		__asm__ volatile ("vncipherlast %0, %0, %1\n\t"
		:"+v" (b0)
		:"v" (rk[r])
		);
		__asm__ volatile ("vncipherlast %0, %0, %1\n\t"
		:"+v" (b1)
		:"v" (rk[r])
		);
		__asm__ volatile ("vncipherlast %0, %0, %1\n\t"
		:"+v" (b2)
		:"v" (rk[r])
		);
		__asm__ volatile ("vncipherlast %0, %0, %1\n\t"
		:"+v" (b3)
		:"v" (rk[r])
		);
		__asm__ volatile ("vncipherlast %0, %0, %1\n\t"
		:"+v" (b4)
		:"v" (rk[r])
		);
		__asm__ volatile ("vncipherlast %0, %0, %1\n\t"
		:"+v" (b5)
		:"v" (rk[r])
		);
		__asm__ volatile ("vncipherlast %0, %0, %1\n\t"
		:"+v" (b6)
		:"v" (rk[r])
		);
		__asm__ volatile ("vncipherlast %0, %0, %1\n\t"
		:"+v" (b7)
		:"v" (rk[r])
		);

		//hexDump("end", &b0, 16);
		iv = iv7;
		//hexDump("end-iv5", &b0, 16);

		b0 = swap_if_le(b0) ^ iv0;
		b1 = swap_if_le(b1) ^ iv1;
		b2 = swap_if_le(b2) ^ iv2;
		b3 = swap_if_le(b3) ^ iv3;
		b4 = swap_if_le(b4) ^ iv4;
		b5 = swap_if_le(b5) ^ iv5;
		b6 = swap_if_le(b6) ^ iv6;
		b7 = swap_if_le(b7) ^ iv7;

		ctr ^= b0 ^ b1 ^ b2 ^ b3 ^ b4 ^ b5 ^ b6 ^ b7;

		// The unaligned store stxvb16x writes big-endian
		if ((uintptr_t)outbuf % 16 == 0)
		{
		vec_vsx_st(b0, 0, out++);
		vec_vsx_st(b1, 0, out++);
		vec_vsx_st(b2, 0, out++);
		vec_vsx_st(b3, 0, out++);
		vec_vsx_st(b4, 0, out++);
		vec_vsx_st(b5, 0, out++);
		vec_vsx_st(b6, 0, out++);
		vec_vsx_st(b7, 0, out++);
		}
		else
		{
		b0 = swap_if_le(b0);
		b1 = swap_if_le(b1);
		b2 = swap_if_le(b2);
		b3 = swap_if_le(b3);
		b4 = swap_if_le(b4);
		b5 = swap_if_le(b5);
		b6 = swap_if_le(b6);
		b7 = swap_if_le(b7);
		__asm__ ("stxvb16x %x0, %1, %2\n\t"
		:: "wa" (b0), "r" (zero), "r" ((uintptr_t)(out++)));
		//hexDump("out-un", out - 1, 16);
		__asm__ ("stxvb16x %x0, %1, %2\n\t"
		:: "wa" (b1), "r" (zero), "r" ((uintptr_t)(out++)));
		__asm__ ("stxvb16x %x0, %1, %2\n\t"
		:: "wa" (b2), "r" (zero), "r" ((uintptr_t)(out++)));
		__asm__ ("stxvb16x %x0, %1, %2\n\t"
		:: "wa" (b3), "r" (zero), "r" ((uintptr_t)(out++)));
		__asm__ ("stxvb16x %x0, %1, %2\n\t"
		:: "wa" (b4), "r" (zero), "r" ((uintptr_t)(out++)));
		__asm__ ("stxvb16x %x0, %1, %2\n\t"
		:: "wa" (b5), "r" (zero), "r" ((uintptr_t)(out++)));
		__asm__ ("stxvb16x %x0, %1, %2\n\t"
		:: "wa" (b6), "r" (zero), "r" ((uintptr_t)(out++)));
		__asm__ ("stxvb16x %x0, %1, %2\n\t"
		:: "wa" (b7), "r" (zero), "r" ((uintptr_t)(out++)));
		}
		}

		for ( ;nblocks; nblocks-- )
		{
		block b;
		u64 i = ++c->u_mode.ocb.data_nblocks;
		const block l = (block)ocb_get_l(c, i);

		/* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
		iv ^= l;
		if ((uintptr_t)in % 16 == 0) {
		b = vec_ld(0, in++);
		} else {
		block unalignedprevprev;
		unalignedprevprev = unalignedprev;
		unalignedprev = vec_ld(0, in++);
		b = vec_perm(unalignedprevprev, unalignedprev, vec_lvsl(0, inbuf));
		}
		//hexDump("start", &b, 16);
		/* Checksum_i = Checksum_{i-1} xor P_i */
		//hexDump("ctr", &ctr, 16);
		/* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */
		b ^= iv;
		//hexDump("xoriv", &b, 16);
		b = swap_if_le(b);
		b = _gcry_aes_ppc8_decrypt_altivec (ctx, b);
		//hexDump("crypt", &b, 16);
		b = swap_if_le(b) ^ iv;
		ctr ^= b;
		if ((uintptr_t)out % 16 == 0)
		vec_vsx_st(b, 0, out++);
		else {
		b = swap_if_le(b);
		__asm__ volatile ("stxvb16x %x0, %1, %2\n\t"
		:
		: "wa" (b), "r" (zero), "r" ((uintptr_t)out++));
		}
		//hexDump("out", out - 1, 16);
		}

		// We want to store iv and ctr big-endian and the unaligned
		// store stxvb16x stores them little endian, so we have to swap them.
		iv = swap_if_le(iv);
		__asm__ volatile ("stxvb16x %x0, %1, %2\n\t"
		:: "wa" (iv), "r" (zero), "r" ((uintptr_t)&c->u_iv.iv));
		ctr = swap_if_le(ctr);
		__asm__ volatile ("stxvb16x %x0, %1, %2\n\t"
		:: "wa" (ctr), "r" (zero), "r" ((uintptr_t)&c->u_ctr.ctr));
		}
		return 0;
		}
	extern int aes_p8_set_encrypt_key (const unsigned char *userKey, const int bits,	extern int aes_p8_set_encrypt_key (const unsigned char *userKey, const int bits,
	RIJNDAEL_context *key);	RIJNDAEL_context *key);
	extern int aes_p8_set_decrypt_key (const unsigned char *userKey, const int bits,	extern int aes_p8_set_decrypt_key (const unsigned char *userKey, const int bits,
	/* this is the decryption key part of context */	/* this is the decryption key part of context */
	const unsigned (*)[15][4]);	const unsigned (*)[15][4]);
Context not available.
	const unsigned char *inbuf = inbuf_arg;	const unsigned char *inbuf = inbuf_arg;
	unsigned char *outbuf = outbuf_arg;	unsigned char *outbuf = outbuf_arg;
	const RIJNDAEL_context *ctx = context;	const RIJNDAEL_context *ctx = context;
	const uint64_t two32 = 1ULL << 32;	const uint64_t two32 = 1ULL << 32;
	int overflow;	int overflow;
	u64 s[2], e[2];	u64 s[2];
	s[0] = buf_get_be64(ctr + 8);	s[0] = buf_get_be64(ctr + 8);
	overflow = two32 - (s[0] % two32) < nblocks;	overflow = two32 - (s[0] % two32) < nblocks;
	#ifdef __builtin_expect	#ifdef __builtin_expect
	__builtin_expect(overflow, 0);	__builtin_expect(overflow, 0);
	#endif	#endif
Context not available.
	if (hd) {	if (hd) {
	hd->bulk.cbc_dec = _gcry_aes_ppc8_cbc_dec;	hd->bulk.cbc_dec = _gcry_aes_ppc8_cbc_dec;
	hd->bulk.cbc_enc = _gcry_aes_ppc8_cbc_enc;	hd->bulk.cbc_enc = _gcry_aes_ppc8_cbc_enc;
	hd->bulk.xts_crypt = _gcry_aes_ppc8_xts_crypt;	hd->bulk.xts_crypt = _gcry_aes_ppc8_xts_crypt;
	hd->bulk.ctr_enc = _gcry_aes_ppc8_ctr_enc;	hd->bulk.ctr_enc = _gcry_aes_ppc8_ctr_enc;
		hd->bulk.ocb_crypt = _gcry_aes_ppc8_ocb_crypt;
	}	}
	}	}
	#endif	#endif
	else	else
	{	{
Context not available.
	else if (ctx->use_arm_ce)	else if (ctx->use_arm_ce)
	{	{
	return _gcry_aes_armv8_ce_ocb_crypt (c, outbuf, inbuf, nblocks, encrypt);	return _gcry_aes_armv8_ce_ocb_crypt (c, outbuf, inbuf, nblocks, encrypt);
	}	}
	#endif /USE_ARM_CE/	#endif /USE_ARM_CE/
		#ifdef USE_PPC_ASM
		else if (ctx->use_ppc_asm)
		{
		return _gcry_aes_ppc8_ocb_crypt (c, outbuf, inbuf, nblocks, encrypt);
		}
		#endif /USE_PPC_ASM/
	else if (encrypt)	else if (encrypt)
	{	{
	union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp;	union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp;
	rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn;	rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn;

Context not available.