Revision control

Copy as Markdown

Other Tools

/*
* Lowest Level MPI Algorithms
* (C) 1999-2010 Jack Lloyd
* 2006 Luca Piccarreta
*
* Botan is released under the Simplified BSD License (see license.txt)
*/
#ifndef BOTAN_MP_ASM_INTERNAL_H_
#define BOTAN_MP_ASM_INTERNAL_H_
#include <botan/internal/mp_madd.h>
namespace Botan {
#if defined(BOTAN_MP_USE_X86_32_ASM)
#define ADDSUB2_OP(OPERATION, INDEX) \
ASM("movl 4*" #INDEX "(%[y]), %[carry]") \
ASM(OPERATION " %[carry], 4*" #INDEX "(%[x])") \
#define ADDSUB3_OP(OPERATION, INDEX) \
ASM("movl 4*" #INDEX "(%[x]), %[carry]") \
ASM(OPERATION " 4*" #INDEX "(%[y]), %[carry]") \
ASM("movl %[carry], 4*" #INDEX "(%[z])") \
#define LINMUL_OP(WRITE_TO, INDEX) \
ASM("movl 4*" #INDEX "(%[x]),%%eax") \
ASM("mull %[y]") \
ASM("addl %[carry],%%eax") \
ASM("adcl $0,%%edx") \
ASM("movl %%edx,%[carry]") \
ASM("movl %%eax, 4*" #INDEX "(%[" WRITE_TO "])")
#define MULADD_OP(IGNORED, INDEX) \
ASM("movl 4*" #INDEX "(%[x]),%%eax") \
ASM("mull %[y]") \
ASM("addl %[carry],%%eax") \
ASM("adcl $0,%%edx") \
ASM("addl 4*" #INDEX "(%[z]),%%eax") \
ASM("adcl $0,%%edx") \
ASM("movl %%edx,%[carry]") \
ASM("movl %%eax, 4*" #INDEX " (%[z])")
#define ADD_OR_SUBTRACT(CORE_CODE) \
ASM("rorl %[carry]") \
CORE_CODE \
ASM("sbbl %[carry],%[carry]") \
ASM("negl %[carry]")
#elif defined(BOTAN_MP_USE_X86_64_ASM)
#define ADDSUB2_OP(OPERATION, INDEX) \
ASM("movq 8*" #INDEX "(%[y]), %[carry]") \
ASM(OPERATION " %[carry], 8*" #INDEX "(%[x])") \
#define ADDSUB3_OP(OPERATION, INDEX) \
ASM("movq 8*" #INDEX "(%[x]), %[carry]") \
ASM(OPERATION " 8*" #INDEX "(%[y]), %[carry]") \
ASM("movq %[carry], 8*" #INDEX "(%[z])") \
#define LINMUL_OP(WRITE_TO, INDEX) \
ASM("movq 8*" #INDEX "(%[x]),%%rax") \
ASM("mulq %[y]") \
ASM("addq %[carry],%%rax") \
ASM("adcq $0,%%rdx") \
ASM("movq %%rdx,%[carry]") \
ASM("movq %%rax, 8*" #INDEX "(%[" WRITE_TO "])")
#define MULADD_OP(IGNORED, INDEX) \
ASM("movq 8*" #INDEX "(%[x]),%%rax") \
ASM("mulq %[y]") \
ASM("addq %[carry],%%rax") \
ASM("adcq $0,%%rdx") \
ASM("addq 8*" #INDEX "(%[z]),%%rax") \
ASM("adcq $0,%%rdx") \
ASM("movq %%rdx,%[carry]") \
ASM("movq %%rax, 8*" #INDEX " (%[z])")
#define ADD_OR_SUBTRACT(CORE_CODE) \
ASM("rorq %[carry]") \
CORE_CODE \
ASM("sbbq %[carry],%[carry]") \
ASM("negq %[carry]")
#endif
#if defined(ADD_OR_SUBTRACT)
#define ASM(x) x "\n\t"
#define DO_8_TIMES(MACRO, ARG) \
MACRO(ARG, 0) \
MACRO(ARG, 1) \
MACRO(ARG, 2) \
MACRO(ARG, 3) \
MACRO(ARG, 4) \
MACRO(ARG, 5) \
MACRO(ARG, 6) \
MACRO(ARG, 7)
#endif
/*
* Word Addition
*/
inline word word_add(word x, word y, word* carry)
{
#if defined(BOTAN_MP_USE_X86_32_ASM)
asm(
ADD_OR_SUBTRACT(ASM("adcl %[y],%[x]"))
: [x]"=r"(x), [carry]"=r"(*carry)
: "0"(x), [y]"rm"(y), "1"(*carry)
: "cc");
return x;
#elif defined(BOTAN_MP_USE_X86_64_ASM)
asm(
ADD_OR_SUBTRACT(ASM("adcq %[y],%[x]"))
: [x]"=r"(x), [carry]"=r"(*carry)
: "0"(x), [y]"rm"(y), "1"(*carry)
: "cc");
return x;
#else
word z = x + y;
word c1 = (z < x);
z += *carry;
*carry = c1 | (z < *carry);
return z;
#endif
}
/*
* Eight Word Block Addition, Two Argument
*/
inline word word8_add2(word x[8], const word y[8], word carry)
{
#if defined(BOTAN_MP_USE_X86_32_ASM)
asm(
ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "adcl"))
: [carry]"=r"(carry)
: [x]"r"(x), [y]"r"(y), "0"(carry)
: "cc", "memory");
return carry;
#elif defined(BOTAN_MP_USE_X86_64_ASM)
asm(
ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "adcq"))
: [carry]"=r"(carry)
: [x]"r"(x), [y]"r"(y), "0"(carry)
: "cc", "memory");
return carry;
#else
x[0] = word_add(x[0], y[0], &carry);
x[1] = word_add(x[1], y[1], &carry);
x[2] = word_add(x[2], y[2], &carry);
x[3] = word_add(x[3], y[3], &carry);
x[4] = word_add(x[4], y[4], &carry);
x[5] = word_add(x[5], y[5], &carry);
x[6] = word_add(x[6], y[6], &carry);
x[7] = word_add(x[7], y[7], &carry);
return carry;
#endif
}
/*
* Eight Word Block Addition, Three Argument
*/
inline word word8_add3(word z[8], const word x[8],
const word y[8], word carry)
{
#if defined(BOTAN_MP_USE_X86_32_ASM)
asm(
ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "adcl"))
: [carry]"=r"(carry)
: [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry)
: "cc", "memory");
return carry;
#elif defined(BOTAN_MP_USE_X86_64_ASM)
asm(
ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "adcq"))
: [carry]"=r"(carry)
: [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry)
: "cc", "memory");
return carry;
#else
z[0] = word_add(x[0], y[0], &carry);
z[1] = word_add(x[1], y[1], &carry);
z[2] = word_add(x[2], y[2], &carry);
z[3] = word_add(x[3], y[3], &carry);
z[4] = word_add(x[4], y[4], &carry);
z[5] = word_add(x[5], y[5], &carry);
z[6] = word_add(x[6], y[6], &carry);
z[7] = word_add(x[7], y[7], &carry);
return carry;
#endif
}
/*
* Word Subtraction
*/
inline word word_sub(word x, word y, word* carry)
{
#if defined(BOTAN_MP_USE_X86_32_ASM)
asm(
ADD_OR_SUBTRACT(ASM("sbbl %[y],%[x]"))
: [x]"=r"(x), [carry]"=r"(*carry)
: "0"(x), [y]"rm"(y), "1"(*carry)
: "cc");
return x;
#elif defined(BOTAN_MP_USE_X86_64_ASM)
asm(
ADD_OR_SUBTRACT(ASM("sbbq %[y],%[x]"))
: [x]"=r"(x), [carry]"=r"(*carry)
: "0"(x), [y]"rm"(y), "1"(*carry)
: "cc");
return x;
#else
word t0 = x - y;
word c1 = (t0 > x);
word z = t0 - *carry;
*carry = c1 | (z > t0);
return z;
#endif
}
/*
* Eight Word Block Subtraction, Two Argument
*/
inline word word8_sub2(word x[8], const word y[8], word carry)
{
#if defined(BOTAN_MP_USE_X86_32_ASM)
asm(
ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "sbbl"))
: [carry]"=r"(carry)
: [x]"r"(x), [y]"r"(y), "0"(carry)
: "cc", "memory");
return carry;
#elif defined(BOTAN_MP_USE_X86_64_ASM)
asm(
ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "sbbq"))
: [carry]"=r"(carry)
: [x]"r"(x), [y]"r"(y), "0"(carry)
: "cc", "memory");
return carry;
#else
x[0] = word_sub(x[0], y[0], &carry);
x[1] = word_sub(x[1], y[1], &carry);
x[2] = word_sub(x[2], y[2], &carry);
x[3] = word_sub(x[3], y[3], &carry);
x[4] = word_sub(x[4], y[4], &carry);
x[5] = word_sub(x[5], y[5], &carry);
x[6] = word_sub(x[6], y[6], &carry);
x[7] = word_sub(x[7], y[7], &carry);
return carry;
#endif
}
/*
* Eight Word Block Subtraction, Two Argument
*/
inline word word8_sub2_rev(word x[8], const word y[8], word carry)
{
#if defined(BOTAN_MP_USE_X86_32_ASM)
asm(
ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbl"))
: [carry]"=r"(carry)
: [x]"r"(y), [y]"r"(x), [z]"r"(x), "0"(carry)
: "cc", "memory");
return carry;
#elif defined(BOTAN_MP_USE_X86_64_ASM)
asm(
ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbq"))
: [carry]"=r"(carry)
: [x]"r"(y), [y]"r"(x), [z]"r"(x), "0"(carry)
: "cc", "memory");
return carry;
#else
x[0] = word_sub(y[0], x[0], &carry);
x[1] = word_sub(y[1], x[1], &carry);
x[2] = word_sub(y[2], x[2], &carry);
x[3] = word_sub(y[3], x[3], &carry);
x[4] = word_sub(y[4], x[4], &carry);
x[5] = word_sub(y[5], x[5], &carry);
x[6] = word_sub(y[6], x[6], &carry);
x[7] = word_sub(y[7], x[7], &carry);
return carry;
#endif
}
/*
* Eight Word Block Subtraction, Three Argument
*/
inline word word8_sub3(word z[8], const word x[8],
const word y[8], word carry)
{
#if defined(BOTAN_MP_USE_X86_32_ASM)
asm(
ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbl"))
: [carry]"=r"(carry)
: [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry)
: "cc", "memory");
return carry;
#elif defined(BOTAN_MP_USE_X86_64_ASM)
asm(
ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbq"))
: [carry]"=r"(carry)
: [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry)
: "cc", "memory");
return carry;
#else
z[0] = word_sub(x[0], y[0], &carry);
z[1] = word_sub(x[1], y[1], &carry);
z[2] = word_sub(x[2], y[2], &carry);
z[3] = word_sub(x[3], y[3], &carry);
z[4] = word_sub(x[4], y[4], &carry);
z[5] = word_sub(x[5], y[5], &carry);
z[6] = word_sub(x[6], y[6], &carry);
z[7] = word_sub(x[7], y[7], &carry);
return carry;
#endif
}
/*
* Eight Word Block Linear Multiplication
*/
inline word word8_linmul2(word x[8], word y, word carry)
{
#if defined(BOTAN_MP_USE_X86_32_ASM)
asm(
DO_8_TIMES(LINMUL_OP, "x")
: [carry]"=r"(carry)
: [x]"r"(x), [y]"rm"(y), "0"(carry)
: "cc", "%eax", "%edx");
return carry;
#elif defined(BOTAN_MP_USE_X86_64_ASM)
asm(
DO_8_TIMES(LINMUL_OP, "x")
: [carry]"=r"(carry)
: [x]"r"(x), [y]"rm"(y), "0"(carry)
: "cc", "%rax", "%rdx");
return carry;
#else
x[0] = word_madd2(x[0], y, &carry);
x[1] = word_madd2(x[1], y, &carry);
x[2] = word_madd2(x[2], y, &carry);
x[3] = word_madd2(x[3], y, &carry);
x[4] = word_madd2(x[4], y, &carry);
x[5] = word_madd2(x[5], y, &carry);
x[6] = word_madd2(x[6], y, &carry);
x[7] = word_madd2(x[7], y, &carry);
return carry;
#endif
}
/*
* Eight Word Block Linear Multiplication
*/
inline word word8_linmul3(word z[8], const word x[8], word y, word carry)
{
#if defined(BOTAN_MP_USE_X86_32_ASM)
asm(
DO_8_TIMES(LINMUL_OP, "z")
: [carry]"=r"(carry)
: [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry)
: "cc", "%eax", "%edx");
return carry;
#elif defined(BOTAN_MP_USE_X86_64_ASM)
asm(
DO_8_TIMES(LINMUL_OP, "z")
: [carry]"=r"(carry)
: [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry)
: "cc", "%rax", "%rdx");
return carry;
#else
z[0] = word_madd2(x[0], y, &carry);
z[1] = word_madd2(x[1], y, &carry);
z[2] = word_madd2(x[2], y, &carry);
z[3] = word_madd2(x[3], y, &carry);
z[4] = word_madd2(x[4], y, &carry);
z[5] = word_madd2(x[5], y, &carry);
z[6] = word_madd2(x[6], y, &carry);
z[7] = word_madd2(x[7], y, &carry);
return carry;
#endif
}
/*
* Eight Word Block Multiply/Add
*/
inline word word8_madd3(word z[8], const word x[8], word y, word carry)
{
#if defined(BOTAN_MP_USE_X86_32_ASM)
asm(
DO_8_TIMES(MULADD_OP, "")
: [carry]"=r"(carry)
: [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry)
: "cc", "%eax", "%edx");
return carry;
#elif defined(BOTAN_MP_USE_X86_64_ASM)
asm(
DO_8_TIMES(MULADD_OP, "")
: [carry]"=r"(carry)
: [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry)
: "cc", "%rax", "%rdx");
return carry;
#else
z[0] = word_madd3(x[0], y, z[0], &carry);
z[1] = word_madd3(x[1], y, z[1], &carry);
z[2] = word_madd3(x[2], y, z[2], &carry);
z[3] = word_madd3(x[3], y, z[3], &carry);
z[4] = word_madd3(x[4], y, z[4], &carry);
z[5] = word_madd3(x[5], y, z[5], &carry);
z[6] = word_madd3(x[6], y, z[6], &carry);
z[7] = word_madd3(x[7], y, z[7], &carry);
return carry;
#endif
}
/*
* Multiply-Add Accumulator
* (w2,w1,w0) += x * y
*/
inline void word3_muladd(word* w2, word* w1, word* w0, word x, word y)
{
#if defined(BOTAN_MP_USE_X86_32_ASM)
word z0 = 0, z1 = 0;
asm("mull %[y]"
: "=a"(z0),"=d"(z1)
: "a"(x), [y]"rm"(y)
: "cc");
asm(R"(
addl %[z0],%[w0]
adcl %[z1],%[w1]
adcl $0,%[w2]
)"
: [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2)
: [z0]"r"(z0), [z1]"r"(z1), "0"(*w0), "1"(*w1), "2"(*w2)
: "cc");
#elif defined(BOTAN_MP_USE_X86_64_ASM)
word z0 = 0, z1 = 0;
asm("mulq %[y]"
: "=a"(z0),"=d"(z1)
: "a"(x), [y]"rm"(y)
: "cc");
asm(R"(
addq %[z0],%[w0]
adcq %[z1],%[w1]
adcq $0,%[w2]
)"
: [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2)
: [z0]"r"(z0), [z1]"r"(z1), "0"(*w0), "1"(*w1), "2"(*w2)
: "cc");
#else
word carry = *w0;
*w0 = word_madd2(x, y, &carry);
*w1 += carry;
*w2 += (*w1 < carry);
#endif
}
/*
* 3-word addition
* (w2,w1,w0) += x
*/
inline void word3_add(word* w2, word* w1, word* w0, word x)
{
#if defined(BOTAN_MP_USE_X86_32_ASM)
asm(R"(
addl %[x],%[w0]
adcl $0,%[w1]
adcl $0,%[w2]
)"
: [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2)
: [x]"r"(x), "0"(*w0), "1"(*w1), "2"(*w2)
: "cc");
#elif defined(BOTAN_MP_USE_X86_64_ASM)
asm(R"(
addq %[x],%[w0]
adcq $0,%[w1]
adcq $0,%[w2]
)"
: [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2)
: [x]"r"(x), "0"(*w0), "1"(*w1), "2"(*w2)
: "cc");
#else
*w0 += x;
word c1 = (*w0 < x);
*w1 += c1;
word c2 = (*w1 < c1);
*w2 += c2;
#endif
}
/*
* Multiply-Add Accumulator
* (w2,w1,w0) += 2 * x * y
*/
inline void word3_muladd_2(word* w2, word* w1, word* w0, word x, word y)
{
#if defined(BOTAN_MP_USE_X86_32_ASM)
word z0 = 0, z1 = 0;
asm("mull %[y]"
: "=a"(z0),"=d"(z1)
: "a"(x), [y]"rm"(y)
: "cc");
asm(R"(
addl %[z0],%[w0]
adcl %[z1],%[w1]
adcl $0,%[w2]
addl %[z0],%[w0]
adcl %[z1],%[w1]
adcl $0,%[w2]
)"
: [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2)
: [z0]"r"(z0), [z1]"r"(z1), "0"(*w0), "1"(*w1), "2"(*w2)
: "cc");
#elif defined(BOTAN_MP_USE_X86_64_ASM)
word z0 = 0, z1 = 0;
asm("mulq %[y]"
: "=a"(z0),"=d"(z1)
: "a"(x), [y]"rm"(y)
: "cc");
asm(R"(
addq %[z0],%[w0]
adcq %[z1],%[w1]
adcq $0,%[w2]
addq %[z0],%[w0]
adcq %[z1],%[w1]
adcq $0,%[w2]
)"
: [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2)
: [z0]"r"(z0), [z1]"r"(z1), "0"(*w0), "1"(*w1), "2"(*w2)
: "cc");
#else
word carry = 0;
x = word_madd2(x, y, &carry);
y = carry;
word top = (y >> (BOTAN_MP_WORD_BITS-1));
y <<= 1;
y |= (x >> (BOTAN_MP_WORD_BITS-1));
x <<= 1;
carry = 0;
*w0 = word_add(*w0, x, &carry);
*w1 = word_add(*w1, y, &carry);
*w2 = word_add(*w2, top, &carry);
#endif
}
#if defined(ASM)
#undef ASM
#undef DO_8_TIMES
#undef ADD_OR_SUBTRACT
#undef ADDSUB2_OP
#undef ADDSUB3_OP
#undef LINMUL_OP
#undef MULADD_OP
#endif
}
#endif