Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reactivate fast squaring algorithms #68

Closed
mratsim opened this issue Jun 21, 2020 · 3 comments
Closed

Reactivate fast squaring algorithms #68

mratsim opened this issue Jun 21, 2020 · 3 comments

Comments

@mratsim
Copy link
Owner

mratsim commented Jun 21, 2020

Commit 2971965
deactivates the fast path for generic squaring as there is an off-by-one on 32-bit with inputs from the test suite (from #61 #62):

suite "Modular squaring - bugs highlighted by property-based testing":
test "a² == (-a)² on for Fp[2^127 - 1] - #61":
var a{.noInit.}: Fp[Mersenne127]
a.fromHex"0x75bfffefbfffffff7fd9dfd800000000"
var na{.noInit.}: Fp[Mersenne127]
na.neg(a)
a.square()
na.square()
check:
bool(a == na)
var a2{.noInit.}, na2{.noInit.}: Fp[Mersenne127]
a2.fromHex"0x75bfffefbfffffff7fd9dfd800000000"
na2.neg(a2)
a2 *= a2
na2 *= na2
check:
bool(a2 == na2)
bool(a2 == a)
bool(a2 == na)
test "a² == (-a)² on for Fp[2^127 - 1] - #62":
var a{.noInit.}: Fp[Mersenne127]
a.fromHex"0x7ff7ffffffffffff1dfb7fafc0000000"
var na{.noInit.}: Fp[Mersenne127]
na.neg(a)
a.square()
na.square()
check:
bool(a == na)
var a2{.noInit.}, na2{.noInit.}: Fp[Mersenne127]
a2.fromHex"0x7ff7ffffffffffff1dfb7fafc0000000"
na2.neg(a2)
a2 *= a2
na2 *= na2
check:
bool(a2 == na2)
bool(a2 == a)
bool(a2 == na)

To be reactivated:

func montySquare_CIOS(r: var Limbs, a, M: Limbs, m0ninv: BaseType) =
## Montgomery Multiplication using Coarse Grained Operand Scanning (CIOS)
##
## Architectural Support for Long Integer Modulo Arithmetic on Risc-Based Smart Cards
## Johann Großschädl, 2003
## https://citeseerx.ist.psu.edu/viewdoc/download;jsessionid=95950BAC26A728114431C0C7B425E022?doi=10.1.1.115.3276&rep=rep1&type=pdf
##
## Analyzing and Comparing Montgomery Multiplication Algorithms
## Koc, Acar, Kaliski, 1996
## https://www.semanticscholar.org/paper/Analyzing-and-comparing-Montgomery-multiplication-Ko%C3%A7-Acar/5e3941ff482ec3ee41dc53c3298f0be085c69483
# TODO: Deactivated
# Off-by one on 32-bit for Fp[2^127 - 1] with inputs
# - -0x75bfffefbfffffff7fd9dfd800000000
# - -0x7ff7ffffffffffff1dfb7fafc0000000
# Squaring the number and its opposite
# should give the same result, but those are off-by-one
# We want all the computation to be kept in registers
# hence we use a temporary `t`, hoping that the compiler does it.
var z: typeof(r) # zero-init
const L = z.len
# Extra words to handle up to 2 carries t[N] and t[N+1]
var zLp1: SecretWord
var zL: SecretWord
staticFor i, 0, L:
# Squaring
var t: Carry
var u, v: SecretWord
# (u, v) <- a[i] * a[i] + z[i]
muladd1(u, v, a[i], a[i], z[i])
z[i] = v
staticFor j, i+1, L:
# (t, u, v) <- 2*a[j]*a[i] + z[j] + (t, u)
# 2*a[j]*a[i] can spill 1-bit on a 3rd word
mulDoubleAdd2(t, u, v, a[j], a[i], z[j], t, u)
z[j] = v
block:
# (u, v) <- zs + (t, u)
# zL <- v
# zL+1 <- u
var C: Carry
addC(C, v, zL, u, Carry(0))
addC(C, u, zLp1, SecretWord(t), C)
zL = v
zLp1 = u
# Reduction
# m <- (z[0] * m0ninv) mod 2^w
# (u, v) <- m * M[0] + z[0]
let m = z[0] * SecretWord(m0ninv)
muladd1(u, v, m, M[0], z[0])
staticFor j, 1, L:
# (u, v) <- m*M[j] + z[j] + u
# z[j-1] <- v
muladd2(u, v, m, M[j], z[j], u)
z[j-1] = v
block:
# (u, v) <- zL + u
# z[L-1] <- v
# z[L] <- zL+1 + u
var C: Carry
addC(C, v, zL, u, Carry(0))
z[L-1] = v
addC(C, zL, zLp1, Zero, C)
discard z.csub(M, zL.isNonZero() or not(z < M)) # TODO: (z >= M) is unnecessary for prime in the form (2^64)^w - 1
r = z

@mratsim
Copy link
Owner Author

mratsim commented Jun 22, 2020

In the Montgomery domain, the off-by-one is the least significant bit

image

Also it doesn't happen only for generic square but also the fast square montySquare_CIOS_nocarry from Consensys article: https://hackmd.io/@zkteam/modular_multiplication which should be straightforward to implement

 for i=0 to N-1
    C, t[i] = a[i] * a[i] + t[i]
    p = 0
    for j=i+1 to N-1
        p,C,t[j] = 2*a[j]*a[i] + t[j] + (p,C)

    A = C

    m = t[0] * q'[0]
    C, _ = t[0] + q[0]*m
    for j=1 to N-1
        C, t[j-1] = q[j]*m +  t[j] + C

    t[N-1] = C + A

vs

func montySquare_CIOS_nocarry(r: var Limbs, a, M: Limbs, m0ninv: BaseType) =
## Montgomery Multiplication using Coarse Grained Operand Scanning (CIOS)
## and no-carry optimization.
## This requires the most significant word of the Modulus
## M[^1] < high(SecretWord) shr 2 (i.e. less than 0b00111...1111)
## https://hackmd.io/@zkteam/modular_multiplication
# We want all the computation to be kept in registers
# hence we use a temporary `t`, hoping that the compiler does it.
var t: typeof(M) # zero-init
const N = t.len
staticFor i, 0, N:
# Squaring
var
A1: Carry
A0: SecretWord
# (A0, t[i]) <- a[i] * a[i] + t[i]
muladd1(A0, t[i], a[i], a[i], t[i])
staticFor j, i+1, N:
# (A1, A0, t[j]) <- 2*a[j]*a[i] + t[j] + (A1, A0)
# 2*a[j]*a[i] can spill 1-bit on a 3rd word
mulDoubleAdd2(A1, A0, t[j], a[j], a[i], t[j], A1, A0)
# Reduction
# m <- (t[0] * m0ninv) mod 2^w
# (C, _) <- m * M[0] + t[0]
let m = t[0] * SecretWord(m0ninv)
var C, lo: SecretWord
muladd1(C, lo, m, M[0], t[0])
staticFor j, 1, N:
# (C, t[j-1]) <- m*M[j] + t[j] + C
muladd2(C, t[j-1], m, M[j], t[j], C)
t[N-1] = C + A0
discard t.csub(M, not(t < M))
r = t

mratsim added a commit that referenced this issue Jun 22, 2020
@mratsim mratsim changed the title Reactivate fast generic squaring Reactivate fast squaring algorithms Jun 22, 2020
@mratsim mratsim mentioned this issue Jun 22, 2020
2 tasks
mratsim added a commit that referenced this issue Jun 22, 2020
* Add test case for #30 - Euler's criterion doesn't return 1 for a square

* Detect #42 in the test suite

* Detect #43 in the test suite

* comment in sqrt tests

* Add #67 to the anti-regression suite

* Add #61 to the anti-regression suite

* Add #62 to anti-regression suite

* Add #60 to the anti-regression suite

* Add #64 to the test suite

* Add #65 - case 1

* Add #65 case 2

* Add #65 case 3

* Add debug check to isSquare/Euler's Criterion/Legendre Symbol

* Make sure our primitives are correct

* For now deactivate montySquare CIOS fix #61 #62

* Narrow down #42 and #43 to powinv on 32-bit

* Detect #42 #43 at the fast squaring level

* More #42, #43 tests, Use multiplication instead of squaring as a temporary workaround, see #68

* Prevent regression of #67 now that squaring is "fixed"
@mratsim
Copy link
Owner Author

mratsim commented Feb 6, 2021

unfused squaring was added in #144
including assembly version though no MULX/ADOX/ADCX implemented as it's quite tricky in terms of registers (commented out).

The normal assembly version is as fast as multiplication with MULX/ADOX/ADCX.

@mratsim
Copy link
Owner Author

mratsim commented Feb 21, 2022

Closed in #160

@mratsim mratsim closed this as completed Feb 21, 2022
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

No branches or pull requests

1 participant