Add ARM fast multiply/square for secp224r1.

This commit is contained in:
Ken MacKay
2015-06-28 21:47:16 -07:00
parent 1015fe5c43
commit 05cdd402f2
3 changed files with 567 additions and 8 deletions
+4 -4
View File
@@ -26,8 +26,8 @@ ry = [6, 7, 8]
emit("add r0, %s", (size - init_size) * 4) # move z
emit("add r2, %s", (size - init_size) * 4) # move y
emit("ldmia r1!, {%s}", ",".join(["r%s" % (rx[i]) for i in xrange(init_size)]))
emit("ldmia r2!, {%s}", ",".join(["r%s" % (ry[i]) for i in xrange(init_size)]))
emit("ldmia r1!, {%s}", ", ".join(["r%s" % (rx[i]) for i in xrange(init_size)]))
emit("ldmia r2!, {%s}", ", ".join(["r%s" % (ry[i]) for i in xrange(init_size)]))
print ""
if init_size == 1:
@@ -84,11 +84,11 @@ emit("sub r0, %s", (2 * init_size + 3) * 4)
emit("sub r2, %s", (init_size + 3) * 4)
#### load y registers
emit("ldmia r2!, {%s}", ",".join(["r%s" % (ry[i]) for i in xrange(3)]))
emit("ldmia r2!, {%s}", ", ".join(["r%s" % (ry[i]) for i in xrange(3)]))
#### load additional x registers
if init_size != 3:
emit("ldmia r1!, {%s}", ",".join(["r%s" % (rx[i]) for i in xrange(init_size, 3)]))
emit("ldmia r1!, {%s}", ", ".join(["r%s" % (rx[i]) for i in xrange(init_size, 3)]))
print ""
prev_size = init_size
+15 -4
View File
@@ -8,7 +8,7 @@ if len(sys.argv) < 2:
size = int(sys.argv[1])
if size > 6 and size != 8:
if size > 8:
print "This script doesn't work with integer size %s due to laziness" % (size)
sys.exit(1)
@@ -37,8 +37,19 @@ r = [2, 3, 4, 5, 6, 7]
s = size - init_size
# Note that I just implemented the init_size = 2 case directly
if init_size > 0:
if init_size == 1:
emit("ldmia r1!, {r2}")
emit("add r1, %s", (size - init_size * 2) * 4)
emit("ldmia r1!, {r5}")
emit("add r0, %s", (size - init_size) * 4)
emit("umull r8, r9, r2, r5")
emit("stmia r0!, {r8, r9}")
emit("sub r0, %s", (size + init_size) * 4)
emit("sub r1, %s", (size) * 4)
print ""
elif init_size == 2:
emit("ldmia r1!, {r2, r3}")
emit("add r1, %s", (size - init_size * 2) * 4)
emit("ldmia r1!, {r5, r6}")
@@ -66,7 +77,7 @@ if init_size > 0:
emit("sub r1, %s", (size) * 4)
# load input words
emit("ldmia r1!, {%s}", ",".join(["r%s" % (r[i]) for i in xrange(s)]))
emit("ldmia r1!, {%s}", ", ".join(["r%s" % (r[i]) for i in xrange(s)]))
print ""
emit("umull r11, r12, r2, r2")