Skip to content

Commit

Permalink
fix: Fix asm_decompose
Browse files Browse the repository at this point in the history
  • Loading branch information
sp301415 committed Jan 26, 2025
1 parent 8492f3c commit 590b9b2
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 14 deletions.
28 changes: 26 additions & 2 deletions internal/asmgen/decompose.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,21 @@ func decomposePolyAssignUint32AVX2() {
SUBQ(Imm(3), jj)

Label("level_loop_end")
CMPQ(j, Imm(0))
CMPQ(j, Imm(1))
JGE(LabelRef("level_loop_body"))

u = YMM()
VANDPD(baseMask, c, u)

uCarry = YMM()
VANDPD(baseHalf, u, uCarry)
VPSLLD(Imm(1), uCarry, uCarry)
VPSUBD(uCarry, u, u)

decomposedOut0 := GP64()
MOVQ(Mem{Base: decomposedOut}, decomposedOut0)
VMOVDQU(u, Mem{Base: decomposedOut0, Index: i, Scale: 4})

ADDQ(Imm(8), i)

Label("N_loop_end")
Expand Down Expand Up @@ -159,9 +171,21 @@ func decomposePolyAssignUint64AVX2() {
SUBQ(Imm(3), jj)

Label("level_loop_end")
CMPQ(j, Imm(0))
CMPQ(j, Imm(1))
JGE(LabelRef("level_loop_body"))

u = YMM()
VANDPD(baseMask, c, u)

uCarry = YMM()
VANDPD(baseHalf, u, uCarry)
VPSLLQ(Imm(1), uCarry, uCarry)
VPSUBQ(uCarry, u, u)

decomposedOut0 := GP64()
MOVQ(Mem{Base: decomposedOut}, decomposedOut0)
VMOVDQU(u, Mem{Base: decomposedOut0, Index: i, Scale: 8})

ADDQ(Imm(4), i)

Label("N_loop_end")
Expand Down
4 changes: 1 addition & 3 deletions math/poly/asm_convert.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,9 +92,7 @@ func convertPolyToFourierPolyAssign[T num.Integer](p []T, fpOut []float64) {
// floatModQInPlace computes coeffs mod Q in place.
func floatModQInPlace(coeffs []float64, Q float64) {
for i := range coeffs {
cQuo := coeffs[i] / Q
cRem := cQuo - math.Round(cQuo)
coeffs[i] = math.Round(cRem * Q)
coeffs[i] = math.Round(coeffs[i] - Q*math.Round(coeffs[i]/Q))
}
}

Expand Down
4 changes: 1 addition & 3 deletions math/poly/asm_convert_amd64.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,9 +116,7 @@ func floatModQInPlace(coeffs []float64, Q float64) {
}

for i := range coeffs {
cQuo := coeffs[i] / Q
cRem := cQuo - math.Round(cQuo)
coeffs[i] = math.Round(cRem * Q)
coeffs[i] = math.Round(coeffs[i] - Q*math.Round(coeffs[i]/Q))
}
}

Expand Down
24 changes: 18 additions & 6 deletions tfhe/asm_decompose_amd64.s
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,15 @@ level_loop_body:
SUBQ $0x03, R8

level_loop_end:
CMPQ DI, $0x00
JGE level_loop_body
ADDQ $0x08, SI
CMPQ DI, $0x01
JGE level_loop_body
VANDPD Y4, Y6, Y6
VANDPD Y0, Y6, Y7
VPSLLD $0x01, Y7, Y7
VPSUBD Y7, Y6, Y6
MOVQ (CX), DI
VMOVDQU Y6, (DI)(SI*4)
ADDQ $0x08, SI

N_loop_end:
CMPQ SI, DX
Expand Down Expand Up @@ -106,9 +112,15 @@ level_loop_body:
SUBQ $0x03, R8

level_loop_end:
CMPQ DI, $0x00
JGE level_loop_body
ADDQ $0x04, SI
CMPQ DI, $0x01
JGE level_loop_body
VANDPD Y4, Y6, Y6
VANDPD Y0, Y6, Y7
VPSLLQ $0x01, Y7, Y7
VPSUBQ Y7, Y6, Y6
MOVQ (CX), DI
VMOVDQU Y6, (DI)(SI*8)
ADDQ $0x04, SI

N_loop_end:
CMPQ SI, DX
Expand Down

0 comments on commit 590b9b2

Please sign in to comment.