Skip to content

Commit

Permalink
fix: Fix for non-amd64 platform
Browse files Browse the repository at this point in the history
  • Loading branch information
sp301415 committed Dec 12, 2023
1 parent 2d0b555 commit b8adbdc
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 9 deletions.
10 changes: 10 additions & 0 deletions math/poly/asm_fourier_ops.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
//go:build !(amd64 && !purego)

package poly

func monomialDivMinusOneAssign(fp0 []complex128, d int, w2NjFull []complex128, bitRevIdxMono []int, fpOut []complex128) {

Check failure on line 5 in math/poly/asm_fourier_ops.go

View workflow job for this annotation

GitHub Actions / Run Go 1.18 Tests

other declaration of monomialDivMinusOneAssign

Check failure on line 5 in math/poly/asm_fourier_ops.go

View workflow job for this annotation

GitHub Actions / Run Go 1.19 Tests

other declaration of monomialDivMinusOneAssign

Check failure on line 5 in math/poly/asm_fourier_ops.go

View workflow job for this annotation

GitHub Actions / Run Go 1.20 Tests

other declaration of monomialDivMinusOneAssign

Check failure on line 5 in math/poly/asm_fourier_ops.go

View workflow job for this annotation

GitHub Actions / Run Go 1.21 Tests

other declaration of monomialDivMinusOneAssign
N := len(fp0)
for i := 0; i < N; i++ {
fpOut[i] = fp0[i] * w2NjFull[(d*bitRevIdxMono[i])&(4*N-1)]
}
}
15 changes: 6 additions & 9 deletions math/poly/asm_fourier_ops_amd64.s
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@

//go:build amd64 && !purego

#include "textflag.h"

Expand All @@ -12,31 +12,28 @@ TEXT ·monomialDivMinusOneAssignAVX2(SB), NOSPLIT, $0-104
MOVQ vOut_len+88(FP), R9 // N

MOVQ R9, R10
ADDQ R9, R10 // 2N

MOVQ R10, R11
ADDQ R10, R11
SUBQ $1, R11 // 4N - 1
SHLQ $2, R10
SUBQ $1, R10 // 4N - 1

XORQ SI, SI
XORQ DI, DI
JMP loop_end

loop_body:
VMOVUPD (AX)(SI*8), X0

// d * bitRevIdxMono[i]
MOVQ (CX)(DI*8), R12
IMULQ R8, R12

// k = (d * bitRevIdxMono[i]) & (4N-1)
ANDQ R11, R12
ANDQ R10, R12

// w2Nj[2k]
// Multiply 2 since we are dealing with two floats...
ADDQ R12, R12
VMOVUPD (BX)(R12*8), X1

VMOVUPD (AX)(SI*8), X0

// fp0[i] * w2Nj[2k]
VSHUFPD $0b01, X1, X1, X2
VSHUFPD $0b11, X0, X0, X3
Expand Down

0 comments on commit b8adbdc

Please sign in to comment.