0% found this document useful (0 votes)

15 views6 pages

FFT Full

The document contains assembly code for a Fast Fourier Transform (FFT) implementation, including functions for computing log2(N), bit-reversal reordering, and the vectorized butterfly stages. It defines data sections for real and imaginary parts, temporary storage, and precomputed twiddle factors and bit-reversal indices. The main execution starts at the _start label, which sets up the stack, runs the FFT, and prints the results.

Uploaded by

fawad.sidd17

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as TXT, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

15 views6 pages

FFT Full

Uploaded by

fawad.sidd17

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as TXT, PDF, TXT or read online on Scribd

You are on page 1/ 6

.section .

text
.global _start
_start:
# — Stack & return setup —
la sp, STACK
la ra, _finish

# — Compute log2(N) for later loops —

lw a0, size
call setlogN

# — Run FFT —
la a0, real # real[]
la a1, imag # imag[]
lw a2, size # N
call vFFT

# — Print or inspect results —

call print

# — hang forever —
j _finish

#-------------------------------------------------------------------------------
# setlogN: logsize = log2(a0)
#-------------------------------------------------------------------------------
setlogN:
clz t0, a0
li t1, 31
sub t1, t1, t0
la t0, logsize
sw t1, 0(t0)
jr ra

#-------------------------------------------------------------------------------
# vOrdina: bit-reverse reorder using a lookup table
# a0 = &real, a1 = &imag, a2 = N
#-------------------------------------------------------------------------------
vOrdina:
addi sp, sp, -24
sw ra, 0(sp)
sw a0, 4(sp)
sw a1, 8(sp)
sw a2, 12(sp)

la t0, bitrev # table of N .word reversed indices

la t1, real_temp
la t2, imag_temp

li t3, 0 # processed count

1: bge t3, a2, 2f

# set vector length = min(VLEN, remaining)

sub t4, a2, t3
vsetvli t5, t4, e32

# load a chunk of bit-reversed indices

vle32.v v0, 0(t0)
# gather from real and imag
la t6, real
la t7, imag
vloxei32.v v1, 0(t6), v0
vloxei32.v v2, 0(t7), v0

# scatter into temp at normal order

vse32.v v1, 0(t1)
vse32.v v2, 0(t2)

# advance pointers by VL×4 bytes

slli t8, t5, 2
add t0, t0, t8
add t1, t1, t8
add t2, t2, t8

# update count
add t3, t3, t5
j 1b
2:
# copy back real_temp→real, imag_temp→imag
la t1, real_temp
la t2, imag_temp
la t6, real
la t7, imag
li t3, 0

3: bge t3, a2, 4f

sub t4, a2, t3
vsetvli t5, t4, e32

vle32.v v1, 0(t1)

vle32.v v2, 0(t2)

vse32.v v1, 0(t6)

vse32.v v2, 0(t7)

slli t8, t5, 2

add t1, t1, t8
add t2, t2, t8
add t6, t6, t8
add t7, t7, t8

add t3, t3, t5

j 3b
4:

lw ra, 0(sp)
lw a0, 4(sp)
lw a1, 8(sp)
lw a2,12(sp)
addi sp, sp, 24
jr ra

#-------------------------------------------------------------------------------
# vTransform: the vectorized butterfly stages (same as your original)
# a0=&real, a1=&imag, a2=N, a3=+1 for FFT / –1 for IFFT
#-------------------------------------------------------------------------------
vTransform:
addi sp, sp, -40
sw ra, 0(sp)
sw a0, 4(sp)
sw a1, 8(sp)
sw a2, 12(sp)
sw a3, 16(sp)
sw s0, 32(sp)

# 1. bit-reversal
call vOrdina

# 2. load inverse flag into ft0

fcvt.s.w ft0, a3

# 3. pointers to twiddles
la t1, W_real
la t2, W_imag

# 4. vector-length for data

vsetvli t0, a2, e32, m4

li a5, 1 # n = 1
srai a4, a2, 1 # a = N/2
li t3, 0 # stage counter
lw a3, logsize # #stages = log2(N)
slli t5, t0, 2 # bytes per vector chunk
addi s0, a4, -1 # mask = N/2 – 1

# — Outer loop over stages —

L1: bge t3, a3, Lend
li t4, 0 # index i=0

L2: blt t4, a2, Lbody

j L5

Lbody:
# mask = !(i & n)
slli a6, a5, 2 # a6 = n*4
vid.v v28 # v28 = [0..VLEN-1]
vsll.vi v20, v28, 2 # byte offsets i*4
vand.vx v0, v20, a6
vmseq.vx v0, v0, zero # mask

# k = ((i * a) & (N/2–1)) << 2

vmul.vx v24, v28, a4
vand.vx v24, v24, s0, v0.t
vsll.vi v24, v24, 2, v0.t

# load W_real, W_imag

vloxei32.v v4, 0(t1), v24, v0.t
vloxei32.v v28,0(t2), v24, v0.t
vfsgnjx.vf v28, v28, ft0, v0.t

# load x[i+n]
vadd.vx v16, v20, a6, v0.t
vloxei32.v v8, 0(a0), v16, v0.t
vloxei32.v v12, 0(a1), v16, v0.t
# compute t = w·x[i+n]
vfmul.vv v16, v4, v8, v0.t
vfnmsac.vv v16, v28, v12, v0.t
vfmul.vv v12, v4, v12, v0.t
vfmacc.vv v12, v28, v8, v0.t

# load x[i]
vloxei32.v v4, 0(a0), v20, v0.t
vloxei32.v v28,0(a1), v20, v0.t

# butterfly: top=x[i]+t, bot=x[i]–t

vfadd.vv v8, v4, v16, v0.t
vfsub.vv v4, v4, v16, v0.t
vfadd.vv v16, v28, v12, v0.t
vfsub.vv v28, v28, v12, v0.t

# store back
vsoxei32.v v8, 0(a0), v20, v0.t
vsoxei32.v v16, 0(a1), v20, v0.t
vsoxei32.v v4, 0(a0), v16, v0.t
vsoxei32.v v28, 0(a1), v16, v0.t

add t4, t4, t0

j L2

L5:
slli a5, a5, 1 # n <<= 1
srai a4, a4, 1 # a >>= 1
addi t3, t3, 1
j L1
Lend:
# restore
lw ra, 0(sp)
lw a0, 4(sp)
lw a1, 8(sp)
lw a2, 12(sp)
lw a3, 16(sp)
lw s0, 32(sp)
addi sp, sp, 40
jr ra

#-------------------------------------------------------------------------------
# vFFT / vIFFT wrappers
#-------------------------------------------------------------------------------
vFFT:
addi sp, sp, -8
sw ra, 0(sp)
li a3, 1 # forward
call vTransform
lw ra, 0(sp)
addi sp, sp, 8
ret

vIFFT:
addi sp, sp, -8
sw ra, 0(sp)
li a3, -1 # inverse
call vTransform
# (optional) divide by N in-place here…
lw ra, 0(sp)
addi sp, sp, 8
ret

#-------------------------------------------------------------------------------
# print: simple vector dump (as in your original)
#-------------------------------------------------------------------------------
print:
addi sp, sp, -12
sw ra, 0(sp)
sw a0, 4(sp)
sw a1, 8(sp)

li t0, 0
lw a2, size
vsetvli t3, a2, e32
slli t4, t3, 2

PLoop:
bge t0, a2, PEnd
vle32.v v0, 0(a0)
vle32.v v8, 0(a1)
add a0, a0, t4
add a1, a1, t4
add t0, t0, t3
j PLoop
PEnd:
lw ra, 0(sp)
lw a0, 4(sp)
lw a1, 8(sp)
addi sp, sp, 12
jr ra

_finish:
li x3, 0xd0580000
li x5, 0xff
sb x5, 0(x3)
beq x0, x0, _finish

#-------------------------------------------------------------------------------
# Data Section
#-------------------------------------------------------------------------------
.section .data
.equ N, 1024
.equ N2, N/2

size: .word N
logsize: .word 0

real:
.rept N
.float 0.0
.endr

imag:
.rept N
.float 0.0
.endr
real_temp:
.rept N
.float 0.0
.endr

imag_temp:
.rept N
.float 0.0
.endr

# — Precomputed twiddle tables (generated offline) —

W_real:
.include "W_real.inc"

W_imag:
.include "W_imag.inc"

# — Precomputed bit-reversal indices (0..N–1) —

bitrev:
.include "bitrev.inc"

STACK:
.space 4096

330acac562ddee2b 0000000000000000 Vs
No ratings yet
330acac562ddee2b 0000000000000000 Vs
30 pages
5a41baf724c1cff3 0000000000000000 Vs
No ratings yet
5a41baf724c1cff3 0000000000000000 Vs
30 pages
Df832bc2e6d22e45 0000000000000000 Vs
No ratings yet
Df832bc2e6d22e45 0000000000000000 Vs
30 pages
3fae14064195391b 0000000000000000 Vs
No ratings yet
3fae14064195391b 0000000000000000 Vs
31 pages
A225baec4db6d89e 0000000000000000 Vs
No ratings yet
A225baec4db6d89e 0000000000000000 Vs
33 pages
C01cc5b7af21f689 0000000000000000 Vs
No ratings yet
C01cc5b7af21f689 0000000000000000 Vs
33 pages
Ec248df3384d3d18 0000000000000000 Vs
No ratings yet
Ec248df3384d3d18 0000000000000000 Vs
31 pages
ccc6fb8b53f5f651 0000000000000000 Vs
No ratings yet
ccc6fb8b53f5f651 0000000000000000 Vs
33 pages
d321199dc854621f 0000000000000000 Vs
No ratings yet
d321199dc854621f 0000000000000000 Vs
30 pages
E082c1f638f8e81e 0000000000000000 Vs
No ratings yet
E082c1f638f8e81e 0000000000000000 Vs
30 pages
B3609db7d1363f6a 0000000000000000 Vs
No ratings yet
B3609db7d1363f6a 0000000000000000 Vs
7 pages
Geometry 0
No ratings yet
Geometry 0
8 pages
2011 Quiz 4 Sol
No ratings yet
2011 Quiz 4 Sol
17 pages
Geometry 0
No ratings yet
Geometry 0
5 pages
Geometry 1
No ratings yet
Geometry 1
5 pages
Sspe
No ratings yet
Sspe
7 pages
Assembly Lab10 LeXuanHieu 20215201
No ratings yet
Assembly Lab10 LeXuanHieu 20215201
8 pages
Geometry 0
No ratings yet
Geometry 0
4 pages
3 Tobias Grosser 2017 Day2
No ratings yet
3 Tobias Grosser 2017 Day2
122 pages
MSH 3arf Fe Sekoe L7shad Triangle 3
No ratings yet
MSH 3arf Fe Sekoe L7shad Triangle 3
3 pages
Geometry 1
No ratings yet
Geometry 1
4 pages
Geometry 1
No ratings yet
Geometry 1
3 pages
Geometry 585
No ratings yet
Geometry 585
4 pages
Geometry 2
No ratings yet
Geometry 2
3 pages
Geometry 0
No ratings yet
Geometry 0
3 pages
Computer Architecture Course: IT089IU International University - VNU HCM Date: March 2021 Dr. Le Hai Duong Time: 3 Hours
No ratings yet
Computer Architecture Course: IT089IU International University - VNU HCM Date: March 2021 Dr. Le Hai Duong Time: 3 Hours
8 pages
Arm Reference
No ratings yet
Arm Reference
3 pages
Lab Program - 1: Implement Bresenham's Line Drawing Algorithm For All Types of Slope
No ratings yet
Lab Program - 1: Implement Bresenham's Line Drawing Algorithm For All Types of Slope
25 pages
ABIexp
No ratings yet
ABIexp
2 pages
3.2: Arrays of Integers in Mips Assembly Language: CSC 256 Lab Manual 3.2.1
No ratings yet
3.2: Arrays of Integers in Mips Assembly Language: CSC 256 Lab Manual 3.2.1
13 pages
2022 Scheme Verilog Programs
No ratings yet
2022 Scheme Verilog Programs
4 pages
Web GPU
0% (1)
Web GPU
40 pages
Ca Lab Programs
No ratings yet
Ca Lab Programs
12 pages
EXP4
No ratings yet
EXP4
5 pages
Elec3010 HW6 S2025
No ratings yet
Elec3010 HW6 S2025
11 pages
Geometry 11
No ratings yet
Geometry 11
5 pages
Geometry 295
No ratings yet
Geometry 295
4 pages
Đỗ Ngọc Đức - Ititiu22034 - Ca - lab7
No ratings yet
Đỗ Ngọc Đức - Ititiu22034 - Ca - lab7
3 pages
502FA090
No ratings yet
502FA090
3 pages
Nguyễn Minh Hùng - Ititiu22034 - Ca - lab7 PDF
No ratings yet
Nguyễn Minh Hùng - Ititiu22034 - Ca - lab7 PDF
3 pages
Chapter 04
No ratings yet
Chapter 04
12 pages
Vector Code Example
No ratings yet
Vector Code Example
6 pages
Geometry 5
No ratings yet
Geometry 5
5 pages
My First Program On D Flip Flop
No ratings yet
My First Program On D Flip Flop
39 pages
Arm Example
No ratings yet
Arm Example
4 pages
Lab 07-1
No ratings yet
Lab 07-1
4 pages
Practice Questions
No ratings yet
Practice Questions
3 pages
Ripemd160 256
No ratings yet
Ripemd160 256
6 pages
Code Composer Studio Programs: Cycle - Ii
No ratings yet
Code Composer Studio Programs: Cycle - Ii
18 pages
N Points DTFT
No ratings yet
N Points DTFT
2 pages
Module FIR
No ratings yet
Module FIR
3 pages
Sheet 1 Solution
No ratings yet
Sheet 1 Solution
7 pages
RV32im Reference Card v02
No ratings yet
RV32im Reference Card v02
2 pages
Labview Database 1234
No ratings yet
Labview Database 1234
66 pages
Chapter 4 Solutions: Case Study: Implementing A Vector Kernel On A Vector Processor and GPU
No ratings yet
Chapter 4 Solutions: Case Study: Implementing A Vector Kernel On A Vector Processor and GPU
12 pages
Lab 3
No ratings yet
Lab 3
1 page
Assignment 4
No ratings yet
Assignment 4
10 pages
Final Year Project - Proposal Defence PDF
No ratings yet
Final Year Project - Proposal Defence PDF
23 pages
Verilog Code For Fir Filter
No ratings yet
Verilog Code For Fir Filter
58 pages
Infineon TC1762 DS v01 - 00 en PDF
No ratings yet
Infineon TC1762 DS v01 - 00 en PDF
114 pages
Aloka Training
No ratings yet
Aloka Training
225 pages
FF7AN - Credit Management
No ratings yet
FF7AN - Credit Management
6 pages
Brook Wingman XE2 (202309V4)
No ratings yet
Brook Wingman XE2 (202309V4)
75 pages
SCCM vs. Intune - A Closer Look at The Capabilities of Each
No ratings yet
SCCM vs. Intune - A Closer Look at The Capabilities of Each
6 pages
Monitoring JMX With Nagios XI
No ratings yet
Monitoring JMX With Nagios XI
11 pages
Chapter 3 - Package Management
No ratings yet
Chapter 3 - Package Management
17 pages
T2 Searching Algorithms
No ratings yet
T2 Searching Algorithms
25 pages
Manual Printer Dotmatrix 76mm Impact Printer User Manual V1.0.76
No ratings yet
Manual Printer Dotmatrix 76mm Impact Printer User Manual V1.0.76
19 pages
What Is Adobe Zii and How To Use It PDF
No ratings yet
What Is Adobe Zii and How To Use It PDF
15 pages
ICCS Syllabus - CPC 103 - PC Competence: A. B. C. D. E. F. G
No ratings yet
ICCS Syllabus - CPC 103 - PC Competence: A. B. C. D. E. F. G
4 pages
Disc08 Sols
100% (1)
Disc08 Sols
8 pages
VVV
No ratings yet
VVV
13 pages
Ictl Form 2
No ratings yet
Ictl Form 2
10 pages
Dwyth Anne L. Monteras Grade 9-Lakandula TLE (ICT) 1.7
No ratings yet
Dwyth Anne L. Monteras Grade 9-Lakandula TLE (ICT) 1.7
4 pages
History of Computers
No ratings yet
History of Computers
36 pages
Toa Final Spring 2024
No ratings yet
Toa Final Spring 2024
7 pages
CCN Assignment01
No ratings yet
CCN Assignment01
7 pages
Practice: Open File Named Poohsticks Rating - Vlookup Answer, and Study The VLOOKUP Formula For The Following Instructions
No ratings yet
Practice: Open File Named Poohsticks Rating - Vlookup Answer, and Study The VLOOKUP Formula For The Following Instructions
3 pages
Can OpenStack Run Over A VXLAN Fabric Without An Overlay Controller
No ratings yet
Can OpenStack Run Over A VXLAN Fabric Without An Overlay Controller
3 pages
Automata Sessional-I (Solution) (Fall-2020)
No ratings yet
Automata Sessional-I (Solution) (Fall-2020)
12 pages
Merge Sort - Quick Sort - Exercises: Unit 28 1
100% (1)
Merge Sort - Quick Sort - Exercises: Unit 28 1
13 pages
Relay For Voltage Control: Technical Data
No ratings yet
Relay For Voltage Control: Technical Data
1 page
Workout
No ratings yet
Workout
5 pages
IFD5 Manual - Issue 5
No ratings yet
IFD5 Manual - Issue 5
30 pages
Disc09 Sols
No ratings yet
Disc09 Sols
7 pages
IoT Project Template
No ratings yet
IoT Project Template
54 pages
d5952691062b0f Webleaflet Eng Amiko A4 Ott v170104
No ratings yet
d5952691062b0f Webleaflet Eng Amiko A4 Ott v170104
2 pages
17.8.1 - CCN Lab - Documentation
No ratings yet
17.8.1 - CCN Lab - Documentation
1 page
Genexus Trial Tutorial EN PDF
No ratings yet
Genexus Trial Tutorial EN PDF
45 pages
Optional (If You Have Experience)
No ratings yet
Optional (If You Have Experience)
1 page
Function Generator Using X86 Microprocessor
No ratings yet
Function Generator Using X86 Microprocessor
7 pages
6393 Question Paper
No ratings yet
6393 Question Paper
2 pages
Module 9 Shift Registers (Student)
No ratings yet
Module 9 Shift Registers (Student)
8 pages
L 18 Java Package and Access Specifiers
No ratings yet
L 18 Java Package and Access Specifiers
4 pages
6 Online Tools For Generating and Testing Cron Jobs For Linux
No ratings yet
6 Online Tools For Generating and Testing Cron Jobs For Linux
4 pages
Lisp Interpreter in Rust
From Everand
Lisp Interpreter in Rust
Vishal Patil
1/5 (1)
Profound Python Data Science
From Everand
Profound Python Data Science
Onder Teker
No ratings yet
Computer Engineering Laboratory Solution Primer
From Everand
Computer Engineering Laboratory Solution Primer
Karan Bhandari
No ratings yet

FFT Full

Uploaded by

FFT Full

Uploaded by

.section .

# — Compute log2(N) for later loops —

# — Print or inspect results —

la t0, bitrev # table of N .word reversed indices

li t3, 0 # processed count

1: bge t3, a2, 2f

# set vector length = min(VLEN, remaining)

# load a chunk of bit-reversed indices

# scatter into temp at normal order

# advance pointers by VL×4 bytes

3: bge t3, a2, 4f

vle32.v v1, 0(t1)

vse32.v v1, 0(t6)

slli t8, t5, 2

add t3, t3, t5

# 2. load inverse flag into ft0

# 4. vector-length for data

# — Outer loop over stages —

L2: blt t4, a2, Lbody

# k = ((i * a) & (N/2–1)) << 2

# load W_real, W_imag

# butterfly: top=x[i]+t, bot=x[i]–t

add t4, t4, t0

# — Precomputed twiddle tables (generated offline) —

# — Precomputed bit-reversal indices (0..N–1) —

You might also like