0% found this document useful (0 votes)
20 views

SystemC-n-BehaviorCoding_Fall2021_Section5_HLS

Uploaded by

Hua-Chien Chang
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PPTX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
20 views

SystemC-n-BehaviorCoding_Fall2021_Section5_HLS

Uploaded by

Hua-Chien Chang
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PPTX, PDF, TXT or read online on Scribd
You are on page 1/ 63

+

SystemC, Behavior Coding


and Modeling
蘇培陞 博士
Alan P. Su, Ph.D.
[email protected], [email protected]
+
2

Drag picture to plac


Section 5
High Level Synthesis

Drag picture to plac


+ 3

Why using HLS?

 Area reduction: with proper coding an area reduction of


15% or more can be expected
 Exhaustive area/timing trade-off

 Shortened design cycle: an overall 25% cut in design


cycle
 Higher level of abstraction
 Shorter code
 Easy to verify
+ 4

A First Example
y” + 3xy’ + 3y = 0 in the interval [0, a] with step-
size dx and initial values X(0) = x; y(0) = y; y’(0) = u
diffeq {
read (x, y, u, dx, a);
repeat {
x1 = x + dx;
Behavior u1 = u – ( 3 * x * u * dx ) – ( 3 * y * dx );
Code y1 = y + u * dx;
c = x1 < a;
x = x1; u = u1; y = y1;
}
until ( c );
write y;
}

Synthesized
Circuit
* ALU Steering & Memory Control Unit
+ 5

IIR Filter

y = mid + a2 * mid[k-1] + b2 * mid[k-2]


mid = x + a1 * mid[k-1] + b1 * mid[k-2]

mid y
x + +
a1 a2

+ * * +

* *

b1 b2
+ 6

Behavior Code in SystemC


// IIR.h #include “IIR.h”
#include “systemc.h” void IIR::iir()
SC_MODULE(IIR) {
{ mid = 0; mid1 = 0; mid2 = 0;
sc_in<sc_uint<32> > input; while (1) {
sc_out<sc_uint<32> > output; wait();
sc_in<bool> enable; x = input.read();
sc_in<bool> clk; y = mid + a2 * mid1 + b2 * mid2;
void iir(); mid = x + a1 * mid1 + b1 *
sc_uint<32> x, y; mid2;
sc_signal<sc_uint<32> > mid, mid1, mid2 = mid1;
mid1 = mid;
mid2; output.write(y);
sc_uint<32> a1, a2, b1, b2; }
SC_CTOR(IIR) { }
SC_CTHREAD(iir,
clk.pos());
reset_signal_is(enable,
false);
a1 = 354; a2 = 1799;
b1 = 573; b2 = 1254;
}
};
+ 7

Behavior States
!enable mid = 0;
mid1 = 0;
mid2 = 0;
Behavior Code
0 Single
mid = 0; Cycle
enable
mid1 = 0;
mid2 = 0; !enable Multiple
Cycles
while (1) {
wait();
x = input.read();
y = mid + a2 *
while (1) {
mid1
wait();
+ b2 * mid2; 1 y = mid + a2 *
mid = x + a1 *
mid1
mid1
+ b2 * mid2;
+ b1 * mid2;
mid = x + a1 *
mid2 = mid1;
mid1 = mid;
enable mid1
+ b1 * mid2;
output.write(y);
mid2 = mid1;
}
mid1 = mid;
}
+ 8

High Level Synthesis

 A Hardware Description Language (HDL) compiler that takes in


a component described at behavior level and synthesize it
down to Register Transfer Level (RTL)
 Steps:
 Lexical (syntax) analysis (lex)
 Semantic analysis (yacc, bison)
 Resource Allocation
 Scheduling
+ FSM 9

!enable mid=0; mid1=0; mid2=0; void FSM() {


if (enable) state = 1; switch (state) {
case 0:
else state = 0; mid=0; mid1=0; mid2=0;
if (enable) state = 1;
else state = 0;

enable 0 break;
case 1:
!enable x = input.read();
tmp0 = a2 * mid1;
tmp1 = b2 * mid2;
x = input.read();
state = 2; break;
tmp0 = a2 * mid1; case 2:
tmp1 = b2 * mid2; tmp2 = a1 * mid1;
state = 2 tmp3 = b1 * mid2;

1 4 tmp4 = tmp0 + tmp1;


state = 3; break;
case 3:
y = mid + tmp4;
enable tmp5 = tmp2 + tmp3;
mid = x + tmp5;
state = 4; break;
mid2 = mid1;mid1 = case 4:
mid; mid = x + tmp5;
2 3 output.write(y); mid2 = mid1; mid1 = mid;
if (enable) state = 1; output.write(y);
else state = 0; if (enable) state = 1;
else state = 0;
tmp2 = a1 * mid1; break;
tmp3 = b1 * mid2; y = mid + tmp4; } }
tmp4 = tmp0 + tmp1; tmp5 = tmp2 + tmp3;
state = 3; state = 4;
+ Datapath & Behavior Code 10

0 0
sel8 sel7

a1 a2 mid1 b1 b2 mid2 void iir()


{
sel0 sel1 mid = 0; mid1 = 0; mid2 = 0;
while (1) {

* * wait();
x = input.read();
y = mid + a2 * mid1 + b2 *
0 mid2;
input mid = x + a1 * mid1 + b1 *
sel6
mid2;
mid2 = mid1;
tmp0 tmp4 tmp1 mid tmp2 tmp5 tmp3 x
mid1 = mid;
output.write(y);
sel2 sel3 sel4 sel5
}
}
1 enable
+
+
y
=
output
+ FSM & Behavior Code 11

void FSM() { tmp2.enb = 1;


switch (state) { tmp3.enb = 1; void iir()
case 0: sel0.s = 0; sel1.s = {
mid.enb=1; 0; mid = 0; mid1 = 0; mid2 = 0;
mid1.enb=1; state = 3; break;
sel6.s =0;
mid2.enb=1; y.enb
case 3: = 1;
while (1) {
sel7.s = 1; sel2.s = 1; sel3.s = wait();
sel8.s = 0; 1; tmp5.enb = 1; x = input.read();
if (enable) state = y = mid + a2 * mid1 + b2 *
sel4.s = 0; sel5.s =
1; mid2;
0;
else state mid = x + a1 * mid1 + b1 *
state = 4; break;
= 0; mid.enb = 1; mid2;
x.enb case 4:
break;= 1; sel6.s = 1; mid2 = mid1;
casetmp0.enb
1: = 1; sel4.s = 1; sel5.s = mid1 = mid;
tmp1.enb = 1; 1; mid2.enb = 1; output.write(y);
sel0.s = 1; sel1.s = sel7.s = 0; }
1;
mid1.enb = 1; }
state = 2; break;
sel8.s = 1;
tmp4.enb
case 2: = 1; if (enable) state =
sel2.s = 0; sel3.s = 1;
0; else state
= 0;
break;
} }
12

+
Drag picture to placeholder or click ico

Not BS Drag picture to plac

Synthesizable Subset
+ 13

Traditional Synthesizable Subset


 Don’tDo’s: given the complete language,
exclude those are not synthesizable, e.g.
$display ( “Hello World" );
y <= a after 3 ns;
 Do’s:
then give guidelines to specify how to code
(modeling), e.g. J-K flip-flop, D flip-flop,
synchronous vs. asynchronous resets, FSM, etc.
 Because of the modeling constraint, traditional
synthesizable subset can only support data
intensive applications like filters.
+ 14

SystemC Synthesizable Subset


 Use C++, plus SystemC extras, then cross out not
synthesizable clauses.

Almost no modeling suggestions


+ 15

SC_MODULE Processes

SC_METHOD supported
SC_THREAD not supported
SC_CTHREAD supported
+ 16

Synthesizable Subset Niche

 Users are expecting to leverage as much as possible


C++ features
 Synthesizer should be modeling insensitive
 Should be able to handle control intensive designs as
well
 Key to close the gap between algorithm and
implementation
+ 17

Extra Extensions Proposed I

 SC_METHOD
 Blocking,
no wait()
 No sc_in_clk type port
 Signals in the sensitivity list are synthesized as
synchronous signals
 With at least one sc_in_clk type port
 Must be listed in the sensitivity list with positive or
negative triggering specified
 Other signals in the sensitivity list are synthesized as
asynchronous signals
 sc_out_clk type port removed in SystemC 2.1
+ 18

Extra Extensions Proposed II

 SC_CTHREAD
 Non-blocking,with wait()
 Clock pin must be specified
 Sensitivity list, if any, is asynchronous
 Reset pin if specified
 reset_signal_is(reset, true); // 2.1
 async_reset_signal_is(reset, true); // 2.3
 wait();§
 wait(no_of_cycles);§
 wait(signal_in_sensitivity_list);§
 wait_until()is removed from SystemC 2.1

§ OSCI SystemC Synthesizable Subset Draft 1.1.18 , Section 8.3, p39


+ 19

Extra Extensions Proposed III


 SC_THREAD
 Non-blocking,
with wait()
 No sc_in_clk type port
 Signals in the sensitivity list are synthesized as
synchronous signals
 With at least one sc_in_clk type port
 Must be listed in the sensitivity list with positive or
negative triggering specified
 Other signals in the sensitivity list are synthesized as
asynchronous signals
 sc_out_clk type port is removed from SystemC
2.1
 wait();§
 wait(no_of_cycles);§
 wait(signal_in_sensitivity_list);§
§ OSCI SystemC Synthesizable Subset Draft 1.1.18, Section 8.3, p39
+ 20

Modeling Reset
// IIR.h void iir()
SC_MODULE(IIR) {
{ //Reset
sc_in<sc_uint<32> > input; mid = 0; mid1 = 0; mid2 = 0;
sc_out<sc_uint<32> > output; // Function body
sc_in<bool> enable; while (1) {
sc_in<bool> clk; wait();
void iir(); x = input.read();
sc_uint<32> x, y; y = mid + a2 * mid1 + b2 * mid2;
sc_signal<sc_uint<32> > mid, mid = x + a1 * mid1 + b1 * mid2;
mid1, mid2 = mid1;
mid2;
mid1 = mid;
sc_uint<32> a1, a2, b1, b2;
output.write(y);
SC_CTOR(IIR) {
}
SC_CTHREAD(iir, clk.pos());
}
reset_signal_is(enable, false);
a1 = 354; a2 = 1799;
b1 = 573; b2 = 1254;
}
};
+ 21

Supported C Data Types

Operators Supported:
Unary +, - Floating Point Types
Arithmatic +, -, *, /, %, ++, --, Not Supported
Shift: <<, >>
Logic: <, <=, >, >=, ==, !=, &, ~
Conditional a ? b : c
+ 22

Compound Data Types


+ 23

SystemC Data Types


Integer
+ Type Supported Operators 24
+ 25

Fixed Point Types


+ 26

Integer Type Supported Methods


+ 27

Declaration
+ Derived Module 28
+Other Important Supported Features 29

 User define types


 User define classes
 Inheritance

 Virtual function
+ 30

Pointers

 Usersare expecting to leverage as much


as possible C++ features
 Synthesizer should be modeling
insensitive
 Should be able to handle control
intensive designs as well
 Keyto close the gap between algorithm
and implementation
+
Non-synthesizable Pointers

void filter_copy(int16_t *src, int32_t src_stride,


int16_t *dst, int32_t dst_stride,
int32_t width, int32_t height) {

int32_t bit_depth = 8;
int32_t row, col;

for (row = 0; row < height; row++) {


for (col = 0; col < width; col++) {
dst[col] = src[col];
Pointer computation
} not allowed
src += src_stride;
dst += dst_stride;
}
}
+
Synthesizable Pointers

void filter_copy_p2p::filter_copy() {
// Data structure defined in SC_MODULE
sc_int<32> bit_depth = 8;
sc_int<32> row, col;
sc_int<32> srcI = 0; dstI = 0;
sc_int<16> *srcP = &src[0], *dstP = &dst[0];

for (row = 0; row < height; row++) {


for (col = 0; col < width; col++) {
dstP[col] = srcP[col];
Turn pointer
} computation into
srcI += src_stride; index computation,
dstI += dst_stride; then use pointer
srcP = &src[srcI]; assignment
dstP = &dst[dstI];
}
}
33

+
Drag picture to placeholder or click ico

Not BS Drag picture to plac

Events
+ 34

Event Usage

A, B & C: Processes
B D: Channel
D
A
C

Event Notify B Notify C

Looks like signal, but it is a simulation control mechanism


but a physical signal.
Non-synthesizable and do not use in behavior codes.
+ 35

Blocking vs. Non-blocking

SC_MODULE(two-proc) {
#ifdef BLOCKING
sc_uint<8> a, b;
#else
sc_signal<sc_uint<8> > a, b;
#endif

SC_METHOD(proc1);
SC_METHOD(proc2);
}
void proc1() {
a = a + 1;

}

void proc2() {
b = a; // non-deterministic if blocking
… // deterministic only if non-blocking
}
36

+
Drag picture to placeholder or click ico

Not BS Drag picture to plac

Coding & Area


+ 37

A 8-1 Multiplexer
sc_in<sc_uint<4> > nbits;

for (i=0; i<nbits.read(); i++) {
switch (i) {
case 0: buf = inp[0].read();
break;
case 1: buf = inp[1].read();
break;
case 2: buf = inp[2].read();
break;
case 3: buf = inp[3].read();
break; 3
case 4: buf = inp[4].read();
break;
case 5: buf = inp[5].read();
break;
case 6: buf = inp[6].read();
break;
case 7: buf = inp[7].read();
break;
}
}
+ 38

A Simpler form, with a problem

sc_in<sc_uint<4> > nbits;

for (i=0; i<nbits.read(); i++) {

buf = inp[i]; // nbits is 4 bits

4
+ 39

A Fix in SW Mindset

sc_in<sc_uint<4> > nbits;

for (i=0; i<nbits.read(); i++) {

if (i < 8)

buf = inp[i];
3
<
}

3
+ 40

A Fix in HW Mindset

sc_in<sc_uint<4> > nbits;

for (i=0; i<nbits.read().range(2,0); i++) {

buf = inp[i];

3
41

Issues with
Macro Architecture
+ 42

Specification

Data
M S
Application/ Data Data Application/
Stimuli transmit receiver Sink
S M
Acknowledge
+ 43

Macro Architecture 1

timer

Data
M S

transmit bus receiver display


S M

Acknowledge
+ 44

Macro Architecture 2

timer

Data
M S

transmit bus receiver display


S M

Acknowledge

1. Seems to works in behavior level, when all FU has 1 cycle delay,


and no competition on the bus
2. Impractical to implement at RTL, since the control and data
inherits race condition in nature. Notice the bus latency may not
be 1.
+ 45

Key Point

 In a communication system, synchronous data flow


(SDF) modeling must be followed
 Avoid race conditions between control and data
 Behaviorally correct in macro architecture does not
imply a working implementation must exists
 HLS is not an architecture level tool, it is IP level
+ 46

Notice

 HLS synthesize each process individually. If multiple


processes are presented in a module, care must be
taken regarding the synchronization between these
processes. Particularly blockingness between two
process also needs to be taken care of
 Suggestion: avoid having multiple processes in a
module. While it is a common practice in RTL coding, at
high level it creates a lot of data and control
synchronization issues
47

+
Drag picture to placeholder or click ico

Not BS Drag picture to plac

RTL Modeling
+ 48

D Flip-Flop, No Reset
#include "systemc.h“
SC_MODULE (DFF_P) {
sc_in<bool> D;
sc_out<sc_uint<1> > Q;
sc_in<bool> clock;

sc_signal<sc_uint<1> > reg;


void dff() {
reg.write(D.read());
Q.write(reg.read());
}
SC_CTOR (DFF_P) {
SC_METHOD (dff);
sensitive << clock.pos();
}
};
SystemC 2.1: typedef sc_in<bool> sc_in_clk;
+ 49

DFF, Active-High Asyn. Reset

#include "systemc.h"
SC_MODULE (DFF_AH_AR) {
sc_in<bool> D, reset;
sc_out<bool> Q;
sc_in<bool> clock;

sc_signal<bool> reg;
void dff_ah_ar () {
if (reset.read()) {
reg = 0; Q.write(0);
} else {
reg = D; Q.write(reg);
} }
SC_CTOR (DFF_AH_AR) {
SC_METHOD (dff_ah_ar);
sensitive << clock.pos() <<
reset.pos();
}
};
+ 50

DFF, Active-Low Asyn. Reset

#include "systemc.h"
SC_MODULE (DFF_AL_AR) {
sc_in<bool> D, reset;
sc_out<bool> Q;
sc_in<bool> clock;
sc_signal<bool> reg;
void dff_al_ar () {
if (!reset.read()) {
reg = 0; Q.write(0);
} else {
reg = D; Q.write(reg);
} }
SC_CTOR (DFF_AL_AR) {
SC_METHOD (dff_al_ar);
sensitive << clock.pos();
sensitive << reset.neg();
}
};
+ 51

DFF, Active-High Asyn. Set &


Reset
#include "systemc.h"
SC_MODULE (DFF_AH_AS_AR) {
sc_in<bool> D, reset, set;
sc_out<bool> Q;
sc_in<bool> clock;
sc_signal<bool> reg;
void dff_ah_as_ar () {
if (reset.read()) {
reg = 0; Q.write(0);
} else if (set.read()) {
reg = 1; Q.write(1);
} else {
reg = D; Q.write(reg);
}
SC_CTOR (DFF_AH_AS_AR) {
SC_METHOD (dff_ah_as_ar);
sensitive << clock.pos() << reset.pos()
<< set.pos();
}
};
+ 52

DFF, Active-High Syn. Reset


#include "systemc.h"
SC_MODULE (DFF_AH_SR) {
sc_in<bool> D, reset;
sc_out<bool> Q;
sc_in<bool> clock;

sc_signal<bool> reg;
void dff_ah_sr () {
if (reset.read()) {
reg = 0;
Q.write(reg);
} else {
reg = D;
Q.write(reg);
} }

SC_CTOR (DFF_AH_SR) {
SC_METHOD (dff_ah_sr);
sensitive << clock.pos();
}
};
+ 53

DFF, Active-Low Syn. Reset

#include "systemc.h"
SC_MODULE (DFF_AL_SR) {
sc_in<bool> D, reset;
sc_out<bool> Q;
sc_in<bool> clock;
sc_signal<bool> reg;
void dff_al_sr () {
if (!reset.read()) {
reg = 0; Q.write(reg);
} else {
reg = D; Q.write(reg);
} }
SC_CTOR (DFF_AL_SR) {
SC_METHOD (dff_al_sr);
sensitive << clock.pos();
}
};
+ 54

DFF, Active-High Syn. Set &


Reset#include "systemc.h"
SC_MODULE (DFF_AH_SS_SR) {
sc_in<bool> D, reset, set;
sc_out<bool> Q;
sc_in<bool> clock;
sc_signal<bool> reg;
void dff_ah_ss_sr () {
if (reset.read()) {
reg = 0; Q.write(reg);
} else if (set.read()) {
reg = 1; Q.write(reg);
} else {
reg = D;
Q.write(D.read());
} }
SC_CTOR (DFF_AH_SS_SR) {
SC_METHOD (dff_ah_ss_sr);
sensitive << clock.pos();
}
};
+ 55

JK Flip-Flop Spec

Positive Edge Edge Triggered JK Flip-Flop Truth Table


+ 56

Positive Edge Triggered JK Flip


Flop

#include "systemc.h" void JKFF::jkff() {


SC_MODULE (JKFF) { sc_uint<2> sw;
sc_in<bool> J, K; sw[1] = J.read();
sc_inout<bool> Q; sw[0] = K.read();
sc_in<bool> clk; switch(sw) {
case 0x0: break; // optional
void jkff (); case 0x1: reg = 0; Q.write(reg);
sc_signal<bool> reg; break;
SC_CTOR (JKFF) { case 0x2: reg = 1; Q.write(reg);
SC_METHOD (jkff); break;
sensitive_pos << case 0x3: Q.write(!reg.read());
clk; reg = !reg;
} break;
}; default: break;
}
}
+ 57

JK FF Active-Low Asyn. Set &


Reset
#include "systemc.h" void JKFF::jkff_al_as_ar () {
SC_MODULE (JKFF) { if (!reset.read()) Q.write(0);
sc_in<bool> J, K, set, else if (!set.read()) Q.write(1);
reset; else {
sc_inout<bool> Q; sc_uint<2> sw;
sc_in<bool> clk; sw[1] = J.read();
sw[0] = K.read();
void jkff_al_as_ar (); switch(sw) {
sc_signal<bool reg; case 0x0: break;
SC_CTOR (JKFF) { case 0x1: reg = 0; Q.write(0);
SC_METHOD break;
(jkff_al_as_ar); case 0x2: reg = 1; Q.write(1);
sensitive_pos << clk; break;
sensitive_neg << set case 0x3: Q.write(!reg.read());
<< reset; reg = !reg;
} break;
}; default: break;
} } }
+ 58

D Latch

#include "systemc.h“
SC_MODULE (D_LATCH) {
sc_in<bool> D;
sc_out<bool> Q;
sc_in<bool> clock; // clock
port

void d_latch() {
if (clock.read())
Q.write(D.read());
}

SC_CTOR (D_LATCH) {
SC_METHOD (d_latch);
sensitive << clock;
}
};
+ 59

SR Latch Spec
+ 60

SR Latch

#include "systemc.h"
SC_MODULE( SR_LATCH ) {
sc_in<bool> reset, set;
sc_out<bool> Q;

void sr_latch () {
if (reset.read() == 0)
Q.write(0);
else if (set.read() == 0)
Q.write(1);
}

SC_CTOR( SR_LATCH ) {
SC_METHOD( sr_latch );
sensitive << reset << set;
}
};
+ 61

A D Latch using switch{}

#include "systemc.h"
SC_MODULE( D_LATCH4 ) {
sc_in<sc_uint<4> > D;
sc_out<sc_uint<4> > Q;
void d_latch4 () {
switch (D.read()) {
case 0: Q.write(0x01); break;
case 1: Q.write(0x02); break;
case 2: Q.write(0x04); break;
case 3: Q.write(0x08); break;
} }
SC_CTOR( D_LATCH4 ) {
SC_METHOD( d_latch4 );
sensitive << D;
}
};
+ 62

A Three-State Buffer

#include "systemc.h"
SC_MODULE( TRISTATE_BUF ) {
sc_in<bool> ctrl;
sc_in<sc_logic> data;
sc_out<sc_logic> out;
void tristate_buf () {
if (ctrl.read())
out.write(data.read());
else
out.write(sc_logic_Z);
}
SC_CTOR( TRISTATE_BUF ) {
SC_METHOD( tristate_buf );
sensitive << ctrl << data;
}
};
+
End of 5th Section
Thanks to you all!

63

You might also like