orca-sim
TDmaMult.cpp
Go to the documentation of this file.
1 
23 //std API
24 #include <iostream>
25 #include <sstream>
26 #include <iomanip>
27 
28 //simulator API
29 #include <TimedModel.h>
30 #include <UBuffer.h>
31 
32 #include <TDmaMult.h>
33 
34 TDmaMult::TDmaMult(std::string name,
35  //signals
36  Signal<uint8_t>* stall, Signal<uint8_t>* dma_start, Signal<uint32_t>* burst_size, Signal<uint32_t>* nn_size,
37  Signal<uint32_t>* out_size, uint32_t base_mac_out_addr, Memory* main_mem) : TimedModel(name) {
38 
39  int i;
40 
41  // control signal sent to the proc
42  _sig_stall = stall;
43  _sig_dma_prog = dma_start;
44 
45  // data signals sent by the proc
46  _sig_burst_size = burst_size;
47  _sig_nn_size = nn_size; // TODO not used
48  _sig_out_size = out_size; // TODO not used
49  _base_mac_out_addr = base_mac_out_addr;
50 
51  // set the base mamory address to each channel
52  _memW[0] = MEMW_BASE;
53  _memI[0] = MEMI_BASE;
54  for ( i = 1; i < SIMD_SIZE; i++)
55  {
58  }
59  _mem0 = main_mem;
60 
61  // internal control 'registers' between the pipeline stages
62  _mul_loaded = 0;
63  _mul_ready = 0;
64 
65  printf("NN CONFIGURATION:\n\n");
66  printf(" TOTAL_NN_MEM_SIZE = 0x%x\n",TOTAL_NN_MEM_SIZE);
67  printf(" SIMD_SIZE = %d\n",SIMD_SIZE);
68  printf(" NN_MEM_SIZE_PER_CHANNEL = 0x%x\n",NN_MEM_SIZE_PER_CHANNEL);
69  printf(" MEMW_BASE = 0x%x\n",MEMW_BASE);
70  printf(" MEMI_BASE = 0x%x\n",MEMI_BASE);
71  printf(" DMA_MAC_OUT_ARRAY = 0x%x\n\n",base_mac_out_addr);
72 
73  this->Reset();
74 }
75 
77 }
78 
80  int i;
81  // all relevant data go to their initial value at this state
83 
84  // get the pointer to the base memory position where the MACs store their final values
85  float *ptr = (float *)_mem0->GetMap(_base_mac_out_addr);
86  for (i=0;i<SIMD_SIZE;i++){
87  *ptr = 0;
88  ptr++;
89  }
90 }
91 
93 
95  // TODO fazer loop p realizar o mac de todo o vetor
96  //3 stage pipeline
97  this->DoAcc(); // 3rd stage, accumulator
98  this->DoMult(); // 2nd stage, does mult
99  this->ReadData(); // 1st stage, read data from the memory
100 
101  return 1; //takes only 1 cycle to change both states
102 }
103 
104 // 3rd pipeline stage - accumulate previous value with current mult result
105 #pragma GCC diagnostic push
106 #pragma GCC diagnostic ignored "-Wswitch"
108  int i;
109  switch(_dma_state){
111  for (i=0;i<SIMD_SIZE;i++){
112  _reg_mac[i] = 0;
113  }
114  }break;
116  if (_mul_ready == 0x1){
117  for (i=0;i<SIMD_SIZE;i++){
118  _reg_mac[i] += _reg_mul[i];
119  }
120  }
121  }break;
122  case DmaState::COPY_TO_CPU:{
123  float *ptr = (float *)_mem0->GetMap(_base_mac_out_addr);
124  for (i=0;i<SIMD_SIZE;i++){
125  //printf ("MAC[%d]: %f - 0x%p\n", i, _reg_mac[i], &(_reg_mac[i]));
126  *ptr = _reg_mac[i]; // send the final result back to the processor
127  ptr++;
128  }
129  }break;
130  case DmaState::FLUSH:
131  break;
132  }
133 }
134 #pragma GCC diagnostic pop
135 
136 // 2rd pipeline stage - do the mult
138  int i;
140  for (i=0;i<SIMD_SIZE;i++){
141  _reg_mul[i] = 0; // restart register
142  }
143  _mul_ready = 0;
144  }
145  else {
146  if (_mul_loaded == 0x1){
147  for (i=0;i<SIMD_SIZE;i++){
148  _reg_mul[i] = _op1[i] * _op2[i]; // mult
149  }
150  _mul_ready = 1;
151  }else{
152  _mul_ready = 0;
153  }
154  }
155 }
156 
157 // 1st pipeline stage - read the memories and load the mult operands
159  int i;
160  //send state machine
161  switch(_dma_state){
162  //wait the cpu to configure the ni
164  if(_sig_dma_prog->Read() == 0x1){
165  _sig_stall->Write(0x1); //raise stall
166  _mul_loaded = 0; // raised when the mul can be executed
167  // reading data sent from the proc to the DMA
170  stringstream s;
171  s << this->GetName() << ": burst size exedded the NN memory capacity.";
172  throw std::runtime_error(s.str());
173  }
174 // _w_mem_idx = _sig_nn_size->Read();
175 // _i_mem_idx = _sig_out_size->Read();
176  // init counters used for burst mode operation
177  _mem_idx = 0;
179  _dma_state = DmaState::COPY_FROM_MEM; //change states
180  }
181 
182  } break;
183 
184  //copy data from the NN memory to the internal MAC registers
186 
187  if(_remaining > 0){
188  int8_t * w_ptr, * i_ptr;
189 
190  for (i=0;i<SIMD_SIZE;i++){
191  w_ptr = _mem0->GetMap(_memW[i]+_mem_idx);
192  _op1[i] = *(float*)w_ptr;
193  i_ptr = _mem0->GetMap(_memI[i]+_mem_idx);
194  _op2[i] = *(float*)i_ptr;
195  //if ( _op1[i] != 0.0f)
196  // printf ("OPs[%d %d]: %f %f\n", _mem_idx, i, _op1[i], _op2[i]);
197  }
198  //signal to the next pipiline stage
199  _mul_loaded = 1;
200  // updating counters used for burst mode operation
201  _remaining--; //one less packet to send
202  _mem_idx +=4;
203  }else{
204  _mul_loaded = 0;
206  }
207  } break;
208 
209  case DmaState::COPY_TO_CPU:{
210  // result is written back to the output MMIO register
212  }break;
213 
214  // just waits few clock cycles. currently, only one cycle
215  case DmaState::FLUSH:
217  // TODO multiple drivers to signal _sig_dma_prog !!! implement a handshare protocol between proc and dma
218  _sig_dma_prog->Write(0x0); //lower the start signal .
219  _sig_stall->Write(0x0); //lowering stall and giving the control back to the processor
220  break;
221  }
222 }
Signal< uint8_t > * _sig_dma_prog
Definition: TDmaMult.hpp:89
Signal< uint32_t > * _sig_out_size
Definition: TDmaMult.hpp:99
DmaState GetDmaState()
Definition: TDmaMult.cpp:92
uint32_t _memI[SIMD_SIZE]
Definition: TDmaMult.hpp:78
TDmaMult(std::string name, Signal< uint8_t > *stall, Signal< uint8_t > *dma_start, Signal< uint32_t > *burst_size, Signal< uint32_t > *nn_size, Signal< uint32_t > *out_size, uint32_t base_mac_out_addr, Memory *main_mem)
ctor
Definition: TDmaMult.cpp:34
uint32_t nn_size
Definition: TDmaMult.hpp:117
Signal< uint32_t > * _sig_nn_size
Definition: TDmaMult.hpp:97
DmaState
Definition: TDmaMult.hpp:48
T Read()
Get the last value writen to the bus.
Definition: Signal.cpp:118
#define MEMI_BASE
Definition: _MemoryMap.h:84
uint8_t _mul_loaded
pipeline signals.
Definition: TDmaMult.hpp:112
DmaState _dma_state
Definition: TDmaMult.hpp:83
uint32_t _remaining
count number of data to be read.
Definition: TDmaMult.hpp:120
~TDmaMult()
dtor
Definition: TDmaMult.cpp:76
uint32_t out_size
Definition: TDmaMult.hpp:118
uint32_t SimulationTime
void Write(T val)
Writes some value to the bus.
Definition: Signal.cpp:127
uint32_t _mem_idx
memory idx used to access both the input and weight memories.
Definition: TDmaMult.hpp:122
float _reg_mac[SIMD_SIZE]
Definition: TDmaMult.hpp:109
uint32_t _burst_size
Definition: TDmaMult.hpp:116
#define MEMW_BASE
Definition: _MemoryMap.h:83
void DoMult()
Definition: TDmaMult.cpp:137
float _reg_mul[SIMD_SIZE]
Definition: TDmaMult.hpp:107
float _op1[SIMD_SIZE]
Definition: TDmaMult.hpp:104
void DoAcc()
Definition: TDmaMult.cpp:107
void ReadData()
Internal processes – 3 stage pipeline.
Definition: TDmaMult.cpp:158
#define SIMD_SIZE
Definition: _MemoryMap.h:81
SimulationTime Run()
Definition: TDmaMult.cpp:94
Memory * _mem0
Definition: TDmaMult.hpp:72
uint8_t _mul_ready
Definition: TDmaMult.hpp:113
Signal< uint32_t > * _sig_burst_size
Definition: TDmaMult.hpp:93
void Reset()
Definition: TDmaMult.cpp:79
float _op2[SIMD_SIZE]
Definition: TDmaMult.hpp:104
uint32_t _memW[SIMD_SIZE]
Definition: TDmaMult.hpp:75
#define TOTAL_NN_MEM_SIZE
Definition: _MemoryMap.h:80
Signal< uint8_t > * _sig_stall
Definition: TDmaMult.hpp:87
uint32_t _base_mac_out_addr
Definition: TDmaMult.hpp:81
#define NN_MEM_SIZE_PER_CHANNEL
Definition: _MemoryMap.h:82