orca-sim
TDmaMult.hpp
Go to the documentation of this file.
1 /******************************************************************************
2  * This file is part of project ORCA. More information on the project
3  * can be found at the following repositories at GitHub's website.
4  *
5  * http://https://github.com/andersondomingues/orca-sim
6  * http://https://github.com/andersondomingues/orca-software
7  * http://https://github.com/andersondomingues/orca-mpsoc
8  * http://https://github.com/andersondomingues/orca-tools
9  *
10  * Copyright (C) 2018-2020 Anderson Domingues, <ti.andersondomingues@gmail.com>
11  *
12  * This program is free software; you can redistribute it and/or modify
13  * it under the terms of the GNU General Public License as published by
14  * the Free Software Foundation; either version 2 of the License, or
15  * (at your option) any later version.
16  *
17  * This program is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20  * GNU General Public License for more details.
21  *
22  * You should have received a copy of the GNU General Public License along
23  * with this program; if not, write to the Free Software Foundation, Inc.,
24  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 ******************************************************************************/
26 #ifndef PLATFORMS_SINGLE_CORE_NN_INCLUDE_TDMAMULT_HPP_
27 #define PLATFORMS_SINGLE_CORE_NN_INCLUDE_TDMAMULT_HPP_
28 
29 // include peripheral addresses
30 #include "MemoryMap.h"
31 
32 // std API
33 #include <iostream>
34 #include <string>
35 
36 // simulator API
37 #include "TimedModel.hpp"
38 #include "UMemory.hpp"
39 #include "USignal.hpp"
40 
41 /*
42  The current approach is: the cpu copies data to the DMA memory, configure the DMA, stall the CPU, does MACs in parallel and in burst mode,
43  copies the MAC results to MMIO, release the stall, and the CPU copies the results and cotinue the software.
44 
45  An alternative approach, interesting if we have multiple tasks and OS, is not to halt the CPU, letting other tasks using the CPU while
46  the DMA is working. When the DMA finishes, and interrupt is asserted, and the results are copied back to the software layer.
47 */
48 enum class DmaState{
49  // wait cpu to configure the DMA, indicated by _sig_dma_prog. Then, the DMA
50  // raises _sig_stall to stall the cpu while the DMA is working.
52  // copy content from the NN memory to the MAC internal operand registers.
54  // just waste a cycle to copy the MAC results to the MMIO.
56  // deassert the _sig_stall, returning to the wait mode.
57  // The CPU returns to activity.
58  FLUSH
59 };
60 
69 class TDmaMult: public TimedModel{
70  private:
71  // pointer to the main memory
72  Memory* _mem0;
73  // base address of the weight memory channel. Once set, it does not change
74  // in runtime. it can only be changed in design time.
75  uint32_t _memW[SIMD_SIZE];
76  // base address of the input memory channel. Once set, it does not change
77  // in runtime. it can only be changed in design time.
78  uint32_t _memI[SIMD_SIZE];
79  // base address to the array with the results from the MAC units. Supposed
80  // to be constant. it can only be changed in design time.
82  // States for DMA process.
84 
85  // control signals.
86  // (OUT): stalls cpu while the DMA is copying from the memories.
87  Signal<uint8_t>* _sig_stall;
88  // (IN): processor writes 1 to start the DMA.
89  Signal<uint8_t>* _sig_dma_prog;
90 
91  // data sent from the processor to program the DMA.
92  // IN: number of MACs ops to be executed in burst mode.
93  Signal<uint32_t>* _sig_burst_size;
94  // IN: (not used) amount of memory configured for each channel.
95  // 1 means NN_MEM_SIZE_PER_CHANNEL bytes, 2 means 2*NN_MEM_SIZE_PER
96  // CHANNEL bytes, ...
97  Signal<uint32_t>* _sig_nn_size;
98  // IN: (not used) number of expected output data.
99  Signal<uint32_t>* _sig_out_size;
100 
101  // internal registers between the pipeline stages.
102  // data 'register' pf the 1st pipeline stage, i.e. the operands of the
103  // MAC units.
104  float _op1[SIMD_SIZE], _op2[SIMD_SIZE];
105  // data 'register' between the 2nd and the 3rd pipeline stages.
106  // The result of the multiplication.
107  float _reg_mul[SIMD_SIZE];
108  // data 'register' with the output of the MAC.
109  float _reg_mac[SIMD_SIZE];
110 
112  uint8_t _mul_loaded; // signal between the 1st and the 2nd pipeline stages.
113  uint8_t _mul_ready; // signal between the 2nd and the 3rd pipeline stages.
114 
115  // internal data register. Data sent from the processor to program the DMA.
116  uint32_t _burst_size; // total number of multiplications.
117  uint32_t nn_size; // (not used) number of NN memory banks for a single MAC.
118  uint32_t out_size; // (not used) number of expected output data.
119  // others
120  uint32_t _remaining;
121  uint32_t _mem_idx;
123 
125  void ReadData(); // 1st pipeline stage, i.e. data fetch
126  void DoMult(); // 2nd pipeline stage, multiplication
127  void DoAcc(); // 3rd pipeline stage, accumulation
128 
129  public:
130  // getters
131  DmaState GetDmaState();
132 
133  // other
134  SimulationTime Run();
135  void Reset();
136 
146  TDmaMult(std::string name, Signal<uint8_t>* stall,
147  Signal<uint8_t>* dma_start, Signal<uint32_t>* burst_size,
148  Signal<uint32_t>* nn_size, Signal<uint32_t>* out_size,
149  uint32_t base_mac_out_addr, Memory* main_mem);
150 
153  ~TDmaMult();
154 };
155 
156 
157 #endif // PLATFORMS_SINGLE_CORE_NN_INCLUDE_TDMAMULT_HPP_
Signal< uint8_t > * _sig_dma_prog
Definition: TDmaMult.hpp:89
Signal< uint32_t > * _sig_out_size
Definition: TDmaMult.hpp:99
uint32_t nn_size
Definition: TDmaMult.hpp:117
Signal< uint32_t > * _sig_nn_size
Definition: TDmaMult.hpp:97
DmaState
Definition: TDmaMult.hpp:48
uint8_t _mul_loaded
pipeline signals.
Definition: TDmaMult.hpp:112
DmaState _dma_state
Definition: TDmaMult.hpp:83
uint32_t _remaining
count number of data to be read.
Definition: TDmaMult.hpp:120
uint32_t out_size
Definition: TDmaMult.hpp:118
uint32_t SimulationTime
uint32_t _burst_size
Definition: TDmaMult.hpp:116
#define SIMD_SIZE
Definition: _MemoryMap.h:81
Memory * _mem0
Definition: TDmaMult.hpp:72
uint8_t _mul_ready
Definition: TDmaMult.hpp:113
Signal< uint32_t > * _sig_burst_size
Definition: TDmaMult.hpp:93
Signal< uint8_t > * _sig_stall
Definition: TDmaMult.hpp:87
uint32_t _base_mac_out_addr
Definition: TDmaMult.hpp:81