orca-sim
ProcessingTile.hpp
Go to the documentation of this file.
1 /******************************************************************************
2  * This file is part of project ORCA. More information on the project
3  * can be found at the following repositories at GitHub's website.
4  *
5  * http://https://github.com/andersondomingues/orca-sim
6  * http://https://github.com/andersondomingues/orca-software
7  * http://https://github.com/andersondomingues/orca-mpsoc
8  * http://https://github.com/andersondomingues/orca-tools
9  *
10  * Copyright (C) 2018-2020 Anderson Domingues, <ti.andersondomingues@gmail.com>
11  *
12  * This program is free software; you can redistribute it and/or modify
13  * it under the terms of the GNU General Public License as published by
14  * the Free Software Foundation; either version 2 of the License, or
15  * (at your option) any later version.
16  *
17  * This program is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20  * GNU General Public License for more details.
21  *
22  * You should have received a copy of the GNU General Public License along
23  * with this program; if not, write to the Free Software Foundation, Inc.,
24  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 ******************************************************************************/
26 #ifndef PLATFORMS_SINGLE_CORE_NN_INCLUDE_PROCESSINGTILE_HPP_
27 #define PLATFORMS_SINGLE_CORE_NN_INCLUDE_PROCESSINGTILE_HPP_
28 
29 // std API
30 #include <iostream>
31 #include <string>
32 
33 // model API
34 #include "THFRiscV.hpp"
35 #include "UMemory.hpp"
36 #include "USignal.hpp"
37 #include "TDmaMult.hpp"
38 
39 /* MEMORY LAYOUT
40 ------------------- 0x40000000 <<-- code begin
41 
42  sram
43  (4MBytes)
44 
45 ------------------- 0x40400000 <<-- stack
46  empty space
47  (64KBytes)
48 ------------------- 0x40410000 <<-- mmio begin
49  mmio
50  (1Mbytes - 64KBytes)
51 
52  0x40410000 => various MMIO registers
53  0x40411xxx => MMIO performance counters
54  0x404120xx => mmio NN DMA
55  0x40412100 until 0x404FFFFF => available
56 
57 ------------------- 0x404FFFFF <<-- mmio end
58 
59 
60 ------------------- 0x40500000 <<-- mmio NN MEM banks
61 
62  NN MEM banks
63  (TOTAL_NN_MEM_SIZE Bytes) // # TOTAL_NN_MEM_SIZE/2 Bytes for weight and TOTAL_NN_MEM_SIZE/2 Bytes for inputs
64  (4MBytes)
65 
66 ------------------- 0x408FFFFF <<-- mmio NN MEM banks
67 
68 total memory space: 9MBytes
69 
70 ------------------- 0x404120xx <<-- mmio NN DMA
71 since there are up to 16 MACs, it is neceseray to reserve
72 4 32-bit resgisters X 16 MACs = 4 x 4 x 16 = 256 bytes of MMIO for the NN DMA
73  0x40412000 DMA0
74  0x40412010 DMA1
75  0x40412020 DMA2
76  ...
77  0x404120F0 DMA15
78 ------------------- 0x40500000 <<-- mmio NN MEM banks
79 
80 
81 max TOTAL_NN_MEM_SIZE = NN_MEM_SIZE_PER_CHANNEL * 2 * SIMD_SIZE.
82 It means that the max size for weight is 2MBytes. the same size for inputs.
83 
84 
85 
86 NN_TOTAL_MEM_HEIGHT determines the total number of words (32bits) of
87 the weight and the input memories. These two memories feed the MAC Units.
88 Assuming NN_TOTAL_MEM_HEIGHT = 1024 (4KBytes), the memory map is:
89  0x40500000 - uint32_t weight[NN_TOTAL_MEM_HEIGHT]
90  0x40700000 - uint32_t input[NN_TOTAL_MEM_HEIGHT]
91 
92 In fact, the weight and input is divided into individual banks such that
93 it is possible to load each MACs in parallel. Since SIMD_SIZE tells the
94 # of MACs running in parallel, then the actual memory map per bank is:
95  0x40500000 - uint32_t weight[SIMD_SIZE][NN_TOTAL_MEM_HEIGHT/SIMD_SIZE]
96  0x40700000 - uint32_t input[SIMD_SIZE][NN_TOTAL_MEM_HEIGHT/SIMD_SIZE]
97 
98 where, for instance:
99  - weight[0][0] is the 1st address of the MAC0
100  - weight[15][0] is the 1st address of the MAC15
101 */
102 
103 // main memory mapping
104 // #define MEM0_SIZE 0x008FFFFF
105 // #define MEM0_BASE 0x40000000
106 #define MEM0_SIZE ORCA_MEMORY_SIZE
107 #define MEM0_BASE ORCA_MEMORY_BASE
108 
109 #include <MemoryMap.h>
110 
119 class ProcessingTile{
120  private:
121  // the hfrisv-core.
122  HFRiscV* _cpu;
123  // the main memory.
124  Memory* _mem0;
125  // DMA unit reponsible to transfer weight and input data from the main
126  // memory directly to the vector multipliers
128 
129  // hosttime magic wire
130  uint32_t _shosttime;
131  Signal<uint32_t>* _signal_hosttime;
132 
133  // control signals.
134  // stalls cpu while copying from the memories
135  Signal<uint8_t>* _sig_stall;
136 
137  // flag to start the DMA
138  Signal<uint8_t>* _sig_dma_prog;
139 
140  // dummy signal required by the CPU. not really used since we dont have
141  // interrupts in this design
142  Signal<uint8_t>* _sig_intr;
143 
144  // data sent from the processor to program the DMA.
145  // number of MACs ops to be executed in burst mode.
146  Signal<uint32_t>* _sig_burst_size;
147 
148  // (not used) amount of memory configured for each channel.
149  // 1 means NN_MEM_SIZE_PER_CHANNEL bytes,
150  // 2 means 2*NN_MEM_SIZE_PER_CHANNEL bytes, ...
151  Signal<uint32_t>* _sig_nn_size;
152 
153  // (not used) number of expected output data.
154  Signal<uint32_t>* _sig_out_size;
155 
156  public:
157  ProcessingTile();
158  ~ProcessingTile();
159 
160  // getters
161  Signal<uint8_t>* GetSignalStall();
162  Signal<uint8_t>* GetSignalDmaProg();
163  // required only by the cpu and orca. not really usefull
164  Signal<uint8_t>* GetSignalIntr();
165 
166  // getters
167  Memory* GetMem0();
169  HFRiscV* GetCpu();
170  TDmaMult* GetDma();
171 
176  Signal<uint32_t>* GetSignalHostTime();
177 
178  std::string ToString();
179  std::string GetName();
180 
181  void Reset();
182 };
183 
184 
185 #endif // PLATFORMS_SINGLE_CORE_NN_INCLUDE_PROCESSINGTILE_HPP_
TDmaMult * GetDma()
Signal< uint8_t > * GetSignalIntr()
Signal< uint8_t > * GetSignalDmaProg()
uint32_t _shosttime
Signal< uint32_t > * _signal_hosttime
Signal< uint8_t > * GetSignalStall()
Signal< uint8_t > * _sig_dma_prog
ProcessingTile()
This file is part of project URSA.
std::string GetName()
Signal< uint32_t > * _sig_burst_size
HFRiscV * _cpu
Signal< uint32_t > * GetSignalHostTime()
Get current signal for systime signal.
Signal< uint32_t > * _sig_out_size
Signal< uint8_t > * _sig_intr
Memory * GetMem0()
HFRiscV * GetCpu()
Signal< uint8_t > * _sig_stall
std::string ToString()
Signal< uint32_t > * _sig_nn_size