Sequential Quantum Gate Decomposer  v1.9.3
Powerful decomposition of general unitarias into one- and two-qubit gates gates
dot.cpp
Go to the documentation of this file.
1 /*
2 Copyright 2020 Peter Rakyta, Ph.D.
3 
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7 
8  http://www.apache.org/licenses/LICENSE-2.0
9 
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15 
16 */
17 
18 #include "dot.h"
19 #include "common.h"
20 #include <cstring>
21 #include <iostream>
22 #include "tbb/tbb.h"
23 #include <tbb/scalable_allocator.h>
24 
25 
26 // number of rows in matrix A and cols in matrix B, under which serialized multiplication is applied instead of parallel one
27 #define SERIAL_CUTOFF 16
28 
29 //tbb::spin_mutex my_mutex;
30 
37 Matrix
38 dot( Matrix &A, Matrix &B ) {
39 
40 #if BLAS==0 // undefined BLAS
41  int NumThreads = omp_get_max_threads();
43 #elif BLAS==1 // MKL
44  int NumThreads = mkl_get_max_threads();
45  MKL_Set_Num_Threads(1);
46 #elif BLAS==2 //OpenBLAS
47  int NumThreads = openblas_get_num_threads();
48  openblas_set_num_threads(1);
49 #endif
50 
51 
52  // check the matrix shapes in DEBUG mode
53  assert( check_matrices( A, B ) );
54 
55 
56 #if BLAS==1 // MKL does not support option CblasConjNoTrans so the conjugation of the matrices are done in prior.
57  if ( B.is_conjugated() && !B.is_transposed() ) {
58  Matrix tmp = Matrix( B.rows, B.cols );
59  vzConj( B.cols*B.rows, B.get_data(), tmp.get_data() );
60  B = tmp;
61  }
62 
63  if ( A.is_conjugated() && !A.is_transposed() ) {
64  Matrix tmp = Matrix( A.rows, A.cols );
65  vzConj( A.cols*A.rows, A.get_data(), tmp.get_data() );
66  A = tmp;
67  }
68 #endif
69 
70 
71  // Preparing the output matrix
72  Matrix C;
73  if ( A.is_transposed() ){
74  if ( B.is_transposed() ) {
75  C = Matrix(A.cols, B.rows);
76  }
77  else {
78  C = Matrix(A.cols, B.cols);
79  }
80  }
81  else {
82  if ( B.is_transposed() ) {
83  C = Matrix(A.rows, B.rows);
84  }
85  else {
86  C = Matrix(A.rows, B.cols);
87  }
88  }
89 
90 
91  // Calculating the matrix product
92  if ( A.rows <= SERIAL_CUTOFF && B.cols <= SERIAL_CUTOFF ) {
93  // creating the serial task object
94  zgemm_Task_serial calc_task = zgemm_Task_serial( A, B, C );
95  calc_task.zgemm_chunk();
96  }
97  else {
98 
99  // creating the task object
100  tbb::task_group g;
101  g.run_and_wait([&A, &B, &C, &g]() {
102  zgemm_Task calc_task = zgemm_Task( A, B, C );
103  calc_task.execute(g);
104  });
105 
106 
107  }
108 
109 
110 #if BLAS==0 // undefined BLAS
111  omp_set_num_threads(NumThreads);
112 #elif BLAS==1 //MKL
113  MKL_Set_Num_Threads(NumThreads);
114 #elif BLAS==2 //OpenBLAS
115  openblas_set_num_threads(NumThreads);
116 #endif
117 
118  return C;
119 
120 
121 }
122 
123 
124 
131 bool
133 
134 
135  //The stringstream input to store the output messages.
136  std::stringstream sstream;
137 
138  //Integer value to set the verbosity level of the output messages.
139  int verbose_level;
140 
141  //Logging variable.
142  logging output;
143 
144  if (!A.is_transposed() & !B.is_transposed()) {
145  if ( A.cols != B.rows ) {
146  sstream << "pic::dot:: Cols of matrix A does not match rows of matrix B!" << std::endl;
147  verbose_level=1;
148  output.print(sstream,verbose_level);
149  }
150  }
151  else if ( A.is_transposed() & !B.is_transposed() ) {
152  if ( A.rows != B.rows ) {
153  sstream << "pic::dot:: Cols of matrix A.transpose does not match rows of matrix B!" << std::endl;
154  verbose_level=1;
155  output.print(sstream,verbose_level);
156  return false;
157  }
158  }
159  else if ( A.is_transposed() & B.is_transposed() ) {
160  if ( A.rows != B.cols ) {
161  sstream << "pic::dot:: Cols of matrix A.transpose does not match rows of matrix B.transpose!" << std::endl;
162  verbose_level=1;
163  output. print(sstream,verbose_level);
164  return false;
165  }
166  }
167  else if ( !A.is_transposed() & B.is_transposed() ) {
168  if ( A.cols != B.cols ) {
169  sstream << "pic::dot:: Cols of matrix A does not match rows of matrix B.transpose!" << std::endl;
170  verbose_level=1;
171  output.print(sstream,verbose_level);
172  return false;
173  }
174  }
175 
176 
177  // check the pointer of the matrices
178  if ( A.get_data() == NULL ) {
179  sstream << "pic::dot:: No preallocated data in matrix A!" << std::endl;
180  verbose_level=1;
181  output.print(sstream,verbose_level);
182  return false;
183  }
184  if ( B.get_data() == NULL ) {
185  sstream << "pic::dot:: No preallocated data in matrix B!" << std::endl;
186  verbose_level=1;
187  output.print(sstream,verbose_level);
188  return false;
189  }
190 
191  return true;
192 
193 }
194 
195 
201 void
203 
204 
205  //The stringstream input to store the output messages.
206  std::stringstream sstream;
207 
208  //Integer value to set the verbosity level of the output messages.
209  int verbose_level;
210 
211  //Logging variable.
212  logging output;
213 
214  if ( A.is_conjugated() & A.is_transposed() ) {
215  transpose = CblasConjTrans;
216  }
217  else if ( A.is_conjugated() & !A.is_transposed() ) {
218  sstream << "CblasConjNoTrans NOT IMPLEMENTED in GSL!!!!!!!!!!!!!!!" << std::endl;
219  verbose_level=1;
220  output.print(sstream,verbose_level);
221  exit(-1);
222  //transpose = CblasConjNoTrans; // not present in MKL
223  }
224  else if ( !A.is_conjugated() & A.is_transposed() ) {
225  transpose = CblasTrans;
226  }
227  else {
228  transpose = CblasNoTrans;
229  }
230 
231 }
232 
233 
234 
235 
236 
237 
245 
246  A = A_in;
247  B = B_in;
248  C = C_in;
249 
250 
252 
253  rows.Arows_start = 0;
254  rows.Arows_end = A.rows;
255  rows.Arows = A.rows;
256  rows.Brows_start = 0;
257  rows.Brows_end = B.rows;
258  rows.Brows = B.rows;
259  rows.Crows_start = 0;
260  rows.Crows_end = C.rows;
261  rows.Crows = C.rows;
262 
263  cols.Acols_start = 0;
264  cols.Acols_end = A.cols;
265  cols.Acols = A.cols;
266  cols.Bcols_start = 0;
267  cols.Bcols_end = B.cols;
268  cols.Bcols = B.cols;
269  cols.Ccols_start = 0;
270  cols.Ccols_end = C.cols;
271  cols.Ccols = C.cols;
272 
273 }
274 
275 
285 
286  A = A_in;
287  B = B_in;
288  C = C_in;
289 
290  rows = rows_in;
291  cols = cols_in;
292 
293 }
294 
295 
296 
300 void
302 
303  // setting CBLAS transpose operations
304  CBLAS_TRANSPOSE Atranspose, Btranspose;
305  get_cblas_transpose( A, Atranspose );
306  get_cblas_transpose( B, Btranspose );
307 
311 
312 
313  // zgemm parameters
314  int m,n,k,lda,ldb,ldc;
315 
316  if ( A.is_transposed() ) {
317  m = cols.Acols;
318  k = rows.Arows;
319  lda = A.stride;
320  }
321  else {
322  m = rows.Arows;
323  k = cols.Acols;
324  lda = A.stride;
325  }
326 
327 
328 
329  if (B.is_transposed()) {
330  n = rows.Brows;
331  ldb = B.stride;
332  }
333  else {
334  n = cols.Bcols;
335  ldb = B.stride;
336  }
337 
338  ldc = C.stride;
339 
340  // parameters alpha and beta for the cblas_zgemm3m function (the input matrices are not scaled)
342  alpha.real = 1.0;
343  alpha.imag = 0.0;
344  QGD_Complex16 beta;
345  beta.real = 0.0;
346  beta.imag = 0.0;
347 
348 
349 
350  cblas_zgemm(CblasRowMajor, Atranspose, Btranspose, m, n, k, (double*)&alpha, (double*)A_zgemm_data, lda, (double*)B_zgemm_data, ldb, (double*)&beta, (double*)C_zgemm_data, ldc);
351 
352 
353 
354 }
355 
356 
357 
358 
359 
366 zgemm_Task::zgemm_Task( Matrix &A_in, Matrix &B_in, Matrix &C_in) : zgemm_Task_serial(A_in, B_in, C_in) {
367 
368 }
369 
378 zgemm_Task::zgemm_Task( Matrix &A_in, Matrix &B_in, Matrix &C_in, row_indices& rows_in, col_indices& cols_in) : zgemm_Task_serial(A_in, B_in, C_in, rows_in, cols_in) {
379 
380 }
381 
386 void
387 zgemm_Task::execute(tbb::task_group& g) {
388 
389 
390 
391  if ( !A.is_transposed() && rows.Arows > A.rows/8 && rows.Arows > SERIAL_CUTOFF ) {
392  // *********** divide rows of A into sub-tasks*********
393 
394  int rows_start = rows.Arows_start;
395  int rows_end = rows.Arows_end;
396  int rows_mid = (rows_end+rows_start)/2;
397 
398 
399  // dispatching task to divide the current task into two pieces
400  zgemm_Task* calc_task = new zgemm_Task( A, B, C );
401  calc_task->cols = cols;
402  calc_task->rows = rows;
403 
404  calc_task->rows.Arows_start = rows_mid;
405  calc_task->rows.Arows = rows_end-rows_mid;
406  calc_task->rows.Crows_start = rows_mid;
407  calc_task->rows.Crows = rows_end-rows_mid;
408 
409  g.run([calc_task, &g](){
410  calc_task->execute(g);
411  delete calc_task;
412  });
413 
414  // recycling the present task
415  rows.Arows_end = rows_mid;
416  rows.Arows = rows_mid-rows_start;
417  rows.Crows_end = rows_mid;
418  rows.Crows = rows_mid-rows_start;
419 
420  execute(g);
421  return;
422 
423  }
424 
425 
426  else if ( A.is_transposed() && cols.Acols > A.cols/8 && cols.Acols > SERIAL_CUTOFF ) {
427  // *********** divide cols of B into sub-tasks*********
428 
429  int cols_start = cols.Acols_start;
430  int cols_end = cols.Acols_end;
431  int cols_mid = (cols_end+cols_start)/2;
432 
433 
434  // dispatching task to divide the current task into two pieces
435  zgemm_Task* calc_task = new zgemm_Task( A, B, C );
436  calc_task->cols = cols;
437  calc_task->rows = rows;
438 
439  calc_task->cols.Acols_start = cols_mid;
440  calc_task->cols.Acols = cols_end-cols_mid;
441 
442  calc_task->rows.Crows_start = cols_mid;
443  calc_task->rows.Crows = cols_end-cols_mid;
444 
445 
446  g.run([calc_task, &g](){
447  calc_task->execute(g);
448  delete calc_task;
449  });
450 
451 
452  // recycling the present task
453  cols.Acols_end = cols_mid;
454  cols.Acols = cols_mid-cols_start;
455  rows.Crows_end = cols_mid;
456  rows.Crows = cols_mid-cols_start;
457 
458  execute(g);
459  return;
460  }
461 
462 
463  else if ( !B.is_transposed() && cols.Bcols > B.cols/8 && cols.Bcols > SERIAL_CUTOFF ) {
464  // *********** divide cols of B into sub-tasks*********
465 
466 
467  int cols_start = cols.Bcols_start;
468  int cols_end = cols.Bcols_end;
469  int cols_mid = (cols_end+cols_start)/2;
470 
471 
472  // dispatching task to divide the current task into two pieces
473  zgemm_Task* calc_task = new zgemm_Task( A, B, C );
474  calc_task->cols = cols;
475  calc_task->rows = rows;
476 
477  calc_task->cols.Bcols_start = cols_mid;
478  calc_task->cols.Bcols = cols_end-cols_mid;
479  calc_task->cols.Ccols_start = cols_mid;
480  calc_task->cols.Ccols = cols_end-cols_mid;
481 
482 
483  g.run([calc_task, &g](){
484  calc_task->execute(g);
485  delete calc_task;
486  });
487 
488  // recycling the present task
489  cols.Bcols_end = cols_mid;
490  cols.Bcols = cols_mid-cols_start;
491  cols.Ccols_end = cols_mid;
492  cols.Ccols = cols_mid-cols_start;
493 
494  execute(g);
495  return;
496  }
497 
498 
499  else if ( B.is_transposed() && rows.Brows > B.rows/8 && rows.Brows > SERIAL_CUTOFF ) {
500  // *********** divide rows of B into sub-tasks*********
501 
502  int rows_start = rows.Brows_start;
503  int rows_end = rows.Brows_end;
504  int rows_mid = (rows_end+rows_start)/2;
505 
506  // dispatching task to divide the current task into two pieces
507  zgemm_Task* calc_task = new zgemm_Task( A, B, C );
508  calc_task->cols = cols;
509  calc_task->rows = rows;
510 
511  calc_task->rows.Brows_start = rows_mid;
512  calc_task->rows.Brows = rows_end-rows_mid;
513 
514  calc_task->cols.Ccols_start = rows_mid;
515  calc_task->cols.Ccols = rows_end-rows_mid;
516 
517 
518  g.run([calc_task, &g](){
519  calc_task->execute(g);
520  delete calc_task;
521  });
522 
523  // recycling the present task
524  rows.Brows_end = rows_mid;
525  rows.Brows = rows_mid-rows_start;
526  cols.Ccols_end = rows_mid;
527  cols.Ccols = rows_mid-rows_start;
528 
529  execute(g);
530  return;
531  }
532 
533  else {
534  zgemm_chunk();
535  return;
536  }
537 
538 
539  return;
540 
541 
542 
543 } //execute
544 
545 
Matrix B
The matrix B.
Definition: dot.h:153
Matrix dot(Matrix &A, Matrix &B)
Call to calculate the product of two complex matrices by calling method zgemm3m from the CBLAS librar...
Definition: dot.cpp:38
CBLAS_ORDER order
CBLAS storage order.
Definition: dot.h:161
void print(const std::stringstream &sstream, int verbose_level=1) const
Call to print output messages in the function of the verbosity level.
Definition: logging.cpp:55
CBLAS_TRANSPOSE
Definition: dot.h:34
void cblas_zgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const void *beta, void *C, const int ldc)
Definition of the zgemm function from CBLAS.
void execute(tbb::task_group &g)
This function is called when a task is spawned.
Definition: dot.cpp:387
Class to calculate a matrix product C=A*B in serial.
Definition: dot.h:147
int Ccols
The number of cols in matrix C participating in the multiplication sub-problem.
Definition: dot.h:138
int stride
The column stride of the array. (The array elements in one row are a_0, a_1, ... a_{cols-1}, 0, 0, 0, 0. The number of zeros is stride-cols)
Definition: matrix_base.hpp:46
row_indices rows
Structure containing row limits for the partitioning of the matrix product calculations.
Definition: dot.h:157
Matrix A
The matrix A.
Definition: dot.h:151
void get_cblas_transpose(Matrix &A, CBLAS_TRANSPOSE &transpose)
Call to get the transpose properties of the input matrix for CBLAS calculations.
Definition: dot.cpp:202
int Arows_end
The last row in matrix A participating in the multiplication sub-problem. (The rows are picked from a...
Definition: dot.h:99
int Arows
The number of rows in matrix A participating in the multiplication sub-problem.
Definition: dot.h:101
int Crows_start
The firs row in matrix C participating in the multiplication sub-problem.
Definition: dot.h:109
Class to calculate a matrix product C=A*B in parallel.
Definition: dot.h:199
int Acols_start
The firs col in matrix A participating in the multiplication sub-problem.
Definition: dot.h:122
Structure containing row limits for the partitioning of the matrix product calculations.
Definition: dot.h:94
int Brows_start
The firs row in matrix B participating in the multiplication sub-problem.
Definition: dot.h:103
Structure containing column limits for the partitioning of the matrix product calculations.
Definition: dot.h:120
scalar * get_data() const
Call to get the pointer to the stored data.
bool check_matrices(Matrix &A, Matrix &B)
Call to check the shape of the matrices for method dot.
Definition: dot.cpp:132
bool is_conjugated()
Call to get whether the matrix should be conjugated in CBLAS functions or not.
int Crows
The number of rows in matrix C participating in the multiplication sub-problem.
Definition: dot.h:113
A class containing basic methods for setting up the verbosity level.
Definition: logging.h:43
int rows
The number of rows.
Definition: matrix_base.hpp:42
int cols
The number of columns.
Definition: matrix_base.hpp:44
zgemm_Task(Matrix &A_in, Matrix &B_in, Matrix &C_in)
Constructor of the class.
Definition: dot.cpp:366
Matrix C
The matrix C.
Definition: dot.h:155
int Bcols_start
The firs col in matrix B participating in the multiplication sub-problem.
Definition: dot.h:128
zgemm_Task_serial(Matrix &A_in, Matrix &B_in, Matrix &C_in)
Constructor of the class.
Definition: dot.cpp:244
Structure type representing complex numbers in the SQUANDER package.
Definition: QGDTypes.h:38
int Acols
The number of cols in matrix A participating in the multiplication sub-problem.
Definition: dot.h:126
void zgemm_chunk()
Call to calculate the product of matrix chunks defined by attributes rows, cols.
Definition: dot.cpp:301
int Acols_end
The last col in matrix A participating in the multiplication sub-problem. (The cols are picked from a...
Definition: dot.h:124
int Crows_end
The last row in matrix C participating in the multiplication sub-problem. (The rows are picked from a...
Definition: dot.h:111
Class to store data of complex arrays and its properties.
Definition: matrix.h:38
void omp_set_num_threads(int num_threads)
Set the number of threads on runtime in MKL.
col_indices cols
Structure containing column limits for the partitioning of the matrix product calculations.
Definition: dot.h:159
Definition: dot.h:34
int Brows
The number of rows in matrix B participating in the multiplication sub-problem.
Definition: dot.h:107
int Bcols
The number of cols in matrix B participating in the multiplication sub-problem.
Definition: dot.h:132
int Ccols_start
The firs col in matrix C participating in the multiplication sub-problem.
Definition: dot.h:134
Header file for commonly used functions and wrappers to CBLAS functions.
double real
the real part of a complex number
Definition: QGDTypes.h:40
int Arows_start
The firs row in matrix A participating in the multiplication sub-problem.
Definition: dot.h:97
int Ccols_end
The last col in matrix C participating in the multiplication sub-problem. (The col are picked from a ...
Definition: dot.h:136
int Brows_end
The last row in matrix B participating in the multiplication sub-problem. (The rows are picked from a...
Definition: dot.h:105
#define SERIAL_CUTOFF
Definition: dot.cpp:27
int Bcols_end
The last col in matrix B participating in the multiplication sub-problem. (The cols are picked from a...
Definition: dot.h:130
double imag
the imaginary part of a complex number
Definition: QGDTypes.h:42
bool is_transposed()
Call to get whether the matrix should be conjugated in CBLAS functions or not.
int omp_get_max_threads()
get the number of threads in MKL