rakytap/sequential-quantum-gate-decomposer/dot_8cpp_source.html

 /*
 Copyright 2020 Peter Rakyta, Ph.D.

 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at

     http://www.apache.org/licenses/LICENSE-2.0

 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.

 */

 #include "dot.h"
 #include "common.h"
 #include <cstring>
 #include <iostream>
 #include "tbb/tbb.h"
 #include <tbb/scalable_allocator.h>


 // number of rows in matrix A and cols in matrix B, under which serialized multiplication is applied instead of parallel one
 #define SERIAL_CUTOFF 16

 //tbb::spin_mutex my_mutex;

 Matrix
 dot( Matrix &A, Matrix &B ) {

 #if BLAS==0 // undefined BLAS
     int NumThreads = omp_get_max_threads();
     omp_set_num_threads(1);
 #elif BLAS==1 // MKL
     int NumThreads = mkl_get_max_threads();
     MKL_Set_Num_Threads(1);
 #elif BLAS==2 //OpenBLAS
     int NumThreads = openblas_get_num_threads();
     openblas_set_num_threads(1);
 #endif


     // check the matrix shapes in DEBUG mode
     assert( check_matrices( A, B ) );


 #if BLAS==1 // MKL does not support option CblasConjNoTrans so the conjugation of the matrices are done in prior.
     if ( B.is_conjugated() && !B.is_transposed() ) {
         Matrix tmp = Matrix( B.rows, B.cols );
         vzConj( B.cols*B.rows, B.get_data(), tmp.get_data() );
         B = tmp;
     }

     if ( A.is_conjugated() && !A.is_transposed() ) {
         Matrix tmp = Matrix( A.rows, A.cols );
         vzConj( A.cols*A.rows, A.get_data(), tmp.get_data() );
          A = tmp;
     }
 #endif


     // Preparing the output matrix
     Matrix C;
     if ( A.is_transposed() ){
         if ( B.is_transposed() ) {
             C = Matrix(A.cols, B.rows);
         }
         else {
             C = Matrix(A.cols, B.cols);
         }
     }
     else {
         if ( B.is_transposed() ) {
             C = Matrix(A.rows, B.rows);
         }
         else {
             C = Matrix(A.rows, B.cols);
         }
     }


     // Calculating the matrix product
     if ( A.rows <= SERIAL_CUTOFF && B.cols <= SERIAL_CUTOFF ) {
         // creating the serial task object
         zgemm_Task_serial calc_task = zgemm_Task_serial( A, B, C );
         calc_task.zgemm_chunk();
     }
     else {

         // creating the task object
         tbb::task_group g;
         g.run_and_wait([&A, &B, &C, &g]() {
                        zgemm_Task calc_task = zgemm_Task( A, B, C );
                        calc_task.execute(g);
         });


     }


 #if BLAS==0 // undefined BLAS
     omp_set_num_threads(NumThreads);
 #elif BLAS==1 //MKL
     MKL_Set_Num_Threads(NumThreads);
 #elif BLAS==2 //OpenBLAS
     openblas_set_num_threads(NumThreads);
 #endif

     return C;


 }


 bool
 check_matrices( Matrix &A, Matrix &B ) {


     //The stringstream input to store the output messages.
     std::stringstream sstream;

     //Integer value to set the verbosity level of the output messages.
     int verbose_level;

     //Logging variable.
     logging output;

     if (!A.is_transposed() & !B.is_transposed())  {
         if ( A.cols != B.rows ) {
         sstream << "pic::dot:: Cols of matrix A does not match rows of matrix B!" << std::endl;
         verbose_level=1;
         output.print(sstream,verbose_level);
         }
     }
     else if ( A.is_transposed() & !B.is_transposed() )  {
         if ( A.rows != B.rows ) {
         sstream << "pic::dot:: Cols of matrix A.transpose does not match rows of matrix B!" << std::endl;
         verbose_level=1;
         output.print(sstream,verbose_level);
            return false;
         }
     }
     else if ( A.is_transposed() & B.is_transposed() )  {
         if ( A.rows != B.cols ) {
         sstream << "pic::dot:: Cols of matrix A.transpose does not match rows of matrix B.transpose!" << std::endl;
         verbose_level=1;
         output. print(sstream,verbose_level);
            return false;
         }
     }
     else if ( !A.is_transposed() & B.is_transposed() )  {
         if ( A.cols != B.cols ) {
         sstream << "pic::dot:: Cols of matrix A does not match rows of matrix B.transpose!" << std::endl;
         verbose_level=1;
         output.print(sstream,verbose_level);
            return false;
         }
     }


     // check the pointer of the matrices
     if ( A.get_data() == NULL ) {
        sstream << "pic::dot:: No preallocated data in matrix A!" << std::endl;
        verbose_level=1;
        output.print(sstream,verbose_level);
        return false;
     }
     if ( B.get_data() == NULL ) {
        sstream << "pic::dot:: No preallocated data in matrix B!" << std::endl;
        verbose_level=1;
        output.print(sstream,verbose_level);
        return false;
     }

     return true;

 }


 void
 get_cblas_transpose( Matrix &A, CBLAS_TRANSPOSE &transpose ) {


     //The stringstream input to store the output messages.
     std::stringstream sstream;

     //Integer value to set the verbosity level of the output messages.
     int verbose_level;

     //Logging variable.
     logging output;

     if ( A.is_conjugated() & A.is_transposed() ) {
         transpose = CblasConjTrans;
     }
     else if ( A.is_conjugated() & !A.is_transposed() ) {
      sstream << "CblasConjNoTrans NOT IMPLEMENTED in GSL!!!!!!!!!!!!!!!" << std::endl;
      verbose_level=1;
         output.print(sstream,verbose_level);
      exit(-1);
         //transpose = CblasConjNoTrans; // not present in MKL
     }
     else if ( !A.is_conjugated() & A.is_transposed() ) {
         transpose = CblasTrans;
     }
     else {
         transpose = CblasNoTrans;
     }

 }


 zgemm_Task_serial::zgemm_Task_serial( Matrix &A_in, Matrix &B_in, Matrix &C_in ) {

     A = A_in;
     B = B_in;
     C = C_in;


     order = CblasRowMajor;

     rows.Arows_start = 0;
     rows.Arows_end = A.rows;
     rows.Arows = A.rows;
     rows.Brows_start = 0;
     rows.Brows_end = B.rows;
     rows.Brows = B.rows;
     rows.Crows_start = 0;
     rows.Crows_end = C.rows;
     rows.Crows = C.rows;

     cols.Acols_start = 0;
     cols.Acols_end = A.cols;
     cols.Acols = A.cols;
     cols.Bcols_start = 0;
     cols.Bcols_end = B.cols;
     cols.Bcols = B.cols;
     cols.Ccols_start = 0;
     cols.Ccols_end = C.cols;
     cols.Ccols = C.cols;

 }


 zgemm_Task_serial::zgemm_Task_serial( Matrix &A_in, Matrix &B_in, Matrix &C_in, row_indices& rows_in, col_indices& cols_in ) {

     A = A_in;
     B = B_in;
     C = C_in;

     rows = rows_in;
     cols = cols_in;

 }


 void
 zgemm_Task_serial::zgemm_chunk() {

     // setting CBLAS transpose operations
     CBLAS_TRANSPOSE Atranspose, Btranspose;
     get_cblas_transpose( A, Atranspose );
     get_cblas_transpose( B, Btranspose );

     QGD_Complex16* A_zgemm_data = A.get_data()+rows.Arows_start*A.stride+cols.Acols_start;
     QGD_Complex16* B_zgemm_data = B.get_data()+rows.Brows_start*B.stride+cols.Bcols_start;
     QGD_Complex16* C_zgemm_data = C.get_data()+rows.Crows_start*C.stride+cols.Ccols_start;


     // zgemm parameters
     int m,n,k,lda,ldb,ldc;

     if ( A.is_transposed() ) {
         m = cols.Acols;
         k = rows.Arows;
         lda = A.stride;
     }
     else {
         m = rows.Arows;
         k = cols.Acols;
         lda = A.stride;
     }


     if (B.is_transposed()) {
         n = rows.Brows;
         ldb = B.stride;
     }
     else {
         n = cols.Bcols;
         ldb = B.stride;
     }

     ldc = C.stride;

     // parameters alpha and beta for the cblas_zgemm3m function (the input matrices are not scaled)
     QGD_Complex16 alpha;
     alpha.real = 1.0;
     alpha.imag = 0.0;
     QGD_Complex16 beta;
     beta.real = 0.0;
     beta.imag = 0.0;


     cblas_zgemm(CblasRowMajor, Atranspose, Btranspose, m, n, k, (double*)&alpha, (double*)A_zgemm_data, lda, (double*)B_zgemm_data, ldb, (double*)&beta, (double*)C_zgemm_data, ldc);


 }


 zgemm_Task::zgemm_Task( Matrix &A_in, Matrix &B_in, Matrix &C_in) : zgemm_Task_serial(A_in, B_in, C_in) {

 }

 zgemm_Task::zgemm_Task( Matrix &A_in, Matrix &B_in, Matrix &C_in, row_indices& rows_in, col_indices& cols_in) : zgemm_Task_serial(A_in, B_in, C_in, rows_in, cols_in) {

 }

 void
 zgemm_Task::execute(tbb::task_group& g) {


     if ( !A.is_transposed() && rows.Arows > A.rows/8 && rows.Arows > SERIAL_CUTOFF ) {
         // *********** divide rows of A into sub-tasks*********

         int rows_start = rows.Arows_start;
         int rows_end = rows.Arows_end;
         int rows_mid = (rows_end+rows_start)/2;


         // dispatching task to divide the current task into two pieces
         zgemm_Task* calc_task = new zgemm_Task( A, B, C );
         calc_task->cols = cols;
         calc_task->rows = rows;

         calc_task->rows.Arows_start = rows_mid;
         calc_task->rows.Arows = rows_end-rows_mid;
         calc_task->rows.Crows_start = rows_mid;
         calc_task->rows.Crows = rows_end-rows_mid;

         g.run([calc_task, &g](){
             calc_task->execute(g);
             delete calc_task;
         });

         // recycling the present task
         rows.Arows_end = rows_mid;
         rows.Arows = rows_mid-rows_start;
         rows.Crows_end = rows_mid;
         rows.Crows = rows_mid-rows_start;

         execute(g);
         return;

     }


     else if ( A.is_transposed() && cols.Acols > A.cols/8 && cols.Acols > SERIAL_CUTOFF  ) {
     // *********** divide cols of B into sub-tasks*********

         int cols_start = cols.Acols_start;
         int cols_end = cols.Acols_end;
         int cols_mid = (cols_end+cols_start)/2;


         // dispatching task to divide the current task into two pieces
         zgemm_Task* calc_task = new zgemm_Task( A, B, C );
         calc_task->cols = cols;
         calc_task->rows = rows;

         calc_task->cols.Acols_start = cols_mid;
         calc_task->cols.Acols = cols_end-cols_mid;

         calc_task->rows.Crows_start = cols_mid;
         calc_task->rows.Crows = cols_end-cols_mid;


         g.run([calc_task, &g](){
             calc_task->execute(g);
             delete calc_task;
         });


         // recycling the present task
         cols.Acols_end = cols_mid;
         cols.Acols = cols_mid-cols_start;
         rows.Crows_end = cols_mid;
         rows.Crows = cols_mid-cols_start;

         execute(g);
         return;
     }


     else if ( !B.is_transposed() && cols.Bcols > B.cols/8 && cols.Bcols > SERIAL_CUTOFF  ) {
     // *********** divide cols of B into sub-tasks*********


         int cols_start = cols.Bcols_start;
         int cols_end = cols.Bcols_end;
         int cols_mid = (cols_end+cols_start)/2;


         // dispatching task to divide the current task into two pieces
         zgemm_Task* calc_task = new zgemm_Task( A, B, C );
         calc_task->cols = cols;
         calc_task->rows = rows;

         calc_task->cols.Bcols_start = cols_mid;
         calc_task->cols.Bcols = cols_end-cols_mid;
         calc_task->cols.Ccols_start = cols_mid;
         calc_task->cols.Ccols = cols_end-cols_mid;


         g.run([calc_task, &g](){
             calc_task->execute(g);
             delete calc_task;
         });

         // recycling the present task
         cols.Bcols_end = cols_mid;
         cols.Bcols = cols_mid-cols_start;
         cols.Ccols_end = cols_mid;
         cols.Ccols = cols_mid-cols_start;

         execute(g);
         return;
     }


     else if ( B.is_transposed() && rows.Brows > B.rows/8 && rows.Brows > SERIAL_CUTOFF ) {
         // *********** divide rows of B into sub-tasks*********

         int rows_start = rows.Brows_start;
         int rows_end = rows.Brows_end;
         int rows_mid = (rows_end+rows_start)/2;

         // dispatching task to divide the current task into two pieces
         zgemm_Task* calc_task = new zgemm_Task( A, B, C );
         calc_task->cols = cols;
         calc_task->rows = rows;

         calc_task->rows.Brows_start = rows_mid;
         calc_task->rows.Brows = rows_end-rows_mid;

         calc_task->cols.Ccols_start = rows_mid;
         calc_task->cols.Ccols = rows_end-rows_mid;


         g.run([calc_task, &g](){
             calc_task->execute(g);
             delete calc_task;
         });

         // recycling the present task
         rows.Brows_end = rows_mid;
         rows.Brows = rows_mid-rows_start;
         cols.Ccols_end = rows_mid;
         cols.Ccols = rows_mid-rows_start;

         execute(g);
         return;
     }

     else {
         zgemm_chunk();
         return;
     }


     return;


 } //execute


zgemm_Task_serial::B
Matrix B
The matrix B.
Definition: dot.h:153

dot
Matrix dot(Matrix &A, Matrix &B)
Call to calculate the product of two complex matrices by calling method zgemm3m from the CBLAS librar...
Definition: dot.cpp:38

zgemm_Task_serial::order
CBLAS_ORDER order
CBLAS storage order.
Definition: dot.h:161

logging::print
void print(const std::stringstream &sstream, int verbose_level=1) const
Call to print output messages in the function of the verbosity level.
Definition: logging.cpp:55

CBLAS_TRANSPOSE
CBLAS_TRANSPOSE
Definition: dot.h:34

cblas_zgemm
void cblas_zgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const void *beta, void *C, const int ldc)
Definition of the zgemm function from CBLAS.

zgemm_Task::execute
void execute(tbb::task_group &g)
This function is called when a task is spawned.
Definition: dot.cpp:387

zgemm_Task_serial
Class to calculate a matrix product C=A*B in serial.
Definition: dot.h:147

col_indices::Ccols
int Ccols
The number of cols in matrix C participating in the multiplication sub-problem.
Definition: dot.h:138

matrix_base::stride
int stride
The column stride of the array. (The array elements in one row are a_0, a_1, ... a_{cols-1}, 0, 0, 0, 0. The number of zeros is stride-cols)
Definition: matrix_base.hpp:46

zgemm_Task_serial::rows
row_indices rows
Structure containing row limits for the partitioning of the matrix product calculations.
Definition: dot.h:157

zgemm_Task_serial::A
Matrix A
The matrix A.
Definition: dot.h:151

CblasNoTrans
Definition: dot.h:34

get_cblas_transpose
void get_cblas_transpose(Matrix &A, CBLAS_TRANSPOSE &transpose)
Call to get the transpose properties of the input matrix for CBLAS calculations.
Definition: dot.cpp:202

CblasRowMajor
Definition: dot.h:33

row_indices::Arows_end
int Arows_end
The last row in matrix A participating in the multiplication sub-problem. (The rows are picked from a...
Definition: dot.h:99

row_indices::Arows
int Arows
The number of rows in matrix A participating in the multiplication sub-problem.
Definition: dot.h:101

row_indices::Crows_start
int Crows_start
The firs row in matrix C participating in the multiplication sub-problem.
Definition: dot.h:109

zgemm_Task
Class to calculate a matrix product C=A*B in parallel.
Definition: dot.h:199

col_indices::Acols_start
int Acols_start
The firs col in matrix A participating in the multiplication sub-problem.
Definition: dot.h:122

row_indices
Structure containing row limits for the partitioning of the matrix product calculations.
Definition: dot.h:94

row_indices::Brows_start
int Brows_start
The firs row in matrix B participating in the multiplication sub-problem.
Definition: dot.h:103

col_indices
Structure containing column limits for the partitioning of the matrix product calculations.
Definition: dot.h:120

matrix_base::get_data
scalar * get_data() const
Call to get the pointer to the stored data.
Definition: matrix_base.hpp:304

check_matrices
bool check_matrices(Matrix &A, Matrix &B)
Call to check the shape of the matrices for method dot.
Definition: dot.cpp:132

matrix_base::is_conjugated
bool is_conjugated()
Call to get whether the matrix should be conjugated in CBLAS functions or not.
Definition: matrix_base.hpp:264

row_indices::Crows
int Crows
The number of rows in matrix C participating in the multiplication sub-problem.
Definition: dot.h:113

logging
A class containing basic methods for setting up the verbosity level.
Definition: logging.h:43

dot.h

matrix_base::rows
int rows
The number of rows.
Definition: matrix_base.hpp:42

matrix_base::cols
int cols
The number of columns.
Definition: matrix_base.hpp:44

zgemm_Task::zgemm_Task
zgemm_Task(Matrix &A_in, Matrix &B_in, Matrix &C_in)
Constructor of the class.
Definition: dot.cpp:366

zgemm_Task_serial::C
Matrix C
The matrix C.
Definition: dot.h:155

16_qubit_trained_circuit_VQE.k
k
Definition: 16_qubit_trained_circuit_VQE.py:150

col_indices::Bcols_start
int Bcols_start
The firs col in matrix B participating in the multiplication sub-problem.
Definition: dot.h:128

CblasConjTrans
Definition: dot.h:34

zgemm_Task_serial::zgemm_Task_serial
zgemm_Task_serial(Matrix &A_in, Matrix &B_in, Matrix &C_in)
Constructor of the class.
Definition: dot.cpp:244

QGD_Complex16
Structure type representing complex numbers in the SQUANDER package.
Definition: QGDTypes.h:38

col_indices::Acols
int Acols
The number of cols in matrix A participating in the multiplication sub-problem.
Definition: dot.h:126

zgemm_Task_serial::zgemm_chunk
void zgemm_chunk()
Call to calculate the product of matrix chunks defined by attributes rows, cols.
Definition: dot.cpp:301

col_indices::Acols_end
int Acols_end
The last col in matrix A participating in the multiplication sub-problem. (The cols are picked from a...
Definition: dot.h:124

row_indices::Crows_end
int Crows_end
The last row in matrix C participating in the multiplication sub-problem. (The rows are picked from a...
Definition: dot.h:111

Matrix
Class to store data of complex arrays and its properties.
Definition: matrix.h:38

omp_set_num_threads
void omp_set_num_threads(int num_threads)
Set the number of threads on runtime in MKL.

zgemm_Task_serial::cols
col_indices cols
Structure containing column limits for the partitioning of the matrix product calculations.
Definition: dot.h:159

CblasTrans
Definition: dot.h:34

row_indices::Brows
int Brows
The number of rows in matrix B participating in the multiplication sub-problem.
Definition: dot.h:107

col_indices::Bcols
int Bcols
The number of cols in matrix B participating in the multiplication sub-problem.
Definition: dot.h:132

col_indices::Ccols_start
int Ccols_start
The firs col in matrix C participating in the multiplication sub-problem.
Definition: dot.h:134

common.h
Header file for commonly used functions and wrappers to CBLAS functions.

QGD_Complex16::real
double real
the real part of a complex number
Definition: QGDTypes.h:40

get_parametric_circuit.alpha
int alpha
Definition: get_parametric_circuit.py:30

example_CH_general_unitary.output
output
Definition: example_CH_general_unitary.py:120

row_indices::Arows_start
int Arows_start
The firs row in matrix A participating in the multiplication sub-problem.
Definition: dot.h:97

col_indices::Ccols_end
int Ccols_end
The last col in matrix C participating in the multiplication sub-problem. (The col are picked from a ...
Definition: dot.h:136

row_indices::Brows_end
int Brows_end
The last row in matrix B participating in the multiplication sub-problem. (The rows are picked from a...
Definition: dot.h:105

SERIAL_CUTOFF
#define SERIAL_CUTOFF
Definition: dot.cpp:27

col_indices::Bcols_end
int Bcols_end
The last col in matrix B participating in the multiplication sub-problem. (The cols are picked from a...
Definition: dot.h:130

QGD_Complex16::imag
double imag
the imaginary part of a complex number
Definition: QGDTypes.h:42

matrix_base::is_transposed
bool is_transposed()
Call to get whether the matrix should be conjugated in CBLAS functions or not.
Definition: matrix_base.hpp:282

omp_get_max_threads
int omp_get_max_threads()
get the number of threads in MKL