45 for (
int current_idx_pair=current_idx + index_step_target; current_idx_pair<
matrix_size; current_idx_pair=current_idx_pair+(index_step_target << 1) ) {
47 for (
int idx = 0; idx < index_step_target; idx++) {
50 int current_idx_loc = current_idx + idx;
51 int current_idx_pair_loc = current_idx_pair + idx;
53 int row_offset = current_idx_loc * input.
stride;
54 int row_offset_pair = current_idx_pair_loc * input.
stride;
56 if (control_qbit < 0 || ((current_idx_loc >> control_qbit) & 1)) {
64 input[row_offset].real = tmp1.
real + tmp2.
real;
65 input[row_offset].imag = tmp1.
imag + tmp2.
imag;
67 tmp1 =
mult(u3_1qbit[2], element);
68 tmp2 =
mult(u3_1qbit[3], element_pair);
70 input[row_offset_pair].real = tmp1.
real + tmp2.
real;
71 input[row_offset_pair].imag = tmp1.
imag + tmp2.
imag;
94 current_idx = current_idx + (index_step_target << 1);
123 int parallel_outer_cycles = matrix_size/(index_step_target << 1);
124 int outer_grain_size;
125 if ( index_step_target <= 2 ) {
126 outer_grain_size = 64;
128 else if ( index_step_target <= 4 ) {
129 outer_grain_size = 32;
131 else if ( index_step_target <= 8 ) {
132 outer_grain_size = 16;
134 else if ( index_step_target <= 16 ) {
135 outer_grain_size = 8;
138 outer_grain_size = 2;
141 int inner_grain_size = 64;
143 tbb::parallel_for( tbb::blocked_range<int>(0, parallel_outer_cycles, outer_grain_size), [&](tbb::blocked_range<int> r) {
145 int current_idx = r.begin()*(index_step_target << 1);
146 int current_idx_pair = index_step_target + r.begin()*(index_step_target << 1);
148 for (
int rdx=r.begin(); rdx<r.end(); rdx++) {
151 tbb::parallel_for( tbb::blocked_range<int>(0,index_step_target,inner_grain_size), [&](tbb::blocked_range<int> r) {
152 for (
int idx=r.begin(); idx<r.end(); ++idx) {
156 int current_idx_loc = current_idx + idx;
157 int current_idx_pair_loc = current_idx_pair + idx;
159 int row_offset = current_idx_loc * input.
stride;
160 int row_offset_pair = current_idx_pair_loc * input.
stride;
162 if (control_qbit < 0 || ((current_idx_loc >> control_qbit) & 1)) {
170 input[row_offset].real = tmp1.
real + tmp2.
real;
171 input[row_offset].imag = tmp1.
imag + tmp2.
imag;
173 tmp1 =
mult(u3_1qbit[2], element);
174 tmp2 =
mult(u3_1qbit[3], element_pair);
176 input[row_offset_pair].real = tmp1.
real + tmp2.
real;
177 input[row_offset_pair].imag = tmp1.
imag + tmp2.
imag;
201 current_idx = current_idx + (index_step_target << 1);
202 current_idx_pair = current_idx_pair + (index_step_target << 1);
int stride
The column stride of the array. (The array elements in one row are a_0, a_1, ... a_{cols-1}, 0, 0, 0, 0. The number of zeros is stride-cols)
QGD_Complex16 mult(QGD_Complex16 &a, QGD_Complex16 &b)
Call to calculate the product of two complex scalars.
scalar * get_data() const
Call to get the pointer to the stored data.
int cols
The number of columns.
Structure type representing complex numbers in the SQUANDER package.
Class to store data of complex arrays and its properties.
double real
the real part of a complex number
double imag
the imaginary part of a complex number