Sequential Quantum Gate Decomposer  v1.9.3
Powerful decomposition of general unitarias into one- and two-qubit gates gates
BFGS_Powell.cpp
Go to the documentation of this file.
1 /*
2 Copyright 2020 Peter Rakyta, Ph.D.
3 
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7 
8  http://www.apache.org/licenses/LICENSE-2.0
9 
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15 
16 */
17 
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <math.h>
21 
22 #include <BFGS_Powell.h>
23 
24 
25 
32 BFGS_Powell::BFGS_Powell(void (* f_pointer) (Matrix_real , void *, double *, Matrix_real&), void* meta_data_in) : Grad_Descend(f_pointer, meta_data_in) {
33 
34 }
35 
36 
37 
42 {
43 
44 
45  // initialize array Z
46  memset( Zmtx.get_data(), 0.0, variable_num*variable_num*sizeof(double) );
47 
48 
49  // initialite Z to identiy matrix
50  long row_offset = 0;
51  for (long idx = 0; idx < variable_num; idx++) {
52 
53  Zmtx[row_offset + idx] = 1.0;
54  row_offset = row_offset + variable_num;
55 
56  }
57 
59 
60  return;
61 }
62 
63 
64 
71 {
72 
73 
74  // the initial gradient during the line search
75  Matrix_real g0_search( variable_num, 1 );
76 
77  // the initial point during the line search
78  Matrix_real x0_search( variable_num, 1 );
79 
80  // The calculated graient of the cost function at position x
81  Matrix_real g( variable_num, 1 );
82 
83  // the current search direction
84  Matrix_real search_direction( variable_num, 1 );
85 
86 
87  // Z @ Z^T = B^-1 approximated Hesse matrix (second derivate of the cost function) (B is the inverse of the Hesse matrix) Eq (1.8) in
88  // M. J. D. Powell: A tolerant algorithm for linearly constrained optimization calculations, Mathematical Programming volume 45, pages 547–566 (1989)
89  // B is not stored, Z is updated during the BFGS formula
91 
92 
93  // Initialize Z
95 
96 
97  // Z.T @ g as an intermediate result for search direction update and the BFGS iteration
99 
101 
102 
103  // The norm of the matrix Z (initialized to an imposiible value)
104  double norm_Z = -1.0;
105 
106 
107  // inner product of the search direction and the gradient
108  double d__dot__g;
109 
110  double maximal_step; // The upper bound of the allowed step in the search direction
111 
113  get_f_ang_gradient(x, f, g);
114  }
115 
116  double fprev = fabs(f + f + 1.0);
117 
118  // Calculate the next search direction.
119 
120 
121  while ( true ) {
122 
123  maximal_step = 0.0;
124 
125  double search_direction__grad_overlap = 0.0;
126  get_search_direction(g, search_direction, search_direction__grad_overlap);
127 
128 
129  // Calculate the bound on the step-length
130 
131  if (search_direction__grad_overlap < 0.0) {
132  get_Maximal_Line_Search_Step(search_direction, maximal_step, search_direction__grad_overlap);
133  }
134  d__dot__g = search_direction__grad_overlap;
135 
136 
137 
138 
139  double gradient_norm = 0.0;
140  for (long idx = 0; idx < variable_num; idx++) {
141  gradient_norm += g[idx] * g[idx];
142  }
143 
144  double acc=1e-19;
145  // Test for convergence by measuring the magnitude of the gradient.
146  if (gradient_norm <= acc * acc) {
148  return;
149  }
150 
151  // in case the cost fuction does not show decrement in the direction of the line search
152  if (d__dot__g >= 0.0) {
154  return;
155  }
156 
157 
158  // terminate cycles if the cost function is not decreased any more
159  if (f >= fprev) {
161  return;
162  }
163 
164  fprev = f;
165 
166 
167  // terminate if maximal number of iteration reached
170  return;
171  }
172 
173 
174  // perform line search in the direction search_direction
175  line_search(x, g, search_direction, x0_search, g0_search, maximal_step, d__dot__g, f);
176 
178  return;
179  }
180 
181 
182  // update the second derivative approximation.
183  BFGS_iteration(x, g, x0_search, g0_search, norm_Z);
184 
185 
186 
187  }
188 
189 
190 
191 
192 }
193 
203 {
204 
205  // Test if there is sufficient convexity for the update.
206 
207  double norm_delta = 0.0;
208  double delta_T__dot__gamma = 0.0; //in Eq (2.6) in M.J.D. POWELL: UPDATING CONJUGATE DIRECTIONS BY THE BFGS FORMULA, Mathematical Programming 38 (1987) 29-46
209 
210 
211 
212  double tmp = 0.0;
213 
214  Matrix_real delta( x0.size(), 1 );
215  Matrix_real gamma( g0.size(), 1 );
216 
217  for (long idx = 0; idx < variable_num; idx++) {
218  delta[idx] = x[idx] - x0[idx];
219 
220  norm_delta += delta[idx] * delta[idx];
221  tmp += g0[idx] * delta[idx];
222 
223  gamma[idx] = g[idx] - g0[idx];
224 
225  delta_T__dot__gamma += gamma[idx] * delta[idx];
226  }
227 
228 
229  if (delta_T__dot__gamma < fabs(tmp) * 0.1) {
230  return;
231  }
232 
233  // delta_T__dot__gamma should not be zero or negative. Might happen on the first iteration when delta is zero
234  delta_T__dot__gamma = std::max( num_precision, delta_T__dot__gamma );
235 
236 
237 
238  /*
239  Transform the Z matrix with Givens rotations. Eq (1.7) M.J.D. POWELL: UPDATING CONJUGATE DIRECTIONS BY THE BFGS FORMULA, Mathematical Programming 38 (1987) 29-46
240  this transformation provides a convenient way to satisfy active constraints.
241  also, the freedom to postmultiply Z by an orthogonal matrix is sometimes important to maintain good accuracy in the presence of computer rounding errors.
242  For example if B (Hessian of the cos function) has (n-1) eigenvalues of magnitude one and one tiny eigenvalue whose
243  eigenvector is e = ( 1 1 ... 1), then inv(B) is dominated by a large multiple of the
244  n x n matrix whose elements are all one. Therefore, if just the elements of inv(B) are
245  stored, then rounding errors prevent the accurate determination of B from inv(B).
246  However, using this transformation all the large elements of Z can be confined to a single column, which can make the determination
247  of B from Z well-conditioned.
248  This algorithm allows large differences in the magnitudes of the column norms { ||z_i|| [i= 1, 2 . . . . , n}.
249  */
250  for (long col_idx = variable_num-1; col_idx > 0; col_idx--) {
251 
252  if (Z_T__dot__g[col_idx] == 0.0) {
253  continue;
254  }
255 
256  long col_idx_tmp = col_idx - 1;
257 
258  double abs = sqrt( Z_T__dot__g[col_idx_tmp]*Z_T__dot__g[col_idx_tmp] + Z_T__dot__g[col_idx]*Z_T__dot__g[col_idx] );
259 
260  double cos_val = Z_T__dot__g[col_idx_tmp] / abs;
261  double sin_val = Z_T__dot__g[col_idx] / abs;
262 
263  Z_T__dot__g[col_idx_tmp] = abs;
264 
265  long row_offset = col_idx_tmp;
266 
267  for (long row_idx = 0; row_idx < variable_num; row_idx++) {
268 
269  tmp = cos_val * Zmtx[row_offset + 1] - sin_val * Zmtx[row_offset];
270  Zmtx[row_offset] = cos_val * Zmtx[row_offset] + sin_val * Zmtx[row_offset + 1];
271  Zmtx[row_offset + 1] = tmp;
272 
273  row_offset += variable_num;
274  }
275 
276  }
277 
278 
279 
280  // Update the value of the norm of the Z matrix encoded in norm_Z.
281 
282  if (norm_Z < 0.0) {
283  // the first column of Z is delta/sqrt(delta.T @ gamma)
284  norm_Z = norm_delta / delta_T__dot__gamma;
285  } else {
286  tmp = sqrt(norm_Z * norm_delta / delta_T__dot__gamma);
287 
288  norm_Z = std::min(norm_Z, tmp);
289  norm_Z = std::max(norm_Z, tmp*0.1);
290  }
291 
292  // Complete the updating of Z.
293 
294  tmp = sqrt(delta_T__dot__gamma);
295 
296  long row_offset = 0;
297 
298  // Eq (2.2) in M.J.D. POWELL: UPDATING CONJUGATE DIRECTIONS BY THE BFGS FORMULA, Mathematical Programming 38 (1987) 29-46
299  for (long idx = 0; idx < variable_num; idx++) {
300  if (fabs(delta[idx]) > num_precision ) {
301  Zmtx[row_offset] = delta[idx] / tmp;
302  }
303  else {
304  Zmtx[row_offset] = 0;
305  }
306 
307 
308  row_offset += variable_num;
309 
310  }
311 
312 
313  // calculate the dot product of gamma.T @ Z
314  Matrix_real gamma_T__dot__Z(1, variable_num);
315  memset( gamma_T__dot__Z.get_data(), 0.0, gamma_T__dot__Z.size()*sizeof(double) );
316 
317  row_offset = 0;
318  for (long row_idx = 0; row_idx < variable_num; row_idx++) {
319 
320  double& gamma_element = gamma[row_idx];
321 
322  for (long col_idx = 1; col_idx < variable_num; col_idx++) {
323  gamma_T__dot__Z[col_idx] += gamma_element * Zmtx[row_offset + col_idx];
324  }
325 
326  row_offset = row_offset + variable_num;
327 
328 
329  }
330 
331 
332 
333  for (long col_idx = 1; col_idx < variable_num; col_idx++) {
334  gamma_T__dot__Z[col_idx] = gamma_T__dot__Z[col_idx]/delta_T__dot__gamma;
335  }
336 
337  // Eq (2.2) in M.J.D. POWELL: UPDATING CONJUGATE DIRECTIONS BY THE BFGS FORMULA, Mathematical Programming 38 (1987) 29-46 + column-wise Frobenius norm of Z
338  Matrix_real norm_Z_tmp_mtx( 1, variable_num);
339  memset( norm_Z_tmp_mtx.get_data(), 0.0, norm_Z_tmp_mtx.size()*sizeof(double) );
340 
341  row_offset = 0;
342  for (long row_idx = 0; row_idx < variable_num; row_idx++) {
343 
344  double& delta_element = delta[row_idx];
345 
346  for (long col_idx = 1; col_idx < variable_num; col_idx++) {
347  double& Z_element = Zmtx[row_offset + col_idx];
348 
349  Z_element = Z_element - gamma_T__dot__Z[col_idx] * delta_element;
350 
351  norm_Z_tmp_mtx[col_idx] += Z_element * Z_element;
352 
353  }
354 
355  row_offset = row_offset + variable_num;
356 
357 
358  }
359 
360 
361  // renormalize the columns of the Z matrix
362  // When the column norm of the new Z is small, need to scale it up to preserve numerical accuracy
363  // this scaling will broke the Z^T @ Z = inv(B) relation, but it will preserve the quadratic termination properties of the BFGS
364  // (see M.J.D. POWELL: UPDATING CONJUGATE DIRECTIONS BY THE BFGS FORMULA, Mathematical Programming 38 (1987) 29-46)
365  for (long col_idx = 1; col_idx < variable_num; col_idx++) {
366 
367  if (norm_Z_tmp_mtx[col_idx] < norm_Z && norm_Z_tmp_mtx[col_idx] > num_precision) {
368 
369  tmp = sqrt(norm_Z / norm_Z_tmp_mtx[col_idx]);
370 
371  row_offset = col_idx;
372  for (long row_idx = 0; row_idx < variable_num; row_idx++) {
373 
374  Zmtx[row_offset] = tmp * Zmtx[row_offset];
375  row_offset += variable_num;
376  }
377  }
378 
379  }
380 
381 
382 
383 
384 
385  return;
386 }
387 
388 
389 
396 void BFGS_Powell::get_search_direction(Matrix_real& g, Matrix_real& search_direction, double& search_direction__grad_overlap )
397 {
398 
399 
400  // calculate Z.T @ g
401  memset(Z_T__dot__g.get_data(), 0.0, Z_T__dot__g.size()*sizeof(double) );
402  long row_offset = 0;
403  for (long row_idx = 0; row_idx < variable_num; row_idx++) {
404 
405  double& g_element = g[row_idx];
406 
407  for (long col_idx = 0; col_idx < variable_num; col_idx++) {
408  Z_T__dot__g[col_idx] += Zmtx[row_offset + col_idx] * g_element;
409 
410  }
411  row_offset = row_offset + variable_num;
412  }
413 
414 
415 
416  // calculate the search direction d by -Z @ Z_T__dot__g
417  memset(search_direction.get_data(), 0.0, search_direction.size()*sizeof(double) );
418  row_offset = 0;
419  for (long row_idx = 0; row_idx < variable_num; row_idx++) {
420 
421  for (long col_idx = 0; col_idx < variable_num; col_idx++) {
422  search_direction[row_idx] = search_direction[row_idx] - Zmtx[row_offset + col_idx] * Z_T__dot__g[col_idx];
423  }
424 
425  row_offset = row_offset + variable_num;
426 
427  }
428 
429 
430  // test variable to see downhill or uphill
431  search_direction__grad_overlap = 0.0;
432 
433  for (long idx = 0; idx < variable_num; idx++) {
434  search_direction__grad_overlap += search_direction[idx] * g[idx];
435  }
436 
437  return;
438 
439 }
440 
445 
446 }
A class implementing the BFGS iterations on the.
Definition: grad_descend.h:31
void get_f_ang_gradient(Matrix_real &x, double &f, Matrix_real &g)
Call this method to obtain the cost function and its gradient at a gives position given by x...
void get_Maximal_Line_Search_Step(Matrix_real &search_direction, double &maximal_step, double &search_direction__grad_overlap)
Call this method to obtain the maximal step size during the line search.
void Initialize_Zmtx()
Initialize the matrix Z to identity.
Definition: BFGS_Powell.cpp:41
BFGS_Powell(void(*f_pointer)(Matrix_real, void *, double *, Matrix_real &), void *meta_data_in)
Constructor of the class.
Definition: BFGS_Powell.cpp:32
long maximal_iterations
maximal count of iterations during the optimization
Definition: grad_descend.h:41
~BFGS_Powell()
Destructor of the class.
void Optimize(Matrix_real &x, double &f)
Call this method to start the optimization process.
Definition: BFGS_Powell.cpp:70
void line_search(Matrix_real &x, Matrix_real &g, Matrix_real &search_direction, Matrix_real &x0_search, Matrix_real &g0_search, double &maximal_step, double &d__dot__g, double &f)
Call to perform inexact line search terminated with Wolfe 1st and 2nd conditions. ...
void get_search_direction(Matrix_real &g, Matrix_real &search_direction, double &search_direction__grad_overlap)
Method to get the search direction in the next line search.
scalar * get_data() const
Call to get the pointer to the stored data.
Matrix_real Zmtx
Definition: BFGS_Powell.h:34
enum solver_status status
status of the solver
Definition: grad_descend.h:59
long function_call_count
number of function calls during the optimization process
Definition: grad_descend.h:44
int size() const
Call to get the number of the allocated elements.
int variable_num
number of independent variables in the problem
Definition: grad_descend.h:38
double num_precision
numerical precision used in the calculations
Definition: grad_descend.h:47
Matrix_real Z_T__dot__g
Definition: BFGS_Powell.h:37
void BFGS_iteration(Matrix_real &x, Matrix_real &g, Matrix_real &x0_search, Matrix_real &g0_search, double &norm_Z)
Method implementing the BFGS update of the matrix Z.
Class to store data of complex arrays and its properties.
Definition: matrix_real.h:39