1 #ifndef CPPAD_CG_MODEL_C_SOURCE_GEN_IMPL_INCLUDED 2 #define CPPAD_CG_MODEL_C_SOURCE_GEN_IMPL_INCLUDED 24 const std::string ModelCSourceGen<Base>::FUNCTION_FORWAD_ZERO =
"forward_zero";
27 const std::string ModelCSourceGen<Base>::FUNCTION_JACOBIAN =
"jacobian";
30 const std::string ModelCSourceGen<Base>::FUNCTION_HESSIAN =
"hessian";
33 const std::string ModelCSourceGen<Base>::FUNCTION_FORWARD_ONE =
"forward_one";
36 const std::string ModelCSourceGen<Base>::FUNCTION_REVERSE_ONE =
"reverse_one";
39 const std::string ModelCSourceGen<Base>::FUNCTION_REVERSE_TWO =
"reverse_two";
42 const std::string ModelCSourceGen<Base>::FUNCTION_SPARSE_JACOBIAN =
"sparse_jacobian";
45 const std::string ModelCSourceGen<Base>::FUNCTION_SPARSE_HESSIAN =
"sparse_hessian";
48 const std::string ModelCSourceGen<Base>::FUNCTION_JACOBIAN_SPARSITY =
"jacobian_sparsity";
51 const std::string ModelCSourceGen<Base>::FUNCTION_HESSIAN_SPARSITY =
"hessian_sparsity";
54 const std::string ModelCSourceGen<Base>::FUNCTION_HESSIAN_SPARSITY2 =
"hessian_sparsity2";
57 const std::string ModelCSourceGen<Base>::FUNCTION_SPARSE_FORWARD_ONE =
"sparse_forward_one";
60 const std::string ModelCSourceGen<Base>::FUNCTION_SPARSE_REVERSE_ONE =
"sparse_reverse_one";
63 const std::string ModelCSourceGen<Base>::FUNCTION_SPARSE_REVERSE_TWO =
"sparse_reverse_two";
66 const std::string ModelCSourceGen<Base>::FUNCTION_FORWARD_ONE_SPARSITY =
"forward_one_sparsity";
69 const std::string ModelCSourceGen<Base>::FUNCTION_REVERSE_ONE_SPARSITY =
"reverse_one_sparsity";
72 const std::string ModelCSourceGen<Base>::FUNCTION_REVERSE_TWO_SPARSITY =
"sparse_reverse_two_sparsity";
75 const std::string ModelCSourceGen<Base>::FUNCTION_INFO =
"info";
78 const std::string ModelCSourceGen<Base>::FUNCTION_ATOMIC_FUNC_NAMES =
"atomic_functions";
81 const std::string ModelCSourceGen<Base>::CONST =
"const";
84 VariableNameGenerator<Base>* ModelCSourceGen<Base>::createVariableNameGenerator(
const std::string& depName,
85 const std::string& indepName,
86 const std::string& tmpName,
87 const std::string& tmpArrayName) {
88 return new LangCDefaultVariableNameGenerator<Base> (depName, indepName, tmpName, tmpArrayName);
92 const std::map<std::string, std::string>& ModelCSourceGen<Base>::getSources(MultiThreadingType multiThreadingType,
94 if (_sources.empty()) {
95 generateSources(multiThreadingType, timer);
101 void ModelCSourceGen<Base>::generateSources(MultiThreadingType multiThreadingType,
107 startingJob(
"'" + _name +
"'", JobTimer::SOURCE_FOR_MODEL);
110 generateZeroSource();
111 _zeroEvaluated =
true;
115 generateJacobianSource();
119 generateHessianSource();
123 generateSparseForwardOneSources();
124 generateForwardOneSources();
128 generateSparseReverseOneSources();
129 generateReverseOneSources();
133 generateSparseReverseTwoSources();
134 generateReverseTwoSources();
137 if (_sparseJacobian) {
138 generateSparseJacobianSource(multiThreadingType);
141 if (_sparseHessian) {
142 generateSparseHessianSource(multiThreadingType);
145 if (_sparseJacobian || _forwardOne || _reverseOne) {
146 generateJacobianSparsitySource();
149 if (_sparseHessian || _reverseTwo) {
150 generateHessianSparsitySource();
153 generateInfoSource();
155 generateAtomicFuncNames();
161 void ModelCSourceGen<Base>::generateLoops() {
162 if (_relatedDepCandidates.empty()) {
166 startingJob(
"", JobTimer::LOOP_DETECTION);
168 CodeHandler<Base> handler;
169 handler.setJobTimer(_jobTimer);
171 std::vector<CGBase> xx(_fun.Domain());
172 handler.makeVariables(xx);
174 for (
size_t i = 0; i < xx.size(); i++) {
175 xx[i].setValue(_x[i]);
179 std::vector<CGBase> yy = _fun.Forward(0, xx);
181 DependentPatternMatcher<Base> matcher(_relatedDepCandidates, yy, xx);
182 matcher.generateTapes(_funNoLoops, _loopTapes);
185 if (_jobTimer !=
nullptr && _jobTimer->isVerbose()) {
186 std::cout <<
" equation patterns: " << matcher.getEquationPatterns().size() <<
187 " loops: " << matcher.getLoops().size() << std::endl;
192 void ModelCSourceGen<Base>::generateInfoSource() {
193 const char* localBaseName =
typeid (Base).name();
195 std::string funcName = _name +
"_" + FUNCTION_INFO;
197 std::unique_ptr<VariableNameGenerator<Base> > nameGen(createVariableNameGenerator());
203 "unsigned int* indCount",
204 "unsigned int* depCount"});
206 " *baseName = \"" << _baseTypeName <<
" " << localBaseName <<
"\";\n" 207 " *m = " << _fun.Range() <<
";\n" 208 " *n = " << _fun.Domain() <<
";\n" 209 " *depCount = " << nameGen->getDependent().size() <<
"; // number of dependent array variables\n" 210 " *indCount = " << nameGen->getIndependent().size() <<
"; // number of independent array variables\n" 213 _sources[funcName +
".c"] = _cache.str();
217 void ModelCSourceGen<Base>::generateAtomicFuncNames() {
218 std::string funcName = _name +
"_" + FUNCTION_ATOMIC_FUNC_NAMES;
219 size_t n = _atomicFunctions.size();
222 "unsigned long* n"});
224 " static const char* atomic[" << n <<
"] = {";
225 for (
size_t i = 0; i < n; i++) {
226 if (i > 0) _cache <<
", ";
227 _cache <<
"\"" << _atomicFunctions[i] <<
"\"";
230 " *names = atomic;\n" 231 " *n = " << n <<
";\n" 234 _sources[funcName +
".c"] = _cache.str();
238 bool ModelCSourceGen<Base>::isAtomicsUsed() {
239 if (_zeroEvaluated) {
240 return _atomicFunctions.size() > 0;
242 return !getAtomicsInfo().empty();
247 const std::map<size_t, AtomicUseInfo<Base> >& ModelCSourceGen<Base>::getAtomicsInfo() {
248 if (_atomicsInfo ==
nullptr) {
249 AtomicDependencyLocator<Base> adl(_fun);
250 _atomicsInfo =
new std::map<size_t, AtomicUseInfo<Base> >(adl.findAtomicsUsage());
252 return *_atomicsInfo;
257 const SparsitySetType& sparsity) {
258 std::vector<Color> colors(sparsity.size());
265 for (
size_t i = 0; i < sparsity.size(); i++) {
266 const std::set<size_t>& row = sparsity[i];
267 if (row.size() == 0) {
272 std::set<size_t> rowReduced;
273 if (_custom_hess.defined) {
274 for (
size_t j : row) {
275 if (columns.find(j) != columns.end())
276 rowReduced.insert(j);
282 bool newColor =
true;
284 for (
size_t c = 0; c < c_used; c++) {
285 std::set<size_t>& forbidden_c = colors[c].forbiddenRows;
286 if (!intersects(forbidden_c, rowReduced)) {
290 forbidden_c.insert(rowReduced.begin(), rowReduced.end());
297 colors[c_used].forbiddenRows = rowReduced;
301 colors[colori].rows.insert(i);
303 for (
size_t j : rowReduced) {
304 colors[colori].column2Row[j] = i;
305 colors[colori].row2Columns[i].insert(j);
309 colors.resize(c_used);
315 const std::string& suffix,
316 const std::string& function_sparsity,
317 const std::map<
size_t, std::vector<size_t> >& elements) {
322 std::string argsDcl = langC.generateDefaultFunctionArgumentsDcl();
323 std::vector<std::string> argsDcl2 = langC.generateDefaultFunctionArgumentsDcl2();
324 std::string args = langC.generateDefaultFunctionArguments();
327 _cache << _name <<
"_" <<
function;
328 std::string model_function = _cache.str();
331 _cache << LanguageC<Base>::ATOMICFUN_STRUCT_DEFINITION <<
"\n\n";
332 generateFunctionDeclarationSource(_cache, model_function, suffix, elements, argsDcl);
337 for (
const auto& it : elements) {
339 _cache <<
" case " << it.first <<
":\n" 340 " " << model_function <<
"_" << suffix << it.first <<
"(" << args <<
");\n" 341 " return 0; // done\n";
343 _cache <<
" default:\n" 344 " return 1; // error\n" 348 _sources[model_function +
".c"] = _cache.str();
354 generateSparsity1DSource2(_name +
"_" + function_sparsity, elements);
355 _sources[_name +
"_" + function_sparsity +
".c"] = _cache.str();
362 const std::string& model_function,
363 const std::string& suffix,
364 const std::map<size_t, T>& elements,
365 const std::string& argsDcl) {
366 for (
const auto& it : elements) {
367 size_t pos = it.first;
368 cache <<
"void " << model_function <<
"_" << suffix << pos <<
"(" << argsDcl <<
");\n";
374 const std::vector<size_t>& sparsity) {
376 "unsigned long* nnz"});
383 _cache <<
" *sparsity = nonzeros;\n" 384 " *nnz = " << sparsity.size() <<
";\n" 391 const std::vector<size_t>& rows = sparsity.rows;
392 const std::vector<size_t>& cols = sparsity.cols;
394 CPPADCG_ASSERT_UNKNOWN(rows.size() == cols.size());
397 "unsigned long const** col",
398 "unsigned long* nnz"});
408 _cache <<
" *row = rows;\n" 410 " *nnz = " << rows.size() <<
";\n" 416 const std::vector<LocalSparsityInfo>& sparsities) {
418 "unsigned long const** row",
419 "unsigned long const** col",
420 "unsigned long* nnz"});
423 std::ostringstream os;
425 std::vector<size_t> nnzs(sparsities.size());
427 long long int maxNnzIndex = -1;
429 for (
size_t i = 0; i < sparsities.size(); i++) {
430 const std::vector<size_t>& rows = sparsities[i].rows;
431 const std::vector<size_t>& cols = sparsities[i].cols;
432 CPPADCG_ASSERT_UNKNOWN(rows.size() == cols.size());
434 nnzs[i] = rows.size();
452 nnzs.resize(maxNnzIndex);
454 auto makeArrayOfArrays = [&,
this](
const std::string& name) {
457 for (
size_t i = 0; i < size_t(maxNnzIndex); i++) {
461 if (sparsities[i].rows.empty()) {
470 if (maxNnzIndex > 0) {
471 makeArrayOfArrays(
"rows");
472 makeArrayOfArrays(
"cols");
479 _cache <<
" if(i < " << maxNnzIndex <<
") {\n" 489 _cache <<
" *row = 0;\n" 499 const std::map<
size_t, std::vector<size_t> >& elements) {
501 "unsigned long const** elements",
502 "unsigned long* nnz"});
505 std::vector<size_t> nnzs(elements.empty()? 0: elements.rbegin()->first + 1);
507 long long int maxNnzIndex = -1;
509 for (
const auto& it : elements) {
511 const std::vector<size_t>& els = it.second;
514 std::ostringstream os;
515 os <<
"els" << it.first;
518 maxNnzIndex = it.first;
519 nnzs[it.first] = els.size();
524 nnzs.resize(maxNnzIndex);
526 if (maxNnzIndex > 0) {
528 auto it = elements.begin();
529 for (
size_t i = 0; i < size_t(maxNnzIndex); i++) {
533 if (it == elements.end() || i != it->first) {
536 _cache <<
"els" << i;
547 _cache <<
" if(pos < " << maxNnzIndex <<
") {\n" 548 " *elements = els[pos];\n" 549 " *nnz = nnzs[pos];\n" 555 _cache <<
" *elements = 0;\n" 565 return determineOrderByCol(elements, sparsity.rows, sparsity.cols);
570 const std::vector<size_t>& userRows,
571 const std::vector<size_t>& userCols) {
572 std::map<size_t, std::vector<std::set<size_t> > > userLocation;
574 for (
const auto& it : elements) {
575 size_t col = it.first;
576 const std::vector<size_t>& colElements = it.second;
578 userLocation[col] = determineOrderByCol(col, colElements, userRows, userCols);
586 const std::vector<size_t>& colElements,
587 const std::vector<size_t>& userRows,
588 const std::vector<size_t>& userCols) {
589 std::vector<std::set<size_t> > userLocationCol(colElements.size());
591 for (
size_t er = 0; er < colElements.size(); er++) {
592 size_t row = colElements[er];
593 for (
size_t e = 0; e < userRows.size(); e++) {
594 if (userRows[e] == row && userCols[e] == col) {
595 userLocationCol[er].insert(e);
601 return userLocationCol;
607 return determineOrderByRow(elements, sparsity.rows, sparsity.cols);
612 const std::vector<size_t>& userRows,
613 const std::vector<size_t>& userCols) {
614 std::map<size_t, std::vector<std::set<size_t> > > userLocation;
616 for (
const auto& it : elements) {
617 size_t row = it.first;
618 const std::vector<size_t>& rowsElements = it.second;
619 userLocation[row] = determineOrderByRow(row, rowsElements, userRows, userCols);
627 const std::vector<size_t>& rowElements,
628 const std::vector<size_t>& userRows,
629 const std::vector<size_t>& userCols) {
630 std::vector<std::set<size_t> > userLocationRow(rowElements.size());
632 for (
size_t ec = 0; ec < rowElements.size(); ec++) {
633 size_t col = rowElements[ec];
634 for (
size_t e = 0; e < userRows.size(); e++) {
635 if (userCols[e] == col && userRows[e] == row) {
636 userLocationRow[ec].insert(e);
642 return userLocationRow;
647 const std::string& baseTypeName) {
649 cache << CPPADCG_PTHREAD_POOL_H_FILE <<
"\n";
651 cache <<
"typedef struct ExecArgStruct {\n" 652 " cppadcg_function_type func;\n" 653 " " << baseTypeName +
" const *const * in;\n" 654 " " << baseTypeName +
"* out[1];\n" 655 " struct LangCAtomicFun atomicFun;\n" 658 "static void exec_func(void* arg) {\n" 659 " ExecArgStruct* eArg = (ExecArgStruct*) arg;\n" 660 " (*eArg->func)(eArg->in, eArg->out, eArg->atomicFun);\n" 667 auto repeatFill = [&](
const std::string& txt){
669 for (
size_t i = 0; i < size; ++i) {
670 if (i != 0) cache <<
", ";
676 cache <<
" ExecArgStruct* args[" << size <<
"];\n";
677 cache <<
" static cppadcg_thpool_function_type execute_functions[" << size <<
"] = ";
678 repeatFill(
"exec_func");
680 cache <<
" static float ref_elapsed[" << size <<
"] = ";
683 cache <<
" static float elapsed[" << size <<
"] = ";
686 " static int order[" << size <<
"] = {";
687 for (
size_t i = 0; i < size; ++i) {
688 if (i != 0) cache <<
", ";
692 " static int job2Thread[" << size <<
"] = ";
695 " static int last_elapsed_changed = 1;\n" 696 " unsigned int nBench = cppadcg_thpool_get_n_time_meas();\n" 697 " static unsigned int n_meas = 0;\n" 698 " int do_benchmark = " << (size > 0 ?
"(n_meas < nBench && !cppadcg_thpool_is_disabled())" :
"0") <<
";\n" 699 " float* elapsed_p = do_benchmark ? elapsed : NULL;\n";
705 cache <<
" cppadcg_thpool_add_jobs(execute_functions, (void**) args, ref_elapsed, elapsed_p, order, job2Thread, " << size <<
", last_elapsed_changed" <<
");\n" 707 " cppadcg_thpool_wait();\n" 709 " for(i = 0; i < " << size <<
"; ++i) {\n" 713 " if(do_benchmark) {\n" 714 " cppadcg_thpool_update_order(ref_elapsed, n_meas, elapsed, order, " << size <<
");\n" 717 " last_elapsed_changed = 0;\n" 723 cache << CPPADCG_OPENMP_H_FILE <<
"\n" 725 "#include <stdio.h>\n" 726 "#include <time.h>\n";
733 " enum omp_sched_t old_kind;\n" 734 " int old_modifier;\n" 735 " int enabled = !cppadcg_openmp_is_disabled();\n" 736 " int verbose = cppadcg_openmp_is_verbose();\n" 737 " struct timespec start[" << size <<
"];\n" 738 " struct timespec end[" << size <<
"];\n" 739 " int thread_id[" << size <<
"];\n" 740 " unsigned int n_threads = cppadcg_openmp_get_threads();\n" 741 " if(n_threads > " << size <<
")\n" 742 " n_threads = " << size <<
";\n" 745 " omp_get_schedule(&old_kind, &old_modifier);\n" 746 " cppadcg_openmp_apply_scheduler_strategy();\n" 753 cache <<
"#pragma omp parallel for private(outLocal) schedule(runtime) if(enabled) num_threads(n_threads)\n" 754 " for(i = 0; i < " << size <<
"; ++i) {\n" 757 " thread_id[i] = omp_get_thread_num();\n" 758 " info = clock_gettime(CLOCK_MONOTONIC, &start[i]);\n" 760 " start[i].tv_sec = 0;\n" 761 " start[i].tv_nsec = 0;\n" 762 " end[i].tv_sec = 0;\n" 763 " end[i].tv_nsec = 0;\n" 775 " info = clock_gettime(CLOCK_MONOTONIC, &end[i]);\n" 777 " end[i].tv_sec = 0;\n" 778 " end[i].tv_nsec = 0;\n" 785 " omp_set_schedule(old_kind, old_modifier);\n" 789 " struct timespec diff;\n" 790 " for (i = 0; i < " << size <<
"; ++i) {\n" 791 " if ((end[i].tv_nsec - start[i].tv_nsec) < 0) {\n" 792 " diff.tv_sec = end[i].tv_sec - start[i].tv_sec - 1;\n" 793 " diff.tv_nsec = end[i].tv_nsec - start[i].tv_nsec + 1000000000;\n" 795 " diff.tv_sec = end[i].tv_sec - start[i].tv_sec;\n" 796 " diff.tv_nsec = end[i].tv_nsec - start[i].tv_nsec;\n" 798 " fprintf(stdout, \"## Thread %i, Job %li, started at %ld.%.9ld, ended at %ld.%.9ld, elapsed %ld.%.9ld\\n\",\n" 799 " thread_id[i], i, start[i].tv_sec, start[i].tv_nsec, end[i].tv_sec, end[i].tv_nsec, diff.tv_sec, diff.tv_nsec);\n" 808 if (_jobTimer !=
nullptr)
809 _jobTimer->startingJob(jobName, type);
814 if (_jobTimer !=
nullptr)
815 _jobTimer->finishedJob();
std::vector< ModelCSourceGen< Base >::Color > colorByRow(const std::set< size_t > &columns, const SparsitySetType &sparsity)
virtual void generateGlobalDirectionalFunctionSource(const std::string &function, const std::string &function2_suffix, const std::string &function_sparsity, const std::map< size_t, std::vector< size_t > > &elements)
static void printFunctionDeclaration(std::ostringstream &out, const std::string &returnType, const std::string &functionName, const std::vector< std::string > &arguments, const std::vector< std::string > &arguments2={})