LCOV - code coverage report
Current view: top level - ugbase/common/profiler - profile_node.cpp (source / functions) Coverage Total Hit
Test: coverage.info Lines: 0.0 % 42 0
Test Date: 2025-09-21 23:31:46 Functions: 0.0 % 22 0

            Line data    Source code
       1              : /*
       2              :  * Copyright (c) 2012-2015:  G-CSC, Goethe University Frankfurt
       3              :  * Author: Martin Rupp
       4              :  * 
       5              :  * This file is part of UG4.
       6              :  * 
       7              :  * UG4 is free software: you can redistribute it and/or modify it under the
       8              :  * terms of the GNU Lesser General Public License version 3 (as published by the
       9              :  * Free Software Foundation) with the following additional attribution
      10              :  * requirements (according to LGPL/GPL v3 §7):
      11              :  * 
      12              :  * (1) The following notice must be displayed in the Appropriate Legal Notices
      13              :  * of covered and combined works: "Based on UG4 (www.ug4.org/license)".
      14              :  * 
      15              :  * (2) The following notice must be displayed at a prominent place in the
      16              :  * terminal output of covered works: "Based on UG4 (www.ug4.org/license)".
      17              :  * 
      18              :  * (3) The following bibliography is recommended for citation and must be
      19              :  * preserved in all covered files:
      20              :  * "Reiter, S., Vogel, A., Heppner, I., Rupp, M., and Wittum, G. A massively
      21              :  *   parallel geometric multigrid solver on hierarchically distributed grids.
      22              :  *   Computing and visualization in science 16, 4 (2013), 151-164"
      23              :  * "Vogel, A., Reiter, S., Rupp, M., Nägel, A., and Wittum, G. UG4 -- a novel
      24              :  *   flexible software system for simulating pde based models on high performance
      25              :  *   computers. Computing and visualization in science 16, 4 (2013), 165-179"
      26              :  * 
      27              :  * This program is distributed in the hope that it will be useful,
      28              :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      29              :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
      30              :  * GNU Lesser General Public License for more details.
      31              :  */
      32              : 
      33              : 
      34              : #include "profiler.h"
      35              : #include <iomanip>
      36              : #include <cmath> // for floor
      37              : #include <algorithm>
      38              : #include <string>
      39              : #include <string.h>
      40              : #include "common/log.h"
      41              : #include "common/util/string_util.h"
      42              : #include "profile_node.h"
      43              : #include "common/util/string_util.h"
      44              : #include "common/util/path_provider.h"
      45              : #include <map>
      46              : #include <fstream>
      47              : #include "compile_info/compile_info.h"
      48              : #include "pcl/pcl_base.h"
      49              : #include "common/error.h"
      50              : #include "memtracker.h"
      51              : 
      52              : #ifdef UG_PARALLEL
      53              : #include "pcl/pcl.h"
      54              : #endif
      55              : 
      56              : #include "profile_call.h"
      57              : 
      58              : 
      59              : using namespace std;
      60              : 
      61              : namespace ug
      62              : {
      63              : 
      64              : #if SHINY_PROFILER
      65              : 
      66              : void ProfilerUpdate()
      67              : {
      68              :         Shiny::ProfileManager::instance.update(1.0);
      69              :         UpdateTotalMem();
      70              : }
      71              : 
      72              : 
      73              : static const int PROFILER_BRIDGE_OUTPUT_WIDTH_NAME = 70; // Shiny::OUTPUT_WIDTH_NAME
      74              : static const int PROFILER_BRIDGE_OUTPUT_WIDTH_HIT  = 13; // Shiny::OUTPUT_WIDTH_HIT
      75              : static const int PROFILER_BRIDGE_OUTPUT_WIDTH_TIME =  7; // Shiny::OUTPUT_WIDTH_TIME
      76              : static const int PROFILER_BRIDGE_OUTPUT_WIDTH_PERC =  4; // Shiny::OUTPUT_WIDTH_PERC
      77              : 
      78              : 
      79              : /*static string cut(string &s, size_t L)
      80              : {
      81              :         return s.substr(0, L);
      82              : }*/
      83              : 
      84              : static string cut(const char *p, size_t L)
      85              : {
      86              :         string s(p);
      87              :         return s.substr(0, L);
      88              : }
      89              : 
      90              : 
      91              : #define SHINY_DAMPING_FACTOR 1.0
      92              : 
      93              : double UGProfileNode::get_avg_entry_count() const
      94              : {
      95              :         if(!valid()) return 0.0;
      96              :         return data.entryCount.avg; // * SHINY_DAMPING_FACTOR;
      97              : }
      98              : 
      99              : double UGProfileNode::get_avg_self_time_ms() const
     100              : {
     101              :         return get_avg_self_time() * Shiny::GetTickInvFreq() * 1000.0;
     102              : }
     103              : 
     104              : double UGProfileNode::get_avg_total_time_ms() const
     105              : {
     106              :         return get_avg_total_time() * Shiny::GetTickInvFreq() * 1000.0;
     107              : }
     108              : 
     109              : double UGProfileNode::get_avg_self_time() const
     110              : {
     111              :         if(!valid()) return 0.0;
     112              :         return data.selfTicks.avg * SHINY_DAMPING_FACTOR;
     113              : }
     114              : 
     115              : double UGProfileNode::get_avg_total_time() const
     116              : {
     117              :         if(!valid()) return 0.0;
     118              :         return data.totalTicksAvg() * SHINY_DAMPING_FACTOR;
     119              : }
     120              : 
     121              : double UGProfileNode::get_self_mem() const
     122              : {
     123              :         if(!valid()) return 0.0;
     124              :         return GetSelfMem(this);
     125              : }
     126              : 
     127              : 
     128              : double UGProfileNode::get_total_mem() const
     129              : {
     130              :         if(!valid()) return 0.0;
     131              :         return GetTotalMem(this);
     132              : }
     133              : 
     134              : string UGProfileNode::get_mem_info(double fullMem) const
     135              : {
     136              :         if(HasMemTracking())
     137              :         {
     138              :                 stringstream s;
     139              :                 s << GetBytesSizeString((size_t)get_self_mem(), 10) << setw(5) << floor(get_self_mem() / fullMem * 100) << "%  ";
     140              :                 s << GetBytesSizeString((size_t)get_total_mem(), 10) << setw(5) << floor(get_total_mem() / fullMem * 100) << "%  ";
     141              :                 return s.str();
     142              :         }
     143              :         else
     144              :                 return "";
     145              : }
     146              : 
     147              : string UGProfileNode::call_tree(double dSkipMarginal) const
     148              : {
     149              :         if(!valid()) return "Profile Node not valid!";
     150              : 
     151              :         stringstream s;
     152              :         UGProfileNode::log_header(s, "call tree");
     153              : 
     154              :         rec_print(get_avg_total_time_ms(), get_total_mem(), s, 0, dSkipMarginal);
     155              : 
     156              :         return s.str();
     157              : }
     158              : 
     159              : string UGProfileNode::child_self_time_sorted(double dSkipMarginal) const
     160              : {
     161              :         return print_child_sorted("self time sorted", UGProfileNode::self_time_sort, dSkipMarginal);
     162              : }
     163              : 
     164              : string UGProfileNode::total_time_sorted(double dSkipMarginal) const
     165              : {
     166              :         return print_child_sorted("total time sorted", UGProfileNode::total_time_sort, dSkipMarginal);
     167              : }
     168              : 
     169              : string UGProfileNode::child_self_memory_sorted(double dSkipMarginal) const
     170              : {
     171              :         return print_child_sorted("self memory sorted", UGProfileNode::self_memory_sort, dSkipMarginal);
     172              : }
     173              : 
     174              : string UGProfileNode::total_memory_sorted(double dSkipMarginal) const
     175              : {
     176              :         return print_child_sorted("total memory sorted", UGProfileNode::total_memory_sort, dSkipMarginal);
     177              : }
     178              : 
     179              : string UGProfileNode::entry_count_sorted(double dSkipMarginal) const
     180              : {
     181              :         return print_child_sorted("entry count sorted", UGProfileNode::entry_count_sort, dSkipMarginal);
     182              : }
     183              : 
     184              : 
     185              : bool UGProfileNode::valid() const
     186              : {
     187              :         return this != PROFILER_NULL_NODE;
     188              : }
     189              : 
     190              : string SimplifyUG4Path(string s)
     191              : {
     192              :         const string &ug4root = PathProvider::get_path(ROOT_PATH);
     193              :         if(StartsWith(s, ug4root))
     194              :                 return string("$") + s.substr(ug4root.length());
     195              :         else
     196              :                 return s;
     197              : }
     198              : 
     199              : // private functions
     200              : 
     201              : void UGProfileNode::PDXML_rec_write(ostream &s) const
     202              : {
     203              :         if(!valid()) return;
     204              :         
     205              :         /*
     206              :          * <node>
     207              :          * name
     208              :          * group
     209              :          * file
     210              :          * line
     211              :          * hits
     212              :          * self
     213              :          * total         * 
     214              :          * </node>         
     215              :          */
     216              :         
     217              :         s << "<node>\n";
     218              :         if(zone->name)
     219              :         {
     220              :                 if(strcmp(zone->name, "<root>") == 0)
     221              :                         s << "<name>root</name>\n";
     222              :                 else
     223              :                         s << "<name>" << XMLStringEscape(zone->name) << "</name>\n";
     224              :         }
     225              :         if(zone->groups)
     226              :                 s << "<groups>" << XMLStringEscape(zone->groups) << "</groups>\n";
     227              :         
     228              :         if(zone->file != NULL)
     229              :         {
     230              :                 s << "<file>" << SimplifyUG4Path(zone->file) << "</file>\n";
     231              :                 s << "<line>" << zone->line << "</line>\n";
     232              :         }
     233              : 
     234              : #ifdef SHINY_CALL_LOGGING
     235              :         s << "<id>" << this << "</id>\n";
     236              : #endif
     237              :          
     238              :         s << "<hits>" << floor(get_avg_entry_count()) << "</hits>\n"
     239              :           << "<self>" << get_avg_self_time_ms() * 1000.0 << "</self>\n"
     240              :           << "<total>" << get_avg_total_time_ms() * 1000.0 << "</total>\n";
     241              : 
     242              :         if(HasMemTracking())
     243              :         {
     244              :                 s << "<totalMemory>" << get_total_mem() << "</totalMemory>\n";
     245              :                 s << "<selfMemory>" << get_self_mem() << "</selfMemory>\n";
     246              :         }
     247              :                         
     248              :         for(const UGProfileNode *p=get_first_child(); p != NULL; p=p->get_next_sibling())
     249              :         {
     250              :                 p->PDXML_rec_write(s);
     251              :                 if(p==get_last_child())
     252              :                         break;
     253              :         }
     254              : 
     255              :         s << "</node>\n";
     256              : }
     257              : 
     258              : string GetProfilingFileLine(const char *name)
     259              : {
     260              :         const char *p = strchr(name, ':'); // search for line number
     261              :         if(!(p == NULL || p[0] == 0x00 || p[1] == 0x00))
     262              :         {
     263              :                 // if we find the corresponding code in the LUA file, print the code as "name"
     264              :                 // and the filename above
     265              : 
     266              :                 int line = strtol(p+1, NULL, 10);
     267              :                 char file[255];
     268              :                 strncpy(file, name, p-name);
     269              :                 file[p-name]=0x00;
     270              :                 string str = GetFileLine(file, line);
     271              :                 for(size_t i=0; i<str.size(); i++) if(str[i] == '\t') str[i] = ' ';
     272              :                 return str;
     273              :         }
     274              :         else return "";
     275              : 
     276              : }
     277              : 
     278              : string UGProfileNode::print_node(double fullMs, double fullMem, size_t offset) const
     279              : {
     280              :         if(!valid()) return "";
     281              :         const Shiny::TimeUnit *selfUnit = Shiny::GetTimeUnit(get_avg_self_time());
     282              :         const Shiny::TimeUnit *totalUnit = Shiny::GetTimeUnit(get_avg_total_time());
     283              :         stringstream s;
     284              :         if(offset)      s << setw(offset) << " ";
     285              : 
     286              :         // name
     287              :         if(zone->name[0] == '@')
     288              :         {
     289              :                 // zone name is a filename
     290              :                 // this can happen when we are using LUA script profiling.
     291              : 
     292              :                 // check if filename is too long
     293              :                 if(strlen(zone->name) > (PROFILER_BRIDGE_OUTPUT_WIDTH_NAME-offset) )
     294              :                         s << "@... " << zone->name+strlen(zone->name)-(PROFILER_BRIDGE_OUTPUT_WIDTH_NAME-offset-5);
     295              :                 else
     296              :                         s << left << setw(PROFILER_BRIDGE_OUTPUT_WIDTH_NAME-offset) << cut(zone->name, PROFILER_BRIDGE_OUTPUT_WIDTH_NAME-offset);
     297              : 
     298              :                 string str = GetProfilingFileLine(zone->name+1);
     299              :                 if(str.size())
     300              :                 {
     301              :                         s << "\n";
     302              :                         if(offset)      s << setw(offset) << " ";
     303              :                         s << left << setw(PROFILER_BRIDGE_OUTPUT_WIDTH_NAME-offset) <<
     304              :                                         cut(str.c_str(), PROFILER_BRIDGE_OUTPUT_WIDTH_NAME-offset);
     305              :                 }
     306              :         }
     307              :         else
     308              :                 s << left << setw(PROFILER_BRIDGE_OUTPUT_WIDTH_NAME-offset) << cut(zone->name, PROFILER_BRIDGE_OUTPUT_WIDTH_NAME-offset);
     309              : 
     310              :         // entry count
     311              :         s <<      right << setw(PROFILER_BRIDGE_OUTPUT_WIDTH_HIT) << floor(get_avg_entry_count()) << " " <<
     312              :                         setprecision(PROFILER_BRIDGE_OUTPUT_WIDTH_TIME-1);
     313              : 
     314              :         // self time
     315              :         s <<      setw(PROFILER_BRIDGE_OUTPUT_WIDTH_TIME) << get_avg_self_time() * selfUnit->invTickFreq << " " <<
     316              :                         left << setw(2) << selfUnit->suffix << " " <<
     317              :                         right << setw(PROFILER_BRIDGE_OUTPUT_WIDTH_PERC) << floor(get_avg_self_time_ms() / fullMs * 100) << "%  ";
     318              : 
     319              :         // total time
     320              :         s <<      setw(PROFILER_BRIDGE_OUTPUT_WIDTH_TIME) << get_avg_total_time() * totalUnit->invTickFreq << " " <<
     321              :                         left << setw(2) << totalUnit->suffix << " " <<
     322              :                         right << setw(PROFILER_BRIDGE_OUTPUT_WIDTH_PERC) << floor(get_avg_total_time_ms() / fullMs * 100) << "%  ";
     323              :         if(fullMem >= 0.0)
     324              :                 s << get_mem_info(fullMem);
     325              :         if(zone->groups != NULL)
     326              :                 s << zone->groups;
     327              :         return s.str();
     328              : }
     329              : 
     330              : const UGProfileNode *UGProfileNode::get_first_child() const
     331              : {
     332              :         return reinterpret_cast<const UGProfileNode*>(firstChild);
     333              : }
     334              : 
     335              : const UGProfileNode *UGProfileNode::get_last_child() const
     336              : {
     337              :         return reinterpret_cast<const UGProfileNode*>(lastChild);
     338              : }
     339              : 
     340              : const UGProfileNode *UGProfileNode::get_next_sibling() const
     341              : {
     342              :         return reinterpret_cast<const UGProfileNode*>(nextSibling);
     343              : }
     344              : 
     345              : const UGProfileNode *UGProfileNode::find_next_in_tree() const
     346              : {
     347              :         return reinterpret_cast<const UGProfileNode*>(findNextInTree());
     348              : }
     349              : 
     350              : void UGProfileNode::rec_print(double fullMs, double fullMem, stringstream &s, size_t offset, double dSkipMarginal) const
     351              : {
     352              :         if(!valid()) return;
     353              :         if(dSkipMarginal==0.0 ||
     354              :                         (fullMs*dSkipMarginal < get_avg_total_time_ms() || (HasMemTracking() && fullMem*dSkipMarginal< get_total_mem() ) )
     355              :                         )
     356              :         {
     357              :                 s << print_node(fullMs, fullMem, offset) << "\n";
     358              :                 for(const UGProfileNode *p=get_first_child(); p != NULL; p=p->get_next_sibling())
     359              :                 {
     360              :                         p->rec_print(fullMs, fullMem, s, offset+1, dSkipMarginal);
     361              :                         if(p==get_last_child())
     362              :                                 break;
     363              :                 }
     364              :         }
     365              : }
     366              : 
     367              : string UGProfileNode::groups() const
     368              : {
     369              :         if (!valid()) return "Profile Node not valid!";
     370              : 
     371              :         vector<const UGProfileNode*> nodes;
     372              :         rec_add_nodes(nodes);
     373              : 
     374              :         map<string, double> mapGroups;
     375              :         for(size_t i=0; i<nodes.size(); i++)
     376              :         {
     377              :                 if(nodes[i]->zone->groups == NULL) continue;
     378              :                 vector<string> g;
     379              :                 TokenizeString(nodes[i]->zone->groups, g, ' ');
     380              :                 for(size_t j=0; j<g.size(); j++)
     381              :                         mapGroups[g[j]] += nodes[i]->get_avg_self_time();
     382              :         }
     383              : 
     384              :         vector<string> gs;
     385              : #ifdef UG_PARALLEL
     386              :         if(pcl::ProcRank() == 0)
     387              : #endif
     388              :         for(map<string, double>::iterator it = mapGroups.begin(); it != mapGroups.end();++it)
     389              :                 gs.push_back(it->first);
     390              : 
     391              : #ifdef UG_PARALLEL
     392              :         pcl::ProcessCommunicator pc;
     393              :         pc.broadcast(gs);
     394              :         vector<double> t(gs.size(), 0.0), tMax, tMin;
     395              :         for(size_t i=0; i<gs.size(); i++)
     396              :                 t[i] = mapGroups[gs[i]];
     397              :         pc.allreduce(t, tMax, PCL_RO_MAX);
     398              :         pc.allreduce(t, tMin, PCL_RO_MIN);
     399              : #endif
     400              :         stringstream s;
     401              :         for(size_t i=0; i<gs.size(); i++)
     402              :         {
     403              :                 string name = gs[i];
     404              :                 double time = mapGroups[name];
     405              :                 const Shiny::TimeUnit *unit = Shiny::GetTimeUnit(time);
     406              :                 s << left << setw(20) << name
     407              :                   << setw(PROFILER_BRIDGE_OUTPUT_WIDTH_TIME) << time * unit->invTickFreq << " " <<
     408              :                         left << setw(2) << unit->suffix;
     409              : #ifdef UG_PARALLEL
     410              :                 double maxTime = tMax[i];
     411              :                 double minTime = tMin[i];
     412              :                 double diffTime = maxTime - minTime;
     413              :                 const Shiny::TimeUnit *maxUnit  = Shiny::GetTimeUnit(maxTime);
     414              :                 const Shiny::TimeUnit *minUnit  = Shiny::GetTimeUnit(minTime);
     415              :                 const Shiny::TimeUnit *diffUnit = Shiny::GetTimeUnit(diffTime);
     416              :                 s << left << " max: " << setw(PROFILER_BRIDGE_OUTPUT_WIDTH_TIME)
     417              :                                 << maxTime * maxUnit->invTickFreq << " " <<      left << setw(2) << maxUnit->suffix;
     418              :                 s << left << " min: " << setw(PROFILER_BRIDGE_OUTPUT_WIDTH_TIME)
     419              :                                                 << minTime * maxUnit->invTickFreq << " " <<      left << setw(2) << minUnit->suffix;
     420              :                 s << left << " diff: " << setw(PROFILER_BRIDGE_OUTPUT_WIDTH_TIME)
     421              :                                                                 << diffTime * diffUnit->invTickFreq << " " <<    left << setw(2) << diffUnit->suffix
     422              :                                                                 << " (" << diffTime/maxTime*100 << " %)";
     423              : #endif
     424              :                 s << "\n";
     425              :         }
     426              :         return s.str();
     427              : }
     428              : 
     429              : 
     430              : void UGProfileNode::rec_add_nodes(vector<const UGProfileNode*> &nodes) const
     431              : {
     432              :         nodes.push_back(this);
     433              :         for(const UGProfileNode *p=get_first_child(); p != NULL; p=p->get_next_sibling())
     434              :         {
     435              :                 p->rec_add_nodes(nodes);
     436              :                 if(p==get_last_child())
     437              :                         break;
     438              :         }
     439              : }
     440              : 
     441              : string UGProfileNode::print_child_sorted(const char *name, bool sortFunction(const UGProfileNode *a, const UGProfileNode *b),
     442              :                 double dSkipMarginal) const
     443              : {
     444              :         if(!valid()) return "";
     445              :         stringstream s;
     446              :         vector<const UGProfileNode*> nodes;
     447              :         rec_add_nodes(nodes);
     448              :         sort(nodes.begin(), nodes.end(), sortFunction);
     449              : 
     450              :         UGProfileNode::log_header(s, name);
     451              :         double fullMs = get_avg_total_time_ms();
     452              :         double fullMem = get_total_mem();
     453              :         for(size_t i=0; i<nodes.size(); i++)
     454              :         {
     455              :                 if(dSkipMarginal==0.0 || fullMs*dSkipMarginal < nodes[i]->get_avg_total_time_ms())
     456              :                         s << nodes[i]->print_node(fullMs, fullMem) << "\n";
     457              :         }
     458              :         return s.str();
     459              : }
     460              : 
     461              : void UGProfileNode::log_header(stringstream &s, const char *name)
     462              : {
     463              : 
     464              :         s <<      left << setw(PROFILER_BRIDGE_OUTPUT_WIDTH_NAME) << name << " " <<
     465              :                         right << setw(PROFILER_BRIDGE_OUTPUT_WIDTH_HIT) << "hits" << " " <<
     466              :                         setw(PROFILER_BRIDGE_OUTPUT_WIDTH_TIME+4+PROFILER_BRIDGE_OUTPUT_WIDTH_PERC+1 -4) << "self time" << " " <<
     467              :                         setw(PROFILER_BRIDGE_OUTPUT_WIDTH_TIME+4+PROFILER_BRIDGE_OUTPUT_WIDTH_PERC+1) << "total time";
     468              :         if(HasMemTracking())
     469              :         {
     470              :                 s << "  " << setw(10+5+3) << "self mem" << "   " <<
     471              :                                 setw(10) << "total mem";
     472              :         }
     473              : 
     474              :         s << "\n";
     475              : }
     476              : 
     477              : bool UGProfileNode::self_time_sort(const UGProfileNode *a, const UGProfileNode *b)
     478              : {
     479              :         return a->get_avg_self_time() < b->get_avg_self_time();
     480              : }
     481              : 
     482              : bool UGProfileNode::total_time_sort(const UGProfileNode *a, const UGProfileNode *b)
     483              : {
     484              :         return a->get_avg_total_time() < b->get_avg_total_time();
     485              : }
     486              : 
     487              : bool UGProfileNode::self_memory_sort(const UGProfileNode *a, const UGProfileNode *b)
     488              : {
     489              :         return a->get_self_mem() < b->get_self_mem();
     490              : }
     491              : 
     492              : bool UGProfileNode::total_memory_sort(const UGProfileNode *a, const UGProfileNode *b)
     493              : {
     494              :         return a->get_total_mem() < b->get_total_mem();
     495              : }
     496              : 
     497              : 
     498              : bool UGProfileNode::entry_count_sort(const UGProfileNode *a, const UGProfileNode *b)
     499              : {
     500              :         return a->get_avg_entry_count() < b->get_avg_entry_count();
     501              : }
     502              : 
     503              : /*
     504              : void WriteCompressedProfileData(const char *filename)
     505              : {
     506              :         // compressed:
     507              :         // filedata: file0, file1, file2, file3
     508              :         // zonedata: zone1 {name, fileid, line}, zone2, zone3   
     509              :     // nodes: node0 {self, total, children}, node1, node2, node3.
     510              :  
     511              :     // combine file and zone data, so that profile data is
     512              :  * 
     513              :  * // <files>...</files>
     514              :  * <zones><zone id="732"><name>bla</name><group>algebra</group><file>8</file><line>344</line> </zone> ... </zones>
     515              :  * <core id=0>
     516              :  *   <node zoneId=732><self>8734.2</self><total>8333.2</total></node>
     517              :     
     518              : }
     519              : */
     520              : 
     521              : #ifndef NDEBUG
     522              : #define PROFILE_NODE_MIN_HITS 500
     523              : #define PROFILE_NODE_MIN_TOTAL_TIME_MS 100
     524              : #define PROFILE_NODE_MAX_TIME_PER_CALL_MS 0.01
     525              : #else
     526              : #define PROFILE_NODE_MIN_HITS 1000
     527              : #define PROFILE_NODE_MIN_TOTAL_TIME_MS 1000
     528              : #define PROFILE_NODE_MAX_TIME_PER_CALL_MS 0.01
     529              : #endif
     530              : 
     531              : void UGProfileNode::check_for_too_small_nodes(double fullMs, map<string, const UGProfileNode *> &list) const
     532              : {
     533              :         // also don't check nodes which require less time than 0.01% of the whole problem
     534              :         if(get_avg_total_time_ms() < 0.01*0.01*fullMs) return;
     535              :         for(const UGProfileNode *p=get_first_child(); p != NULL; p=p->get_next_sibling())
     536              :         {
     537              :                 double t_ms = p->get_avg_total_time_ms();
     538              :                 size_t entry = (size_t)p->get_avg_entry_count();
     539              : 
     540              :                 if(t_ms > PROFILE_NODE_MIN_TOTAL_TIME_MS)
     541              :                 {
     542              :                         if(entry > PROFILE_NODE_MIN_HITS && t_ms/entry < PROFILE_NODE_MAX_TIME_PER_CALL_MS)
     543              :                         {
     544              :                                 string desc = p->zone->name;
     545              :                                 if(p->zone->file != NULL)
     546              :                                         desc.append(string(p->zone->file) + string(":") + ToString(p->zone->line));
     547              :                                 const UGProfileNode *p2 = list[desc];
     548              : 
     549              :                                 if(p2 == NULL)
     550              :                                         list[desc] = p;
     551              :                                 else
     552              :                                 {
     553              :                                         double t_ms2 = p->get_avg_total_time_ms();
     554              :                                         size_t entry2 = (size_t)p->get_avg_entry_count();
     555              :                                         if(entry2/t_ms2 < entry/t_ms)
     556              :                                                 list[desc] = p;
     557              :                                 }
     558              :                         }
     559              : 
     560              :                         p->check_for_too_small_nodes(fullMs, list);
     561              :                 }
     562              :                 if(p==get_last_child())
     563              :                         break;
     564              :         }
     565              : 
     566              : }
     567              : 
     568              : void ProfileTestFunction1()
     569              : {
     570              :         PROFILE_FUNC();
     571              : }
     572              : 
     573              : void ProfileTestFunction2()
     574              : {
     575              :         PROFILE_FUNC();
     576              : }
     577              : 
     578              : 
     579              : void UGProfileNode::CheckForTooSmallNodes()
     580              : {
     581              :         Shiny::ProfileManager::instance.update(1.0); // WE call with damping = 1.0
     582              :         const UGProfileNode *pnRoot = UGProfileNode::get_root();
     583              : 
     584              :         const size_t numProfileCall = 100;
     585              :         for(size_t i=0; i<numProfileCall; i++)
     586              :         {
     587              :                 ProfileTestFunction1();
     588              :                 ProfileTestFunction2();
     589              :         }
     590              : 
     591              :         const UGProfileNode *ptf1 = GetProfileNode("ProfileTestFunction1");
     592              : 
     593              :         double fullMs = pnRoot->get_avg_total_time_ms();
     594              : 
     595              :         if(fullMs > 1)
     596              :         {
     597              :                 map<string, const UGProfileNode *> list;
     598              :                 pnRoot->check_for_too_small_nodes(fullMs, list);
     599              : 
     600              :                 if(list.size() != 0)
     601              :                 {
     602              :                         UG_LOG(resetiosflags( ::ios::scientific ));
     603              :                         UG_LOG("WARNING: Some profile nodes might be too small\n");
     604              :                         UG_LOG("----------------------------------------------------------------------\n");
     605              : 
     606              :                         double tProfileCall= ptf1->get_avg_total_time()/numProfileCall;
     607              :                         const Shiny::TimeUnit *unit2 = Shiny::GetTimeUnit(tProfileCall);
     608              : 
     609              :                         UG_LOG("Profile Call overhead estimated at " << tProfileCall * unit2->invTickFreq << " " << unit2->suffix << ".\n");
     610              : 
     611              :                         UG_LOG("Displaying nodes with hits > " << PROFILE_NODE_MIN_HITS << " and totalTime > " << PROFILE_NODE_MIN_TOTAL_TIME_MS
     612              :                                         << " and totalTime/hits < " << PROFILE_NODE_MAX_TIME_PER_CALL_MS << " ms :\n");
     613              : 
     614              : 
     615              :                         stringstream s;
     616              :                         s <<      left << setw(PROFILER_BRIDGE_OUTPUT_WIDTH_NAME) << "name" << " " <<
     617              :                                         right << setw(PROFILER_BRIDGE_OUTPUT_WIDTH_HIT) << "hits" << " " <<
     618              :                                         setw(PROFILER_BRIDGE_OUTPUT_WIDTH_TIME+4) << "total  " << " " <<
     619              :                                         setw(PROFILER_BRIDGE_OUTPUT_WIDTH_TIME+5) << "total/hits  " <<
     620              :                                         "  potentially lost (prediction)\n";
     621              :                         for(map<string, const UGProfileNode *>::iterator it = list.begin();
     622              :                                         it != list.end(); ++it)
     623              :                         {
     624              :                                 const UGProfileNode *p = it->second;
     625              :                                 if(p->zone->file != NULL)
     626              :                                         s << SimplifyUG4Path(p->zone->file) << ":" << p->zone->line << " : \n";
     627              : 
     628              :                                 s << left << setw(PROFILER_BRIDGE_OUTPUT_WIDTH_NAME) << p->zone->name << " ";
     629              :                                 // entry count
     630              :                                 s <<      right << setw(PROFILER_BRIDGE_OUTPUT_WIDTH_HIT) << floor(p->get_avg_entry_count()) << " " <<
     631              :                                                 setprecision(PROFILER_BRIDGE_OUTPUT_WIDTH_TIME-1);
     632              : 
     633              :                                 // total time
     634              :                                 const Shiny::TimeUnit *unit = Shiny::GetTimeUnit(p->get_avg_total_time());
     635              :                                 s <<      right << setw(PROFILER_BRIDGE_OUTPUT_WIDTH_TIME) << p->get_avg_total_time() * unit->invTickFreq << " " <<
     636              :                                                 left << setw(2) << unit->suffix << "   ";
     637              : 
     638              :                                 // fraction
     639              :                                 double t = p->get_avg_total_time()/p->get_avg_entry_count();
     640              :                                 unit = Shiny::GetTimeUnit(t);
     641              :                                 s <<      right << setw(PROFILER_BRIDGE_OUTPUT_WIDTH_TIME) <<  t * unit->invTickFreq << " " <<
     642              :                                                 left << setw(2) << unit->suffix << "   ";
     643              : 
     644              :                                 // lost
     645              : 
     646              :                                 t = tProfileCall*p->get_avg_entry_count();
     647              :                                 unit = Shiny::GetTimeUnit(t);
     648              :                                 s <<      right << setw(PROFILER_BRIDGE_OUTPUT_WIDTH_TIME) <<  t * unit->invTickFreq << " " <<
     649              :                                                 left << setw(2) << unit->suffix << "   ";
     650              : 
     651              :                                 // %
     652              : 
     653              : //                              t = tProfileCall*p->get_avg_entry_count();
     654              : //                              unit = Shiny::GetTimeUnit(t);
     655              : //                              s << (tProfileCall*p->get_avg_entry_count()*100)/ p->get_avg_total_time() << "% ";
     656              : 
     657              :                                 s << "\n";
     658              :                         }
     659              :                         UG_LOG(s.str());
     660              : 
     661              :                 }
     662              :         }
     663              : }
     664              : 
     665              : const UGProfileNode *UGProfileNode::get_root()
     666              : {
     667              :         const Shiny::ProfileNode *node = &Shiny::ProfileManager::instance.rootNode;
     668              :         return reinterpret_cast<const UGProfileNode*> (node);
     669              : }
     670              : 
     671              : 
     672              : void WriteProfileDataXML(const char *filename)
     673              : {
     674              :         WriteProfileDataXML(filename, ug::GetLogAssistant().get_output_process());
     675              : }
     676              : 
     677              : 
     678              : void WriteCallLog(const char *filename, int procId)
     679              : {
     680              : #ifdef SHINY_CALL_LOGGING
     681              :         if(pcl::ProcRank() != procId) return;
     682              :         fstream f(filename, ios::out);
     683              : 
     684              :         Shiny::tick_t curTime;
     685              :         Shiny::GetTicks(&curTime);
     686              : 
     687              :         FinishShinyCallLogging();
     688              :         UG_LOG("Writing Call log to " << filename << ". Max Frequency is " << g_ShinyCallLoggingMaxFreq << " Hz.\n")
     689              :         // print call log
     690              :         int depth=0;
     691              :         for(size_t i=0; i<profileCalls.size(); i++)
     692              :         {
     693              :                 f << profileCalls[i].c << " ";
     694              :                 if(profileCalls[i].p == NULL)
     695              :                 {
     696              :                         depth--;
     697              :                         f << repeat(' ', depth) << "}\n";
     698              :                 }
     699              :                 else
     700              :                 {
     701              :                         f << repeat(' ', depth);
     702              : 
     703              :                         if(profileCalls[i].p->zone->name[0] == '@')
     704              :                         {
     705              :                                 string str = GetProfilingFileLine(profileCalls[i].p->zone->name +1);
     706              :                                 if(str.size())
     707              :                                 {       f << str << " "; }
     708              : 
     709              :                                 f << "(" << SimplifyUG4Path(profileCalls[i].p->zone->name+1) << ")\n";
     710              :                         }
     711              :                         else
     712              :                                 f << profileCalls[i].p->zone->name << " (" << SimplifyUG4Path(profileCalls[i].p->zone->file) << ":" << profileCalls[i].p->zone->line << ")\n";
     713              : 
     714              :                         if(i+1 < profileCalls.size() && profileCalls[i+1].p == NULL)
     715              :                                 i++;
     716              :                         else
     717              :                         {
     718              :                                 f << profileCalls[i].c << " ";
     719              :                                 f << repeat(' ', depth) << "{\n";
     720              :                                 depth++;
     721              :                         }
     722              :                 }
     723              :         }
     724              :         for(int i=0; i<depth; i++)
     725              :                 f << curTime << " " << repeat(' ', depth-i-1) << "}\n";
     726              : #else
     727              :         UG_LOG("Did NOT write Call Log since Call Logging is disabled (enable with cmake -DSHINY_CALL_LOGGING=ON ..)\n");
     728              : #endif
     729              : }
     730              : 
     731              : void WriteCallLog(const char *filename)
     732              : {
     733              :         WriteCallLog(filename, ug::GetLogAssistant().get_output_process());
     734              : }
     735              : 
     736              : void WriteProfileDataXML(const char *filename, int procId)
     737              : {
     738              :         #ifdef UG_PARALLEL
     739              :                 UG_COND_THROW(procId >= pcl::NumProcs(),
     740              :                                           "Bad process id: " << procId << ". Maximum allowed is "
     741              :                                           << pcl::NumProcs() - 1);
     742              :         #else
     743              :                 UG_COND_THROW(procId >0,
     744              :                                           "Bad process id: " << procId
     745              :                                           << ". Only one process available (with id 0)");
     746              :         #endif
     747              : 
     748              : #ifdef SHINY_CALL_LOGGING
     749              :         Shiny::tick_t curTime;
     750              :         Shiny::GetTicks(&curTime);
     751              : #endif
     752              :         ProfilerUpdate();
     753              :         const UGProfileNode *pnRoot = UGProfileNode::get_root();
     754              : 
     755              : //      procId represents the id of the process which will write the file.
     756              : //      if all processes are considered (procId < 0), then process 0 will write the file.
     757              :         bool gatherFromAllProcs = false;
     758              :         if(procId < 0){
     759              :                 gatherFromAllProcs = true;
     760              :                 procId = 0;
     761              :         }
     762              : 
     763              : #ifdef UG_PARALLEL
     764              :         typedef pcl::SingleLevelLayout<pcl::OrderedInterface<size_t, vector> >
     765              :                 IndexLayout;
     766              : 
     767              :         pcl::InterfaceCommunicator<IndexLayout> ic;
     768              :         if(pcl::ProcRank() == procId)
     769              :         {
     770              : #endif
     771              :                 fstream f(filename, ios::out);
     772              :                 f << "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n";
     773              :                 f << "<!-- ug4 created profile data -->\n";
     774              :                 f << "<ProfileData>\n";
     775              :                 f << "<AdditionalInfo>\n";
     776              :                 f << "<SVNRevision>" << UGSvnRevision() << "</SVNRevision>\n";
     777              :                 f << "<BuildHostname>" << UGBuildHost() << "</BuildHostname>\n";
     778              :                 f << "<CompileDate>" << UGCompileDate() << "</CompileDate>\n";
     779              :                 f << "</AdditionalInfo>\n";
     780              :                 f << "<basepath>" << PathProvider::get_path(ROOT_PATH) << "</basepath>\n";
     781              : 
     782              :                 f << "<core id=\""<<procId<<"\">\n";
     783              :                 
     784              :                 pnRoot->PDXML_rec_write(f);
     785              :                 f << "</core>\n";
     786              : 
     787              : #ifdef SHINY_CALL_LOGGING
     788              :                 FinishShinyCallLogging();
     789              :                 f << "<log>\n";
     790              :                 int depth=0;
     791              :                 for(size_t i=0; i<profileCalls.size(); i++)
     792              :                 {
     793              :                         if(profileCalls[i].p == NULL)
     794              :                         {
     795              :                                 f << "<stop>" << profileCalls[i].c << "</stop>\n";
     796              :                                 f << "</call>\n";
     797              :                                 depth--;
     798              :                         }
     799              :                         else
     800              :                         {
     801              :                                 f << "<call>\n<start>" << profileCalls[i].c << "</start>\n<id>" << profileCalls[i].p << "</id>\n";
     802              :                                 depth++;
     803              :                         }
     804              :                 }
     805              :                 for(int i=0; i<depth; i++)
     806              :                 {
     807              :                         f << "<stop>" << curTime << "</stop>\n";
     808              :                         f << "</call>\n";
     809              :                 }
     810              :                 f << "</log>\n";
     811              : 
     812              : #endif
     813              :                 
     814              : #ifdef UG_PARALLEL
     815              :                 if(gatherFromAllProcs)
     816              :                 {
     817              :                         vector<ug::BinaryBuffer> buffers(pcl::NumProcs()-1);
     818              :                         for(int i=1; i<pcl::NumProcs(); i++)
     819              :                                 ic.receive_raw(i, buffers[i-1]);
     820              :                         ic.communicate();
     821              : 
     822              :                         for(int i=1; i<pcl::NumProcs(); i++)
     823              :                         {
     824              :                                 f << "\n<core id=\"" << i << "\">";
     825              :                                 string s;
     826              :                                 Deserialize(buffers[i-1], s);
     827              :                                 f << s;                   
     828              :                                 f << "\n</core>";
     829              :                         }               
     830              :                 }
     831              : #endif
     832              :                 f << "</ProfileData>\n";
     833              :                 
     834              : #ifdef UG_PARALLEL              
     835              :         }       
     836              :         else            
     837              :         {
     838              :                 if(gatherFromAllProcs)
     839              :                 {
     840              :                         stringstream ss;
     841              :                         pnRoot->PDXML_rec_write(ss);
     842              :                         BinaryBuffer buf;
     843              :                         Serialize(buf, ss.str());
     844              :                         ic.send_raw(procId, buf.buffer(), buf.write_pos(), false);
     845              :                         ic.communicate();
     846              :                 }
     847              :         }       
     848              : #endif  
     849              :         
     850              : }
     851              : 
     852              : const UGProfileNode *GetProfileNode(const char *name, const UGProfileNode *node)
     853              : {
     854              :         ProfilerUpdate();
     855              :         if(node == NULL) node = UGProfileNode::get_root();
     856              :         if(name == NULL)
     857              :                 return node;
     858              :         do
     859              :         {
     860              :                 if(strcmp(node->zone->name, name) == 0)
     861              :                         return node;
     862              :                 node = node->find_next_in_tree();
     863              :         } while (node);
     864              : 
     865              : //      UG_LOG("Profiler Node \"" << name << "\" not found\n");
     866              :         return PROFILER_NULL_NODE;
     867              : }
     868              : 
     869              : const UGProfileNode *GetProfileNode(const char *name)
     870              : {
     871              :         return GetProfileNode(name, NULL);
     872              : }
     873              : 
     874              : bool GetProfilerAvailable()
     875              : {
     876              :         return true;
     877              : }
     878              : 
     879              : void PrintLUA()
     880              : {
     881              :         const UGProfileNode *rootNode = GetProfileNode(NULL);
     882              :         vector<const UGProfileNode*> nodes;
     883              :         rootNode->rec_add_nodes(nodes);
     884              :         double full = rootNode->get_avg_total_time_ms();
     885              : 
     886              :         map<string, vector<double> > files;
     887              :         for(size_t i=0; i<nodes.size(); i++)
     888              :         {
     889              :                 if(nodes[i]->zone->groups == NULL ||
     890              :                                 strcmp(nodes[i]->zone->groups, "lua") != 0)
     891              :                         continue;
     892              :                 const char *name = nodes[i]->zone->name;
     893              :                 cout << name << "\n";
     894              :                 if(name[0]==0x00 || name[1]==0x00) continue;
     895              :                 name++; // skip @
     896              :                 const char *p = strchr(name, ':'); // search for line number
     897              :                 if(p == NULL || p[0] == 0x00 || p[1] == 0x00) continue;
     898              :                 int line = strtol(p+1, NULL, 10);
     899              :                 if(line > 10000 || line < 0) continue;
     900              :                 char file[255];
     901              :                 strncpy(file, name, p-name);
     902              :                 file[p-name]=0x00;
     903              :                 vector<double> &v = files[file];
     904              :                 if(v.size() < (size_t)line+1)
     905              :                 {
     906              :                         size_t s=v.size();
     907              :                         v.resize(line+1);
     908              :                         for(; s<(size_t)line+1; s++)
     909              :                                 v[s]=0.0;
     910              :                 }
     911              :                 v[line] = nodes[i]->get_avg_total_time_ms();
     912              :         }
     913              : 
     914              : 
     915              :         for(map<string, vector<double> >::iterator it = files.begin(); it != files.end();
     916              :                 ++it)
     917              :         {
     918              :                 string name = it->first;
     919              :                 vector<double> &v = it->second;
     920              :                 cout << "\n" << name << ":\n\n";
     921              : 
     922              :                 char buf[512];
     923              :                 fstream file(name.c_str(), ios::in);
     924              :                 if(file.is_open() == false) continue;
     925              :                 size_t lineNr=0;
     926              :                 while(!file.eof())
     927              :                 {
     928              :                         file.getline(buf, 512);
     929              :                         double time = lineNr >= v.size() ? 0.0 : v[lineNr];
     930              :                         const Shiny::TimeUnit *unit = Shiny::GetTimeUnit(time);
     931              :                         cout << resetiosflags( ::ios::scientific ) <<
     932              :                                         left << setw(PROFILER_BRIDGE_OUTPUT_WIDTH_TIME)
     933              :                                         << time * unit->invTickFreq << " " <<
     934              :                                 left << setw(2) << unit->suffix << " " <<
     935              :                                 right << setw(PROFILER_BRIDGE_OUTPUT_WIDTH_PERC) << floor(time / full * 100) << "%  "
     936              :                                 << setw(3) << lineNr << "| " << buf << "\n";
     937              :                         lineNr ++;
     938              :                 }
     939              :         }
     940              : }
     941              : 
     942              : 
     943              : 
     944              : #else
     945              : 
     946              : 
     947            0 : double UGProfileNode::get_avg_entry_count() const
     948              : {
     949            0 :         return 0;
     950              : }
     951              : 
     952              : /// \return time in milliseconds spend in this node excluding subnodes
     953            0 : double UGProfileNode::get_avg_self_time_ms() const
     954              : {
     955            0 :         return 0.0;
     956              : }
     957              : 
     958              : /// \return time in milliseconds spend in this node including subnodes
     959            0 : double UGProfileNode::get_avg_total_time_ms() const
     960              : {
     961            0 :         return 0.0;
     962              : }
     963              : 
     964            0 : double UGProfileNode::get_total_mem() const
     965              : {
     966            0 :         return 0.0;
     967              : }
     968              : 
     969            0 : double UGProfileNode::get_self_mem() const
     970              : {
     971            0 :         return 0.0;
     972              : }
     973              : ///////////////////////////////////////////////////////////////////
     974            0 : string UGProfileNode::call_tree(double dSkipMarginal) const
     975              : {
     976            0 :         return "Profiler not available!";
     977              : }
     978              : 
     979            0 : string UGProfileNode::child_self_time_sorted(double dSkipMarginal) const
     980              : {
     981            0 :         return "Profiler not available!";
     982              : }
     983              : 
     984            0 : string UGProfileNode::total_time_sorted(double dSkipMarginal) const
     985              : {
     986            0 :         return "Profiler not available!";
     987              : }
     988              : 
     989              : 
     990            0 : string UGProfileNode::child_self_memory_sorted(double dSkipMarginal) const
     991              : {
     992            0 :         return "Profiler not available!";
     993              : }
     994              : 
     995            0 : string UGProfileNode::total_memory_sorted(double dSkipMarginal) const
     996              : {
     997            0 :         return "Profiler not available!";
     998              : }
     999              : 
    1000            0 : string UGProfileNode::entry_count_sorted(double dSkipMarginal) const
    1001              : {
    1002            0 :         return "Profiler not available!";
    1003              : }
    1004              : 
    1005              : /*
    1006              : string UGProfileNode::child_self_time_sorted(double dSkipMarginal) const
    1007              : {
    1008              :         return "Profiler not available!";
    1009              : }
    1010              : 
    1011              : string UGProfileNode::child_self_time_sorted() const
    1012              : {
    1013              :         return child_self_time_sorted(0.0);
    1014              : }
    1015              : 
    1016              : string UGProfileNode::total_time_sorted() const
    1017              : {
    1018              :         return total_time_sorted(0.0);
    1019              : }
    1020              : */
    1021              : 
    1022              : /// \return true if node has been found
    1023            0 : bool UGProfileNode::valid() const
    1024              : {
    1025            0 :         return false;
    1026              : }
    1027              : 
    1028            0 : string UGProfileNode::groups() const
    1029              : {
    1030            0 :         return "Profiler not available!";
    1031              : }
    1032              : 
    1033            0 : const UGProfileNode *GetProfileNode(const char *name)
    1034              : {
    1035            0 :         return PROFILER_NULL_NODE;
    1036              : }
    1037              : 
    1038            0 : const UGProfileNode *GetProfileNode(const char *name, const UGProfileNode *node)
    1039              : {
    1040            0 :         return PROFILER_NULL_NODE;
    1041              : }
    1042              : 
    1043            0 : void WriteProfileDataXML(const char *filename)
    1044              : {
    1045            0 :         return;
    1046              : }
    1047              : 
    1048            0 : void WriteProfileDataXML(const char *filename, int procId)
    1049              : {
    1050            0 :         return;
    1051              : }
    1052              : 
    1053            0 : bool GetProfilerAvailable()
    1054              : {
    1055            0 :         return false;
    1056              : }
    1057              : 
    1058            0 : void PrintLUA()
    1059              : {
    1060              :         UG_LOG("LUA Profiler not available.");
    1061            0 : }
    1062              : 
    1063            0 : void UGProfileNode::CheckForTooSmallNodes()
    1064              : {
    1065              : 
    1066            0 : }
    1067              : 
    1068            0 : void WriteCallLog(const char *filename) {}
    1069            0 : void WriteCallLog(const char *filename, int procId) {}
    1070              : #endif // SHINY
    1071              : 
    1072              : } // namespace ug
    1073              : 
    1074              : 
    1075              : //////////////////////////////////////////////////////////////////////////////////////////
        

Generated by: LCOV version 2.0-1