Line data Source code
1 : /*
2 : * Copyright (c) 2012-2015: G-CSC, Goethe University Frankfurt
3 : * Author: Martin Rupp
4 : *
5 : * This file is part of UG4.
6 : *
7 : * UG4 is free software: you can redistribute it and/or modify it under the
8 : * terms of the GNU Lesser General Public License version 3 (as published by the
9 : * Free Software Foundation) with the following additional attribution
10 : * requirements (according to LGPL/GPL v3 §7):
11 : *
12 : * (1) The following notice must be displayed in the Appropriate Legal Notices
13 : * of covered and combined works: "Based on UG4 (www.ug4.org/license)".
14 : *
15 : * (2) The following notice must be displayed at a prominent place in the
16 : * terminal output of covered works: "Based on UG4 (www.ug4.org/license)".
17 : *
18 : * (3) The following bibliography is recommended for citation and must be
19 : * preserved in all covered files:
20 : * "Reiter, S., Vogel, A., Heppner, I., Rupp, M., and Wittum, G. A massively
21 : * parallel geometric multigrid solver on hierarchically distributed grids.
22 : * Computing and visualization in science 16, 4 (2013), 151-164"
23 : * "Vogel, A., Reiter, S., Rupp, M., Nägel, A., and Wittum, G. UG4 -- a novel
24 : * flexible software system for simulating pde based models on high performance
25 : * computers. Computing and visualization in science 16, 4 (2013), 165-179"
26 : *
27 : * This program is distributed in the hope that it will be useful,
28 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
29 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
30 : * GNU Lesser General Public License for more details.
31 : */
32 :
33 :
34 : #include "profiler.h"
35 : #include <iomanip>
36 : #include <cmath> // for floor
37 : #include <algorithm>
38 : #include <string>
39 : #include <string.h>
40 : #include "common/log.h"
41 : #include "common/util/string_util.h"
42 : #include "profile_node.h"
43 : #include "common/util/string_util.h"
44 : #include "common/util/path_provider.h"
45 : #include <map>
46 : #include <fstream>
47 : #include "compile_info/compile_info.h"
48 : #include "pcl/pcl_base.h"
49 : #include "common/error.h"
50 : #include "memtracker.h"
51 :
52 : #ifdef UG_PARALLEL
53 : #include "pcl/pcl.h"
54 : #endif
55 :
56 : #include "profile_call.h"
57 :
58 :
59 : using namespace std;
60 :
61 : namespace ug
62 : {
63 :
64 : #if SHINY_PROFILER
65 :
66 : void ProfilerUpdate()
67 : {
68 : Shiny::ProfileManager::instance.update(1.0);
69 : UpdateTotalMem();
70 : }
71 :
72 :
73 : static const int PROFILER_BRIDGE_OUTPUT_WIDTH_NAME = 70; // Shiny::OUTPUT_WIDTH_NAME
74 : static const int PROFILER_BRIDGE_OUTPUT_WIDTH_HIT = 13; // Shiny::OUTPUT_WIDTH_HIT
75 : static const int PROFILER_BRIDGE_OUTPUT_WIDTH_TIME = 7; // Shiny::OUTPUT_WIDTH_TIME
76 : static const int PROFILER_BRIDGE_OUTPUT_WIDTH_PERC = 4; // Shiny::OUTPUT_WIDTH_PERC
77 :
78 :
79 : /*static string cut(string &s, size_t L)
80 : {
81 : return s.substr(0, L);
82 : }*/
83 :
84 : static string cut(const char *p, size_t L)
85 : {
86 : string s(p);
87 : return s.substr(0, L);
88 : }
89 :
90 :
91 : #define SHINY_DAMPING_FACTOR 1.0
92 :
93 : double UGProfileNode::get_avg_entry_count() const
94 : {
95 : if(!valid()) return 0.0;
96 : return data.entryCount.avg; // * SHINY_DAMPING_FACTOR;
97 : }
98 :
99 : double UGProfileNode::get_avg_self_time_ms() const
100 : {
101 : return get_avg_self_time() * Shiny::GetTickInvFreq() * 1000.0;
102 : }
103 :
104 : double UGProfileNode::get_avg_total_time_ms() const
105 : {
106 : return get_avg_total_time() * Shiny::GetTickInvFreq() * 1000.0;
107 : }
108 :
109 : double UGProfileNode::get_avg_self_time() const
110 : {
111 : if(!valid()) return 0.0;
112 : return data.selfTicks.avg * SHINY_DAMPING_FACTOR;
113 : }
114 :
115 : double UGProfileNode::get_avg_total_time() const
116 : {
117 : if(!valid()) return 0.0;
118 : return data.totalTicksAvg() * SHINY_DAMPING_FACTOR;
119 : }
120 :
121 : double UGProfileNode::get_self_mem() const
122 : {
123 : if(!valid()) return 0.0;
124 : return GetSelfMem(this);
125 : }
126 :
127 :
128 : double UGProfileNode::get_total_mem() const
129 : {
130 : if(!valid()) return 0.0;
131 : return GetTotalMem(this);
132 : }
133 :
134 : string UGProfileNode::get_mem_info(double fullMem) const
135 : {
136 : if(HasMemTracking())
137 : {
138 : stringstream s;
139 : s << GetBytesSizeString((size_t)get_self_mem(), 10) << setw(5) << floor(get_self_mem() / fullMem * 100) << "% ";
140 : s << GetBytesSizeString((size_t)get_total_mem(), 10) << setw(5) << floor(get_total_mem() / fullMem * 100) << "% ";
141 : return s.str();
142 : }
143 : else
144 : return "";
145 : }
146 :
147 : string UGProfileNode::call_tree(double dSkipMarginal) const
148 : {
149 : if(!valid()) return "Profile Node not valid!";
150 :
151 : stringstream s;
152 : UGProfileNode::log_header(s, "call tree");
153 :
154 : rec_print(get_avg_total_time_ms(), get_total_mem(), s, 0, dSkipMarginal);
155 :
156 : return s.str();
157 : }
158 :
159 : string UGProfileNode::child_self_time_sorted(double dSkipMarginal) const
160 : {
161 : return print_child_sorted("self time sorted", UGProfileNode::self_time_sort, dSkipMarginal);
162 : }
163 :
164 : string UGProfileNode::total_time_sorted(double dSkipMarginal) const
165 : {
166 : return print_child_sorted("total time sorted", UGProfileNode::total_time_sort, dSkipMarginal);
167 : }
168 :
169 : string UGProfileNode::child_self_memory_sorted(double dSkipMarginal) const
170 : {
171 : return print_child_sorted("self memory sorted", UGProfileNode::self_memory_sort, dSkipMarginal);
172 : }
173 :
174 : string UGProfileNode::total_memory_sorted(double dSkipMarginal) const
175 : {
176 : return print_child_sorted("total memory sorted", UGProfileNode::total_memory_sort, dSkipMarginal);
177 : }
178 :
179 : string UGProfileNode::entry_count_sorted(double dSkipMarginal) const
180 : {
181 : return print_child_sorted("entry count sorted", UGProfileNode::entry_count_sort, dSkipMarginal);
182 : }
183 :
184 :
185 : bool UGProfileNode::valid() const
186 : {
187 : return this != PROFILER_NULL_NODE;
188 : }
189 :
190 : string SimplifyUG4Path(string s)
191 : {
192 : const string &ug4root = PathProvider::get_path(ROOT_PATH);
193 : if(StartsWith(s, ug4root))
194 : return string("$") + s.substr(ug4root.length());
195 : else
196 : return s;
197 : }
198 :
199 : // private functions
200 :
201 : void UGProfileNode::PDXML_rec_write(ostream &s) const
202 : {
203 : if(!valid()) return;
204 :
205 : /*
206 : * <node>
207 : * name
208 : * group
209 : * file
210 : * line
211 : * hits
212 : * self
213 : * total *
214 : * </node>
215 : */
216 :
217 : s << "<node>\n";
218 : if(zone->name)
219 : {
220 : if(strcmp(zone->name, "<root>") == 0)
221 : s << "<name>root</name>\n";
222 : else
223 : s << "<name>" << XMLStringEscape(zone->name) << "</name>\n";
224 : }
225 : if(zone->groups)
226 : s << "<groups>" << XMLStringEscape(zone->groups) << "</groups>\n";
227 :
228 : if(zone->file != NULL)
229 : {
230 : s << "<file>" << SimplifyUG4Path(zone->file) << "</file>\n";
231 : s << "<line>" << zone->line << "</line>\n";
232 : }
233 :
234 : #ifdef SHINY_CALL_LOGGING
235 : s << "<id>" << this << "</id>\n";
236 : #endif
237 :
238 : s << "<hits>" << floor(get_avg_entry_count()) << "</hits>\n"
239 : << "<self>" << get_avg_self_time_ms() * 1000.0 << "</self>\n"
240 : << "<total>" << get_avg_total_time_ms() * 1000.0 << "</total>\n";
241 :
242 : if(HasMemTracking())
243 : {
244 : s << "<totalMemory>" << get_total_mem() << "</totalMemory>\n";
245 : s << "<selfMemory>" << get_self_mem() << "</selfMemory>\n";
246 : }
247 :
248 : for(const UGProfileNode *p=get_first_child(); p != NULL; p=p->get_next_sibling())
249 : {
250 : p->PDXML_rec_write(s);
251 : if(p==get_last_child())
252 : break;
253 : }
254 :
255 : s << "</node>\n";
256 : }
257 :
258 : string GetProfilingFileLine(const char *name)
259 : {
260 : const char *p = strchr(name, ':'); // search for line number
261 : if(!(p == NULL || p[0] == 0x00 || p[1] == 0x00))
262 : {
263 : // if we find the corresponding code in the LUA file, print the code as "name"
264 : // and the filename above
265 :
266 : int line = strtol(p+1, NULL, 10);
267 : char file[255];
268 : strncpy(file, name, p-name);
269 : file[p-name]=0x00;
270 : string str = GetFileLine(file, line);
271 : for(size_t i=0; i<str.size(); i++) if(str[i] == '\t') str[i] = ' ';
272 : return str;
273 : }
274 : else return "";
275 :
276 : }
277 :
278 : string UGProfileNode::print_node(double fullMs, double fullMem, size_t offset) const
279 : {
280 : if(!valid()) return "";
281 : const Shiny::TimeUnit *selfUnit = Shiny::GetTimeUnit(get_avg_self_time());
282 : const Shiny::TimeUnit *totalUnit = Shiny::GetTimeUnit(get_avg_total_time());
283 : stringstream s;
284 : if(offset) s << setw(offset) << " ";
285 :
286 : // name
287 : if(zone->name[0] == '@')
288 : {
289 : // zone name is a filename
290 : // this can happen when we are using LUA script profiling.
291 :
292 : // check if filename is too long
293 : if(strlen(zone->name) > (PROFILER_BRIDGE_OUTPUT_WIDTH_NAME-offset) )
294 : s << "@... " << zone->name+strlen(zone->name)-(PROFILER_BRIDGE_OUTPUT_WIDTH_NAME-offset-5);
295 : else
296 : s << left << setw(PROFILER_BRIDGE_OUTPUT_WIDTH_NAME-offset) << cut(zone->name, PROFILER_BRIDGE_OUTPUT_WIDTH_NAME-offset);
297 :
298 : string str = GetProfilingFileLine(zone->name+1);
299 : if(str.size())
300 : {
301 : s << "\n";
302 : if(offset) s << setw(offset) << " ";
303 : s << left << setw(PROFILER_BRIDGE_OUTPUT_WIDTH_NAME-offset) <<
304 : cut(str.c_str(), PROFILER_BRIDGE_OUTPUT_WIDTH_NAME-offset);
305 : }
306 : }
307 : else
308 : s << left << setw(PROFILER_BRIDGE_OUTPUT_WIDTH_NAME-offset) << cut(zone->name, PROFILER_BRIDGE_OUTPUT_WIDTH_NAME-offset);
309 :
310 : // entry count
311 : s << right << setw(PROFILER_BRIDGE_OUTPUT_WIDTH_HIT) << floor(get_avg_entry_count()) << " " <<
312 : setprecision(PROFILER_BRIDGE_OUTPUT_WIDTH_TIME-1);
313 :
314 : // self time
315 : s << setw(PROFILER_BRIDGE_OUTPUT_WIDTH_TIME) << get_avg_self_time() * selfUnit->invTickFreq << " " <<
316 : left << setw(2) << selfUnit->suffix << " " <<
317 : right << setw(PROFILER_BRIDGE_OUTPUT_WIDTH_PERC) << floor(get_avg_self_time_ms() / fullMs * 100) << "% ";
318 :
319 : // total time
320 : s << setw(PROFILER_BRIDGE_OUTPUT_WIDTH_TIME) << get_avg_total_time() * totalUnit->invTickFreq << " " <<
321 : left << setw(2) << totalUnit->suffix << " " <<
322 : right << setw(PROFILER_BRIDGE_OUTPUT_WIDTH_PERC) << floor(get_avg_total_time_ms() / fullMs * 100) << "% ";
323 : if(fullMem >= 0.0)
324 : s << get_mem_info(fullMem);
325 : if(zone->groups != NULL)
326 : s << zone->groups;
327 : return s.str();
328 : }
329 :
330 : const UGProfileNode *UGProfileNode::get_first_child() const
331 : {
332 : return reinterpret_cast<const UGProfileNode*>(firstChild);
333 : }
334 :
335 : const UGProfileNode *UGProfileNode::get_last_child() const
336 : {
337 : return reinterpret_cast<const UGProfileNode*>(lastChild);
338 : }
339 :
340 : const UGProfileNode *UGProfileNode::get_next_sibling() const
341 : {
342 : return reinterpret_cast<const UGProfileNode*>(nextSibling);
343 : }
344 :
345 : const UGProfileNode *UGProfileNode::find_next_in_tree() const
346 : {
347 : return reinterpret_cast<const UGProfileNode*>(findNextInTree());
348 : }
349 :
350 : void UGProfileNode::rec_print(double fullMs, double fullMem, stringstream &s, size_t offset, double dSkipMarginal) const
351 : {
352 : if(!valid()) return;
353 : if(dSkipMarginal==0.0 ||
354 : (fullMs*dSkipMarginal < get_avg_total_time_ms() || (HasMemTracking() && fullMem*dSkipMarginal< get_total_mem() ) )
355 : )
356 : {
357 : s << print_node(fullMs, fullMem, offset) << "\n";
358 : for(const UGProfileNode *p=get_first_child(); p != NULL; p=p->get_next_sibling())
359 : {
360 : p->rec_print(fullMs, fullMem, s, offset+1, dSkipMarginal);
361 : if(p==get_last_child())
362 : break;
363 : }
364 : }
365 : }
366 :
367 : string UGProfileNode::groups() const
368 : {
369 : if (!valid()) return "Profile Node not valid!";
370 :
371 : vector<const UGProfileNode*> nodes;
372 : rec_add_nodes(nodes);
373 :
374 : map<string, double> mapGroups;
375 : for(size_t i=0; i<nodes.size(); i++)
376 : {
377 : if(nodes[i]->zone->groups == NULL) continue;
378 : vector<string> g;
379 : TokenizeString(nodes[i]->zone->groups, g, ' ');
380 : for(size_t j=0; j<g.size(); j++)
381 : mapGroups[g[j]] += nodes[i]->get_avg_self_time();
382 : }
383 :
384 : vector<string> gs;
385 : #ifdef UG_PARALLEL
386 : if(pcl::ProcRank() == 0)
387 : #endif
388 : for(map<string, double>::iterator it = mapGroups.begin(); it != mapGroups.end();++it)
389 : gs.push_back(it->first);
390 :
391 : #ifdef UG_PARALLEL
392 : pcl::ProcessCommunicator pc;
393 : pc.broadcast(gs);
394 : vector<double> t(gs.size(), 0.0), tMax, tMin;
395 : for(size_t i=0; i<gs.size(); i++)
396 : t[i] = mapGroups[gs[i]];
397 : pc.allreduce(t, tMax, PCL_RO_MAX);
398 : pc.allreduce(t, tMin, PCL_RO_MIN);
399 : #endif
400 : stringstream s;
401 : for(size_t i=0; i<gs.size(); i++)
402 : {
403 : string name = gs[i];
404 : double time = mapGroups[name];
405 : const Shiny::TimeUnit *unit = Shiny::GetTimeUnit(time);
406 : s << left << setw(20) << name
407 : << setw(PROFILER_BRIDGE_OUTPUT_WIDTH_TIME) << time * unit->invTickFreq << " " <<
408 : left << setw(2) << unit->suffix;
409 : #ifdef UG_PARALLEL
410 : double maxTime = tMax[i];
411 : double minTime = tMin[i];
412 : double diffTime = maxTime - minTime;
413 : const Shiny::TimeUnit *maxUnit = Shiny::GetTimeUnit(maxTime);
414 : const Shiny::TimeUnit *minUnit = Shiny::GetTimeUnit(minTime);
415 : const Shiny::TimeUnit *diffUnit = Shiny::GetTimeUnit(diffTime);
416 : s << left << " max: " << setw(PROFILER_BRIDGE_OUTPUT_WIDTH_TIME)
417 : << maxTime * maxUnit->invTickFreq << " " << left << setw(2) << maxUnit->suffix;
418 : s << left << " min: " << setw(PROFILER_BRIDGE_OUTPUT_WIDTH_TIME)
419 : << minTime * maxUnit->invTickFreq << " " << left << setw(2) << minUnit->suffix;
420 : s << left << " diff: " << setw(PROFILER_BRIDGE_OUTPUT_WIDTH_TIME)
421 : << diffTime * diffUnit->invTickFreq << " " << left << setw(2) << diffUnit->suffix
422 : << " (" << diffTime/maxTime*100 << " %)";
423 : #endif
424 : s << "\n";
425 : }
426 : return s.str();
427 : }
428 :
429 :
430 : void UGProfileNode::rec_add_nodes(vector<const UGProfileNode*> &nodes) const
431 : {
432 : nodes.push_back(this);
433 : for(const UGProfileNode *p=get_first_child(); p != NULL; p=p->get_next_sibling())
434 : {
435 : p->rec_add_nodes(nodes);
436 : if(p==get_last_child())
437 : break;
438 : }
439 : }
440 :
441 : string UGProfileNode::print_child_sorted(const char *name, bool sortFunction(const UGProfileNode *a, const UGProfileNode *b),
442 : double dSkipMarginal) const
443 : {
444 : if(!valid()) return "";
445 : stringstream s;
446 : vector<const UGProfileNode*> nodes;
447 : rec_add_nodes(nodes);
448 : sort(nodes.begin(), nodes.end(), sortFunction);
449 :
450 : UGProfileNode::log_header(s, name);
451 : double fullMs = get_avg_total_time_ms();
452 : double fullMem = get_total_mem();
453 : for(size_t i=0; i<nodes.size(); i++)
454 : {
455 : if(dSkipMarginal==0.0 || fullMs*dSkipMarginal < nodes[i]->get_avg_total_time_ms())
456 : s << nodes[i]->print_node(fullMs, fullMem) << "\n";
457 : }
458 : return s.str();
459 : }
460 :
461 : void UGProfileNode::log_header(stringstream &s, const char *name)
462 : {
463 :
464 : s << left << setw(PROFILER_BRIDGE_OUTPUT_WIDTH_NAME) << name << " " <<
465 : right << setw(PROFILER_BRIDGE_OUTPUT_WIDTH_HIT) << "hits" << " " <<
466 : setw(PROFILER_BRIDGE_OUTPUT_WIDTH_TIME+4+PROFILER_BRIDGE_OUTPUT_WIDTH_PERC+1 -4) << "self time" << " " <<
467 : setw(PROFILER_BRIDGE_OUTPUT_WIDTH_TIME+4+PROFILER_BRIDGE_OUTPUT_WIDTH_PERC+1) << "total time";
468 : if(HasMemTracking())
469 : {
470 : s << " " << setw(10+5+3) << "self mem" << " " <<
471 : setw(10) << "total mem";
472 : }
473 :
474 : s << "\n";
475 : }
476 :
477 : bool UGProfileNode::self_time_sort(const UGProfileNode *a, const UGProfileNode *b)
478 : {
479 : return a->get_avg_self_time() < b->get_avg_self_time();
480 : }
481 :
482 : bool UGProfileNode::total_time_sort(const UGProfileNode *a, const UGProfileNode *b)
483 : {
484 : return a->get_avg_total_time() < b->get_avg_total_time();
485 : }
486 :
487 : bool UGProfileNode::self_memory_sort(const UGProfileNode *a, const UGProfileNode *b)
488 : {
489 : return a->get_self_mem() < b->get_self_mem();
490 : }
491 :
492 : bool UGProfileNode::total_memory_sort(const UGProfileNode *a, const UGProfileNode *b)
493 : {
494 : return a->get_total_mem() < b->get_total_mem();
495 : }
496 :
497 :
498 : bool UGProfileNode::entry_count_sort(const UGProfileNode *a, const UGProfileNode *b)
499 : {
500 : return a->get_avg_entry_count() < b->get_avg_entry_count();
501 : }
502 :
503 : /*
504 : void WriteCompressedProfileData(const char *filename)
505 : {
506 : // compressed:
507 : // filedata: file0, file1, file2, file3
508 : // zonedata: zone1 {name, fileid, line}, zone2, zone3
509 : // nodes: node0 {self, total, children}, node1, node2, node3.
510 :
511 : // combine file and zone data, so that profile data is
512 : *
513 : * // <files>...</files>
514 : * <zones><zone id="732"><name>bla</name><group>algebra</group><file>8</file><line>344</line> </zone> ... </zones>
515 : * <core id=0>
516 : * <node zoneId=732><self>8734.2</self><total>8333.2</total></node>
517 :
518 : }
519 : */
520 :
521 : #ifndef NDEBUG
522 : #define PROFILE_NODE_MIN_HITS 500
523 : #define PROFILE_NODE_MIN_TOTAL_TIME_MS 100
524 : #define PROFILE_NODE_MAX_TIME_PER_CALL_MS 0.01
525 : #else
526 : #define PROFILE_NODE_MIN_HITS 1000
527 : #define PROFILE_NODE_MIN_TOTAL_TIME_MS 1000
528 : #define PROFILE_NODE_MAX_TIME_PER_CALL_MS 0.01
529 : #endif
530 :
531 : void UGProfileNode::check_for_too_small_nodes(double fullMs, map<string, const UGProfileNode *> &list) const
532 : {
533 : // also don't check nodes which require less time than 0.01% of the whole problem
534 : if(get_avg_total_time_ms() < 0.01*0.01*fullMs) return;
535 : for(const UGProfileNode *p=get_first_child(); p != NULL; p=p->get_next_sibling())
536 : {
537 : double t_ms = p->get_avg_total_time_ms();
538 : size_t entry = (size_t)p->get_avg_entry_count();
539 :
540 : if(t_ms > PROFILE_NODE_MIN_TOTAL_TIME_MS)
541 : {
542 : if(entry > PROFILE_NODE_MIN_HITS && t_ms/entry < PROFILE_NODE_MAX_TIME_PER_CALL_MS)
543 : {
544 : string desc = p->zone->name;
545 : if(p->zone->file != NULL)
546 : desc.append(string(p->zone->file) + string(":") + ToString(p->zone->line));
547 : const UGProfileNode *p2 = list[desc];
548 :
549 : if(p2 == NULL)
550 : list[desc] = p;
551 : else
552 : {
553 : double t_ms2 = p->get_avg_total_time_ms();
554 : size_t entry2 = (size_t)p->get_avg_entry_count();
555 : if(entry2/t_ms2 < entry/t_ms)
556 : list[desc] = p;
557 : }
558 : }
559 :
560 : p->check_for_too_small_nodes(fullMs, list);
561 : }
562 : if(p==get_last_child())
563 : break;
564 : }
565 :
566 : }
567 :
568 : void ProfileTestFunction1()
569 : {
570 : PROFILE_FUNC();
571 : }
572 :
573 : void ProfileTestFunction2()
574 : {
575 : PROFILE_FUNC();
576 : }
577 :
578 :
579 : void UGProfileNode::CheckForTooSmallNodes()
580 : {
581 : Shiny::ProfileManager::instance.update(1.0); // WE call with damping = 1.0
582 : const UGProfileNode *pnRoot = UGProfileNode::get_root();
583 :
584 : const size_t numProfileCall = 100;
585 : for(size_t i=0; i<numProfileCall; i++)
586 : {
587 : ProfileTestFunction1();
588 : ProfileTestFunction2();
589 : }
590 :
591 : const UGProfileNode *ptf1 = GetProfileNode("ProfileTestFunction1");
592 :
593 : double fullMs = pnRoot->get_avg_total_time_ms();
594 :
595 : if(fullMs > 1)
596 : {
597 : map<string, const UGProfileNode *> list;
598 : pnRoot->check_for_too_small_nodes(fullMs, list);
599 :
600 : if(list.size() != 0)
601 : {
602 : UG_LOG(resetiosflags( ::ios::scientific ));
603 : UG_LOG("WARNING: Some profile nodes might be too small\n");
604 : UG_LOG("----------------------------------------------------------------------\n");
605 :
606 : double tProfileCall= ptf1->get_avg_total_time()/numProfileCall;
607 : const Shiny::TimeUnit *unit2 = Shiny::GetTimeUnit(tProfileCall);
608 :
609 : UG_LOG("Profile Call overhead estimated at " << tProfileCall * unit2->invTickFreq << " " << unit2->suffix << ".\n");
610 :
611 : UG_LOG("Displaying nodes with hits > " << PROFILE_NODE_MIN_HITS << " and totalTime > " << PROFILE_NODE_MIN_TOTAL_TIME_MS
612 : << " and totalTime/hits < " << PROFILE_NODE_MAX_TIME_PER_CALL_MS << " ms :\n");
613 :
614 :
615 : stringstream s;
616 : s << left << setw(PROFILER_BRIDGE_OUTPUT_WIDTH_NAME) << "name" << " " <<
617 : right << setw(PROFILER_BRIDGE_OUTPUT_WIDTH_HIT) << "hits" << " " <<
618 : setw(PROFILER_BRIDGE_OUTPUT_WIDTH_TIME+4) << "total " << " " <<
619 : setw(PROFILER_BRIDGE_OUTPUT_WIDTH_TIME+5) << "total/hits " <<
620 : " potentially lost (prediction)\n";
621 : for(map<string, const UGProfileNode *>::iterator it = list.begin();
622 : it != list.end(); ++it)
623 : {
624 : const UGProfileNode *p = it->second;
625 : if(p->zone->file != NULL)
626 : s << SimplifyUG4Path(p->zone->file) << ":" << p->zone->line << " : \n";
627 :
628 : s << left << setw(PROFILER_BRIDGE_OUTPUT_WIDTH_NAME) << p->zone->name << " ";
629 : // entry count
630 : s << right << setw(PROFILER_BRIDGE_OUTPUT_WIDTH_HIT) << floor(p->get_avg_entry_count()) << " " <<
631 : setprecision(PROFILER_BRIDGE_OUTPUT_WIDTH_TIME-1);
632 :
633 : // total time
634 : const Shiny::TimeUnit *unit = Shiny::GetTimeUnit(p->get_avg_total_time());
635 : s << right << setw(PROFILER_BRIDGE_OUTPUT_WIDTH_TIME) << p->get_avg_total_time() * unit->invTickFreq << " " <<
636 : left << setw(2) << unit->suffix << " ";
637 :
638 : // fraction
639 : double t = p->get_avg_total_time()/p->get_avg_entry_count();
640 : unit = Shiny::GetTimeUnit(t);
641 : s << right << setw(PROFILER_BRIDGE_OUTPUT_WIDTH_TIME) << t * unit->invTickFreq << " " <<
642 : left << setw(2) << unit->suffix << " ";
643 :
644 : // lost
645 :
646 : t = tProfileCall*p->get_avg_entry_count();
647 : unit = Shiny::GetTimeUnit(t);
648 : s << right << setw(PROFILER_BRIDGE_OUTPUT_WIDTH_TIME) << t * unit->invTickFreq << " " <<
649 : left << setw(2) << unit->suffix << " ";
650 :
651 : // %
652 :
653 : // t = tProfileCall*p->get_avg_entry_count();
654 : // unit = Shiny::GetTimeUnit(t);
655 : // s << (tProfileCall*p->get_avg_entry_count()*100)/ p->get_avg_total_time() << "% ";
656 :
657 : s << "\n";
658 : }
659 : UG_LOG(s.str());
660 :
661 : }
662 : }
663 : }
664 :
665 : const UGProfileNode *UGProfileNode::get_root()
666 : {
667 : const Shiny::ProfileNode *node = &Shiny::ProfileManager::instance.rootNode;
668 : return reinterpret_cast<const UGProfileNode*> (node);
669 : }
670 :
671 :
672 : void WriteProfileDataXML(const char *filename)
673 : {
674 : WriteProfileDataXML(filename, ug::GetLogAssistant().get_output_process());
675 : }
676 :
677 :
678 : void WriteCallLog(const char *filename, int procId)
679 : {
680 : #ifdef SHINY_CALL_LOGGING
681 : if(pcl::ProcRank() != procId) return;
682 : fstream f(filename, ios::out);
683 :
684 : Shiny::tick_t curTime;
685 : Shiny::GetTicks(&curTime);
686 :
687 : FinishShinyCallLogging();
688 : UG_LOG("Writing Call log to " << filename << ". Max Frequency is " << g_ShinyCallLoggingMaxFreq << " Hz.\n")
689 : // print call log
690 : int depth=0;
691 : for(size_t i=0; i<profileCalls.size(); i++)
692 : {
693 : f << profileCalls[i].c << " ";
694 : if(profileCalls[i].p == NULL)
695 : {
696 : depth--;
697 : f << repeat(' ', depth) << "}\n";
698 : }
699 : else
700 : {
701 : f << repeat(' ', depth);
702 :
703 : if(profileCalls[i].p->zone->name[0] == '@')
704 : {
705 : string str = GetProfilingFileLine(profileCalls[i].p->zone->name +1);
706 : if(str.size())
707 : { f << str << " "; }
708 :
709 : f << "(" << SimplifyUG4Path(profileCalls[i].p->zone->name+1) << ")\n";
710 : }
711 : else
712 : f << profileCalls[i].p->zone->name << " (" << SimplifyUG4Path(profileCalls[i].p->zone->file) << ":" << profileCalls[i].p->zone->line << ")\n";
713 :
714 : if(i+1 < profileCalls.size() && profileCalls[i+1].p == NULL)
715 : i++;
716 : else
717 : {
718 : f << profileCalls[i].c << " ";
719 : f << repeat(' ', depth) << "{\n";
720 : depth++;
721 : }
722 : }
723 : }
724 : for(int i=0; i<depth; i++)
725 : f << curTime << " " << repeat(' ', depth-i-1) << "}\n";
726 : #else
727 : UG_LOG("Did NOT write Call Log since Call Logging is disabled (enable with cmake -DSHINY_CALL_LOGGING=ON ..)\n");
728 : #endif
729 : }
730 :
731 : void WriteCallLog(const char *filename)
732 : {
733 : WriteCallLog(filename, ug::GetLogAssistant().get_output_process());
734 : }
735 :
736 : void WriteProfileDataXML(const char *filename, int procId)
737 : {
738 : #ifdef UG_PARALLEL
739 : UG_COND_THROW(procId >= pcl::NumProcs(),
740 : "Bad process id: " << procId << ". Maximum allowed is "
741 : << pcl::NumProcs() - 1);
742 : #else
743 : UG_COND_THROW(procId >0,
744 : "Bad process id: " << procId
745 : << ". Only one process available (with id 0)");
746 : #endif
747 :
748 : #ifdef SHINY_CALL_LOGGING
749 : Shiny::tick_t curTime;
750 : Shiny::GetTicks(&curTime);
751 : #endif
752 : ProfilerUpdate();
753 : const UGProfileNode *pnRoot = UGProfileNode::get_root();
754 :
755 : // procId represents the id of the process which will write the file.
756 : // if all processes are considered (procId < 0), then process 0 will write the file.
757 : bool gatherFromAllProcs = false;
758 : if(procId < 0){
759 : gatherFromAllProcs = true;
760 : procId = 0;
761 : }
762 :
763 : #ifdef UG_PARALLEL
764 : typedef pcl::SingleLevelLayout<pcl::OrderedInterface<size_t, vector> >
765 : IndexLayout;
766 :
767 : pcl::InterfaceCommunicator<IndexLayout> ic;
768 : if(pcl::ProcRank() == procId)
769 : {
770 : #endif
771 : fstream f(filename, ios::out);
772 : f << "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n";
773 : f << "<!-- ug4 created profile data -->\n";
774 : f << "<ProfileData>\n";
775 : f << "<AdditionalInfo>\n";
776 : f << "<SVNRevision>" << UGSvnRevision() << "</SVNRevision>\n";
777 : f << "<BuildHostname>" << UGBuildHost() << "</BuildHostname>\n";
778 : f << "<CompileDate>" << UGCompileDate() << "</CompileDate>\n";
779 : f << "</AdditionalInfo>\n";
780 : f << "<basepath>" << PathProvider::get_path(ROOT_PATH) << "</basepath>\n";
781 :
782 : f << "<core id=\""<<procId<<"\">\n";
783 :
784 : pnRoot->PDXML_rec_write(f);
785 : f << "</core>\n";
786 :
787 : #ifdef SHINY_CALL_LOGGING
788 : FinishShinyCallLogging();
789 : f << "<log>\n";
790 : int depth=0;
791 : for(size_t i=0; i<profileCalls.size(); i++)
792 : {
793 : if(profileCalls[i].p == NULL)
794 : {
795 : f << "<stop>" << profileCalls[i].c << "</stop>\n";
796 : f << "</call>\n";
797 : depth--;
798 : }
799 : else
800 : {
801 : f << "<call>\n<start>" << profileCalls[i].c << "</start>\n<id>" << profileCalls[i].p << "</id>\n";
802 : depth++;
803 : }
804 : }
805 : for(int i=0; i<depth; i++)
806 : {
807 : f << "<stop>" << curTime << "</stop>\n";
808 : f << "</call>\n";
809 : }
810 : f << "</log>\n";
811 :
812 : #endif
813 :
814 : #ifdef UG_PARALLEL
815 : if(gatherFromAllProcs)
816 : {
817 : vector<ug::BinaryBuffer> buffers(pcl::NumProcs()-1);
818 : for(int i=1; i<pcl::NumProcs(); i++)
819 : ic.receive_raw(i, buffers[i-1]);
820 : ic.communicate();
821 :
822 : for(int i=1; i<pcl::NumProcs(); i++)
823 : {
824 : f << "\n<core id=\"" << i << "\">";
825 : string s;
826 : Deserialize(buffers[i-1], s);
827 : f << s;
828 : f << "\n</core>";
829 : }
830 : }
831 : #endif
832 : f << "</ProfileData>\n";
833 :
834 : #ifdef UG_PARALLEL
835 : }
836 : else
837 : {
838 : if(gatherFromAllProcs)
839 : {
840 : stringstream ss;
841 : pnRoot->PDXML_rec_write(ss);
842 : BinaryBuffer buf;
843 : Serialize(buf, ss.str());
844 : ic.send_raw(procId, buf.buffer(), buf.write_pos(), false);
845 : ic.communicate();
846 : }
847 : }
848 : #endif
849 :
850 : }
851 :
852 : const UGProfileNode *GetProfileNode(const char *name, const UGProfileNode *node)
853 : {
854 : ProfilerUpdate();
855 : if(node == NULL) node = UGProfileNode::get_root();
856 : if(name == NULL)
857 : return node;
858 : do
859 : {
860 : if(strcmp(node->zone->name, name) == 0)
861 : return node;
862 : node = node->find_next_in_tree();
863 : } while (node);
864 :
865 : // UG_LOG("Profiler Node \"" << name << "\" not found\n");
866 : return PROFILER_NULL_NODE;
867 : }
868 :
869 : const UGProfileNode *GetProfileNode(const char *name)
870 : {
871 : return GetProfileNode(name, NULL);
872 : }
873 :
874 : bool GetProfilerAvailable()
875 : {
876 : return true;
877 : }
878 :
879 : void PrintLUA()
880 : {
881 : const UGProfileNode *rootNode = GetProfileNode(NULL);
882 : vector<const UGProfileNode*> nodes;
883 : rootNode->rec_add_nodes(nodes);
884 : double full = rootNode->get_avg_total_time_ms();
885 :
886 : map<string, vector<double> > files;
887 : for(size_t i=0; i<nodes.size(); i++)
888 : {
889 : if(nodes[i]->zone->groups == NULL ||
890 : strcmp(nodes[i]->zone->groups, "lua") != 0)
891 : continue;
892 : const char *name = nodes[i]->zone->name;
893 : cout << name << "\n";
894 : if(name[0]==0x00 || name[1]==0x00) continue;
895 : name++; // skip @
896 : const char *p = strchr(name, ':'); // search for line number
897 : if(p == NULL || p[0] == 0x00 || p[1] == 0x00) continue;
898 : int line = strtol(p+1, NULL, 10);
899 : if(line > 10000 || line < 0) continue;
900 : char file[255];
901 : strncpy(file, name, p-name);
902 : file[p-name]=0x00;
903 : vector<double> &v = files[file];
904 : if(v.size() < (size_t)line+1)
905 : {
906 : size_t s=v.size();
907 : v.resize(line+1);
908 : for(; s<(size_t)line+1; s++)
909 : v[s]=0.0;
910 : }
911 : v[line] = nodes[i]->get_avg_total_time_ms();
912 : }
913 :
914 :
915 : for(map<string, vector<double> >::iterator it = files.begin(); it != files.end();
916 : ++it)
917 : {
918 : string name = it->first;
919 : vector<double> &v = it->second;
920 : cout << "\n" << name << ":\n\n";
921 :
922 : char buf[512];
923 : fstream file(name.c_str(), ios::in);
924 : if(file.is_open() == false) continue;
925 : size_t lineNr=0;
926 : while(!file.eof())
927 : {
928 : file.getline(buf, 512);
929 : double time = lineNr >= v.size() ? 0.0 : v[lineNr];
930 : const Shiny::TimeUnit *unit = Shiny::GetTimeUnit(time);
931 : cout << resetiosflags( ::ios::scientific ) <<
932 : left << setw(PROFILER_BRIDGE_OUTPUT_WIDTH_TIME)
933 : << time * unit->invTickFreq << " " <<
934 : left << setw(2) << unit->suffix << " " <<
935 : right << setw(PROFILER_BRIDGE_OUTPUT_WIDTH_PERC) << floor(time / full * 100) << "% "
936 : << setw(3) << lineNr << "| " << buf << "\n";
937 : lineNr ++;
938 : }
939 : }
940 : }
941 :
942 :
943 :
944 : #else
945 :
946 :
947 0 : double UGProfileNode::get_avg_entry_count() const
948 : {
949 0 : return 0;
950 : }
951 :
952 : /// \return time in milliseconds spend in this node excluding subnodes
953 0 : double UGProfileNode::get_avg_self_time_ms() const
954 : {
955 0 : return 0.0;
956 : }
957 :
958 : /// \return time in milliseconds spend in this node including subnodes
959 0 : double UGProfileNode::get_avg_total_time_ms() const
960 : {
961 0 : return 0.0;
962 : }
963 :
964 0 : double UGProfileNode::get_total_mem() const
965 : {
966 0 : return 0.0;
967 : }
968 :
969 0 : double UGProfileNode::get_self_mem() const
970 : {
971 0 : return 0.0;
972 : }
973 : ///////////////////////////////////////////////////////////////////
974 0 : string UGProfileNode::call_tree(double dSkipMarginal) const
975 : {
976 0 : return "Profiler not available!";
977 : }
978 :
979 0 : string UGProfileNode::child_self_time_sorted(double dSkipMarginal) const
980 : {
981 0 : return "Profiler not available!";
982 : }
983 :
984 0 : string UGProfileNode::total_time_sorted(double dSkipMarginal) const
985 : {
986 0 : return "Profiler not available!";
987 : }
988 :
989 :
990 0 : string UGProfileNode::child_self_memory_sorted(double dSkipMarginal) const
991 : {
992 0 : return "Profiler not available!";
993 : }
994 :
995 0 : string UGProfileNode::total_memory_sorted(double dSkipMarginal) const
996 : {
997 0 : return "Profiler not available!";
998 : }
999 :
1000 0 : string UGProfileNode::entry_count_sorted(double dSkipMarginal) const
1001 : {
1002 0 : return "Profiler not available!";
1003 : }
1004 :
1005 : /*
1006 : string UGProfileNode::child_self_time_sorted(double dSkipMarginal) const
1007 : {
1008 : return "Profiler not available!";
1009 : }
1010 :
1011 : string UGProfileNode::child_self_time_sorted() const
1012 : {
1013 : return child_self_time_sorted(0.0);
1014 : }
1015 :
1016 : string UGProfileNode::total_time_sorted() const
1017 : {
1018 : return total_time_sorted(0.0);
1019 : }
1020 : */
1021 :
1022 : /// \return true if node has been found
1023 0 : bool UGProfileNode::valid() const
1024 : {
1025 0 : return false;
1026 : }
1027 :
1028 0 : string UGProfileNode::groups() const
1029 : {
1030 0 : return "Profiler not available!";
1031 : }
1032 :
1033 0 : const UGProfileNode *GetProfileNode(const char *name)
1034 : {
1035 0 : return PROFILER_NULL_NODE;
1036 : }
1037 :
1038 0 : const UGProfileNode *GetProfileNode(const char *name, const UGProfileNode *node)
1039 : {
1040 0 : return PROFILER_NULL_NODE;
1041 : }
1042 :
1043 0 : void WriteProfileDataXML(const char *filename)
1044 : {
1045 0 : return;
1046 : }
1047 :
1048 0 : void WriteProfileDataXML(const char *filename, int procId)
1049 : {
1050 0 : return;
1051 : }
1052 :
1053 0 : bool GetProfilerAvailable()
1054 : {
1055 0 : return false;
1056 : }
1057 :
1058 0 : void PrintLUA()
1059 : {
1060 : UG_LOG("LUA Profiler not available.");
1061 0 : }
1062 :
1063 0 : void UGProfileNode::CheckForTooSmallNodes()
1064 : {
1065 :
1066 0 : }
1067 :
1068 0 : void WriteCallLog(const char *filename) {}
1069 0 : void WriteCallLog(const char *filename, int procId) {}
1070 : #endif // SHINY
1071 :
1072 : } // namespace ug
1073 :
1074 :
1075 : //////////////////////////////////////////////////////////////////////////////////////////
|