Skip to content

Commit

Permalink
trace: add command for printing wait profile
Browse files Browse the repository at this point in the history
See

  scripts/trace.py prof-wait -h

The command is using sched_wait and sched_wait_ret tracepoints to
calculate the amount of time a thread was waiting. Samples are
collected and presented in a form of call graph tree.

By default callees are closer to the root. To inverse the order pass
-r|--caller-oriented.

If there is too much output, it can be narrowed down using
--max-levels and --min-duration options.

The presented time spectrum can be narrowed down using --since and --until
options which accept timestamps.

Example:

  scripts/trace.py prof-wait --max-levels 3 trace-file

=== Thread 0xffffc0003eaeb010 ===

12.43 s (100.00%, #7696) All
 |-- 12.43 s (99.99%, #7658) sched::thread::do_wait_until
 |    |-- 10.47 s (84.22%, #6417) condvar::wait(lockfree::mutex*, unsigned long)
 |    |    condvar_wait
 |    |    |-- 6.47 s (52.08%, #6250) cv_timedwait
 |    |    |    txg_delay
 |    |    |    dsl_pool_tempreserve_space
 |    |    |    dsl_dir_tempreserve_space
 |    |    |    dmu_tx_try_assign
 |    |    |    dmu_tx_assign
 |    |    |
 |    |    |-- 2.37 s (19.06%, #24) arc_read_nolock
 |    |    |    arc_read
 |    |    |    dsl_read
 |    |    |    traverse_visitbp
 |    |    |
 |    |    |-- 911.75 ms (7.33%, #3) txg_wait_open
 |    |    |    dmu_tx_wait
 |    |    |    zfs_write
 |    |    |    vfs_file::write(uio*, int)
 |    |    |    sys_write
 |    |    |    pwritev
 |    |    |    writev
 |    |    |    __stdio_write
 |    |    |    __fwritex
 |    |    |    fwrite
 |    |    |    0x100000005a5f
 |    |    |    osv::run(std::string, int, char**, int*)

By default every thread has a separate tree, because duration is best
interpreted in the context of particular thread. There is however an
option to merge samples from all threads into one tree:
-m|--merge-threads. It may be useful if you want to inspect all paths
going in/out to/from particular function. The direction can be changed
with -r|--caller-oriented option. Function names is passed to --function
parameter.

Example: check where zfs_write() blocks:

  scripts/trace.py prof-wait -rm --function=zfs_write trace-file

7.46 s (100.00%, #7314) All
 zfs_write
 |-- 6.48 s (86.85%, #6371) dmu_tx_assign
 |    |-- 6.47 s (86.75%, #6273) dmu_tx_try_assign
 |    |    dsl_dir_tempreserve_space
 |    |    |-- 6.47 s (86.75%, #6248) dsl_pool_tempreserve_space
 |    |    |    txg_delay
 |    |    |    cv_timedwait
 |    |    |    condvar_wait
 |    |    |    condvar::wait(lockfree::mutex*, unsigned long)
 |    |    |    sched::thread::do_wait_until
 |    |    |
 |    |    |-- 87.87 us (0.00%, #24) mutex_lock
 |    |    |    sched::thread::do_wait_until
 |    |    |
 |    |    \-- 6.40 us (0.00%, #1) dsl_dir_tempreserve_impl
 |    |         mutex_lock
 |    |         sched::thread::do_wait_until
 |    |
 |    \-- 7.32 ms (0.10%, #98) mutex_lock
 |         sched::thread::do_wait_until
 |
 |-- 911.75 ms (12.22%, #3) dmu_tx_wait
 |    txg_wait_open
 |    condvar_wait
 |    condvar::wait(lockfree::mutex*, unsigned long)
 |    sched::thread::do_wait_until

Signed-off-by: Tomasz Grabiec <[email protected]>
Signed-off-by: Pekka Enberg <[email protected]>
  • Loading branch information
tgrabiec authored and Pekka Enberg committed Jan 13, 2014
1 parent 60f39ae commit ca3be96
Show file tree
Hide file tree
Showing 3 changed files with 298 additions and 6 deletions.
247 changes: 247 additions & 0 deletions scripts/osv/prof.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,247 @@
import sys
from operator import attrgetter
from osv import trace, tree, debug

class ProfNode(tree.TreeNode):
def __init__(self, key):
super(ProfNode, self).__init__(key)
self.hit_count = 0
self.resident_time = 0
self.tail = []

def hit(self, resident_time=None):
self.hit_count += 1
self.resident_time += resident_time

@property
def attributes(self):
return {
'hit_count': self.hit_count,
'resident_time': self.resident_time
}

class TimeRange(object):
"""
Represents time from @begin inclusive to @end exclusive.
None in any of these means open range from that side.
"""
def __init__(self, begin, end):
self.begin = begin
self.end = end

def __contains__(self, timestamp):
if self.begin and timestamp < self.begin:
return False
if self.end and timestamp >= self.end:
return False
return True

def intersection(self, other):
begin = max(self.begin, other.begin)

if self.end is None:
end = other.end
elif other.end is None:
end = self.end
else:
end = min(self.end, other.end)

if begin and end and begin > end:
return None

return TimeRange(begin, end)

class ProfSample:
def __init__(self, timestamp, thread, backtrace, resident_time=None):
self.thread = thread
self.backtrace = backtrace
self.timestamp = timestamp
self.resident_time = resident_time or 0

@property
def time_range(self):
return TimeRange(self.timestamp, self.timestamp + self.resident_time)

def intersection(self, time_range):
intersection = self.time_range.intersection(time_range)
if not intersection:
return None
return ProfSample(intersection.begin, self.thread, self.backtrace,
resident_time=intersection.end - intersection.begin)

time_units = [
(1e9 * 3600, "h"),
(1e9 * 60, "m"),
(1e9, "s"),
(1e6, "ms"),
(1e3, "us"),
(1, "ns")
]

def parse_time_as_nanos(text):
for level, name in sorted(time_units, key=lambda (level, name): -len(name)):
if text.endswith(name):
return float(text.rstrip(name)) * level
return float(text)

def format_time(time, format="%.2f %s"):
for level, name in sorted(time_units, key=lambda (level, name): -level):
if time >= level:
return format % (float(time) / level, name)
return str(time)

unimportant_functions = set([
'trace_slow_path',
'operator()',
'std::function<void ()>::operator()() const',
'tracepoint_base::do_log_backtrace',
'tracepoint_base::log_backtrace(trace_record*, unsigned char*&)',
'tracepoint_base::do_log_backtrace(trace_record*, unsigned char*&)'
])

bottom_of_stack = set(['thread_main', 'thread_main_c'])

def strip_garbage(backtrace):
def is_good(src_addr):
if not src_addr.name:
return True
return not src_addr.name in unimportant_functions

backtrace = list(filter(is_good, backtrace))

for i, src_addr in enumerate(backtrace):
if src_addr.name in bottom_of_stack:
return backtrace[:i]

return backtrace

def get_duration_profile(traces, entry_trace_name, exit_trace_name):
entry_traces_per_thread = {}
last_time = None

for trace in traces:
last_time = max(last_time, trace.time)

if not trace.backtrace:
continue

if trace.name == entry_trace_name:
if trace.thread in entry_traces_per_thread:
old = entry_traces_per_thread[trace.thread]
raise Exception('Double entry:\n%s\n%s\n' % (str(old), str(trace)))
entry_traces_per_thread[trace.thread] = trace

elif trace.name == exit_trace_name:
entry_trace = entry_traces_per_thread.pop(trace.thread, None)
if not entry_trace:
continue

duration = trace.time - entry_trace.time
yield ProfSample(entry_trace.time, trace.thread, entry_trace.backtrace, resident_time=duration)

for thread, trace in entry_traces_per_thread.iteritems():
duration = last_time - trace.time
yield ProfSample(trace.time, thread, trace.backtrace, resident_time=duration)

def collapse_similar(node):
while node.has_only_one_child():
child = next(node.children)
if node.attributes == child.attributes:
node.squash_child()
node.tail.append(child.key)
else:
break

for child in node.children:
collapse_similar(child)

def strip_level(node, level):
if level <= 0:
node.remove_all()
else:
for child in node.children:
strip_level(child, level - 1)

def find_frame_index(frames, name):
for i, src_addr in enumerate(frames):
if src_addr.name and src_addr.name == name:
return i
return None

def print_profile(samples, symbol_resolver, caller_oriented=False, merge_threads=True,
printer=sys.stdout.write, time_range=None, src_addr_formatter=debug.SourceAddress.__str__,
node_filter=None, order=None, root_function=None, max_levels=None):
thread_profiles = {}

if merge_threads:
root = ProfNode('All')
thread_profiles['All'] = root

for sample in samples:
if time_range:
sample = sample.intersection(time_range)
if not sample:
continue

frames = [symbol_resolver(addr - 1) for addr in sample.backtrace]
frames = strip_garbage(frames)
if caller_oriented:
frames.reverse()

if root_function:
i = find_frame_index(frames, root_function)
if i:
frames = frames[i:]
else:
frames = None

if frames:
if merge_threads:
node = root
else:
node = thread_profiles.get(sample.thread, None)
if not node:
node = ProfNode('All')
thread_profiles[sample.thread] = node

node.hit(sample.resident_time)
for src_addr in frames:
node = node.get_or_add(src_addr_formatter(src_addr))
node.hit(sample.resident_time)

def format_node(node, root):
attributes = ''
percentage_subject_getter = attrgetter('hit_count')

if root.resident_time:
attributes += format_time(node.resident_time) + ' '
percentage_subject_getter = attrgetter('resident_time')

bracket_attributes = []
if percentage_subject_getter(root):
percentage = float(percentage_subject_getter(node)) * 100 / percentage_subject_getter(root)
bracket_attributes.append('%.2f%%' % percentage)

bracket_attributes.append('#%d' % node.hit_count)

label = '\n '.join([node.key] + node.tail)
return "%s(%s) %s" % (attributes, ', '.join(bracket_attributes), label)

if not order:
order = lambda node: (-node.resident_time, -node.hit_count)

for thread, tree_root in sorted(thread_profiles.iteritems(), key=lambda (thread, node): order(node)):
collapse_similar(tree_root)

if max_levels:
strip_level(tree_root, max_levels)

if not merge_threads:
printer("\n=== Thread 0x%x ===\n\n" % thread)

tree.print_tree(tree_root,
formatter=lambda node: format_node(node, tree_root),
order_by=order,
printer=printer,
node_filter=node_filter)
7 changes: 3 additions & 4 deletions scripts/osv/tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@ def print_node(node, is_last_history):
branches = (" |-- ", " \-- ")

label_lines = formatter(node).split('\n')

prefix_without_branch = ''.join(map(stems.__getitem__, is_last_history[:-1]))

if is_last_history:
Expand All @@ -58,10 +57,10 @@ def print_node(node, is_last_history):
printer("%s\n" % line)

children = sorted(filter(node_filter, node.children), key=order_by)
for child in children[:-1]:
print_node(child, is_last_history + [False])
if children:
print_node(children[-1], is_last_history + [True])
for child in children[:-1]:
print_node(child, is_last_history + [False])
print_node(children[-1], is_last_history + [True])

is_last = not is_last_history or is_last_history[-1]
if not is_last:
Expand Down
50 changes: 48 additions & 2 deletions scripts/trace.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env python
import sys
import argparse
from osv import trace, debug
from osv import trace, debug, prof

class symbol_printer:
def __init__(self, resolver, formatter):
Expand Down Expand Up @@ -85,9 +85,49 @@ def list_trace(args):
for t in reader.get_traces():
print t.format(backtrace_formatter)

def add_profile_options(parser):
parser.add_argument("-r", "--caller-oriented", action='store_true', help="change orientation to caller-based; reverses order of frames")
parser.add_argument("-m", "--merge-threads", action='store_true', help="show one merged tree for all threads")
parser.add_argument("--function", action='store', help="use given function as tree root")
parser.add_argument("--since", action='store', help="show profile since this timestamp [ns]")
parser.add_argument("--until", action='store', help="show profile until this timestamp [ns]")
parser.add_argument("--min-duration", action='store', help="show only nodes with resident time not shorter than this, eg: 200ms")
parser.add_argument("--max-levels", action='store', help="maximum number of tree levels to show")

def get_wait_profile(traces):
return prof.get_duration_profile(traces, "sched_wait", "sched_wait_ret")

def int_or_none(value):
if value:
return int(value)
return None

def show_profile(args, sample_producer):
resolver = symbol_resolver(args)
time_range = prof.TimeRange(int_or_none(args.since), int_or_none(args.until))

if args.min_duration:
min_duration = prof.parse_time_as_nanos(args.min_duration)
node_filter = lambda node: node.resident_time >= min_duration
else:
node_filter = None

with get_trace_reader(args) as reader:
prof.print_profile(sample_producer(reader.get_traces()),
symbol_resolver=resolver,
caller_oriented=args.caller_oriented,
merge_threads=args.merge_threads,
src_addr_formatter=src_addr_formatter(args),
root_function=args.function,
node_filter=node_filter,
time_range=time_range,
max_levels=int_or_none(args.max_levels))

def prof_wait(args):
show_profile(args, get_wait_profile)

if __name__ == "__main__":
parser = argparse.ArgumentParser(description="trace file processing")

subparsers = parser.add_subparsers(help="Command")

cmd_list = subparsers.add_parser("list", help="List trace")
Expand All @@ -96,5 +136,11 @@ def list_trace(args):
add_trace_source_options(cmd_list)
cmd_list.set_defaults(func=list_trace)

cmd_prof_wait = subparsers.add_parser("prof-wait", help="Show wait profile")
add_symbol_resolution_options(cmd_prof_wait)
add_trace_source_options(cmd_prof_wait)
add_profile_options(cmd_prof_wait)
cmd_prof_wait.set_defaults(func=prof_wait)

args = parser.parse_args()
args.func(args)

0 comments on commit ca3be96

Please sign in to comment.