forked from microsoft/logparse
-
Notifications
You must be signed in to change notification settings - Fork 0
/
clography.py
executable file
·129 lines (115 loc) · 4.17 KB
/
clography.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#!/usr/bin/env python
import sys
import fileinput
from collections import defaultdict
from getopt import getopt, GetoptError
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import numpy as np
import systemlog
try:
# I am bad at picking single-letter switches, and feel bad
(options, arguments) = getopt(sys.argv[1:], 'i:s:eturm:',
['interval', 'scale', 'events', 'events-only', 'show-unknown', 'sort', 'min-size'])
except GetoptError, error:
sys.stderr.write('%s\n' % str(error))
sys.exit(1)
# dirty, but I'm not rewriting fileinput.input
sys.argv = [sys.argv[0]]
interval = 3600
scale = 10
allevents = False
eventsonly = False
unknowns = False
sort = False
minsize = 0
for opt, arg in options:
if opt in ('-i', '--interval'):
interval = int(arg)
if opt in ('-s', '--scale'):
scale = float(arg)
if opt in ('-e', '--events'):
allevents = True
if opt in ('-t', '--events-only'):
eventsonly = True
if opt in ('-u', '--show-unknown'):
unknowns = True
if opt in ('-r', '--sort'):
sort = True
if opt in ('-m', '--min-size'):
minsize = int(arg)
if arguments:
log = fileinput.input(arguments[0])
else:
log = fileinput.input()
stages = defaultdict(int)
enum = 0
data = defaultdict(lambda: defaultdict(int))
for event in systemlog.parse_log(log):
stage = event['thread_name'] + ' ' + event['source_file']
if stage[0:3].isupper() or event['thread_name'] == 'main': # skip rmi, handshaking, streams, etc
continue
if event['event_type'] == 'unknown' and not unknowns:
continue
if event['event_type'] == 'messages_dropped':
stage = event['message_type'] + ' dropped'
elif event['event_type'] == 'begin_flush':
stage = 'flushed bytes (serialized)'
elif allevents:
if not eventsonly:
stage = ' '.join((event['event_category'], event['event_type'], stage))
else:
stage = event['event_category'] + ' ' +event['event_type']
if not stage in stages and not sort:
stages[stage] = enum
enum += 1
ts = int(event['date'].strftime('%s')) / interval
if event['event_type'] == 'pause':
data[ts][stage] += event['duration'] / 1000
elif event['event_type'] == 'messages_dropped':
if 'internal_timeout' in event and event['internal_timeout'] != None:
data[ts][stage] += event['internal_timeout'] + event['cross_node_timeout']
else:
data[ts][stage] += event['messages_dropped']
elif event['event_type'] == 'begin_flush' and event['serialized_bytes'] != None:
data[ts][stage] += (event['serialized_bytes'] / 1024**2 / scale)
elif event['event_type'] in ('incremental_compaction', 'large_partition'):
data[ts][stage] += event['partition_size'] / 1024**2 / scale
elif event['event_type'] == 'begin_compaction':
data[ts][stage] += len(event['input_sstables'])
elif event['event_type'] == 'end_compaction':
data[ts][stage] += event['output_bytes'] / 1024**2 / scale
else:
data[ts][stage] += 1
if sort:
enum = 0
for stage in reversed(sorted(stages.keys())):
stages[stage] = enum
enum += 1
size=max(stages[stage]*scale, minsize)
fig, ax = plt.subplots()
colors = cm.rainbow(np.linspace(0, 1, len(stages)))
for ts, info in data.iteritems():
for stage in info.keys():
size=info[stage]*scale
ax.scatter(ts, stages[stage], s=size, c=colors[stages[stage]], alpha=0.5)
if size > 700:
ax.scatter(ts, stages[stage], s=size*0.01, c='black', alpha=1, marker='_', lw=1)
plt.yticks(stages.values(), stages.keys())
ax.set_xlabel('Time (%s second buckets)' % interval, fontsize=20)
if not eventsonly:
ax.set_ylabel('Stage', fontsize=20)
else:
ax.set_ylabel('Event', fontsize=20)
ax.grid(True)
plt.plot()
try:
fig.tight_layout()
except ValueError: # too many things on the Y axis
print "Warning, tight layout not possible, too many items on the Y axis"
fig.subplots_adjust(bottom = 0.2)
fig.subplots_adjust(top = 1)
fig.subplots_adjust(right = 1)
fig.subplots_adjust(left = 0)
plt.subplots_adjust(left=0.21)
plt.show()