forked from autotest/virt-test
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpostprocess_iozone.py
executable file
·467 lines (395 loc) · 18.9 KB
/
postprocess_iozone.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
#!/usr/bin/python
"""
Postprocessing module for IOzone. It is capable to pick results from an
IOzone run, calculate the geometric mean for all throughput results for
a given file size or record size, and then generate a series of 2D and 3D
graphs. The graph generation functionality depends on gnuplot, and if it
is not present, functionality degrates gracefully.
@copyright: Red Hat 2010
"""
import os, sys, optparse, logging, math, time
import common
from autotest.client.shared import logging_config, logging_manager
from autotest.client.shared import error
from autotest.client import utils, os_dep
import utils_misc
_LABELS = ['file_size', 'record_size', 'write', 'rewrite', 'read', 'reread',
'randread', 'randwrite', 'bkwdread', 'recordrewrite', 'strideread',
'fwrite', 'frewrite', 'fread', 'freread']
def geometric_mean(values):
"""
Evaluates the geometric mean for a list of numeric values.
@param values: List with values.
@return: Single value representing the geometric mean for the list values.
@see: http://en.wikipedia.org/wiki/Geometric_mean
"""
try:
values = [int(value) for value in values]
except ValueError:
return None
n = len(values)
if n == 0:
return None
return math.exp(sum([math.log(x) for x in values])/n)
def compare_matrices(matrix1, matrix2, treshold=0.05):
"""
Compare 2 matrices nxm and return a matrix nxm with comparison data
@param matrix1: Reference Matrix with numeric data
@param matrix2: Matrix that will be compared
@param treshold: Any difference bigger than this percent treshold will be
reported.
"""
improvements = 0
regressions = 0
same = 0
new_matrix = []
for line1, line2 in zip(matrix1, matrix2):
new_line = []
for element1, element2 in zip(line1, line2):
ratio = float(element2) / float(element1)
if ratio < (1 - treshold):
regressions += 1
new_line.append((100 * ratio - 1) - 100)
elif ratio > (1 + treshold):
improvements += 1
new_line.append("+" + str((100 * ratio - 1) - 100))
else:
same + 1
if line1.index(element1) == 0:
new_line.append(element1)
else:
new_line.append(".")
new_matrix.append(new_line)
total = improvements + regressions + same
return (new_matrix, improvements, regressions, total)
class IOzoneAnalyzer(object):
"""
Analyze an unprocessed IOzone file, and generate the following types of
report:
* Summary of throughput for all file and record sizes combined
* Summary of throughput for all file sizes
* Summary of throughput for all record sizes
If more than one file is provided to the analyzer object, a comparison
between the two runs is made, searching for regressions in performance.
"""
def __init__(self, list_files, output_dir):
self.list_files = list_files
if not os.path.isdir(output_dir):
os.makedirs(output_dir)
self.output_dir = output_dir
logging.info("Results will be stored in %s", output_dir)
def average_performance(self, results, size=None):
"""
Flattens a list containing performance results.
@param results: List of n lists containing data from performance runs.
@param size: Numerical value of a size (say, file_size) that was used
to filter the original results list.
@return: List with 1 list containing average data from the performance
run.
"""
average_line = []
if size is not None:
average_line.append(size)
for i in range(2, 15):
average = geometric_mean([line[i] for line in results]) / 1024.0
average = int(average)
average_line.append(average)
return average_line
def process_results(self, results, label=None):
"""
Process a list of IOzone results according to label.
@label: IOzone column label that we'll use to filter and compute
geometric mean results, in practical term either 'file_size'
or 'record_size'.
@result: A list of n x m columns with original iozone results.
@return: A list of n-? x (m-1) columns with geometric averages for
values of each label (ex, average for all file_sizes).
"""
performance = []
if label is not None:
index = _LABELS.index(label)
sizes = utils_misc.unique([line[index] for line in results])
sizes.sort()
for size in sizes:
r_results = [line for line in results if line[index] == size]
performance.append(self.average_performance(r_results, size))
else:
performance.append(self.average_performance(results))
return performance
def parse_file(self, fileobj):
"""
Parse an IOzone results file.
@param file: File object that will be parsed.
@return: Matrix containing IOzone results extracted from the file.
"""
lines = []
for line in fileobj.readlines():
fields = line.split()
if len(fields) != 15:
continue
try:
lines.append([int(i) for i in fields])
except ValueError:
continue
return lines
def report(self, overall_results, record_size_results, file_size_results):
"""
Generates analysis data for IOZone run.
Generates a report to both logs (where it goes with nice headers) and
output files for further processing (graph generation).
@param overall_results: 1x15 Matrix containing IOzone results for all
file sizes
@param record_size_results: nx15 Matrix containing IOzone results for
each record size tested.
@param file_size_results: nx15 Matrix containing file size results
for each file size tested.
"""
# Here we'll use the logging system to put the output of our analysis
# to files
logger = logging.getLogger()
formatter = logging.Formatter("")
logging.info("")
logging.info("TABLE: SUMMARY of ALL FILE and RECORD SIZES Results in MB/sec")
logging.info("")
logging.info("FILE & RECORD INIT RE RE RANDOM RANDOM BACKWD RECRE STRIDE F FRE F FRE")
logging.info("SIZES (KB) WRITE WRITE READ READ READ WRITE READ WRITE READ WRITE WRITE READ READ")
logging.info("-------------------------------------------------------------------------------------------------------------------")
for result_line in overall_results:
logging.info("ALL %-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s" % tuple(result_line))
logging.info("")
logging.info("DRILLED DATA:")
logging.info("")
logging.info("TABLE: RECORD Size against all FILE Sizes Results in MB/sec")
logging.info("")
logging.info("RECORD INIT RE RE RANDOM RANDOM BACKWD RECRE STRIDE F FRE F FRE ")
logging.info("SIZE (KB) WRITE WRITE READ READ READ WRITE READ WRITE READ WRITE WRITE READ READ")
logging.info("--------------------------------------------------------------------------------------------------------------")
foutput_path = os.path.join(self.output_dir, '2d-datasource-file')
if os.path.isfile(foutput_path):
os.unlink(foutput_path)
foutput = logging.FileHandler(foutput_path)
foutput.setFormatter(formatter)
logger.addHandler(foutput)
for result_line in record_size_results:
logging.info("%-10s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s" % tuple(result_line))
logger.removeHandler(foutput)
logging.info("")
logging.info("")
logging.info("TABLE: FILE Size against all RECORD Sizes Results in MB/sec")
logging.info("")
logging.info("RECORD INIT RE RE RANDOM RANDOM BACKWD RECRE STRIDE F FRE F FRE ")
logging.info("SIZE (KB) WRITE WRITE READ READ READ WRITE READ WRITE READ WRITE WRITE READ READ")
logging.info("--------------------------------------------------------------------------------------------------------------")
routput_path = os.path.join(self.output_dir, '2d-datasource-record')
if os.path.isfile(routput_path):
os.unlink(routput_path)
routput = logging.FileHandler(routput_path)
routput.setFormatter(formatter)
logger.addHandler(routput)
for result_line in file_size_results:
logging.info("%-10s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s" % tuple(result_line))
logger.removeHandler(routput)
logging.info("")
def report_comparison(self, record, file_size_results):
"""
Generates comparison data for 2 IOZone runs.
It compares 2 sets of nxm results and outputs a table with differences.
If a difference higher or smaller than 5% is found, a warning is
triggered.
@param record: Tuple with 4 elements containing results for record size.
@param file_size_results: Tuple with 4 elements containing results for file size.
"""
(record_size, record_improvements, record_regressions,
record_total) = record
(file_size, file_improvements, file_regressions,
file_total) = file_size_results
logging.info("ANALYSIS of DRILLED DATA:")
logging.info("")
logging.info("TABLE: RECsize Difference between runs Results are % DIFF")
logging.info("")
logging.info("RECORD INIT RE RE RANDOM RANDOM BACKWD RECRE STRIDE F FRE F FRE ")
logging.info("SIZE (KB) WRITE WRITE READ READ READ WRITE READ WRITE READ WRITE WRITE READ READ")
logging.info("--------------------------------------------------------------------------------------------------------------")
for result_line in record_size:
logging.info("%-10s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s" % tuple(result_line))
logging.info("REGRESSIONS: %d (%.2f%%) Improvements: %d (%.2f%%)",
record_regressions,
(100 * record_regressions/float(record_total)),
record_improvements,
(100 * record_improvements/float(record_total)))
logging.info("")
logging.info("")
logging.info("TABLE: FILEsize Difference between runs Results are % DIFF")
logging.info("")
logging.info("RECORD INIT RE RE RANDOM RANDOM BACKWD RECRE STRIDE F FRE F FRE ")
logging.info("SIZE (KB) WRITE WRITE READ READ READ WRITE READ WRITE READ WRITE WRITE READ READ")
logging.info("--------------------------------------------------------------------------------------------------------------")
for result_line in file_size:
logging.info("%-10s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s" % tuple(result_line))
logging.info("REGRESSIONS: %d (%.2f%%) Improvements: %d (%.2f%%)",
file_regressions,
(100 * file_regressions/float(file_total)),
file_improvements,
(100 * file_improvements/float(file_total)))
logging.info("")
def analyze(self):
"""
Analyzes and eventually compares sets of IOzone data.
"""
overall = []
record_size = []
file_size = []
for path in self.list_files:
fileobj = open(path, 'r')
logging.info('FILE: %s', path)
results = self.parse_file(fileobj)
overall_results = self.process_results(results)
record_size_results = self.process_results(results, 'record_size')
file_size_results = self.process_results(results, 'file_size')
self.report(overall_results, record_size_results, file_size_results)
if len(self.list_files) == 2:
overall.append(overall_results)
record_size.append(record_size_results)
file_size.append(file_size_results)
if len(self.list_files) == 2:
record_comparison = compare_matrices(*record_size)
file_comparison = compare_matrices(*file_size)
self.report_comparison(record_comparison, file_comparison)
class IOzonePlotter(object):
"""
Plots graphs based on the results of an IOzone run.
Plots graphs based on the results of an IOzone run. Uses gnuplot to
generate the graphs.
"""
def __init__(self, results_file, output_dir):
self.active = True
try:
self.gnuplot = os_dep.command("gnuplot")
except Exception:
logging.error("Command gnuplot not found, disabling graph "
"generation")
self.active = False
if not os.path.isdir(output_dir):
os.makedirs(output_dir)
self.output_dir = output_dir
if not os.path.isfile(results_file):
logging.error("Invalid file %s provided, disabling graph "
"generation", results_file)
self.active = False
self.results_file = None
else:
self.results_file = results_file
self.generate_data_source()
def generate_data_source(self):
"""
Creates data file without headers for gnuplot consumption.
"""
results_file = open(self.results_file, 'r')
self.datasource = os.path.join(self.output_dir, '3d-datasource')
datasource = open(self.datasource, 'w')
for line in results_file.readlines():
fields = line.split()
if len(fields) != 15:
continue
try:
for i in fields:
int(i)
datasource.write(line)
except ValueError:
continue
datasource.close()
def plot_2d_graphs(self):
"""
For each one of the throughput parameters, generate a set of gnuplot
commands that will create a parametric surface with file size vs.
record size vs. throughput.
"""
datasource_2d = os.path.join(self.output_dir, '2d-datasource-file')
for index, label in zip(range(2, 15), _LABELS[2:]):
commands_path = os.path.join(self.output_dir, '2d-%s.do' % label)
commands = ""
commands += "set title 'Iozone performance: %s'\n" % label
commands += "set logscale x\n"
commands += "set xlabel 'File size (KB)'\n"
commands += "set ylabel 'Througput (MB/s)'\n"
commands += "set terminal png small size 450 350\n"
commands += "set output '%s'\n" % os.path.join(self.output_dir,
'2d-%s.png' % label)
commands += ("plot '%s' using 1:%s title '%s' with lines \n" %
(datasource_2d, index, label))
commands_file = open(commands_path, 'w')
commands_file.write(commands)
commands_file.close()
try:
utils.system("%s %s" % (self.gnuplot, commands_path))
except error.CmdError:
logging.error("Problem plotting from commands file %s",
commands_path)
def plot_3d_graphs(self):
"""
For each one of the throughput parameters, generate a set of gnuplot
commands that will create a parametric surface with file size vs.
record size vs. throughput.
"""
for index, label in zip(range(1, 14), _LABELS[2:]):
commands_path = os.path.join(self.output_dir, '%s.do' % label)
commands = ""
commands += "set title 'Iozone performance: %s'\n" % label
commands += "set grid lt 2 lw 1\n"
commands += "set surface\n"
commands += "set parametric\n"
commands += "set xtics\n"
commands += "set ytics\n"
commands += "set logscale x 2\n"
commands += "set logscale y 2\n"
commands += "set logscale z\n"
commands += "set xrange [2.**5:2.**24]\n"
commands += "set xlabel 'File size (KB)'\n"
commands += "set ylabel 'Record size (KB)'\n"
commands += "set zlabel 'Througput (KB/s)'\n"
commands += "set style data lines\n"
commands += "set dgrid3d 80,80, 3\n"
commands += "set terminal png small size 900 700\n"
commands += "set output '%s'\n" % os.path.join(self.output_dir,
'%s.png' % label)
commands += ("splot '%s' using 1:2:%s title '%s'\n" %
(self.datasource, index, label))
commands_file = open(commands_path, 'w')
commands_file.write(commands)
commands_file.close()
try:
utils.system("%s %s" % (self.gnuplot, commands_path))
except error.CmdError:
logging.error("Problem plotting from commands file %s",
commands_path)
def plot_all(self):
"""
Plot all graphs that are to be plotted, provided that we have gnuplot.
"""
if self.active:
self.plot_2d_graphs()
self.plot_3d_graphs()
class AnalyzerLoggingConfig(logging_config.LoggingConfig):
def configure_logging(self, results_dir=None, verbose=False):
super(AnalyzerLoggingConfig, self).configure_logging(use_console=True,
verbose=verbose)
if __name__ == "__main__":
parser = optparse.OptionParser("usage: %prog [options] [filenames]")
options, args = parser.parse_args()
logging_manager.configure_logging(AnalyzerLoggingConfig())
if args:
filenames = args
else:
parser.print_help()
sys.exit(1)
if len(args) > 2:
parser.print_help()
sys.exit(1)
o = os.path.join(os.getcwd(),
"iozone-graphs-%s" % time.strftime('%Y-%m-%d-%H.%M.%S'))
if not os.path.isdir(o):
os.makedirs(o)
a = IOzoneAnalyzer(list_files=filenames, output_dir=o)
a.analyze()
p = IOzonePlotter(results_file=filenames[0], output_dir=o)
p.plot_all()