forked from travisdowns/uarch-bench
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtimers.cpp
165 lines (131 loc) · 4.79 KB
/
timers.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
/*
* timers.cpp
*
* Implementation for some generic timers defined mostly in timers.h.
*/
#include <chrono>
#include <iostream>
#include "timers.hpp"
#include "stats.hpp"
using namespace std;
/* execute a 1-cycle loop 'iters' times */
#if UARCH_BENCH_PORTABLE
bench2_f portable_add_chain;
#define CAL_FN portable_add_chain
#else
extern "C" bench2_f add_calibration_x86;
#define CAL_FN add_calibration_x86
#endif
using namespace Stats;
using namespace std::chrono;
/*
* Calculate the frequency of the CPU based on timing a tight loop that we expect to
* take one iteration per cycle.
*
* ITERS is the base number of iterations to use: the calibration routine is actually
* run twice, once with ITERS iterations and once with 2*ITERS, and a delta is used to
* remove measurement overhead.
*/
template <size_t ITERS, typename CLOCK, size_t TRIES = 10, size_t WARMUP = 100>
double CalcCpuFreq() {
static_assert((ITERS & 3) == 0, "iters must be divisible by 4 because we unroll some loops by 4");
const char* mhz;
if ((mhz = getenv("UARCH_BENCH_CLOCK_MHZ"))) {
double ghz = std::stoi(mhz) / 1000.0;
fprintf(stderr, "Frequency set to %6.3f GHz using UARCH_BENCH_CLOCK_MHZ\n", ghz);
return ghz;
} else {
fprintf(stderr, "UARCH_BENCH_CLOCK_MHZ not set, running calibration\n");
}
std::array<nanoseconds::rep, TRIES> results;
for (size_t w = 0; w < WARMUP + 1; w++) {
for (size_t r = 0; r < TRIES; r++) {
auto t0 = CLOCK::nanos();
CAL_FN(ITERS, nullptr);
auto t1 = CLOCK::nanos();
CAL_FN(ITERS * 2, nullptr);
auto t2 = CLOCK::nanos();
results[r] = (t2 - t1) - (t1 - t0);
}
}
// fprintf(stderr, "All runs:\n");
// for (auto &r : results) {
// fprintf(stderr, "%lu\n", (unsigned long)r);
// }
DescriptiveStats stats = get_stats(results.begin(), results.end());
double ghz = ((double)ITERS / stats.getMedian());
return ghz;
}
template <typename CLOCK>
double ClockTimerT<CLOCK>::getGHz() {
static double ghz = CalcCpuFreq<10000,CLOCK,1000>();
return ghz;
}
template <typename CLOCK>
void ClockTimerT<CLOCK>::init(Context &c) {
c.out() << "Median CPU speed: " << std::fixed << std::setw(4) << std::setprecision(3)
<< getGHz() << " GHz" << std::endl;
}
// explicit instantiation for the default clock
template double DefaultClockTimer::getGHz();
template void DefaultClockTimer::init(Context& c);
// stuff for calculating clock overhead
template <size_t ITERS, typename CLOCK>
DescriptiveStats CalcClockRes() {
std::array<nanoseconds::rep, ITERS> results;
for (int r = 0; r < 3; r++) {
for (size_t i = 0; i < ITERS; i++) {
auto t0 = CLOCK::nanos();
auto t1 = CLOCK::nanos();
results[i] = t1 - t0;
}
}
return get_stats(results.begin(), results.end());
}
volatile int64_t sink;
template <size_t ITERS, typename CLOCK>
DescriptiveStats CalcClockCost() {
std::array<double, ITERS> results;
using timer = DefaultClockTimer;
for (int r = 0; r < 3; r++) {
for (size_t i = 0; i < ITERS; i++) {
int64_t sum = 0;
int64_t before = timer::now();
for (int j = 0; j < 1000; j++) {
sum += CLOCK::nanos();
}
results[i] = (timer::now() - before) / 1000.0;
sink = sum;
}
}
return get_stats(results.begin(), results.end());
}
template <typename CLOCK>
void printOneClock(std::ostream& out, const char* name) {
out << setw(48) << name << setw(28) << CalcClockRes<100,CLOCK>().getString4(5,1);
out << setw(30) << CalcClockCost<100,CLOCK>().getString4(5,1) << endl;
}
struct DumbClock {
static int64_t nanos() { return 0; }
};
void printClockOverheads(std::ostream& out) {
out << "----- Clock Stats --------\n";
out << " Resolution (ns) Runtime (ns)" << endl;
out << " Name min/ med/ avg/ max min/ med/ avg/ max" << endl;
#define PRINT_CLOCK(clock) printOneClock< clock >(out, #clock);
PRINT_CLOCK(StdClockAdapt<system_clock>);
PRINT_CLOCK(StdClockAdapt<steady_clock>);
PRINT_CLOCK(StdClockAdapt<high_resolution_clock>);
PRINT_CLOCK(GettimeAdapter<CLOCK_REALTIME>);
PRINT_CLOCK(GettimeAdapter<CLOCK_REALTIME_COARSE>);
PRINT_CLOCK(GettimeAdapter<CLOCK_MONOTONIC>);
PRINT_CLOCK(GettimeAdapter<CLOCK_MONOTONIC_COARSE>);
PRINT_CLOCK(GettimeAdapter<CLOCK_MONOTONIC_RAW>);
PRINT_CLOCK(GettimeAdapter<CLOCK_PROCESS_CPUTIME_ID>);
PRINT_CLOCK(GettimeAdapter<CLOCK_THREAD_CPUTIME_ID>);
#ifdef CLOCK_BOOTTIME
PRINT_CLOCK(GettimeAdapter<CLOCK_BOOTTIME>);
#endif
PRINT_CLOCK(DumbClock);
out << endl;
}