forked from sysprog21/clz-tests
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.c
123 lines (112 loc) · 3.88 KB
/
main.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <limits.h>
#include <time.h>
#include <assert.h>
#include <omp.h>
#include "clz.h"
static inline __attribute__((always_inline))
void get_cycles(unsigned *high, unsigned *low)
{
asm volatile ("CPUID\n\t"
"RDTSC\n\t"
"mov %%edx, %0\n\t"
"movl %%eax, %1\n\t": "=r" (*high), "=r" (*low)::"%rax","%rbx","%rcx","%rdx"
);
}
static inline __attribute__((always_inline))
void get_cycles_end(unsigned *high, unsigned *low)
{
asm volatile("RDTSCP\n\t"
"mov %%edx, %0\n\t"
"mov %%eax, %1\n\t"
"CPUID\n\t": "=r" (*high), "=r" (*low)::"%rax","%rbx","%rcx","%rdx"
);
}
static inline __attribute__((always_inline))
uint64_t diff_in_cycles(unsigned high1, unsigned low1,
unsigned high2, unsigned low2)
{
uint64_t start,end;
start = (((uint64_t) high1 << 32) | low1);
end = (((uint64_t) high2 << 32) | low2);
return end - start;
}
static inline __attribute__((unused))
double diff_in_second(struct timespec t1, struct timespec t2)
{
struct timespec diff;
if (t2.tv_nsec - t1.tv_nsec < 0) {
diff.tv_sec = t2.tv_sec - t1.tv_sec - 1;
diff.tv_nsec = t2.tv_nsec - t1.tv_nsec + 1000000000;
} else {
diff.tv_sec = t2.tv_sec - t1.tv_sec;
diff.tv_nsec = t2.tv_nsec - t1.tv_nsec;
}
return (diff.tv_sec + diff.tv_nsec / 1000000000.0);
}
int main(int argc, char *argv[])
{
FILE *output = NULL;
uint64_t timec, time_all = 0;
unsigned timec_high1, timec_low1, timec_high2, timec_low2;
#if defined(correct)
// *INDENT-OFF*
for (int try = 0; try < 20; try++) {
timec = 0;
get_cycles(&timec_high1, &timec_low1);
printf("%u:%d \n", 0, clz(0));
assert((sizeof(uint32_t) * 8) == clz(0));
for (uint32_t i = 0; i < 31; i++) {
printf("%u:%d \n", 1 << i, clz(1 << i));
for (uint32_t j = (1 << i); j < (1 << (i + 1)); j++) {
assert( __builtin_clz (j) == clz(j));
}
}
printf("%u:%d \n", 1u << 31, clz(1u << 31));
for (uint32_t j = (1u << 31); j < UINT32_MAX; j++)
assert(__builtin_clz(j) == clz(j));
assert(__builtin_clz(UINT32_MAX) == clz(UINT32_MAX));
get_cycles_end(&timec_high2, &timec_low2);
timec = diff_in_cycles(timec_high1, timec_low1, timec_high2, timec_low2);
printf("executiom time : %lu cycles\n", timec);
}
// *INDENT-ON*
#else
assert(argv[1] && argv[2] && "insert argument");
unsigned int min = atoi(argv[1]);
unsigned int max = atoi(argv[2]);
#if defined(recursive)
output = fopen("recursive.txt","a");
#elif defined(iteration)
output = fopen("iteration.txt","a");
#elif defined(byte)
output = fopen("byte.txt","a");
#elif defined(bitmask)
output = fopen("bitmask.txt","a");
#elif defined(harley)
output = fopen("harley.txt","a");
#endif
uint64_t timecall;
for (uint32_t i = min; i < max; i++) {
timecall = 0;
#ifdef MP
#pragma omp parallel for
#endif
for (uint32_t j = 0; j < 100; j++) {
get_cycles(&timec_high1, &timec_low1);
clz(i);
get_cycles_end(&timec_high2, &timec_low2);
timec = diff_in_cycles(timec_high1, timec_low1, timec_high2, timec_low2);
timecall += timec;
}
time_all += (timecall / 100);
fprintf(output, "%d %lu cycles\n", i, timecall / 100);
//printf("%d %lu cycles\n", i, timecall / 100);
}
fclose(output);
printf("took %lf million cycles\n", time_all / 1000000.0);
#endif
return 0;
}