-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathmain.cpp
117 lines (100 loc) · 3.02 KB
/
main.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
// Virtual Object Array that uses all graphics cards in system as storage
// to achieve seamless concurrent access to all elements
// example: 4.5GB "Particle" array distributed to 3 graphics cards, each serving 1.5GB of its VRAM
// RAM only used for paging system, it can be altered to use more/less of it (number of active pages parameter)
// v1.0:
// only get() set() methods, no defragmentation, no prefetching
// array size needs to be integer multiple of page size
// object size can be anything (4kB object size makes random access performance much better than an average SSD)
// (44byte object needs sequential access to be fast)
// if windows with 64bit cpu + msvc++
#ifndef _AMD64_
#define _AMD64_
#endif
#include "GraphicsCardSupplyDepot.h"
#include "VirtualMultiArray.h"
#include "CpuBenchmarker.h"
class Particle
{
public:
Particle() :x(0), y(0), z(0), vx(0), vy(0), vz(0), vx_old(0), vy_old(0), vz_old(0), m(0), id(0) {}
Particle(int idP) :x(0), y(0), z(0), vx(0), vy(0), vz(0), vx_old(0), vy_old(0), vz_old(0), m(0), id(idP) { }
int getId() { return id; }
private:
float x, y, z;
float vx, vy, vz;
float vx_old, vy_old, vz_old;
float m;
int id;
};
int main(int argC, char** argV)
{
GraphicsCardSupplyDepot depot;
// n needs to be integer multiple of pageSize !!!!
const size_t n = 1024 * 30000;
const size_t pageSize = 1024;
const int maxActivePagesPerGpu = 16;
VirtualMultiArray<Particle> test;
{
CpuBenchmarker bench(0, "init");
test = VirtualMultiArray<Particle>(n, depot.requestGpus(), pageSize, maxActivePagesPerGpu, {15,15,15,15,15,15,15,15,15,15,15});
}
for(int j=0;j<5;j++)
{
CpuBenchmarker bench(10000*sizeof(Particle), "single threaded set, uncached", 10000);
for (int i = 0; i < 10000; i++)
{
test.set(i * (pageSize + 1), Particle(i * (pageSize + 1)));
}
}
for (int j = 0; j < 5; j++)
{
CpuBenchmarker bench(10000 * sizeof(Particle), "single threaded ---get---, uncached", 10000);
for (int i = 0; i < 10000; i++)
{
auto var = test.get(i * (pageSize + 1));
if (var.getId() != i * (pageSize + 1))
{
std::cout << "Error!" << std::endl;
}
}
}
for (int j = 0; j < 5; j++)
{
CpuBenchmarker bench(10000*sizeof(Particle), "single threaded set, cached", 10000);
for (int i = 0; i < 10000; i++)
test.set(i, Particle(i));
}
for (int j = 0; j < 5; j++)
{
CpuBenchmarker bench(10000*sizeof(Particle), "single threaded ---get---, cached", 10000);
for (int i = 0; i < 10000; i++)
{
auto var = test.get(i);
if (var.getId() != i)
{
std::cout << "Error!" << std::endl;
}
}
}
{
CpuBenchmarker bench(n*sizeof(Particle), "multithreaded sequential set",n);
#pragma omp parallel for schedule(guided)
for (int i = 0; i < n; i++)
{
test.set(i, Particle(i));
}
}
{
CpuBenchmarker bench(n * sizeof(Particle), "multithreaded sequential get", n);
#pragma omp parallel for schedule(guided)
for (int i = 0; i < n; i++)
{
if (test.get(i).getId() != i)
{
std::cout << "!!! error at " << i << std::endl;
}
}
}
return 0;
}