-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathweather-analysis.cpp
216 lines (168 loc) · 9.07 KB
/
weather-analysis.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
#include <locale>
#include <cmath>
#include "Parser.hpp"
#include "Kernel.hpp"
/*
The application performs like a console app, where commands are input based on pre-set options. Both the small and large 'temp_lincolnshire' datasets are used within the application. The application allows switching between computing devices (platform and device), if required, before calculating the temperature data's statistics. The data is loaded traditionally using a standard C++ approach before being passed through multiple reduce kernels to calculate the statistics. Additionally, Selection Sort is used to sort the data into ascending order, providing the ability to calculate more advanced statistics, such as median, 1st quartile, and 3rd quartile. This sorting algorithm is based on an implementation written by Bainville (2011).
The kernels used within the implementation are inspired by the 'reduce_add_3' kernel presented in Tutorial 3 (Millard, 2020). The approach used loads the temperature data (floating-point numbers) from the selected file into an integer vector. These values are multiplied by 100 to ensure that the floating-point values are retained when passed into the integer vector. This method provides the ability to use atomic operations, which only accept integer values as input. Additionally, barrier functions are used throughout each kernel. Both barrier functions and atomic operations assist with work-item synchronisation, preventing data conflicts and ensuring that every work item has reached the same point in its processing, which is crucial for calculating the statistics correctly. Once the kernels calculations have completed, the output values are divided by 100 to convert the values to the correct format.
References:
- Bainville, E. (2011) OpenCL Sorting. Parallel Selection Sort. Bealto. Available from: http://www.bealto.com/gpu-sorting_parallel-selection.html [accessed 13 April 2021].
- Millard, A. (2020) OpenCL Tutorials. GitHub. Available from: https://github.com/alanmillard/OpenCL-Tutorials [accessed 28 March 2021].
- Scarpino, M. (2012) OpenCL in Action. New York: Manning. Available from: https://www.manning.com/books/opencl-in-action [accessed 28 March 2021].
*/
int main(int argc, char** argv) {
//Part 1 - handle command line options such as device selection, verbosity, etc.
// Instantiate classes
Helper helper;
Parser parser;
helper.handleInput();
//detect any potential exceptions
try {
//Part 2 - host operations
//2.1 Select computing devices
cl::Context context = GetContext(helper.platform_id, helper.device_id);
//create a queue to which we will push commands for the device
cl::CommandQueue queue(context, CL_QUEUE_PROFILING_ENABLE);
//2.2 Load & build the device code
cl::Program::Sources sources;
AddSources(sources, "kernels/my_kernels.cl");
cl::Program program(context, sources);
//build and debug the kernel code
try {
program.build();
}
catch (const cl::Error& err) {
cout << "Build Status: " << program.getBuildInfo<CL_PROGRAM_BUILD_STATUS>(context.getInfo<CL_CONTEXT_DEVICES>()[0]) << endl;
cout << "Build Options:\t" << program.getBuildInfo<CL_PROGRAM_BUILD_OPTIONS>(context.getInfo<CL_CONTEXT_DEVICES>()[0]) << endl;
cout << "Build Log:\t " << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(context.getInfo<CL_CONTEXT_DEVICES>()[0]) << endl;
throw err;
}
// Part 3 - memory allocation
// Instantiate kernel
Kernel kernel(context, queue, program);
// Display console file info
helper.displayFileOptions();
// Start data handling
string file_url;
file_url = helper.selectFile(file_url); // Select data file
bool sortFlag = helper.enableSorting(); // Calculates all stats
// Read in data
vector<mytype> temperatures = parser.readFile(file_url);
// Set local size variables
size_t local_size = 1024;
size_t padding_size = temperatures.size() % local_size;
size_t pad_difference = local_size - padding_size;
size_t scratch_size = local_size * sizeof(mytype);
// Pad the data
int pad_value = 3;
temperatures = parser.padData(temperatures, local_size, padding_size, pad_value);
// Set size variables
size_t data_size = temperatures.size(); //number of elements
size_t vec_size = data_size * sizeof(mytype); // size in bytes
size_t initial_data_size = data_size - pad_difference;
size_t wg_size = data_size / local_size;
size_t n_stats = 7; //number of statistics
size_t stats_size = n_stats * sizeof(mytype); //size in bytes
// Host - output
vector<mytype> out_temps(data_size);
vector<mytype> core_data(initial_data_size);
vector<float> statistics(n_stats);
// Set kernel related vectors
vector<string> kernelNames = { "minReduce", "maxReduce", "sumReduce", "varianceReduce" };
vector<cl::Event> events;
cout << " Local size set to: " << local_size << endl;
cout << " Number of work-groups: " << wg_size << endl;
cout << " Padding increase: " << pad_difference << endl;
cout << " Number of records + padding: " << data_size << endl;
cout << "\nCalculating statistics..." << endl;
//---------------------------------------------------------------------------------
//---------------------------------------------------------------------------------
// Calculate first three stats - min, max, mean
// Create input buffer and copy to device memory
cl::Buffer buffer_input(context, CL_MEM_READ_ONLY, vec_size);
queue.enqueueWriteBuffer(buffer_input, CL_TRUE, 0, vec_size, &temperatures[0]);
// Iterate over first three kernels
for (int i = 0; i < 3; ++i)
{
// Create output buffer and fill it with zeros
cl::Buffer buffer_output = kernel.createBuffer(vec_size);
//4.2 Setup the kernel
cl::Event kernelEvent;
cl::Kernel activeKernel = kernel.setupKernel(kernelNames[i], buffer_input, buffer_output, scratch_size);
// Execute kernel
kernel.executeKernel(kernelNames[i], activeKernel, data_size, local_size, kernelEvent);
// Copy the data from device to host
out_temps = kernel.readKernelBuffer(buffer_output, vec_size, out_temps);
// Remove padded values
core_data = parser.removePad(out_temps, initial_data_size);
// Add kernel event to events vector
events.push_back(kernelEvent);
// Set statistic value
if (i == 2) // mean
statistics[i] = ((float)core_data[0] / initial_data_size) / 100.f;
else
statistics[i] = core_data[0] / 100.f;
}
//---------------------------------------------------------------------------------
//---------------------------------------------------------------------------------
// Calculate standard deviation
// Set kernel variables
int mean = statistics[2] * 100;
cl::Event stdEvent;
// Create output buffer and fill it with zeros
cl::Buffer buffer_output = kernel.createBuffer(vec_size);
// Setup kernel
cl::Kernel calcStd = kernel.setupKernel(kernelNames[3], buffer_input, buffer_output, scratch_size, mean, initial_data_size);
// Execute kernel
kernel.executeKernel(kernelNames[3], calcStd, data_size, local_size, stdEvent);
// Copy the result from device to host
out_temps = kernel.readKernelBuffer(buffer_output, vec_size, out_temps);
// Add kernel event to events list
events.push_back(stdEvent);
// Set standard deviation
statistics[3] = sqrt((float)out_temps[0] / initial_data_size);
//---------------------------------------------------------------------------------
//---------------------------------------------------------------------------------
// Calculate remaining statistics - median, Q1, Q3 (requires sorted vector)
if (sortFlag)
{
// Set kernel variables
cl::Event sortEvent;
// Create output buffer and fill it with zeros
cl::Buffer buffer_sorted = kernel.createBuffer(vec_size);
// Setup the kernel
kernelNames.push_back("selectionSort");
cl::Kernel sortData = kernel.setupKernel(kernelNames[4], buffer_input, buffer_sorted);
// Execute kernel
kernel.executeKernel(kernelNames[4], sortData, data_size, NULL, sortEvent);
// Copy the result from device to host
out_temps = kernel.readKernelBuffer(buffer_sorted, vec_size, out_temps);
// Remove padded values from sorted data
core_data = parser.removePad(out_temps, pad_value);
// Add kernel event to events list
events.push_back(sortEvent);
// Calculate median
if (initial_data_size % 2 == 0)
{
// Even dataset size
unsigned int half_size = initial_data_size / 2;
mytype half_avg = (core_data[half_size] + core_data[half_size + 1]) / 2;
statistics[4] = half_avg / 100.f;
}
// Odd dataset size
else
statistics[4] = core_data[round(initial_data_size * 0.5)] / 100.f;
// Calculate remaining statistics
statistics[5] = core_data[round(initial_data_size * 0.25)] / 100.f; // Q1
statistics[6] = core_data[round(initial_data_size * 0.75)] / 100.f; // Q3
}
//---------------------------------------------------------------------------------
//---------------------------------------------------------------------------------
// Output information to console
helper.outputInfo(statistics, kernelNames, events, sortFlag);
}
catch (cl::Error err) {
cerr << "\nERROR: " << err.what() << ", " << getErrorString(err.err()) << endl;
}
return 0;
}