25 #include <Gui/biasgl.h>
26 #include <Base/Common/BIASpragma.hh>
32 #include <OpenCLFramework/clfContext.hh>
45 case clfContextTypeCPU:
46 cldt = (cl_device_type)CL_DEVICE_TYPE_CPU;
48 case clfContextTypeGPU:
50 cldt = (cl_device_type)CL_DEVICE_TYPE_GPU;
56 cl::Platform::get(&platforms_);
61 cl_context_properties cps[] = {
62 CL_GL_CONTEXT_KHR,(cl_context_properties)wglGetCurrentContext(),
63 CL_WGL_HDC_KHR,(cl_context_properties)wglGetCurrentDC(),
64 CL_CONTEXT_PLATFORM, (cl_context_properties)(platforms_[0]()),
67 cl_context_properties cps[] = {
68 CL_GL_CONTEXT_KHR, (cl_context_properties)glXGetCurrentContext(),
69 CL_GLX_DISPLAY_KHR, (cl_context_properties)glXGetCurrentDisplay(),
70 CL_CONTEXT_PLATFORM, (cl_context_properties)(platforms_[0]()),
73 std::vector<cl::Device> devices;
74 platforms_[0].getDevices(cldt, &devices);
75 if (devices.size()>1) {
76 BIASWARN(
"more than one GPU detected, selecting first one for sharing GL context");
77 devices[0] = devices[1];
81 context_ = cl::Context(devices, cps, (
void (__stdcall*)(
const char*,
const void*,
size_t,
void*))clfContext::clnotify,
this);
83 context_ = cl::Context(devices, cps, (
void (*)(
const char*,
const void*,
size_t,
void*))clfContext::clnotify, (
void*)
this);
88 cl_context_properties cps[] = {
90 (cl_context_properties)(platforms_[0])(),
94 context_ = cl::Context(cldt, cps, (
void (__stdcall *)(
const char*,
const void*,
size_t,
void*))clfContext::clnotify, (
void*)
this);
96 context_ = cl::Context(cldt, cps, (
void (*)(
const char*,
const void*,
size_t,
void*))clfContext::clnotify, (
void*)
this);
101 devices_ = context_.getInfo<CL_CONTEXT_DEVICES>();
104 for (
unsigned int i=0;i<devices_.size();i++) {
105 queues_.push_back( cl::CommandQueue(context_, devices_[i]) );
108 devinfo_.resize( devices_.size() );
109 for (
unsigned int i=0;i<devices_.size();i++) {
110 QueryDeviceInfo_( i );
113 }
catch (cl::Error &error) {
114 THROW_CL_EXCEPTION(error);
118 clfContext::~clfContext() {
121 void clfContext::clnotify(
const char * p1,
const void *p2,
size_t p3,
clfContext *p4) {
123 cout <<
"[OpenCL Context] " << p1;
127 void clfContext::PrintDeviceInfo(
unsigned int device,
bool verbose,std::ostream& out)
129 if (device >= devices_.size()) {
134 out <<
"Device: " << device << endl;
135 out <<
"Name: " << devinfo_[device].name << endl;
136 out <<
"OpenCL Version: " << devinfo_[device].clfver << endl;
137 out <<
"Driver Version: " << devinfo_[device].driverVersion << endl;
138 out <<
"Max Compute Units: " << devinfo_[device].maxComputeUnits << endl;
142 string memdim =
"Byte";
143 double memreadable = (double)devinfo_[device].globalMemSize;
144 if (memreadable > 1024) {
145 memreadable /= 1024.0;
148 if (memreadable > 1024) {
149 memreadable /= 1024.0;
152 if (memreadable > 1024) {
153 memreadable /= 1024.0;
156 out <<
"Global Memory: " << devinfo_[device].globalMemSize <<
" (" << std::setprecision(2)<<memreadable <<
" "<<memdim<<
")"<<endl;
158 memreadable = devinfo_[device].localMemSize;
159 if (memreadable > 1024) {
160 memreadable /= 1024.0;
163 if (memreadable > 1024) {
164 memreadable /= 1024.0;
167 if (memreadable > 1024) {
168 memreadable /= 1024.0;
171 out <<
"Local Memory: " << devinfo_[device].localMemSize <<
" (" <<std::setprecision(2)<< memreadable <<
" "<<memdim<<
")"<<endl;
172 out <<
"Max Workgroup Size: " << devinfo_[device].maxWorkgroupSize << endl;
173 out <<
"Max Workitem Dimensions: " << devinfo_[device].maxWorkitemDimensions << endl;
174 out <<
"Max Workitem Sizes: ";
175 if (devinfo_[device].maxWorkitemSizes[0] != 0) {
176 for (
unsigned int i=0;i<devinfo_[device].maxWorkitemSizes.size();i++) {
177 out << devinfo_[device].maxWorkitemSizes[i] <<
" ";
181 out <<
"Query failed." << endl;
183 out <<
"Image Support: ";
184 if (devinfo_[device].imageSupport) {
185 out <<
"Yes." << endl;
187 out <<
"No. (maybe query failed.)" << endl;
189 out <<
"Image Max Width: " << devinfo_[device].maxImg2DWidth << endl;
190 out <<
"Image Max Height: " << devinfo_[device].maxImg2DHeight << endl;
191 out <<
"Max Nr of Read Images " << devinfo_[device].maxReadImgs << endl;
192 out <<
"Max Nr of Write Images " << devinfo_[device].maxWriteImgs << endl;
193 out <<
"OpenGL Sharing Available: ";
194 if (devinfo_[device].hasGLsharing) {
195 out <<
"Yes." << endl;
197 out <<
"No." << endl;
199 out <<
"Extensions: " << devinfo_[device].extensions << endl;
204 void clfContext::PrintPlatformsInfo(std::ostream& out) {
206 for (
unsigned int i=0;i<platforms_.size();i++) {
209 platforms_[i].getInfo( (cl_platform_info)CL_PLATFORM_NAME, &name);
210 platforms_[i].getInfo( (cl_platform_info)CL_PLATFORM_VENDOR, &vendor);
211 out <<
" Platform " << i << endl;
212 out <<
" Vendor: " << vendor << endl;
213 out <<
" Name: " << name << endl;
215 }
catch (cl::Error &error) {
216 THROW_CL_EXCEPTION(error);
220 void clfContext::RunOn1DRange(
clfProgram &program,
string kernelname,
unsigned int globalrange,
unsigned int localrange) {
221 if (localrange != 0) {
222 globalrange = DivUp(localrange, globalrange);
224 cl::NDRange global(globalrange);
225 cl::NDRange local(localrange);
226 if (localrange == 0) local = cl::NullRange;
228 queues_[activeQueue_].enqueueNDRangeKernel(program(kernelname), cl::NullRange, global, local);
229 }
catch (cl::Error &error) {
230 THROW_CL_EXCEPTION(error);
234 void clfContext::RunOn2DRange(
clfProgram &program,
string kernelname,
unsigned int globalrangeX,
unsigned int globalrangeY,
unsigned int localrangeX,
unsigned int localrangeY) {
235 if (localrangeX != 0) {
236 globalrangeX = DivUp(localrangeX, globalrangeX);
238 if (localrangeY != 0) {
239 globalrangeY = DivUp(localrangeY, globalrangeY);
241 cl::NDRange global(globalrangeX, globalrangeY);
242 cl::NDRange local(localrangeX, localrangeY);
243 if (localrangeX == 0 || localrangeY == 0) local = cl::NullRange;
245 queues_[activeQueue_].enqueueNDRangeKernel(program(kernelname), cl::NullRange, global, local);
246 }
catch (cl::Error &error) {
247 THROW_CL_EXCEPTION(error);
251 void clfContext::RunOn3DRange(
clfProgram &program,
string kernelname,
unsigned int globalrangeX,
unsigned int globalrangeY,
unsigned int globalrangeZ,
unsigned int localrangeX,
unsigned int localrangeY,
unsigned int localrangeZ) {
252 if (localrangeX != 0) {
253 globalrangeX = DivUp(localrangeX, globalrangeX);
255 if (localrangeY != 0) {
256 globalrangeY = DivUp(localrangeY, globalrangeY);
258 if (localrangeZ != 0) {
259 globalrangeZ = DivUp(localrangeZ, globalrangeZ);
261 cl::NDRange global(globalrangeX, globalrangeY, globalrangeZ);
262 cl::NDRange local(localrangeX, localrangeY, localrangeZ);
263 if (localrangeX == 0 || localrangeY == 0 || localrangeZ == 0) local = cl::NullRange;
265 queues_[activeQueue_].enqueueNDRangeKernel(program(kernelname), cl::NullRange, global, local);
266 }
catch (cl::Error &error) {
267 THROW_CL_EXCEPTION(error);
272 vector<cl::Memory> buf(1, buffer.
memory());
274 queues_[activeQueue_].enqueueAcquireGLObjects( &buf );
275 }
catch (cl::Error &error) {
276 THROW_CL_EXCEPTION(error);
281 vector<cl::Memory> buf(1, buffer.
memory());
283 queues_[activeQueue_].enqueueReleaseGLObjects( &buf );
284 }
catch (cl::Error &error) {
285 THROW_CL_EXCEPTION(error);
289 void clfContext::Finish() {
291 queues_[activeQueue_].finish();
292 }
catch (cl::Error &error) {
293 THROW_CL_EXCEPTION(error);
297 void clfContext::QueryDeviceInfo_(
unsigned int device) {
300 devices_[device].getInfo( (cl_device_info)CL_DEVICE_NAME,
301 &devinfo_[device].name );
302 }
catch (cl::Error &error) {
303 devinfo_[device].name =
"";
306 devices_[device].getInfo( (cl_device_info)CL_DEVICE_EXTENSIONS,
307 &devinfo_[device].extensions );
308 }
catch (cl::Error &error) {
309 devinfo_[device].extensions =
"";
313 devices_[device].getInfo( (cl_device_info)CL_DEVICE_IMAGE_SUPPORT, &test);
314 devinfo_[device].imageSupport = (bool)test;
315 }
catch (cl::Error &error) {
316 devinfo_[device].imageSupport =
false;
319 devices_[device].getInfo((cl_device_info)CL_DEVICE_MAX_COMPUTE_UNITS,
320 &devinfo_[device].maxComputeUnits);
321 }
catch (cl::Error &error) {
322 devinfo_[device].maxComputeUnits = 0;
326 devices_[device].getInfo( (cl_device_info)CL_DEVICE_GLOBAL_MEM_SIZE, &memsize);
327 devinfo_[device].globalMemSize = (
unsigned long long)memsize;
328 }
catch (cl::Error &error) {
329 devinfo_[device].globalMemSize = 0;
332 devices_[device].getInfo( (cl_device_info)CL_DEVICE_LOCAL_MEM_SIZE, &memsize);
333 devinfo_[device].localMemSize = (
unsigned long long)memsize;
334 }
catch (cl::Error &error) {
335 devinfo_[device].localMemSize = 0;
338 devices_[device].getInfo( (cl_device_info)CL_DEVICE_MAX_WORK_GROUP_SIZE,
339 &devinfo_[device].maxWorkgroupSize);
340 }
catch (cl::Error &error) {
341 devinfo_[device].maxWorkgroupSize = 0;
344 devices_[device].getInfo( (cl_device_info)CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,
345 &devinfo_[device].maxWorkitemDimensions);
346 }
catch (cl::Error &error) {
347 devinfo_[device].maxWorkitemDimensions = 1;
352 devices_[device].getInfo( (cl_device_info)CL_DEVICE_MAX_WORK_ITEM_SIZES,
354 for (
unsigned int i=0;i<devinfo_[device].maxWorkitemDimensions;i++) {
355 devinfo_[device].maxWorkitemSizes.push_back(sizes[i]);
357 }
catch (cl::Error &error) {
359 devinfo_[device].maxWorkitemSizes.resize(devinfo_[device].maxWorkitemDimensions,0);
363 devices_[device].getInfo( (cl_device_info)CL_DRIVER_VERSION,
364 &devinfo_[device].driverVersion);
365 }
catch (cl::Error &error) {
366 devinfo_[device].driverVersion =
"1.0";
369 devices_[device].getInfo( (cl_device_info)CL_DEVICE_IMAGE2D_MAX_WIDTH,
370 &devinfo_[device].maxImg2DWidth);
371 }
catch (cl::Error &error) {
372 devinfo_[device].maxImg2DWidth = 0;
375 devices_[device].getInfo( (cl_device_info)CL_DEVICE_IMAGE2D_MAX_HEIGHT,
376 &devinfo_[device].maxImg2DHeight);
377 }
catch (cl::Error &error) {
378 devinfo_[device].maxImg2DHeight = 0;
381 devices_[device].getInfo((cl_device_info)CL_DEVICE_OPENCL_C_VERSION, &devinfo_[device].clfver );
382 }
catch (cl::Error &error) {
383 devinfo_[device].clfver =
"OpenCL C 1.0";
386 devices_[device].getInfo((cl_device_info)CL_DEVICE_MAX_WRITE_IMAGE_ARGS, &devinfo_[device].maxWriteImgs );
387 }
catch (cl::Error &error) {
388 devinfo_[device].maxWriteImgs = 0;
391 devices_[device].getInfo((cl_device_info)CL_DEVICE_MAX_READ_IMAGE_ARGS, &devinfo_[device].maxReadImgs );
392 }
catch (cl::Error &error) {
393 devinfo_[device].maxReadImgs = 0;
396 if (devinfo_[device].extensions.find(
"cl_khr_gl_sharing") == string::npos) {
397 devinfo_[device].hasGLsharing =
false;
399 devinfo_[device].hasGLsharing =
true;
403 void clfContext::SetActiveDevice(
unsigned int device) {
404 if (device >= queues_.size()) {
405 THROW_CL_EXCEPTION(cl::Error(-100,
"device does not exist."));
407 activeQueue_ = device;
411 unsigned int clfContext::GetActiveDevice()
416 int clfContext::DivUp(
const int mod,
int val) {
417 if (val % mod != 0) val += mod - (val%mod);