6a181fc79683a715b91bc64adc9872eee3aa8db2
[occt.git] / src / OpenGl / OpenGl_Workspace_Raytrace.cxx
1 // Created on: 2013-08-27
2 // Created by: Denis BOGOLEPOV
3 // Copyright (c) 2013 OPEN CASCADE SAS
4 //
5 // The content of this file is subject to the Open CASCADE Technology Public
6 // License Version 6.5 (the "License"). You may not use the content of this file
7 // except in compliance with the License. Please obtain a copy of the License
8 // at http://www.opencascade.org and read it completely before using this file.
9 //
10 // The Initial Developer of the Original Code is Open CASCADE S.A.S., having its
11 // main offices at: 1, place des Freres Montgolfier, 78280 Guyancourt, France.
12 //
13 // The Original Code and all software distributed under the License is
14 // distributed on an "AS IS" basis, without warranty of any kind, and the
15 // Initial Developer hereby disclaims all such warranties, including without
16 // limitation, any warranties of merchantability, fitness for a particular
17 // purpose or non-infringement. Please see the License for the specific terms
18 // and conditions governing the rights and limitations under the License.
19
20 #ifdef HAVE_CONFIG_H
21   #include <config.h>
22 #endif
23
24 #ifdef HAVE_OPENCL
25
26 #include <OpenGl_Cl.hxx>
27
28 #if defined(_WIN32)
29
30   #include <windows.h>
31   #include <wingdi.h>
32
33   #pragma comment (lib, "DelayImp.lib")
34   #pragma comment (lib, "OpenCL.lib")
35
36 #elif defined(__APPLE__) && !defined(MACOSX_USE_GLX)
37   #include <OpenGL/CGLCurrent.h>
38 #else
39   #include <GL/glx.h>
40 #endif
41
42 #include <OpenGl_Context.hxx>
43 #include <OpenGl_Texture.hxx>
44 #include <OpenGl_View.hxx>
45 #include <OpenGl_Workspace.hxx>
46
47 using namespace OpenGl_Raytrace;
48
49 //! Use this macro to output ray-tracing debug info
50 // #define RAY_TRACE_PRINT_INFO
51
52 #ifdef RAY_TRACE_PRINT_INFO
53   #include <OSD_Timer.hxx>
54 #endif
55
56 //! OpenCL source of ray-tracing kernels.
57 extern const char THE_RAY_TRACE_OPENCL_SOURCE[];
58
59 // =======================================================================
60 // function : MatVecMult
61 // purpose  : Multiples 4x4 matrix by 4D vector
62 // =======================================================================
63 template< typename T >
64 OpenGl_RTVec4f MatVecMult (const T m[16], const OpenGl_RTVec4f& v)
65 {
66   return OpenGl_RTVec4f (
67     static_cast<float> (m[ 0] * v.x() + m[ 4] * v.y() +
68                         m[ 8] * v.z() + m[12] * v.w()),
69     static_cast<float> (m[ 1] * v.x() + m[ 5] * v.y() +
70                         m[ 9] * v.z() + m[13] * v.w()),
71     static_cast<float> (m[ 2] * v.x() + m[ 6] * v.y() +
72                         m[10] * v.z() + m[14] * v.w()),
73     static_cast<float> (m[ 3] * v.x() + m[ 7] * v.y() +
74                         m[11] * v.z() + m[15] * v.w()));
75 }
76
77 // =======================================================================
78 // function : UpdateRaytraceEnvironmentMap
79 // purpose  : Updates environment map for ray-tracing
80 // =======================================================================
81 Standard_Boolean OpenGl_Workspace::UpdateRaytraceEnvironmentMap()
82 {
83   if (myView.IsNull())
84     return Standard_False;
85
86   if (myViewModificationStatus == myView->ModificationState())
87     return Standard_True;
88
89   cl_int anError = CL_SUCCESS;
90
91   if (myRaytraceEnvironment != NULL)
92     clReleaseMemObject (myRaytraceEnvironment);
93
94   int aSizeX = 1;
95   int aSizeY = 1;
96
97   if (!myView->TextureEnv().IsNull() && myView->SurfaceDetail() != Visual3d_TOD_NONE)
98   {
99     aSizeX = (myView->TextureEnv()->SizeX() <= 0) ? 1 : myView->TextureEnv()->SizeX();
100     aSizeY = (myView->TextureEnv()->SizeY() <= 0) ? 1 : myView->TextureEnv()->SizeY();
101   }
102
103   cl_image_format aImageFormat;
104
105   aImageFormat.image_channel_order = CL_RGBA;
106   aImageFormat.image_channel_data_type = CL_FLOAT;
107
108   myRaytraceEnvironment = clCreateImage2D (myComputeContext, CL_MEM_READ_ONLY,
109                                            &aImageFormat, aSizeX, aSizeY, 0,
110                                            NULL, &anError);
111
112   cl_float* aPixelData = new cl_float[aSizeX * aSizeY * 4];
113
114   // Note: texture format is not compatible with OpenCL image
115   // (it's not possible to create image directly from texture)
116
117   if (!myView->TextureEnv().IsNull() && myView->SurfaceDetail() != Visual3d_TOD_NONE)
118   {
119     myView->TextureEnv()->Bind (GetGlContext());
120
121     glGetTexImage (GL_TEXTURE_2D,
122                    0,
123                    GL_RGBA,
124                    GL_FLOAT,
125                    aPixelData);
126
127     myView->TextureEnv()->Unbind (GetGlContext());
128   }
129   else
130   {
131     for (int aPixel = 0; aPixel < aSizeX * aSizeY * 4; ++aPixel)
132       aPixelData[aPixel] = 0.f;
133   }
134
135   size_t anImageOffset[] = { 0,
136                              0,
137                              0 };
138
139   size_t anImageRegion[] = { aSizeX,
140                              aSizeY,
141                              1 };
142
143   anError |= clEnqueueWriteImage (myRaytraceQueue, myRaytraceEnvironment, CL_TRUE,
144                                   anImageOffset, anImageRegion, 0, 0, aPixelData,
145                                   0, NULL, NULL);
146 #ifdef RAY_TRACE_PRINT_INFO
147   if (anError != CL_SUCCESS)
148     std::cout << "Error! Failed to write environment map image!" << std::endl;
149 #endif
150
151   delete[] aPixelData;
152
153   myViewModificationStatus = myView->ModificationState();
154
155   return (anError == CL_SUCCESS);
156 }
157
158 // =======================================================================
159 // function : UpdateRaytraceGeometry
160 // purpose  : Updates 3D scene geometry for ray tracing
161 // =======================================================================
162 Standard_Boolean OpenGl_Workspace::UpdateRaytraceGeometry (Standard_Boolean theCheck)
163 {
164   if (myView.IsNull())
165     return Standard_False;
166
167   // Note: In 'check' mode the scene geometry is analyzed for modifications
168   // This is light-weight procedure performed for each frame
169
170   if (!theCheck)
171   {
172     myRaytraceSceneData.Clear();
173
174     myIsRaytraceDataValid = Standard_False;
175   }
176   else
177   {
178     if (myLayersModificationStatus != myView->LayerList().ModificationState())
179     {
180       return UpdateRaytraceGeometry (Standard_False);
181     }
182   }
183
184   float* aTransform (NULL);
185
186   // The set of processed structures (reflected to ray-tracing)
187   // This set is used to remove out-of-date records from the
188   // hash map of structures
189   std::set<const OpenGl_Structure*> anElements;
190
191   const OpenGl_LayerList& aList = myView->LayerList();
192
193   for (OpenGl_SequenceOfLayers::Iterator anLayerIt (aList.Layers()); anLayerIt.More(); anLayerIt.Next())
194   {
195     const OpenGl_PriorityList& aPriorityList = anLayerIt.Value();
196
197     if (aPriorityList.NbStructures() == 0)
198       continue;
199
200     const OpenGl_ArrayOfStructure& aStructArray = aPriorityList.ArrayOfStructures();
201
202     for (int anIndex = 0; anIndex < aStructArray.Length(); ++anIndex)
203     {
204       OpenGl_SequenceOfStructure::Iterator aStructIt;
205
206       for (aStructIt.Init (aStructArray (anIndex)); aStructIt.More(); aStructIt.Next())
207       {
208         const OpenGl_Structure* aStructure = aStructIt.Value();
209
210         if (theCheck)
211         {
212           if (CheckRaytraceStructure (aStructure))
213           {
214             return UpdateRaytraceGeometry (Standard_False);
215           }
216         }
217         else
218         {
219           if (!aStructure->IsRaytracable())
220             continue;
221
222           if (aStructure->Transformation()->mat != NULL)
223           {
224             if (aTransform == NULL)
225               aTransform = new float[16];
226
227             for (int i = 0; i < 4; ++i)
228               for (int j = 0; j < 4; ++j)
229               {
230                 aTransform[j * 4 + i] = aStructure->Transformation()->mat[i][j];
231               }
232           }
233
234           AddRaytraceStructure (aStructure, aTransform, anElements);
235         }
236       }
237     }
238   }
239
240   if (!theCheck)
241   {
242     // Actualize the hash map of structures -- remove out-of-date records
243     std::map<const OpenGl_Structure*, Standard_Size>::iterator anIter = myStructureStates.begin();
244
245     while (anIter != myStructureStates.end())
246     {
247       if (anElements.find (anIter->first) == anElements.end())
248       {
249         myStructureStates.erase (anIter++);
250       }
251       else
252       {
253         ++anIter;
254       }
255     }
256
257     // Actualize OpenGL layer list state
258     myLayersModificationStatus = myView->LayerList().ModificationState();
259
260
261 #ifdef RAY_TRACE_PRINT_INFO
262     OSD_Timer aTimer;
263     aTimer.Start();
264 #endif
265
266     myBVHBuilder.Build (myRaytraceSceneData);
267
268 #ifdef RAY_TRACE_PRINT_INFO
269     std::cout << " Build time: " << aTimer.ElapsedTime() << " for "
270                         << myRaytraceSceneData.Triangles.size() / 1000 << "K triangles" << std::endl;
271 #endif
272
273     const float aScaleFactor = 1.5f;
274
275     myRaytraceSceneRadius = aScaleFactor *
276       Max ( Max (fabsf (myRaytraceSceneData.AABB.CornerMin().x()),
277             Max (fabsf (myRaytraceSceneData.AABB.CornerMin().y()),
278                  fabsf (myRaytraceSceneData.AABB.CornerMin().z()))),
279             Max (fabsf (myRaytraceSceneData.AABB.CornerMax().x()),
280             Max (fabsf (myRaytraceSceneData.AABB.CornerMax().y()),
281                  fabsf (myRaytraceSceneData.AABB.CornerMax().z()))) );
282
283     myRaytraceSceneEpsilon = Max (1e-4f, myRaytraceSceneRadius * 1e-4f);
284
285     return WriteRaytraceSceneToDevice();
286   }
287
288   delete [] aTransform;
289
290   return Standard_True;
291 }
292
293 // =======================================================================
294 // function : CheckRaytraceStructure
295 // purpose  : Adds OpenGL structure to ray-traced scene geometry
296 // =======================================================================
297 Standard_Boolean OpenGl_Workspace::CheckRaytraceStructure (const OpenGl_Structure* theStructure)
298 {
299   if (!theStructure->IsRaytracable())
300   {
301     // Checks to see if all ray-tracable elements were
302     // removed from the structure
303     if (theStructure->ModificationState() > 0)
304     {
305       theStructure->ResetModificationState();
306       return Standard_True;
307     }
308
309     return Standard_False;
310   }
311
312   std::map<const OpenGl_Structure*, Standard_Size>::iterator aStructState = myStructureStates.find (theStructure);
313
314   if (aStructState != myStructureStates.end())
315     return aStructState->second != theStructure->ModificationState();
316
317   return Standard_True;
318 }
319
320 // =======================================================================
321 // function : CreateMaterial
322 // purpose  : Creates ray-tracing material properties
323 // =======================================================================
324 void CreateMaterial (const OPENGL_SURF_PROP& theProp, OpenGl_RaytraceMaterial& theMaterial)
325 {
326   theMaterial.Ambient = OpenGl_RTVec4f (theProp.ambcol.rgb[0] * theProp.amb,
327                                         theProp.ambcol.rgb[1] * theProp.amb,
328                                         theProp.ambcol.rgb[2] * theProp.amb,
329                                         1.f);
330
331   theMaterial.Diffuse = OpenGl_RTVec4f (theProp.difcol.rgb[0] * theProp.diff,
332                                         theProp.difcol.rgb[1] * theProp.diff,
333                                         theProp.difcol.rgb[2] * theProp.diff,
334                                         1.f);
335
336   theMaterial.Specular = OpenGl_RTVec4f (theProp.speccol.rgb[0] * theProp.spec,
337                                          theProp.speccol.rgb[1] * theProp.spec,
338                                          theProp.speccol.rgb[2] * theProp.spec,
339                                          theProp.shine);
340
341   theMaterial.Emission = OpenGl_RTVec4f (theProp.emscol.rgb[0] * theProp.emsv,
342                                          theProp.emscol.rgb[1] * theProp.emsv,
343                                          theProp.emscol.rgb[2] * theProp.emsv,
344                                          1.f);
345
346   // Note: Here we use sub-linear transparency function
347   // to produce realistic-looking transparency effect
348   theMaterial.Transparency = OpenGl_RTVec4f (powf (theProp.trans, 0.75f),
349                                              1.f - theProp.trans,
350                                              1.f,
351                                              1.f);
352
353   const float aMaxRefl = Max (theMaterial.Diffuse.x() + theMaterial.Specular.x(),
354                          Max (theMaterial.Diffuse.y() + theMaterial.Specular.y(),
355                               theMaterial.Diffuse.z() + theMaterial.Specular.z()));
356
357   const float aReflectionScale = 0.75f / aMaxRefl;
358
359   theMaterial.Reflection = OpenGl_RTVec4f (theProp.speccol.rgb[0] * theProp.spec,
360                                            theProp.speccol.rgb[1] * theProp.spec,
361                                            theProp.speccol.rgb[2] * theProp.spec,
362                                            0.f) * aReflectionScale;
363 }
364
365 // =======================================================================
366 // function : AddRaytraceStructure
367 // purpose  : Adds OpenGL structure to ray-traced scene geometry
368 // =======================================================================
369 Standard_Boolean OpenGl_Workspace::AddRaytraceStructure (const OpenGl_Structure*            theStructure,
370                                                          const float*                       theTransform,
371                                                          std::set<const OpenGl_Structure*>& theElements)
372 {
373 #ifdef RAY_TRACE_PRINT_INFO
374   std::cout << "Add Structure" << std::endl;
375 #endif
376
377   theElements.insert (theStructure);
378
379   if (!theStructure->IsVisible())
380   {
381     myStructureStates[theStructure] = theStructure->ModificationState();
382     return Standard_True;
383   }
384
385   // Get structure material
386   int aStructMatID = -1;
387
388   if (theStructure->AspectFace() != NULL)
389   {
390     aStructMatID = static_cast<int> (myRaytraceSceneData.Materials.size());
391
392     OpenGl_RaytraceMaterial aStructMaterial;
393     CreateMaterial (theStructure->AspectFace()->IntFront(), aStructMaterial);
394
395     myRaytraceSceneData.Materials.push_back (aStructMaterial);
396   }
397
398   OpenGl_ListOfGroup::Iterator anItg (theStructure->Groups());
399
400   while (anItg.More())
401   {
402     // Get group material
403     int aGroupMatID = -1;
404
405     if (anItg.Value()->AspectFace() != NULL)
406     {
407       aGroupMatID = static_cast<int> (myRaytraceSceneData.Materials.size());
408
409       OpenGl_RaytraceMaterial aGroupMaterial;
410       CreateMaterial (anItg.Value()->AspectFace()->IntFront(), aGroupMaterial);
411
412       myRaytraceSceneData.Materials.push_back (aGroupMaterial);
413     }
414
415     int aMatID = aGroupMatID < 0 ? aStructMatID : aGroupMatID;
416
417     if (aStructMatID < 0 && aGroupMatID < 0)
418     {
419       aMatID = static_cast<int> (myRaytraceSceneData.Materials.size());
420
421       myRaytraceSceneData.Materials.push_back (OpenGl_RaytraceMaterial());
422     }
423
424     // Add OpenGL elements from group (only arrays of primitives)
425     for (const OpenGl_ElementNode* aNode = anItg.Value()->FirstNode(); aNode != NULL; aNode = aNode->next)
426     {
427       if (TelNil == aNode->type)
428       {
429         OpenGl_AspectFace* anAspect = dynamic_cast<OpenGl_AspectFace*> (aNode->elem);
430
431         if (anAspect != NULL)
432         {
433           aMatID = static_cast<int> (myRaytraceSceneData.Materials.size());
434
435           OpenGl_RaytraceMaterial aMaterial;
436           CreateMaterial (anAspect->IntFront(), aMaterial);
437
438           myRaytraceSceneData.Materials.push_back (aMaterial);
439         }
440       }
441       else if (TelParray == aNode->type)
442       {
443         OpenGl_PrimitiveArray* aPrimArray = dynamic_cast<OpenGl_PrimitiveArray*> (aNode->elem);
444
445         if (aPrimArray != NULL)
446         {
447           AddRaytracePrimitiveArray (aPrimArray->PArray(), aMatID, theTransform);
448         }
449       }
450     }
451
452     anItg.Next();
453   }
454
455   float* aTransform (NULL);
456
457   // Process all connected OpenGL structures
458   OpenGl_ListOfStructure::Iterator anIts (theStructure->ConnectedStructures());
459
460   while (anIts.More())
461   {
462     if (anIts.Value()->Transformation()->mat != NULL)
463     {
464       float* aTransform = new float[16];
465
466       for (int i = 0; i < 4; ++i)
467         for (int j = 0; j < 4; ++j)
468         {
469           aTransform[j * 4 + i] =
470             anIts.Value()->Transformation()->mat[i][j];
471         }
472     }
473
474     if (anIts.Value()->IsRaytracable())
475       AddRaytraceStructure (anIts.Value(), aTransform != NULL ? aTransform : theTransform, theElements);
476
477     anIts.Next();
478   }
479
480   delete[] aTransform;
481
482   myStructureStates[theStructure] = theStructure->ModificationState();
483
484   return Standard_True;
485 }
486
487 // =======================================================================
488 // function : AddRaytracePrimitiveArray
489 // purpose  : Adds OpenGL primitive array to ray-traced scene geometry
490 // =======================================================================
491 Standard_Boolean OpenGl_Workspace::AddRaytracePrimitiveArray (const CALL_DEF_PARRAY* theArray,
492                                                               int                    theMatID,
493                                                               const float*           theTransform)
494 {
495   if (theArray->type != TelPolygonsArrayType &&
496       theArray->type != TelTrianglesArrayType &&
497       theArray->type != TelQuadranglesArrayType &&
498       theArray->type != TelTriangleFansArrayType &&
499       theArray->type != TelTriangleStripsArrayType &&
500       theArray->type != TelQuadrangleStripsArrayType)
501   {
502     return Standard_True;
503   }
504
505   if (theArray->vertices == NULL)
506     return Standard_False;
507
508 #ifdef RAY_TRACE_PRINT_INFO
509   switch (theArray->type)
510   {
511     case TelPolygonsArrayType:
512       std::cout << "\tTelPolygonsArrayType" << std::endl; break;
513     case TelTrianglesArrayType:
514       std::cout << "\tTelTrianglesArrayType" << std::endl; break;
515     case TelQuadranglesArrayType:
516       std::cout << "\tTelQuadranglesArrayType" << std::endl; break;
517     case TelTriangleFansArrayType:
518       std::cout << "\tTelTriangleFansArrayType" << std::endl; break;
519     case TelTriangleStripsArrayType:
520       std::cout << "\tTelTriangleStripsArrayType" << std::endl; break;
521     case TelQuadrangleStripsArrayType:
522       std::cout << "\tTelQuadrangleStripsArrayType" << std::endl; break;
523   }
524 #endif
525
526   // Simple optimization to eliminate possible memory allocations
527   // during processing of the primitive array vertices
528   myRaytraceSceneData.Vertices.reserve (
529     myRaytraceSceneData.Vertices.size() + theArray->num_vertexs);
530
531   const int aFirstVert = static_cast<int> (myRaytraceSceneData.Vertices.size());
532
533   for (int aVert = 0; aVert < theArray->num_vertexs; ++aVert)
534   {
535     OpenGl_RTVec4f aVertex (theArray->vertices[aVert].xyz[0],
536                             theArray->vertices[aVert].xyz[1],
537                             theArray->vertices[aVert].xyz[2],
538                             1.f);
539
540     if (theTransform)
541       aVertex = MatVecMult (theTransform, aVertex);
542
543     myRaytraceSceneData.Vertices.push_back (aVertex);
544
545     myRaytraceSceneData.AABB.Add (aVertex);
546   }
547
548   myRaytraceSceneData.Normals.reserve (
549     myRaytraceSceneData.Normals.size() + theArray->num_vertexs);
550
551   for (int aNorm = 0; aNorm < theArray->num_vertexs; ++aNorm)
552   {
553     OpenGl_RTVec4f aNormal;
554
555     // Note: In case of absence of normals, the visualizer
556     // will use generated geometric normals
557
558     if (theArray->vnormals != NULL)
559     {
560       aNormal = OpenGl_RTVec4f (theArray->vnormals[aNorm].xyz[0],
561                                 theArray->vnormals[aNorm].xyz[1],
562                                 theArray->vnormals[aNorm].xyz[2],
563                                 0.f);
564
565       if (theTransform)
566         aNormal = MatVecMult (theTransform, aNormal);
567     }
568
569     myRaytraceSceneData.Normals.push_back (aNormal);
570   }
571
572   if (theArray->num_bounds > 0)
573   {
574 #ifdef RAY_TRACE_PRINT_INFO
575     std::cout << "\tNumber of bounds = " << theArray->num_bounds << std::endl;
576 #endif
577
578     int aVertOffset = 0;
579
580     for (int aBound = 0; aBound < theArray->num_bounds; ++aBound)
581     {
582       const int aVertNum = theArray->bounds[aBound];
583
584 #ifdef RAY_TRACE_PRINT_INFO
585       std::cout << "\tAdd indices from bound " << aBound << ": " <<
586                                     aVertOffset << ", " << aVertNum << std::endl;
587 #endif
588
589       if (!AddRaytraceVertexIndices (theArray, aFirstVert, aVertOffset, aVertNum, theMatID))
590       {
591         return Standard_False;
592       }
593
594       aVertOffset += aVertNum;
595     }
596   }
597   else
598   {
599     const int aVertNum = theArray->num_edges > 0 ? theArray->num_edges : theArray->num_vertexs;
600
601 #ifdef RAY_TRACE_PRINT_INFO
602       std::cout << "\tAdd indices: " << aVertNum << std::endl;
603 #endif
604
605     return AddRaytraceVertexIndices (theArray, aFirstVert, 0, aVertNum, theMatID);
606   }
607
608   return Standard_True;
609 }
610
611 // =======================================================================
612 // function : AddRaytraceVertexIndices
613 // purpose  : Adds vertex indices to ray-traced scene geometry
614 // =======================================================================
615 Standard_Boolean OpenGl_Workspace::AddRaytraceVertexIndices (const CALL_DEF_PARRAY* theArray,
616                                                              int                    theFirstVert,
617                                                              int                    theVertOffset,
618                                                              int                    theVertNum,
619                                                              int                    theMatID)
620 {
621   myRaytraceSceneData.Triangles.reserve (myRaytraceSceneData.Triangles.size() + theVertNum);
622   switch (theArray->type)
623   {
624     case TelTrianglesArrayType:        return AddRaytraceTriangleArray        (theArray, theFirstVert, theVertOffset, theVertNum, theMatID);
625     case TelQuadranglesArrayType:      return AddRaytraceQuadrangleArray      (theArray, theFirstVert, theVertOffset, theVertNum, theMatID);
626     case TelTriangleFansArrayType:     return AddRaytraceTriangleFanArray     (theArray, theFirstVert, theVertOffset, theVertNum, theMatID);
627     case TelTriangleStripsArrayType:   return AddRaytraceTriangleStripArray   (theArray, theFirstVert, theVertOffset, theVertNum, theMatID);
628     case TelQuadrangleStripsArrayType: return AddRaytraceQuadrangleStripArray (theArray, theFirstVert, theVertOffset, theVertNum, theMatID);
629     case TelPolygonsArrayType:         return AddRaytracePolygonArray         (theArray, theFirstVert, theVertOffset, theVertNum, theMatID);
630     default:                           return Standard_False;
631   }
632 }
633
634 // =======================================================================
635 // function : AddRaytraceTriangleArray
636 // purpose  : Adds OpenGL triangle array to ray-traced scene geometry
637 // =======================================================================
638 Standard_Boolean OpenGl_Workspace::AddRaytraceTriangleArray (const CALL_DEF_PARRAY* theArray,
639                                                              int                    theFirstVert,
640                                                              int                    theVertOffset,
641                                                              int                    theVertNum,
642                                                              int                    theMatID)
643 {
644   if (theVertNum < 3)
645     return Standard_True;
646
647   if (theArray->num_edges > 0)
648   {
649     for (int aVert = theVertOffset; aVert < theVertOffset + theVertNum - 2; aVert += 3)
650     {
651       myRaytraceSceneData.Triangles.push_back (OpenGl_RTVec4i (theFirstVert + theArray->edges[aVert + 0],
652                                                                theFirstVert + theArray->edges[aVert + 1],
653                                                                theFirstVert + theArray->edges[aVert + 2],
654                                                                theMatID));
655     }
656   }
657   else
658   {
659     for (int aVert = theVertOffset; aVert < theVertOffset + theVertNum - 2; aVert += 3)
660     {
661       myRaytraceSceneData.Triangles.push_back (OpenGl_RTVec4i (theFirstVert + aVert + 0,
662                                                                theFirstVert + aVert + 1,
663                                                                theFirstVert + aVert + 2,
664                                                                theMatID));
665     }
666   }
667
668   return Standard_True;
669 }
670
671 // =======================================================================
672 // function : AddRaytraceTriangleFanArray
673 // purpose  : Adds OpenGL triangle fan array to ray-traced scene geometry
674 // =======================================================================
675 Standard_Boolean OpenGl_Workspace::AddRaytraceTriangleFanArray (const CALL_DEF_PARRAY* theArray,
676                                                                 int                     theFirstVert,
677                                                                 int                     theVertOffset,
678                                                                 int                     theVertNum,
679                                                                 int                     theMatID)
680 {
681   if (theVertNum < 3)
682     return Standard_True;
683
684   if (theArray->num_edges > 0)
685   {
686     for (int aVert = theVertOffset; aVert < theVertOffset + theVertNum - 2; ++aVert)
687     {
688       myRaytraceSceneData.Triangles.push_back (OpenGl_RTVec4i (theFirstVert + theArray->edges[theVertOffset],
689                                                                theFirstVert + theArray->edges[aVert + 1],
690                                                                theFirstVert + theArray->edges[aVert + 2],
691                                                                theMatID));
692     }
693   }
694   else
695   {
696     for (int aVert = theVertOffset; aVert < theVertOffset + theVertNum - 2; ++aVert)
697     {
698       myRaytraceSceneData.Triangles.push_back (OpenGl_RTVec4i (theFirstVert + theVertOffset,
699                                                                theFirstVert + aVert + 1,
700                                                                theFirstVert + aVert + 2,
701                                                                theMatID));
702     }
703   }
704
705   return Standard_True;
706 }
707
708 // =======================================================================
709 // function : AddRaytraceTriangleStripArray
710 // purpose  : Adds OpenGL triangle strip array to ray-traced scene geometry
711 // =======================================================================
712 Standard_Boolean OpenGl_Workspace::AddRaytraceTriangleStripArray (const CALL_DEF_PARRAY* theArray,
713                                                                   int                    theFirstVert,
714                                                                   int                    theVertOffset,
715                                                                   int                    theVertNum,
716                                                                   int                    theMatID)
717 {
718   if (theVertNum < 3)
719     return Standard_True;
720
721   if (theArray->num_edges > 0)
722   {
723     myRaytraceSceneData.Triangles.push_back (OpenGl_RTVec4i (
724                                       theFirstVert + theArray->edges[theVertOffset + 0],
725                                       theFirstVert + theArray->edges[theVertOffset + 1],
726                                       theFirstVert + theArray->edges[theVertOffset + 2],
727                                       theMatID));
728
729     for (int aVert = theVertOffset + 1, aTriNum = 1; aVert < theVertOffset + theVertNum - 2; ++aVert, ++aTriNum)
730     {
731       myRaytraceSceneData.Triangles.push_back (OpenGl_RTVec4i (
732                                       theFirstVert + theArray->edges[aVert + (aTriNum % 2) ? 1 : 0],
733                                       theFirstVert + theArray->edges[aVert + (aTriNum % 2) ? 0 : 1],
734                                       theFirstVert + theArray->edges[aVert + 2],
735                                       theMatID));
736     }
737   }
738   else
739   {
740     myRaytraceSceneData.Triangles.push_back (OpenGl_RTVec4i (theFirstVert + theVertOffset + 0,
741                                                              theFirstVert + theVertOffset + 1,
742                                                              theFirstVert + theVertOffset + 2,
743                                                              theMatID));
744
745     for (int aVert = theVertOffset + 1, aTriNum = 1; aVert < theVertOffset + theVertNum - 2; ++aVert, ++aTriNum)
746     {
747       myRaytraceSceneData.Triangles.push_back (OpenGl_RTVec4i (theFirstVert + aVert + ( aTriNum % 2 ) ? 1 : 0,
748                                                                theFirstVert + aVert + ( aTriNum % 2 ) ? 0 : 1,
749                                                                theFirstVert + aVert + 2,
750                                                                theMatID));
751     }
752   }
753
754   return Standard_True;
755 }
756
757 // =======================================================================
758 // function : AddRaytraceQuadrangleArray
759 // purpose  : Adds OpenGL quad array to ray-traced scene geometry
760 // =======================================================================
761 Standard_Boolean OpenGl_Workspace::AddRaytraceQuadrangleArray (const CALL_DEF_PARRAY* theArray,
762                                                                int                    theFirstVert,
763                                                                int                    theVertOffset,
764                                                                int                    theVertNum,
765                                                                int                    theMatID)
766 {
767   if (theVertNum < 4)
768     return Standard_True;
769
770   if (theArray->num_edges > 0)
771   {
772     for (int aVert = theVertOffset; aVert < theVertOffset + theVertNum - 3; aVert += 4)
773     {
774       myRaytraceSceneData.Triangles.push_back (OpenGl_RTVec4i (theFirstVert + theArray->edges[aVert + 0],
775                                                                theFirstVert + theArray->edges[aVert + 1],
776                                                                theFirstVert + theArray->edges[aVert + 2],
777                                                                theMatID));
778
779       myRaytraceSceneData.Triangles.push_back (OpenGl_RTVec4i (theFirstVert + theArray->edges[aVert + 0],
780                                                                theFirstVert + theArray->edges[aVert + 2],
781                                                                theFirstVert + theArray->edges[aVert + 3],
782                                                                theMatID));
783     }
784   }
785   else
786   {
787     for (int aVert = theVertOffset; aVert < theVertOffset + theVertNum - 3; aVert += 4)
788     {
789       myRaytraceSceneData.Triangles.push_back (OpenGl_RTVec4i (theFirstVert + aVert + 0,
790                                                                theFirstVert + aVert + 1,
791                                                                theFirstVert + aVert + 2,
792                                                                theMatID));
793
794       myRaytraceSceneData.Triangles.push_back (OpenGl_RTVec4i (theFirstVert + aVert + 0,
795                                                                theFirstVert + aVert + 2,
796                                                                theFirstVert + aVert + 3,
797                                                                theMatID));
798     }
799   }
800
801   return Standard_True;
802 }
803
804 // =======================================================================
805 // function : AddRaytraceQuadrangleStripArray
806 // purpose  : Adds OpenGL quad strip array to ray-traced scene geometry
807 // =======================================================================
808 Standard_Boolean OpenGl_Workspace::AddRaytraceQuadrangleStripArray (const CALL_DEF_PARRAY* theArray,
809                                                                     int                    theFirstVert,
810                                                                     int                    theVertOffset,
811                                                                     int                    theVertNum,
812                                                                     int                    theMatID)
813 {
814   if (theVertNum < 4)
815     return Standard_True;
816
817   if (theArray->num_edges > 0)
818   {
819     myRaytraceSceneData.Triangles.push_back (OpenGl_RTVec4i (
820                                 theFirstVert + theArray->edges[theVertOffset + 0],
821                                 theFirstVert + theArray->edges[theVertOffset + 1],
822                                 theFirstVert + theArray->edges[theVertOffset + 2],
823                                 theMatID));
824
825     myRaytraceSceneData.Triangles.push_back (OpenGl_RTVec4i (
826                                 theFirstVert + theArray->edges[theVertOffset + 1],
827                                 theFirstVert + theArray->edges[theVertOffset + 3],
828                                 theFirstVert + theArray->edges[theVertOffset + 2],
829                                 theMatID));
830
831     for (int aVert = theVertOffset + 2; aVert < theVertOffset + theVertNum - 3; aVert += 2)
832     {
833       myRaytraceSceneData.Triangles.push_back (OpenGl_RTVec4i (
834                                   theFirstVert + theArray->edges[aVert + 0],
835                                   theFirstVert + theArray->edges[aVert + 1],
836                                   theFirstVert + theArray->edges[aVert + 2],
837                                   theMatID));
838
839       myRaytraceSceneData.Triangles.push_back (OpenGl_RTVec4i (
840                                   theFirstVert + theArray->edges[aVert + 1],
841                                   theFirstVert + theArray->edges[aVert + 3],
842                                   theFirstVert + theArray->edges[aVert + 2],
843                                   theMatID));
844     }
845   }
846   else
847   {
848     myRaytraceSceneData.Triangles.push_back (OpenGl_RTVec4i (theFirstVert + 0,
849                                                              theFirstVert + 1,
850                                                              theFirstVert + 2,
851                                                              theMatID));
852
853     myRaytraceSceneData.Triangles.push_back (OpenGl_RTVec4i (theFirstVert + 1,
854                                                              theFirstVert + 3,
855                                                              theFirstVert + 2,
856                                                              theMatID));
857
858     for (int aVert = theVertOffset + 2; aVert < theVertOffset + theVertNum - 3; aVert += 2)
859     {
860       myRaytraceSceneData.Triangles.push_back (OpenGl_RTVec4i (theFirstVert + aVert + 0,
861                                                                theFirstVert + aVert + 1,
862                                                                theFirstVert + aVert + 2,
863                                                                theMatID));
864
865       myRaytraceSceneData.Triangles.push_back (OpenGl_RTVec4i (theFirstVert + aVert + 1,
866                                                                theFirstVert + aVert + 3,
867                                                                theFirstVert + aVert + 2,
868                                                                theMatID));
869     }
870   }
871
872   return Standard_True;
873 }
874
875 // =======================================================================
876 // function : AddRaytracePolygonArray
877 // purpose  : Adds OpenGL polygon array to ray-traced scene geometry
878 // =======================================================================
879 Standard_Boolean OpenGl_Workspace::AddRaytracePolygonArray (const CALL_DEF_PARRAY* theArray,
880                                                             int                    theFirstVert,
881                                                             int                    theVertOffset,
882                                                             int                    theVertNum,
883                                                             int                    theMatID)
884 {
885   if (theArray->num_vertexs < 3)
886     return Standard_True;
887
888   if (theArray->edges != NULL)
889   {
890     for (int aVert = theVertOffset; aVert < theVertOffset + theVertNum - 2; ++aVert)
891     {
892       myRaytraceSceneData.Triangles.push_back (OpenGl_RTVec4i (theFirstVert + theArray->edges[theVertOffset],
893                                                                theFirstVert + theArray->edges[aVert + 1],
894                                                                theFirstVert + theArray->edges[aVert + 2],
895                                                                theMatID));
896     }
897   }
898   else
899   {
900     for (int aVert = theVertOffset; aVert < theVertOffset + theVertNum - 2; ++aVert)
901     {
902       myRaytraceSceneData.Triangles.push_back (OpenGl_RTVec4i (theFirstVert + theVertOffset,
903                                                                theFirstVert + aVert + 1,
904                                                                theFirstVert + aVert + 2,
905                                                                theMatID));
906     }
907   }
908
909   return Standard_True;
910 }
911
912 // =======================================================================
913 // function : UpdateRaytraceLightSources
914 // purpose  : Updates 3D scene light sources for ray-tracing
915 // =======================================================================
916 Standard_Boolean OpenGl_Workspace::UpdateRaytraceLightSources (const GLdouble theInvModelView[16])
917 {
918   myRaytraceSceneData.LightSources.clear();
919
920   OpenGl_RTVec4f anAmbient (0.0f, 0.0f, 0.0f, 0.0f);
921   for (OpenGl_ListOfLight::Iterator anItl (myView->LightList());
922        anItl.More(); anItl.Next())
923   {
924     const OpenGl_Light& aLight = anItl.Value();
925     if (aLight.Type == Visual3d_TOLS_AMBIENT)
926     {
927       anAmbient += OpenGl_RTVec4f (aLight.Color.r(), aLight.Color.g(), aLight.Color.b(), 0.0f);
928       continue;
929     }
930
931     OpenGl_RTVec4f aDiffuse  (aLight.Color.r(), aLight.Color.g(), aLight.Color.b(), 1.0f);
932     OpenGl_RTVec4f aPosition (-aLight.Direction.x(), -aLight.Direction.y(), -aLight.Direction.z(), 0.0f);
933     if (aLight.Type != Visual3d_TOLS_DIRECTIONAL)
934     {
935       aPosition = OpenGl_RTVec4f (aLight.Position.x(), aLight.Position.y(), aLight.Position.z(), 1.0f);
936     }
937     if (aLight.IsHeadlight)
938     {
939       aPosition = MatVecMult (theInvModelView, aPosition);
940     }
941
942     myRaytraceSceneData.LightSources.push_back (OpenGl_RaytraceLight (aDiffuse, aPosition));
943   }
944
945   if (myRaytraceSceneData.LightSources.size() > 0)
946   {
947     myRaytraceSceneData.LightSources.front().Ambient += anAmbient;
948   }
949   else
950   {
951     myRaytraceSceneData.LightSources.push_back (OpenGl_RaytraceLight (OpenGl_RTVec4f (anAmbient.rgb(), -1.0f)));
952   }
953
954   cl_int anError = CL_SUCCESS;
955
956   if (myRaytraceLightSourceBuffer != NULL)
957     clReleaseMemObject (myRaytraceLightSourceBuffer);
958
959   const size_t myLightBufferSize = myRaytraceSceneData.LightSources.size() > 0
960                                  ? myRaytraceSceneData.LightSources.size()
961                                  : 1;
962
963   myRaytraceLightSourceBuffer = clCreateBuffer (myComputeContext, CL_MEM_READ_ONLY,
964                                                 myLightBufferSize * sizeof(OpenGl_RaytraceLight),
965                                                 NULL, &anError);
966
967   if (myRaytraceSceneData.LightSources.size() > 0)
968   {
969     const void* aDataPtr = myRaytraceSceneData.LightSources.front().Packed();
970     anError |= clEnqueueWriteBuffer (myRaytraceQueue, myRaytraceLightSourceBuffer, CL_TRUE, 0,
971                                      myLightBufferSize * sizeof(OpenGl_RaytraceLight), aDataPtr,
972                                      0, NULL, NULL);
973   }
974
975 #ifdef RAY_TRACE_PRINT_INFO
976   if (anError != CL_SUCCESS)
977   {
978     std::cout << "Error! Failed to set light sources!";
979
980     return Standard_False;
981   }
982 #endif
983
984   return Standard_True;
985 }
986
987 // =======================================================================
988 // function : CheckOpenCL
989 // purpose  : Checks OpenCL dynamic library availability
990 // =======================================================================
991 Standard_Boolean CheckOpenCL()
992 {
993 #if defined ( _WIN32 )
994
995   __try
996   {
997     cl_uint aNbPlatforms;
998     clGetPlatformIDs (0, NULL, &aNbPlatforms);
999   }
1000   __except (EXCEPTION_EXECUTE_HANDLER)
1001   {
1002     return Standard_False;
1003   }
1004
1005 #endif
1006
1007   return Standard_True;
1008 }
1009
1010 // =======================================================================
1011 // function : InitOpenCL
1012 // purpose  : Initializes OpenCL objects
1013 // =======================================================================
1014 Standard_Boolean OpenGl_Workspace::InitOpenCL()
1015 {
1016   if (myComputeInitStatus != OpenGl_CLIS_NONE)
1017   {
1018     return myComputeInitStatus == OpenGl_CLIS_INIT;
1019   }
1020
1021   if (!CheckOpenCL())
1022   {
1023     myComputeInitStatus = OpenGl_CLIS_FAIL; // fail to load OpenCL library
1024     myGlContext->PushMessage (GL_DEBUG_SOURCE_APPLICATION_ARB,
1025                               GL_DEBUG_TYPE_ERROR_ARB,
1026                               0,
1027                               GL_DEBUG_SEVERITY_HIGH_ARB,
1028                               "Failed to load OpenCL dynamic library!");
1029     return Standard_False;
1030   }
1031
1032   // Obtain the list of platforms available
1033   cl_uint aNbPlatforms = 0;
1034   cl_int  anError = clGetPlatformIDs (0, NULL, &aNbPlatforms);
1035   cl_platform_id* aPlatforms = (cl_platform_id* )alloca (aNbPlatforms * sizeof(cl_platform_id));
1036   anError |= clGetPlatformIDs (aNbPlatforms, aPlatforms, NULL);
1037   if (anError != CL_SUCCESS
1038    || aNbPlatforms == 0)
1039   {
1040     myComputeInitStatus = OpenGl_CLIS_FAIL;
1041     myGlContext->PushMessage (GL_DEBUG_SOURCE_APPLICATION_ARB,
1042                               GL_DEBUG_TYPE_ERROR_ARB,
1043                               0,
1044                               GL_DEBUG_SEVERITY_HIGH_ARB,
1045                               "No any OpenCL platform installed!");
1046     return Standard_False;
1047   }
1048
1049   // Note: We try to find NVIDIA or AMD platforms with GPU devices!
1050   cl_platform_id aPrefPlatform = NULL;
1051   for (cl_uint aPlatIter = 0; aPlatIter < aNbPlatforms; ++aPlatIter)
1052   {
1053     char aName[256];
1054     anError = clGetPlatformInfo (aPlatforms[aPlatIter], CL_PLATFORM_NAME,
1055                                  sizeof(aName), aName, NULL);
1056     if (anError != CL_SUCCESS)
1057     {
1058       continue;
1059     }
1060
1061     if (strncmp (aName, "NVIDIA", strlen ("NVIDIA")) == 0)
1062     {
1063       aPrefPlatform = aPlatforms[aPlatIter];
1064
1065       // Use optimizations for NVIDIA GPUs
1066       myIsAmdComputePlatform = Standard_False;
1067     }
1068     else if (strncmp (aName, "AMD", strlen ("AMD")) == 0)
1069     {
1070       aPrefPlatform = (aPrefPlatform == NULL)
1071                     ? aPlatforms[aPlatIter]
1072                     : aPrefPlatform;
1073
1074       // Use optimizations for ATI/AMD platform
1075       myIsAmdComputePlatform = Standard_True;
1076     }
1077   }
1078
1079   if (aPrefPlatform == NULL)
1080   {
1081     aPrefPlatform = aPlatforms[0];
1082   }
1083
1084   // Obtain the list of devices available in the selected platform
1085   cl_uint aNbDevices = 0;
1086   anError = clGetDeviceIDs (aPrefPlatform, CL_DEVICE_TYPE_GPU,
1087                             0, NULL, &aNbDevices);
1088
1089   cl_device_id* aDevices = (cl_device_id* )alloca (aNbDevices * sizeof(cl_device_id));
1090   anError |= clGetDeviceIDs (aPrefPlatform, CL_DEVICE_TYPE_GPU,
1091                              aNbDevices, aDevices, NULL);
1092   if (anError != CL_SUCCESS)
1093   {
1094     myComputeInitStatus = OpenGl_CLIS_FAIL;
1095     myGlContext->PushMessage (GL_DEBUG_SOURCE_APPLICATION_ARB,
1096                               GL_DEBUG_TYPE_ERROR_ARB,
1097                               0,
1098                               GL_DEBUG_SEVERITY_HIGH_ARB,
1099                               "Failed to get OpenCL GPU device!");
1100     return Standard_False;
1101   }
1102
1103   // Note: Simply get first available GPU
1104   cl_device_id aDevice = aDevices[0];
1105
1106   // detect old contexts
1107   char aVerClStr[256];
1108   clGetDeviceInfo (aDevice, CL_DEVICE_VERSION,
1109                    sizeof(aVerClStr), aVerClStr, NULL);
1110   aVerClStr[strlen ("OpenCL 1.0")] = '\0';
1111   const bool isVer10 = strncmp (aVerClStr, "OpenCL 1.0", strlen ("OpenCL 1.0")) == 0;
1112
1113   // Create OpenCL context
1114   cl_context_properties aCtxProp[] =
1115   {
1116   #if defined(__APPLE__) && !defined(MACOSX_USE_GLX)
1117     CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE,
1118     (cl_context_properties )CGLGetShareGroup (CGLGetCurrentContext()),
1119   #elif defined(_WIN32)
1120     CL_CONTEXT_PLATFORM, (cl_context_properties )aPrefPlatform,
1121     CL_GL_CONTEXT_KHR,   (cl_context_properties )wglGetCurrentContext(),
1122     CL_WGL_HDC_KHR,      (cl_context_properties )wglGetCurrentDC(),
1123   #else
1124     CL_GL_CONTEXT_KHR,   (cl_context_properties )glXGetCurrentContext(),
1125     CL_GLX_DISPLAY_KHR,  (cl_context_properties )glXGetCurrentDisplay(),
1126     CL_CONTEXT_PLATFORM, (cl_context_properties )aPrefPlatform,
1127   #endif
1128     0
1129   };
1130
1131   myComputeContext = clCreateContext (aCtxProp,
1132                                     #if defined(__APPLE__) && !defined(MACOSX_USE_GLX)
1133                                       0, NULL, // device will be taken from GL context
1134                                     #else
1135                                       1, &aDevice,
1136                                     #endif
1137                                       NULL, NULL, &anError);
1138   if (anError != CL_SUCCESS)
1139   {
1140     myComputeInitStatus = OpenGl_CLIS_FAIL;
1141     myGlContext->PushMessage (GL_DEBUG_SOURCE_APPLICATION_ARB,
1142                               GL_DEBUG_TYPE_ERROR_ARB,
1143                               0,
1144                               GL_DEBUG_SEVERITY_HIGH_ARB,
1145                               "Failed to initialize OpenCL context!");
1146     return Standard_False;
1147   }
1148
1149   // Create OpenCL program
1150   const char* aSources[] =
1151   {
1152     isVer10 ? "#define M_PI_F ( float )( 3.14159265359f )\n" : "",
1153     THE_RAY_TRACE_OPENCL_SOURCE
1154   };
1155   myRaytraceProgram = clCreateProgramWithSource (myComputeContext, 2,
1156                                                  aSources, NULL, &anError);
1157   if (anError != CL_SUCCESS)
1158   {
1159     myComputeInitStatus = OpenGl_CLIS_FAIL;
1160     myGlContext->PushMessage (GL_DEBUG_SOURCE_APPLICATION_ARB,
1161                               GL_DEBUG_TYPE_ERROR_ARB,
1162                               0,
1163                               GL_DEBUG_SEVERITY_HIGH_ARB,
1164                               "Failed to create OpenCL ray-tracing program!");
1165     return Standard_False;
1166   }
1167
1168   anError = clBuildProgram (myRaytraceProgram, 0,
1169                             NULL, NULL, NULL, NULL);
1170   {
1171     // Fetch build log
1172     size_t aLogLen = 0;
1173     cl_int aResult = clGetProgramBuildInfo (myRaytraceProgram, aDevice,
1174                                             CL_PROGRAM_BUILD_LOG, 0, NULL, &aLogLen);
1175
1176     char* aBuildLog = (char* )alloca (aLogLen);
1177     aResult |= clGetProgramBuildInfo (myRaytraceProgram, aDevice,
1178                                       CL_PROGRAM_BUILD_LOG, aLogLen, aBuildLog, NULL);
1179     if (aResult == CL_SUCCESS)
1180     {
1181       if (anError != CL_SUCCESS)
1182       {
1183         myGlContext->PushMessage (GL_DEBUG_SOURCE_APPLICATION_ARB,
1184                                   GL_DEBUG_TYPE_ERROR_ARB,
1185                                   0,
1186                                   GL_DEBUG_SEVERITY_HIGH_ARB,
1187                                   aBuildLog);
1188       }
1189       else
1190       {
1191       #ifdef RAY_TRACE_PRINT_INFO
1192         std::cout << aBuildLog << std::endl;
1193       #endif
1194       }
1195     }
1196   }
1197
1198   if (anError != CL_SUCCESS)
1199   {
1200     return Standard_False;
1201   }
1202
1203   // Create OpenCL ray tracing kernels
1204   myRaytraceRenderKernel = clCreateKernel (myRaytraceProgram, "Main",            &anError);
1205   if (anError != CL_SUCCESS)
1206   {
1207     myComputeInitStatus = OpenGl_CLIS_FAIL;
1208     myGlContext->PushMessage (GL_DEBUG_SOURCE_APPLICATION_ARB,
1209                               GL_DEBUG_TYPE_ERROR_ARB,
1210                               0,
1211                               GL_DEBUG_SEVERITY_HIGH_ARB,
1212                               "Failed to create OpenCL ray-tracing kernel!");
1213     return Standard_False;
1214   }
1215
1216   myRaytraceSmoothKernel = clCreateKernel (myRaytraceProgram, "MainAntialiased", &anError);
1217   if (anError != CL_SUCCESS)
1218   {
1219     myComputeInitStatus = OpenGl_CLIS_FAIL;
1220     myGlContext->PushMessage (GL_DEBUG_SOURCE_APPLICATION_ARB,
1221                               GL_DEBUG_TYPE_ERROR_ARB,
1222                               0,
1223                               GL_DEBUG_SEVERITY_HIGH_ARB,
1224                               "Failed to create OpenCL ray-tracing kernel!");
1225     return Standard_False;
1226   }
1227
1228   // Create OpenCL command queue
1229   // Note: For profiling set CL_QUEUE_PROFILING_ENABLE
1230   cl_command_queue_properties aProps = CL_QUEUE_PROFILING_ENABLE;
1231
1232   myRaytraceQueue = clCreateCommandQueue (myComputeContext, aDevice, aProps, &anError);
1233   if (anError != CL_SUCCESS)
1234   {
1235     myComputeInitStatus = OpenGl_CLIS_FAIL;
1236     myGlContext->PushMessage (GL_DEBUG_SOURCE_APPLICATION_ARB,
1237                               GL_DEBUG_TYPE_ERROR_ARB,
1238                               0,
1239                               GL_DEBUG_SEVERITY_HIGH_ARB,
1240                               "Failed to create OpenCL command queue!");
1241
1242     return Standard_False;
1243   }
1244
1245   myComputeInitStatus = OpenGl_CLIS_INIT; // initialized in normal way
1246   return Standard_True;
1247 }
1248
1249 // =======================================================================
1250 // function : GetOpenClDeviceInfo
1251 // purpose  : Returns information about device used for computations
1252 // =======================================================================
1253 Standard_Boolean OpenGl_Workspace::GetOpenClDeviceInfo (NCollection_DataMap<TCollection_AsciiString,
1254                                                                             TCollection_AsciiString>& theInfo) const
1255 {
1256   theInfo.Clear();
1257   if (myComputeContext == NULL)
1258   {
1259     return Standard_False;
1260   }
1261
1262   size_t aDevicesSize = 0;
1263   cl_int anError = clGetContextInfo (myComputeContext, CL_CONTEXT_DEVICES, 0, NULL, &aDevicesSize);
1264   cl_device_id* aDevices = (cl_device_id* )alloca (aDevicesSize);
1265   anError |= clGetContextInfo (myComputeContext, CL_CONTEXT_DEVICES, aDevicesSize, aDevices, NULL);
1266   if (anError != CL_SUCCESS)
1267   {
1268     return Standard_False;
1269   }
1270
1271   char aDeviceName[256];
1272   anError |= clGetDeviceInfo (aDevices[0], CL_DEVICE_NAME, sizeof(aDeviceName), aDeviceName, NULL);
1273   theInfo.Bind ("Name", aDeviceName);
1274
1275   char aDeviceVendor[256];
1276   anError |= clGetDeviceInfo (aDevices[0], CL_DEVICE_VENDOR, sizeof(aDeviceVendor), aDeviceVendor, NULL);
1277   theInfo.Bind ("Vendor", aDeviceVendor);
1278
1279   cl_device_type aDeviceType;
1280   anError |= clGetDeviceInfo (aDevices[0], CL_DEVICE_TYPE, sizeof(aDeviceType), &aDeviceType, NULL);
1281   theInfo.Bind ("Type", aDeviceType == CL_DEVICE_TYPE_GPU ? "GPU" : "CPU");
1282   return Standard_True;
1283 }
1284
1285 // =======================================================================
1286 // function : ReleaseOpenCL
1287 // purpose  : Releases resources of OpenCL objects
1288 // =======================================================================
1289 void OpenGl_Workspace::ReleaseOpenCL()
1290 {
1291   clReleaseKernel (myRaytraceRenderKernel);
1292   clReleaseKernel (myRaytraceSmoothKernel);
1293
1294   clReleaseProgram (myRaytraceProgram);
1295   clReleaseCommandQueue (myRaytraceQueue);
1296
1297   clReleaseMemObject (myRaytraceOutputImage);
1298   clReleaseMemObject (myRaytraceEnvironment);
1299   clReleaseMemObject (myRaytraceOutputImageSmooth);
1300
1301   clReleaseMemObject (myRaytraceVertexBuffer);
1302   clReleaseMemObject (myRaytraceNormalBuffer);
1303   clReleaseMemObject (myRaytraceTriangleBuffer);
1304
1305   clReleaseMemObject (myRaytraceMaterialBuffer);
1306   clReleaseMemObject (myRaytraceLightSourceBuffer);
1307
1308   clReleaseMemObject (myRaytraceNodeMinPointBuffer);
1309   clReleaseMemObject (myRaytraceNodeMaxPointBuffer);
1310   clReleaseMemObject (myRaytraceNodeDataRcrdBuffer);
1311
1312   clReleaseContext (myComputeContext);
1313
1314   if (glIsTexture (*myRaytraceOutputTexture))
1315     glDeleteTextures (2, myRaytraceOutputTexture);
1316 }
1317
1318 // =======================================================================
1319 // function : ResizeRaytraceOutputBuffer
1320 // purpose  : Resizes OpenCL output image
1321 // =======================================================================
1322 Standard_Boolean OpenGl_Workspace::ResizeRaytraceOutputBuffer (const cl_int theSizeX,
1323                                                                const cl_int theSizeY)
1324 {
1325   if (myComputeContext == NULL)
1326   {
1327     return Standard_False;
1328   }
1329
1330   bool toResize = true;
1331   GLint aSizeX = -1;
1332   GLint aSizeY = -1;
1333   if (*myRaytraceOutputTexture != 0)
1334   {
1335     if (!myGlContext->IsGlGreaterEqual (2, 1))
1336     {
1337       return Standard_False;
1338     }
1339
1340     glBindTexture (GL_TEXTURE_RECTANGLE, *myRaytraceOutputTexture);
1341
1342     glGetTexLevelParameteriv (GL_TEXTURE_RECTANGLE, 0, GL_TEXTURE_WIDTH,  &aSizeX);
1343     glGetTexLevelParameteriv (GL_TEXTURE_RECTANGLE, 0, GL_TEXTURE_HEIGHT, &aSizeY);
1344
1345     toResize = (aSizeX != theSizeX) || (aSizeY != theSizeY);
1346     if (toResize)
1347     {
1348       glDeleteTextures (2, myRaytraceOutputTexture);
1349     }
1350   }
1351   if (!toResize)
1352   {
1353     return Standard_True;
1354   }
1355
1356   glGenTextures (2, myRaytraceOutputTexture);
1357   for (int aTexIter = 0; aTexIter < 2; ++aTexIter)
1358   {
1359     glBindTexture (GL_TEXTURE_RECTANGLE, myRaytraceOutputTexture[aTexIter]);
1360
1361     glTexParameteri (GL_TEXTURE_RECTANGLE, GL_TEXTURE_WRAP_S, GL_CLAMP);
1362     glTexParameteri (GL_TEXTURE_RECTANGLE, GL_TEXTURE_WRAP_T, GL_CLAMP);
1363     glTexParameteri (GL_TEXTURE_RECTANGLE, GL_TEXTURE_WRAP_R, GL_CLAMP);
1364
1365     glTexParameteri (GL_TEXTURE_RECTANGLE, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
1366     glTexParameteri (GL_TEXTURE_RECTANGLE, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
1367
1368     glTexImage2D (GL_TEXTURE_RECTANGLE, 0, GL_RGBA32F,
1369                   theSizeX, theSizeY, 0,
1370                   GL_RGBA, GL_FLOAT, NULL);
1371   }
1372
1373   cl_int anError = CL_SUCCESS;
1374
1375   if (myRaytraceOutputImage != NULL)
1376   {
1377     clReleaseMemObject (myRaytraceOutputImage);
1378   }
1379   if (myRaytraceOutputImageSmooth != NULL)
1380   {
1381     clReleaseMemObject (myRaytraceOutputImageSmooth);
1382   }
1383
1384   myRaytraceOutputImage = clCreateFromGLTexture2D (myComputeContext, CL_MEM_READ_WRITE,
1385                                                    GL_TEXTURE_RECTANGLE, 0,
1386                                                    myRaytraceOutputTexture[0], &anError);
1387   if (anError != CL_SUCCESS)
1388   {
1389 #ifdef RAY_TRACE_PRINT_INFO
1390     std::cout << "Error! Failed to create output image!" << std::endl;
1391 #endif
1392     return Standard_False;
1393   }
1394
1395   myRaytraceOutputImageSmooth = clCreateFromGLTexture2D (myComputeContext, CL_MEM_READ_WRITE,
1396                                                          GL_TEXTURE_RECTANGLE, 0,
1397                                                          myRaytraceOutputTexture[1], &anError);
1398   if (anError != CL_SUCCESS)
1399   {
1400 #ifdef RAY_TRACE_PRINT_INFO
1401     std::cout << "Error! Failed to create anti-aliased output image!" << std::endl;
1402 #endif
1403     return Standard_False;
1404   }
1405
1406   return Standard_True;
1407 }
1408
1409 // =======================================================================
1410 // function : WriteRaytraceSceneToDevice
1411 // purpose  : Writes scene geometry to OpenCl device
1412 // =======================================================================
1413 Standard_Boolean OpenGl_Workspace::WriteRaytraceSceneToDevice()
1414 {
1415   if (myComputeContext == NULL)
1416     return Standard_False;
1417
1418   cl_int anError = CL_SUCCESS;
1419
1420   if (myRaytraceNormalBuffer != NULL)
1421     anError |= clReleaseMemObject (myRaytraceNormalBuffer);
1422
1423   if (myRaytraceVertexBuffer != NULL)
1424     anError |= clReleaseMemObject (myRaytraceVertexBuffer);
1425
1426   if (myRaytraceTriangleBuffer != NULL)
1427     anError |= clReleaseMemObject (myRaytraceTriangleBuffer);
1428
1429   if (myRaytraceNodeMinPointBuffer != NULL)
1430     anError |= clReleaseMemObject (myRaytraceNodeMinPointBuffer);
1431
1432   if (myRaytraceNodeMaxPointBuffer != NULL)
1433     anError |= clReleaseMemObject (myRaytraceNodeMaxPointBuffer);
1434
1435   if (myRaytraceNodeDataRcrdBuffer != NULL)
1436     anError |= clReleaseMemObject (myRaytraceNodeDataRcrdBuffer);
1437
1438   if (myRaytraceMaterialBuffer != NULL)
1439     anError |= clReleaseMemObject (myRaytraceMaterialBuffer);
1440
1441   if (anError != CL_SUCCESS)
1442   {
1443 #ifdef RAY_TRACE_PRINT_INFO
1444     std::cout << "Error! Failed to release OpenCL scene buffers!" << std::endl;
1445 #endif
1446     return Standard_False;
1447   }
1448
1449   // Create geometry buffers
1450   cl_int anErrorTemp = CL_SUCCESS;
1451   const size_t myVertexBufferSize = myRaytraceSceneData.Vertices.size() > 0
1452                                   ? myRaytraceSceneData.Vertices.size() : 1;
1453
1454   myRaytraceVertexBuffer = clCreateBuffer (myComputeContext, CL_MEM_READ_ONLY,
1455                                            myVertexBufferSize * sizeof(cl_float4), NULL, &anErrorTemp);
1456   anError |= anErrorTemp;
1457
1458   const size_t myNormalBufferSize = myRaytraceSceneData.Normals.size() > 0
1459                                   ? myRaytraceSceneData.Normals.size() : 1;
1460   myRaytraceNormalBuffer = clCreateBuffer (myComputeContext, CL_MEM_READ_ONLY,
1461                                            myNormalBufferSize * sizeof(cl_float4), NULL, &anErrorTemp);
1462   anError |= anErrorTemp;
1463
1464   const size_t myTriangleBufferSize = myRaytraceSceneData.Triangles.size() > 0
1465                                     ? myRaytraceSceneData.Triangles.size() : 1;
1466   myRaytraceTriangleBuffer = clCreateBuffer (myComputeContext, CL_MEM_READ_ONLY,
1467                                              myTriangleBufferSize * sizeof(cl_int4), NULL, &anErrorTemp);
1468   anError |= anErrorTemp;
1469   if (anError != CL_SUCCESS)
1470   {
1471 #ifdef RAY_TRACE_PRINT_INFO
1472     std::cout << "Error! Failed to create OpenCL geometry buffers!" << std::endl;
1473 #endif
1474     return Standard_False;
1475   }
1476
1477   // Create material buffer
1478   const size_t myMaterialBufferSize = myRaytraceSceneData.Materials.size() > 0
1479                                     ? myRaytraceSceneData.Materials.size() : 1;
1480   myRaytraceMaterialBuffer = clCreateBuffer (myComputeContext, CL_MEM_READ_ONLY,
1481                                              myMaterialBufferSize * sizeof(OpenGl_RaytraceMaterial), NULL,
1482                                              &anErrorTemp);
1483   if (anErrorTemp != CL_SUCCESS)
1484   {
1485 #ifdef RAY_TRACE_PRINT_INFO
1486     std::cout << "Error! Failed to create OpenCL material buffer!" << std::endl;
1487 #endif
1488     return Standard_False;
1489   }
1490
1491   // Create BVH buffers
1492   OpenGl_BVH aTree = myBVHBuilder.Tree();
1493   const size_t myNodeMinPointBufferSize = aTree.MinPointBuffer().size() > 0
1494                                         ? aTree.MinPointBuffer().size() : 1;
1495   myRaytraceNodeMinPointBuffer = clCreateBuffer (myComputeContext, CL_MEM_READ_ONLY,
1496                                                  myNodeMinPointBufferSize * sizeof(cl_float4), NULL,
1497                                                  &anErrorTemp);
1498   anError |= anErrorTemp;
1499
1500   const size_t myNodeMaxPointBufferSize = aTree.MaxPointBuffer().size() > 0
1501                                         ? aTree.MaxPointBuffer().size() : 1;
1502   myRaytraceNodeMaxPointBuffer = clCreateBuffer (myComputeContext, CL_MEM_READ_ONLY,
1503                                                  myNodeMaxPointBufferSize * sizeof(cl_float4), NULL,
1504                                                  &anError);
1505   anError |= anErrorTemp;
1506
1507   const size_t myNodeDataRecordBufferSize = aTree.DataRcrdBuffer().size() > 0
1508                                           ? aTree.DataRcrdBuffer().size() : 1;
1509   myRaytraceNodeDataRcrdBuffer = clCreateBuffer (myComputeContext, CL_MEM_READ_ONLY,
1510                                                  myNodeDataRecordBufferSize * sizeof(cl_int4), NULL,
1511                                                  &anError);
1512   anError |= anErrorTemp;
1513   if (anError != CL_SUCCESS)
1514   {
1515 #ifdef RAY_TRACE_PRINT_INFO
1516     std::cout << "Error! Failed to create OpenCL BVH buffers!" << std::endl;
1517 #endif
1518     return Standard_False;
1519   }
1520
1521   // Write scene geometry buffers
1522   if (myRaytraceSceneData.Triangles.size() > 0)
1523   {
1524     anError |= clEnqueueWriteBuffer (myRaytraceQueue, myRaytraceVertexBuffer, CL_FALSE,
1525                                      0, myRaytraceSceneData.Vertices.size() * sizeof(cl_float4),
1526                                      &myRaytraceSceneData.Vertices.front(),
1527                                      0, NULL, NULL);
1528     anError |= clEnqueueWriteBuffer (myRaytraceQueue, myRaytraceNormalBuffer, CL_FALSE,
1529                                      0, myRaytraceSceneData.Normals.size() * sizeof(cl_float4),
1530                                      &myRaytraceSceneData.Normals.front(),
1531                                      0, NULL, NULL);
1532     anError |= clEnqueueWriteBuffer (myRaytraceQueue, myRaytraceTriangleBuffer, CL_FALSE,
1533                                      0, myRaytraceSceneData.Triangles.size() * sizeof(cl_int4),
1534                                      &myRaytraceSceneData.Triangles.front(),
1535                                      0, NULL, NULL);
1536     if (anError != CL_SUCCESS)
1537     {
1538   #ifdef RAY_TRACE_PRINT_INFO
1539       std::cout << "Error! Failed to write OpenCL geometry buffers!" << std::endl;
1540   #endif
1541       return Standard_False;
1542     }
1543   }
1544
1545   // Write BVH buffers
1546   if (aTree.DataRcrdBuffer().size() > 0)
1547   {
1548     anError |= clEnqueueWriteBuffer (myRaytraceQueue, myRaytraceNodeMinPointBuffer, CL_FALSE,
1549                                      0, aTree.MinPointBuffer().size() * sizeof(cl_float4),
1550                                      &aTree.MinPointBuffer().front(),
1551                                      0, NULL, NULL);
1552     anError |= clEnqueueWriteBuffer (myRaytraceQueue, myRaytraceNodeMaxPointBuffer, CL_FALSE,
1553                                      0, aTree.MaxPointBuffer().size() * sizeof(cl_float4),
1554                                      &aTree.MaxPointBuffer().front(),
1555                                      0, NULL, NULL);
1556     anError |= clEnqueueWriteBuffer (myRaytraceQueue, myRaytraceNodeDataRcrdBuffer, CL_FALSE,
1557                                      0, aTree.DataRcrdBuffer().size() * sizeof(cl_int4),
1558                                      &aTree.DataRcrdBuffer().front(),
1559                                      0, NULL, NULL);
1560     if (anError != CL_SUCCESS)
1561     {
1562   #ifdef RAY_TRACE_PRINT_INFO
1563       std::cout << "Error! Failed to write OpenCL BVH buffers!" << std::endl;
1564   #endif
1565       return Standard_False;
1566     }
1567   }
1568
1569   // Write material buffers
1570   if (myRaytraceSceneData.Materials.size() > 0)
1571   {
1572     const size_t aSize    = myRaytraceSceneData.Materials.size();
1573     const void*  aDataPtr = myRaytraceSceneData.Materials.front().Packed();
1574
1575     anError |= clEnqueueWriteBuffer (myRaytraceQueue, myRaytraceMaterialBuffer, CL_FALSE,
1576                                      0, aSize * sizeof(OpenGl_RaytraceMaterial), aDataPtr,
1577                                      0, NULL, NULL);
1578     if (anError != CL_SUCCESS)
1579     {
1580   #ifdef RAY_TRACE_PRINT_INFO
1581       std::cout << "Error! Failed to write OpenCL material buffer!" << std::endl;
1582   #endif
1583       return Standard_False;
1584     }
1585   }
1586
1587   anError |= clFinish (myRaytraceQueue);
1588 #ifdef RAY_TRACE_PRINT_INFO
1589   if (anError != CL_SUCCESS)
1590     std::cout << "Error! Failed to set scene data buffers!" << std::endl;
1591 #endif
1592
1593   if (anError == CL_SUCCESS)
1594     myIsRaytraceDataValid = myRaytraceSceneData.Triangles.size() > 0;
1595
1596 #ifdef RAY_TRACE_PRINT_INFO
1597
1598   float aMemUsed = static_cast<float> (
1599     myRaytraceSceneData.Materials.size()) * sizeof (OpenGl_RaytraceMaterial);
1600
1601   aMemUsed += static_cast<float> (
1602     myRaytraceSceneData.Triangles.size() * sizeof (OpenGl_RTVec4i) +
1603     myRaytraceSceneData.Vertices.size()  * sizeof (OpenGl_RTVec4f) +
1604     myRaytraceSceneData.Normals.size()   * sizeof (OpenGl_RTVec4f));
1605
1606   aMemUsed += static_cast<float> (
1607     aTree.MinPointBuffer().size() * sizeof (OpenGl_RTVec4f) +
1608     aTree.MaxPointBuffer().size() * sizeof (OpenGl_RTVec4f) +
1609     aTree.DataRcrdBuffer().size() * sizeof (OpenGl_RTVec4i));
1610
1611   std::cout << "GPU memory used (Mb): " << aMemUsed / 1e6f << std::endl;
1612
1613 #endif
1614
1615   myRaytraceSceneData.Clear();
1616
1617   myBVHBuilder.CleanUp();
1618
1619   return (CL_SUCCESS == anError);
1620 }
1621
1622 #define OPENCL_GROUP_SIZE_TEST_
1623
1624 // =======================================================================
1625 // function : RunRaytraceOpenCLKernels
1626 // purpose  : Runs OpenCL ray-tracing kernels
1627 // =======================================================================
1628 Standard_Boolean OpenGl_Workspace::RunRaytraceOpenCLKernels (const Graphic3d_CView& theCView,
1629                                                              const GLfloat          theOrigins[16],
1630                                                              const GLfloat          theDirects[16],
1631                                                              const int              theSizeX,
1632                                                              const int              theSizeY)
1633 {
1634   if (myRaytraceRenderKernel == NULL || myRaytraceQueue == NULL)
1635     return Standard_False;
1636
1637   ////////////////////////////////////////////////////////////
1638   // Set kernel arguments
1639
1640   cl_uint anIndex = 0;
1641   cl_int  anError = 0;
1642
1643   anError  = clSetKernelArg (myRaytraceRenderKernel, anIndex++,
1644                              sizeof(cl_mem), &myRaytraceOutputImage);
1645   anError |= clSetKernelArg (myRaytraceRenderKernel, anIndex++,
1646                              sizeof(cl_mem), &myRaytraceEnvironment);
1647   anError |= clSetKernelArg (myRaytraceRenderKernel, anIndex++,
1648                              sizeof(cl_mem), &myRaytraceNodeMinPointBuffer);
1649   anError |= clSetKernelArg (myRaytraceRenderKernel, anIndex++,
1650                              sizeof(cl_mem), &myRaytraceNodeMaxPointBuffer);
1651   anError |= clSetKernelArg (myRaytraceRenderKernel, anIndex++,
1652                              sizeof(cl_mem), &myRaytraceNodeDataRcrdBuffer);
1653   anError |= clSetKernelArg (myRaytraceRenderKernel, anIndex++,
1654                              sizeof(cl_mem), &myRaytraceLightSourceBuffer);
1655   anError |= clSetKernelArg (myRaytraceRenderKernel, anIndex++,
1656                              sizeof(cl_mem), &myRaytraceMaterialBuffer);
1657   anError |= clSetKernelArg (myRaytraceRenderKernel, anIndex++,
1658                              sizeof(cl_mem), &myRaytraceVertexBuffer);
1659   anError |= clSetKernelArg (myRaytraceRenderKernel, anIndex++,
1660                              sizeof(cl_mem), &myRaytraceNormalBuffer);
1661   anError |= clSetKernelArg (myRaytraceRenderKernel, anIndex++,
1662                              sizeof(cl_mem), &myRaytraceTriangleBuffer);
1663
1664   anError |= clSetKernelArg (myRaytraceRenderKernel, anIndex++,
1665                              sizeof(cl_float16), theOrigins);
1666   anError |= clSetKernelArg (myRaytraceRenderKernel, anIndex++,
1667                              sizeof(cl_float16), theDirects);
1668
1669   cl_int aLightCount =  static_cast<cl_int> (myRaytraceSceneData.LightSources.size());
1670
1671   anError |= clSetKernelArg (myRaytraceRenderKernel, anIndex++,
1672                              sizeof(cl_int),   &aLightCount);
1673   anError |= clSetKernelArg (myRaytraceRenderKernel, anIndex++,
1674                              sizeof(cl_float), &myRaytraceSceneEpsilon);
1675   anError |= clSetKernelArg (myRaytraceRenderKernel, anIndex++,
1676                              sizeof(cl_float), &myRaytraceSceneRadius);
1677   anError |= clSetKernelArg (myRaytraceRenderKernel, anIndex++,
1678                              sizeof(cl_int),   &theCView.IsShadowsEnabled);
1679   anError |= clSetKernelArg (myRaytraceRenderKernel, anIndex++,
1680                              sizeof(cl_int),   &theCView.IsReflectionsEnabled);
1681   anError |= clSetKernelArg (myRaytraceRenderKernel, anIndex++,
1682                              sizeof(cl_int),   &theSizeX);
1683   anError |= clSetKernelArg (myRaytraceRenderKernel, anIndex++,
1684                              sizeof(cl_int),   &theSizeY);
1685   if (anError != CL_SUCCESS)
1686   {
1687     const TCollection_ExtendedString aMsg = "Error! Failed to set arguments of ray-tracing kernel!";
1688     myGlContext->PushMessage (GL_DEBUG_SOURCE_APPLICATION_ARB,
1689                               GL_DEBUG_TYPE_ERROR_ARB,
1690                               0,
1691                               GL_DEBUG_SEVERITY_HIGH_ARB,
1692                               aMsg);
1693     return Standard_False;
1694   }
1695
1696   // Note: second-pass 'smoothing' kernel runs only if anti-aliasing is enabled
1697   if (theCView.IsAntialiasingEnabled)
1698   {
1699     anIndex = 0;
1700     anError  = clSetKernelArg (myRaytraceSmoothKernel, anIndex++,
1701                                sizeof(cl_mem), &myRaytraceOutputImage);
1702     anError |= clSetKernelArg (myRaytraceSmoothKernel, anIndex++,
1703                                sizeof(cl_mem), &myRaytraceOutputImageSmooth);
1704     anError |= clSetKernelArg (myRaytraceSmoothKernel, anIndex++,
1705                                sizeof(cl_mem), &myRaytraceEnvironment);
1706     anError |= clSetKernelArg (myRaytraceSmoothKernel, anIndex++,
1707                                sizeof(cl_mem), &myRaytraceNodeMinPointBuffer);
1708     anError |= clSetKernelArg (myRaytraceSmoothKernel, anIndex++,
1709                                sizeof(cl_mem), &myRaytraceNodeMaxPointBuffer);
1710     anError |= clSetKernelArg (myRaytraceSmoothKernel, anIndex++,
1711                                sizeof(cl_mem), &myRaytraceNodeDataRcrdBuffer);
1712     anError |= clSetKernelArg (myRaytraceSmoothKernel, anIndex++,
1713                                sizeof(cl_mem), &myRaytraceLightSourceBuffer);
1714     anError |= clSetKernelArg (myRaytraceSmoothKernel, anIndex++,
1715                                sizeof(cl_mem), &myRaytraceMaterialBuffer);
1716     anError |= clSetKernelArg (myRaytraceSmoothKernel, anIndex++,
1717                                sizeof(cl_mem), &myRaytraceVertexBuffer);
1718     anError |= clSetKernelArg (myRaytraceSmoothKernel, anIndex++,
1719                                sizeof(cl_mem), &myRaytraceNormalBuffer);
1720     anError |= clSetKernelArg (myRaytraceSmoothKernel, anIndex++,
1721                                sizeof(cl_mem), &myRaytraceTriangleBuffer);
1722
1723     anError |= clSetKernelArg (myRaytraceSmoothKernel, anIndex++,
1724                                sizeof(cl_float16), theOrigins);
1725     anError |= clSetKernelArg (myRaytraceSmoothKernel, anIndex++,
1726                                 sizeof(cl_float16), theDirects);
1727
1728     anError |= clSetKernelArg (myRaytraceSmoothKernel, anIndex++,
1729                                sizeof(cl_int),   &aLightCount);
1730     anError |= clSetKernelArg (myRaytraceSmoothKernel, anIndex++,
1731                                sizeof(cl_float), &myRaytraceSceneEpsilon);
1732     anError |= clSetKernelArg (myRaytraceSmoothKernel, anIndex++,
1733                                sizeof(cl_float), &myRaytraceSceneRadius);
1734     anError |= clSetKernelArg (myRaytraceSmoothKernel, anIndex++,
1735                                sizeof(cl_int),   &theCView.IsShadowsEnabled);
1736     anError |= clSetKernelArg (myRaytraceSmoothKernel, anIndex++,
1737                                sizeof(cl_int),   &theCView.IsReflectionsEnabled);
1738     anError |= clSetKernelArg (myRaytraceSmoothKernel, anIndex++,
1739                                sizeof(cl_int),   &theSizeX);
1740     anError |= clSetKernelArg (myRaytraceSmoothKernel, anIndex++,
1741                                sizeof(cl_int),   &theSizeY);
1742     if (anError != CL_SUCCESS)
1743     {
1744       const TCollection_ExtendedString aMsg = "Error! Failed to set arguments of 'smoothing' kernel!";
1745       myGlContext->PushMessage (GL_DEBUG_SOURCE_APPLICATION_ARB,
1746                                 GL_DEBUG_TYPE_ERROR_ARB,
1747                                 0,
1748                                 GL_DEBUG_SEVERITY_HIGH_ARB,
1749                                 aMsg);
1750       return Standard_False;
1751     }
1752   }
1753
1754   // Set work size
1755   size_t aLocSizeRender[] = { myIsAmdComputePlatform ? 2 : 4, 32 };
1756
1757 #ifdef OPENCL_GROUP_SIZE_TEST
1758   for (size_t aLocX = 2; aLocX <= 32; aLocX <<= 1 )
1759   for (size_t aLocY = 2; aLocY <= 32; aLocY <<= 1 )
1760 #endif
1761   {
1762 #ifdef OPENCL_GROUP_SIZE_TEST
1763     aLocSizeRender[0] = aLocX;
1764     aLocSizeRender[1] = aLocY;
1765 #endif
1766
1767     size_t aWorkSizeX = theSizeX;
1768     if (aWorkSizeX % aLocSizeRender[0] != 0)
1769       aWorkSizeX += aLocSizeRender[0] - aWorkSizeX % aLocSizeRender[0];
1770
1771     size_t aWokrSizeY = theSizeY;
1772     if (aWokrSizeY % aLocSizeRender[1] != 0 )
1773       aWokrSizeY += aLocSizeRender[1] - aWokrSizeY % aLocSizeRender[1];
1774
1775     size_t aGlbSizeRender[] = { aWorkSizeX, aWokrSizeY };
1776
1777     // Run kernel
1778     cl_event anEvent (NULL), anEventSmooth (NULL);
1779     anError = clEnqueueNDRangeKernel (myRaytraceQueue, myRaytraceRenderKernel,
1780                                       2, NULL, aGlbSizeRender, aLocSizeRender,
1781                                       0, NULL, &anEvent);
1782     if (anError != CL_SUCCESS)
1783     {
1784       const TCollection_ExtendedString aMsg = "Error! Failed to execute the ray-tracing kernel!";
1785       myGlContext->PushMessage (GL_DEBUG_SOURCE_APPLICATION_ARB,
1786                                 GL_DEBUG_TYPE_ERROR_ARB,
1787                                 0,
1788                                 GL_DEBUG_SEVERITY_HIGH_ARB,
1789                                 aMsg);
1790       return Standard_False;
1791     }
1792     clWaitForEvents (1, &anEvent);
1793
1794     if (theCView.IsAntialiasingEnabled)
1795     {
1796       size_t aLocSizeSmooth[] = { myIsAmdComputePlatform ? 8 : 4,
1797                                   myIsAmdComputePlatform ? 8 : 32 };
1798
1799 #ifdef OPENCL_GROUP_SIZE_TEST
1800       aLocSizeSmooth[0] = aLocX;
1801       aLocSizeSmooth[1] = aLocY;
1802 #endif
1803
1804       aWorkSizeX = theSizeX;
1805       if (aWorkSizeX % aLocSizeSmooth[0] != 0)
1806         aWorkSizeX += aLocSizeSmooth[0] - aWorkSizeX % aLocSizeSmooth[0];
1807
1808       size_t aWokrSizeY = theSizeY;
1809       if (aWokrSizeY % aLocSizeSmooth[1] != 0 )
1810         aWokrSizeY += aLocSizeSmooth[1] - aWokrSizeY % aLocSizeSmooth[1];
1811
1812       size_t aGlbSizeSmooth [] = { aWorkSizeX, aWokrSizeY };
1813       anError = clEnqueueNDRangeKernel (myRaytraceQueue, myRaytraceSmoothKernel,
1814                                         2, NULL, aGlbSizeSmooth, aLocSizeSmooth,
1815                                         0, NULL, &anEventSmooth);
1816       clWaitForEvents (1, &anEventSmooth);
1817
1818       if (anError != CL_SUCCESS)
1819       {
1820         const TCollection_ExtendedString aMsg = "Error! Failed to execute the 'smoothing' kernel!";
1821         myGlContext->PushMessage (GL_DEBUG_SOURCE_APPLICATION_ARB,
1822                                   GL_DEBUG_TYPE_ERROR_ARB,
1823                                   0,
1824                                   GL_DEBUG_SEVERITY_HIGH_ARB,
1825                                   aMsg);
1826         return Standard_False;
1827       }
1828     }
1829
1830     // Get the profiling data
1831 #if defined (RAY_TRACE_PRINT_INFO) || defined(OPENCL_GROUP_SIZE_TEST)
1832
1833     cl_ulong aTimeStart,
1834              aTimeFinal;
1835
1836     clGetEventProfilingInfo (anEvent, CL_PROFILING_COMMAND_START,
1837                              sizeof(aTimeStart), &aTimeStart, NULL);
1838     clGetEventProfilingInfo (anEvent, CL_PROFILING_COMMAND_END,
1839                              sizeof(aTimeFinal), &aTimeFinal, NULL);
1840     std::cout << "\tRender time (ms): " << ( aTimeFinal - aTimeStart ) / 1e6f << std::endl;
1841
1842     if (theCView.IsAntialiasingEnabled)
1843     {
1844       clGetEventProfilingInfo (anEventSmooth, CL_PROFILING_COMMAND_START,
1845                                sizeof(aTimeStart), &aTimeStart, NULL);
1846       clGetEventProfilingInfo (anEventSmooth, CL_PROFILING_COMMAND_END,
1847                                sizeof(aTimeFinal), &aTimeFinal, NULL);
1848       std::cout << "\tSmoothing time (ms): " << ( aTimeFinal - aTimeStart ) / 1e6f << std::endl;
1849     }
1850 #endif
1851
1852     if (anEvent != NULL)
1853       clReleaseEvent (anEvent);
1854
1855     if (anEventSmooth != NULL)
1856       clReleaseEvent (anEventSmooth);
1857   }
1858
1859   return Standard_True;
1860 }
1861
1862 // =======================================================================
1863 // function : ComputeInverseMatrix
1864 // purpose  : Computes inversion of 4x4 floating-point matrix
1865 // =======================================================================
1866 template <typename T>
1867 void ComputeInverseMatrix (const T m[16], T inv[16])
1868 {
1869   inv[ 0] = m[ 5] * (m[10] * m[15] - m[11] * m[14]) -
1870             m[ 9] * (m[ 6] * m[15] - m[ 7] * m[14]) -
1871             m[13] * (m[ 7] * m[10] - m[ 6] * m[11]);
1872
1873   inv[ 1] = m[ 1] * (m[11] * m[14] - m[10] * m[15]) -
1874             m[ 9] * (m[ 3] * m[14] - m[ 2] * m[15]) -
1875             m[13] * (m[ 2] * m[11] - m[ 3] * m[10]);
1876
1877   inv[ 2] = m[ 1] * (m[ 6] * m[15] - m[ 7] * m[14]) -
1878             m[ 5] * (m[ 2] * m[15] - m[ 3] * m[14]) -
1879             m[13] * (m[ 3] * m[ 6] - m[ 2] * m[ 7]);
1880
1881   inv[ 3] = m[ 1] * (m[ 7] * m[10] - m[ 6] * m[11]) -
1882             m[ 5] * (m[ 3] * m[10] - m[ 2] * m[11]) -
1883             m[ 9] * (m[ 2] * m[ 7] - m[ 3] * m[ 6]);
1884
1885   inv[ 4] = m[ 4] * (m[11] * m[14] - m[10] * m[15]) -
1886             m[ 8] * (m[ 7] * m[14] - m[ 6] * m[15]) -
1887             m[12] * (m[ 6] * m[11] - m[ 7] * m[10]);
1888
1889   inv[ 5] = m[ 0] * (m[10] * m[15] - m[11] * m[14]) -
1890             m[ 8] * (m[ 2] * m[15] - m[ 3] * m[14]) -
1891             m[12] * (m[ 3] * m[10] - m[ 2] * m[11]);
1892
1893   inv[ 6] = m[ 0] * (m[ 7] * m[14] - m[ 6] * m[15]) -
1894             m[ 4] * (m[ 3] * m[14] - m[ 2] * m[15]) -
1895             m[12] * (m[ 2] * m[ 7] - m[ 3] * m[ 6]);
1896
1897   inv[ 7] = m[ 0] * (m[ 6] * m[11] - m[ 7] * m[10]) -
1898             m[ 4] * (m[ 2] * m[11] - m[ 3] * m[10]) -
1899             m[ 8] * (m[ 3] * m[ 6] - m[ 2] * m[ 7]);
1900
1901   inv[ 8] = m[ 4] * (m[ 9] * m[15] - m[11] * m[13]) -
1902             m[ 8] * (m[ 5] * m[15] - m[ 7] * m[13]) -
1903             m[12] * (m[ 7] * m[ 9] - m[ 5] * m[11]);
1904
1905   inv[ 9] = m[ 0] * (m[11] * m[13] - m[ 9] * m[15]) -
1906             m[ 8] * (m[ 3] * m[13] - m[ 1] * m[15]) -
1907             m[12] * (m[ 1] * m[11] - m[ 3] * m[ 9]);
1908
1909   inv[10] = m[ 0] * (m[ 5] * m[15] - m[ 7] * m[13]) -
1910             m[ 4] * (m[ 1] * m[15] - m[ 3] * m[13]) -
1911             m[12] * (m[ 3] * m[ 5] - m[ 1] * m[ 7]);
1912
1913   inv[11] = m[ 0] * (m[ 7] * m[ 9] - m[ 5] * m[11]) -
1914             m[ 4] * (m[ 3] * m[ 9] - m[ 1] * m[11]) -
1915             m[ 8] * (m[ 1] * m[ 7] - m[ 3] * m[ 5]);
1916
1917   inv[12] = m[ 4] * (m[10] * m[13] - m[ 9] * m[14]) -
1918             m[ 8] * (m[ 6] * m[13] - m[ 5] * m[14]) -
1919             m[12] * (m[ 5] * m[10] - m[ 6] * m[ 9]);
1920
1921   inv[13] = m[ 0] * (m[ 9] * m[14] - m[10] * m[13]) -
1922             m[ 8] * (m[ 1] * m[14] - m[ 2] * m[13]) -
1923             m[12] * (m[ 2] * m[ 9] - m[ 1] * m[10]);
1924
1925   inv[14] = m[ 0] * (m[ 6] * m[13] - m[ 5] * m[14]) -
1926             m[ 4] * (m[ 2] * m[13] - m[ 1] * m[14]) -
1927             m[12] * (m[ 1] * m[ 6] - m[ 2] * m[ 5]);
1928
1929   inv[15] = m[ 0] * (m[ 5] * m[10] - m[ 6] * m[ 9]) -
1930             m[ 4] * (m[ 1] * m[10] - m[ 2] * m[ 9]) -
1931             m[ 8] * (m[ 2] * m[ 5] - m[ 1] * m[ 6]);
1932
1933   T det = m[0] * inv[ 0] +
1934           m[1] * inv[ 4] +
1935           m[2] * inv[ 8] +
1936           m[3] * inv[12];
1937
1938   if (det == T (0.0)) return;
1939
1940   det = T (1.0) / det;
1941
1942   for (int i = 0; i < 16; ++i)
1943     inv[i] *= det;
1944 }
1945
1946 // =======================================================================
1947 // function : GenerateCornerRays
1948 // purpose  : Generates primary rays for corners of screen quad
1949 // =======================================================================
1950 void GenerateCornerRays (const GLdouble theInvModelProj[16],
1951                          float          theOrigins[16],
1952                          float          theDirects[16])
1953 {
1954   int aOriginIndex = 0;
1955   int aDirectIndex = 0;
1956
1957   for (int y = -1; y <= 1; y += 2)
1958   {
1959     for (int x = -1; x <= 1; x += 2)
1960     {
1961       OpenGl_RTVec4f aOrigin (float(x),
1962                               float(y),
1963                               -1.f,
1964                               1.f);
1965
1966       aOrigin = MatVecMult (theInvModelProj, aOrigin);
1967
1968       OpenGl_RTVec4f aDirect (float(x),
1969                               float(y),
1970                               1.f,
1971                               1.f);
1972
1973       aDirect = MatVecMult (theInvModelProj, aDirect) - aOrigin;
1974
1975       GLdouble aInvLen = 1.f / sqrt (aDirect.x() * aDirect.x() +
1976                                      aDirect.y() * aDirect.y() +
1977                                      aDirect.z() * aDirect.z());
1978
1979       theOrigins [aOriginIndex++] = static_cast<GLfloat> (aOrigin.x());
1980       theOrigins [aOriginIndex++] = static_cast<GLfloat> (aOrigin.y());
1981       theOrigins [aOriginIndex++] = static_cast<GLfloat> (aOrigin.z());
1982       theOrigins [aOriginIndex++] = 1.f;
1983
1984       theDirects [aDirectIndex++] = static_cast<GLfloat> (aDirect.x() * aInvLen);
1985       theDirects [aDirectIndex++] = static_cast<GLfloat> (aDirect.y() * aInvLen);
1986       theDirects [aDirectIndex++] = static_cast<GLfloat> (aDirect.z() * aInvLen);
1987       theDirects [aDirectIndex++] = 0.f;
1988     }
1989   }
1990 }
1991
1992 // =======================================================================
1993 // function : Raytrace
1994 // purpose  : Redraws the window using OpenCL ray tracing
1995 // =======================================================================
1996 Standard_Boolean OpenGl_Workspace::Raytrace (const Graphic3d_CView& theCView,
1997                                              const int              theSizeX,
1998                                              int                    theSizeY,
1999                                              const Tint             theToSwap)
2000 {
2001   if (!InitOpenCL())
2002     return Standard_False;
2003
2004   if (!ResizeRaytraceOutputBuffer (theSizeX, theSizeY))
2005     return Standard_False;
2006
2007   if (!UpdateRaytraceEnvironmentMap())
2008     return Standard_False;
2009
2010   if (!UpdateRaytraceGeometry (Standard_True))
2011     return Standard_False;
2012
2013   // Get model-view and projection matrices
2014   TColStd_Array2OfReal theOrientation (0, 3, 0, 3);
2015   TColStd_Array2OfReal theViewMapping (0, 3, 0, 3);
2016
2017   myView->GetMatrices (theOrientation, theViewMapping, Standard_True);
2018
2019   GLdouble aOrientationMatrix[16];
2020   GLdouble aViewMappingMatrix[16];
2021   GLdouble aOrientationInvers[16];
2022
2023   for (int j = 0; j < 4; ++j)
2024     for (int i = 0; i < 4; ++i)
2025     {
2026       aOrientationMatrix [4 * j + i] = theOrientation (i, j);
2027       aViewMappingMatrix [4 * j + i] = theViewMapping (i, j);
2028     }
2029
2030   ComputeInverseMatrix (aOrientationMatrix, aOrientationInvers);
2031
2032   if (!UpdateRaytraceLightSources (aOrientationInvers))
2033     return Standard_False;
2034
2035   // Generate primary rays for corners of the screen quad
2036   glMatrixMode (GL_MODELVIEW);
2037
2038   glLoadMatrixd (aViewMappingMatrix);
2039   glMultMatrixd (aOrientationMatrix);
2040
2041   GLdouble aModelProject[16];
2042   GLdouble aInvModelProj[16];
2043
2044   glGetDoublev (GL_MODELVIEW_MATRIX, aModelProject);
2045
2046   ComputeInverseMatrix (aModelProject, aInvModelProj);
2047
2048   GLfloat aOrigins[16];
2049   GLfloat aDirects[16];
2050
2051   GenerateCornerRays (aInvModelProj,
2052                       aOrigins,
2053                       aDirects);
2054
2055   // Compute ray-traced image using OpenCL kernel
2056   cl_mem anImages[] = { myRaytraceOutputImage, myRaytraceOutputImageSmooth };
2057   cl_int anError = clEnqueueAcquireGLObjects (myRaytraceQueue,
2058                                               2, anImages,
2059                                               0, NULL, NULL);
2060   clFinish (myRaytraceQueue);
2061
2062   if (myIsRaytraceDataValid)
2063   {
2064     RunRaytraceOpenCLKernels (theCView,
2065                               aOrigins,
2066                               aDirects,
2067                               theSizeX,
2068                               theSizeY);
2069   }
2070
2071   anError |= clEnqueueReleaseGLObjects (myRaytraceQueue,
2072                                         2, anImages,
2073                                         0, NULL, NULL);
2074   clFinish (myRaytraceQueue);
2075
2076   // Draw background
2077   glPushAttrib (GL_ENABLE_BIT |
2078                 GL_CURRENT_BIT |
2079                 GL_COLOR_BUFFER_BIT |
2080                 GL_DEPTH_BUFFER_BIT);
2081
2082   glDisable (GL_DEPTH_TEST);
2083
2084   if (NamedStatus & OPENGL_NS_WHITEBACK)
2085   {
2086     glClearColor (1.0f, 1.0f, 1.0f, 1.0f);
2087   }
2088   else
2089   {
2090     glClearColor (myBgColor.rgb[0],
2091                   myBgColor.rgb[1],
2092                   myBgColor.rgb[2],
2093                   1.0f);
2094   }
2095
2096   glClear (GL_COLOR_BUFFER_BIT);
2097
2098   Handle(OpenGl_Workspace) aWorkspace (this);
2099   myView->DrawBackground (aWorkspace);
2100
2101   // Draw dummy quad to show result image
2102   glEnable (GL_COLOR_MATERIAL);
2103   glEnable (GL_BLEND);
2104
2105   glDisable (GL_DEPTH_TEST);
2106
2107   glBlendFunc (GL_ONE, GL_SRC_ALPHA);
2108
2109   glEnable (GL_TEXTURE_RECTANGLE);
2110
2111   glMatrixMode (GL_PROJECTION);
2112   glLoadIdentity();
2113
2114   glMatrixMode (GL_MODELVIEW);
2115   glLoadIdentity();
2116
2117   glColor3f (1.0f, 1.0f, 1.0f);
2118
2119   glBindTexture (GL_TEXTURE_RECTANGLE, myRaytraceOutputTexture[theCView.IsAntialiasingEnabled ? 1 : 0]);
2120
2121   if (myIsRaytraceDataValid)
2122   {
2123     glBegin (GL_QUADS);
2124     {
2125       glTexCoord2i (       0,        0);   glVertex2f (-1.f, -1.f);
2126       glTexCoord2i (       0, theSizeY);   glVertex2f (-1.f,  1.f);
2127       glTexCoord2i (theSizeX, theSizeY);   glVertex2f ( 1.f,  1.f);
2128       glTexCoord2i (theSizeX,        0);   glVertex2f ( 1.f, -1.f);
2129     }
2130     glEnd();
2131   }
2132
2133   glPopAttrib();
2134
2135   // Swap the buffers
2136   if (theToSwap)
2137   {
2138     GetGlContext()->SwapBuffers();
2139     myBackBufferRestored = Standard_False;
2140   }
2141   else
2142     glFlush();
2143
2144   return Standard_True;
2145 }
2146
2147 #endif