diff --git a/jme3-core/src/main/java/com/jme3/opencl/Buffer.java b/jme3-core/src/main/java/com/jme3/opencl/Buffer.java
index 66ce0c8f7..ebb7d2bb2 100644
--- a/jme3-core/src/main/java/com/jme3/opencl/Buffer.java
+++ b/jme3-core/src/main/java/com/jme3/opencl/Buffer.java
@@ -377,6 +377,29 @@ public abstract class Buffer extends AbstractOpenCLObject {
* @return the event object
*/
public abstract Event acquireBufferForSharingAsync(CommandQueue queue);
+
+ /**
+ * Aquires this buffer object for using. Only call this method if this buffer
+ * represents a shared object from OpenGL, created with e.g.
+ * {@link Context#bindVertexBuffer(com.jme3.scene.VertexBuffer, com.jme3.opencl.MemoryAccess) }.
+ * This method must be called before the buffer is used. After the work is
+ * done, the buffer must be released by calling
+ * {@link #releaseBufferForSharingAsync(com.jme3.opencl.CommandQueue) }
+ * so that OpenGL can use the VertexBuffer again.
+ *
+ * The generated event object is directly released.
+ * This brings a performance improvement when the resource is e.g. directly
+ * used by a kernel afterwards on the same queue (this implicitly waits for
+ * this action). If you need the event, use
+ * {@link #acquireBufferForSharingAsync(com.jme3.opencl.CommandQueue) } instead.
+ *
+ * @param queue the command queue
+ */
+ public void acquireBufferForSharingNoEvent(CommandQueue queue) {
+ //default implementation, overwrite for better performance
+ acquireBufferForSharingAsync(queue).release();
+ }
+
/**
* Releases a shared buffer object.
* Call this method after the buffer object was acquired by
@@ -387,5 +410,18 @@ public abstract class Buffer extends AbstractOpenCLObject {
*/
public abstract Event releaseBufferForSharingAsync(CommandQueue queue);
- //TODO: add variants of the above two methods that don't create the event object, but release the event immediately
+ /**
+ * Releases a shared buffer object.
+ * Call this method after the buffer object was acquired by
+ * {@link #acquireBufferForSharingAsync(com.jme3.opencl.CommandQueue) }
+ * to hand the control back to OpenGL.
+ * The generated event object is directly released, resulting in
+ * performance improvements.
+ * @param queue the command queue
+ */
+ public void releaseBufferForSharingNoEvent(CommandQueue queue) {
+ //default implementation, overwrite for better performance
+ releaseBufferForSharingAsync(queue).release();
+ }
+
}
diff --git a/jme3-core/src/main/java/com/jme3/opencl/Image.java b/jme3-core/src/main/java/com/jme3/opencl/Image.java
index e58c20f7e..f9d9d9f28 100644
--- a/jme3-core/src/main/java/com/jme3/opencl/Image.java
+++ b/jme3-core/src/main/java/com/jme3/opencl/Image.java
@@ -485,6 +485,30 @@ memory layout in which channels are stored in the image.
* @return the event object
*/
public abstract Event acquireImageForSharingAsync(CommandQueue queue);
+
+ /**
+ * Aquires this image object for using. Only call this method if this image
+ * represents a shared object from OpenGL, created with e.g.
+ * {@link Context#bindImage(com.jme3.texture.Image, com.jme3.texture.Texture.Type, int, com.jme3.opencl.MemoryAccess) }
+ * or variations.
+ * This method must be called before the image is used. After the work is
+ * done, the image must be released by calling
+ * {@link #releaseImageForSharingAsync(com.jme3.opencl.CommandQueue) }
+ * so that OpenGL can use the image/texture/renderbuffer again.
+ *
+ * The generated event object is directly released.
+ * This brings a performance improvement when the resource is e.g. directly
+ * used by a kernel afterwards on the same queue (this implicitly waits for
+ * this action). If you need the event, use
+ * {@link #acquireImageForSharingAsync(com.jme3.opencl.CommandQueue) }.
+ *
+ * @param queue the command queue
+ */
+ public void acquireImageForSharingNoEvent(CommandQueue queue) {
+ //Default implementation, overwrite for performance
+ acquireImageForSharingAsync(queue).release();
+ }
+
/**
* Releases a shared image object.
* Call this method after the image object was acquired by
@@ -495,5 +519,19 @@ memory layout in which channels are stored in the image.
*/
public abstract Event releaseImageForSharingAsync(CommandQueue queue);
+ /**
+ * Releases a shared image object.
+ * Call this method after the image object was acquired by
+ * {@link #acquireImageForSharingAsync(com.jme3.opencl.CommandQueue) }
+ * to hand the control back to OpenGL.
+ * The generated event object is directly released, resulting in
+ * performance improvements.
+ * @param queue the command queue
+ */
+ public void releaseImageForSharingNoEvent(CommandQueue queue) {
+ //default implementation, overwrite it for performance improvements
+ releaseImageForSharingAsync(queue).release();
+ }
+
//TODO: add variants of the above two methods that don't create the event object, but release the event immediately
}
diff --git a/jme3-core/src/main/java/com/jme3/opencl/Kernel.java b/jme3-core/src/main/java/com/jme3/opencl/Kernel.java
index 82fbf9018..17525a4bd 100644
--- a/jme3-core/src/main/java/com/jme3/opencl/Kernel.java
+++ b/jme3-core/src/main/java/com/jme3/opencl/Kernel.java
@@ -299,6 +299,9 @@ public abstract class Kernel extends AbstractOpenCLObject {
/**
* Launches the kernel with the current global work size, work group size
* and arguments.
+ * If the returned event object is not needed and would otherwise be
+ * released immediately, {@link #RunNoEvent(com.jme3.opencl.CommandQueue) }
+ * might bring a better performance.
* @param queue the command queue
* @return an event object indicating when the kernel is finished
* @see #setGlobalWorkSize(com.jme3.opencl.Kernel.WorkSize)
@@ -306,6 +309,22 @@ public abstract class Kernel extends AbstractOpenCLObject {
* @see #setArg(int, java.lang.Object)
*/
public abstract Event Run(CommandQueue queue);
+
+ /**
+ * Launches the kernel with the current global work size, work group size
+ * and arguments without returning an event object.
+ * The generated event is directly released. Therefore, the performance
+ * is better, but there is no way to detect when the kernel execution
+ * has finished. For this purpose, use {@link #Run(com.jme3.opencl.CommandQueue) }.
+ * @param queue the command queue
+ * @see #setGlobalWorkSize(com.jme3.opencl.Kernel.WorkSize)
+ * @see #setWorkGroupSize(com.jme3.opencl.Kernel.WorkSize)
+ * @see #setArg(int, java.lang.Object)
+ */
+ public void RunNoEvent(CommandQueue queue) {
+ //Default implementation, overwrite to not allocate the event object
+ Run(queue).release();
+ }
/**
* Sets the work sizes and arguments in one call and launches the kernel.
@@ -325,6 +344,28 @@ public abstract class Kernel extends AbstractOpenCLObject {
setArgs(args);
return Run(queue);
}
+
+ /**
+ * Sets the work sizes and arguments in one call and launches the kernel.
+ * The global work size is set to the specified size. The work group
+ * size is automatically determined by the driver.
+ * Each object in the argument array is sent to the kernel by
+ * {@link #setArg(int, java.lang.Object) }.
+ * The generated event is directly released. Therefore, the performance
+ * is better, but there is no way to detect when the kernel execution
+ * has finished. For this purpose, use
+ * {@link #Run1(com.jme3.opencl.CommandQueue, com.jme3.opencl.Kernel.WorkSize, java.lang.Object...) }.
+ * @param queue the command queue
+ * @param globalWorkSize the global work size
+ * @param args the kernel arguments
+ * @see #Run2(com.jme3.opencl.CommandQueue, com.jme3.opencl.Kernel.WorkSize, com.jme3.opencl.Kernel.WorkSize, java.lang.Object...)
+ */
+ public void Run1NoEvent(CommandQueue queue, WorkSize globalWorkSize, Object... args) {
+ setGlobalWorkSize(globalWorkSize);
+ setWorkGroupSizeToNull();
+ setArgs(args);
+ RunNoEvent(queue);
+ }
/**
* Sets the work sizes and arguments in one call and launches the kernel.
@@ -342,8 +383,25 @@ public abstract class Kernel extends AbstractOpenCLObject {
return Run(queue);
}
- //TODO: add variants of the above three methods that don't create the event object, but release the event immediately
-
+ /**
+ * Sets the work sizes and arguments in one call and launches the kernel.
+ * The generated event is directly released. Therefore, the performance
+ * is better, but there is no way to detect when the kernel execution
+ * has finished. For this purpose, use
+ * {@link #Run2(com.jme3.opencl.CommandQueue, com.jme3.opencl.Kernel.WorkSize, com.jme3.opencl.Kernel.WorkSize, java.lang.Object...) }.
+ * @param queue the command queue
+ * @param globalWorkSize the global work size
+ * @param workGroupSize the work group size
+ * @param args the kernel arguments
+ */
+ public void Run2NoEvent(CommandQueue queue, WorkSize globalWorkSize,
+ WorkSize workGroupSize, Object... args) {
+ setGlobalWorkSize(globalWorkSize);
+ setWorkGroupSize(workGroupSize);
+ setArgs(args);
+ RunNoEvent(queue);
+ }
+
/**
* A placeholder for kernel arguments representing local kernel memory.
* This defines the size of available shared memory of a {@code __shared} kernel
diff --git a/jme3-core/src/main/java/com/jme3/opencl/package-info.java b/jme3-core/src/main/java/com/jme3/opencl/package-info.java
index 5b4fcbc0b..c96a026d1 100644
--- a/jme3-core/src/main/java/com/jme3/opencl/package-info.java
+++ b/jme3-core/src/main/java/com/jme3/opencl/package-info.java
@@ -89,6 +89,16 @@
* These async calls all return {@link com.jme3.opencl.Event} objects.
* These events can be used to check (non-blocking) if the action has completed, e.g. a memory copy
* is finished, or to block the execution until the action has finished.
+ *
+ * Some methods have the suffix {@code -NoEvent}. This means that these methods
+ * don't return an event object even if the OpenCL function would return an event.
+ * There exists always an alternative version that does return an event.
+ * These methods exist to increase the performance: since all actions (like multiple kernel calls)
+ * that are sent to the same command queue are executed in order, there is no
+ * need for intermediate events. (These intermediate events would be released
+ * immediately). Therefore, the no-event alternatives increase the performance
+ * because no additional event object has to be allocated and less system calls
+ * are neccessary.
*
*
* Interoperability between OpenCL and jME3:
@@ -142,6 +152,10 @@
* thrown. The exception always records the error code and error name and the
* OpenCL function call where the error was detected. Please check the official
* OpenCL specification for the meanings of these errors for that particular function.
+ *