added alternative versions for kernel launches and resource acquiring that do not return an event. This improves the performance.

define_list_fix
shamanDevel 9 years ago
parent 4be6013068
commit 54113f35e0
  1. 38
      jme3-core/src/main/java/com/jme3/opencl/Buffer.java
  2. 38
      jme3-core/src/main/java/com/jme3/opencl/Image.java
  3. 60
      jme3-core/src/main/java/com/jme3/opencl/Kernel.java
  4. 14
      jme3-core/src/main/java/com/jme3/opencl/package-info.java
  5. 6
      jme3-examples/src/main/java/jme3test/opencl/TestVertexBufferSharing.java
  6. 8
      jme3-examples/src/main/java/jme3test/opencl/TestWriteToTexture.java
  7. 14
      jme3-jogl/src/main/java/com/jme3/opencl/jocl/JoclBuffer.java
  8. 14
      jme3-jogl/src/main/java/com/jme3/opencl/jocl/JoclImage.java
  9. 18
      jme3-jogl/src/main/java/com/jme3/opencl/jocl/JoclKernel.java
  10. 12
      jme3-lwjgl/src/main/java/com/jme3/opencl/lwjgl/LwjglBuffer.java
  11. 13
      jme3-lwjgl/src/main/java/com/jme3/opencl/lwjgl/LwjglImage.java
  12. 17
      jme3-lwjgl/src/main/java/com/jme3/opencl/lwjgl/LwjglKernel.java

@ -377,6 +377,29 @@ public abstract class Buffer extends AbstractOpenCLObject {
* @return the event object * @return the event object
*/ */
public abstract Event acquireBufferForSharingAsync(CommandQueue queue); public abstract Event acquireBufferForSharingAsync(CommandQueue queue);
/**
* Aquires this buffer object for using. Only call this method if this buffer
* represents a shared object from OpenGL, created with e.g.
* {@link Context#bindVertexBuffer(com.jme3.scene.VertexBuffer, com.jme3.opencl.MemoryAccess) }.
* This method must be called before the buffer is used. After the work is
* done, the buffer must be released by calling
* {@link #releaseBufferForSharingAsync(com.jme3.opencl.CommandQueue) }
* so that OpenGL can use the VertexBuffer again.
*
* The generated event object is directly released.
* This brings a performance improvement when the resource is e.g. directly
* used by a kernel afterwards on the same queue (this implicitly waits for
* this action). If you need the event, use
* {@link #acquireBufferForSharingAsync(com.jme3.opencl.CommandQueue) } instead.
*
* @param queue the command queue
*/
public void acquireBufferForSharingNoEvent(CommandQueue queue) {
//default implementation, overwrite for better performance
acquireBufferForSharingAsync(queue).release();
}
/** /**
* Releases a shared buffer object. * Releases a shared buffer object.
* Call this method after the buffer object was acquired by * Call this method after the buffer object was acquired by
@ -387,5 +410,18 @@ public abstract class Buffer extends AbstractOpenCLObject {
*/ */
public abstract Event releaseBufferForSharingAsync(CommandQueue queue); public abstract Event releaseBufferForSharingAsync(CommandQueue queue);
//TODO: add variants of the above two methods that don't create the event object, but release the event immediately /**
* Releases a shared buffer object.
* Call this method after the buffer object was acquired by
* {@link #acquireBufferForSharingAsync(com.jme3.opencl.CommandQueue) }
* to hand the control back to OpenGL.
* The generated event object is directly released, resulting in
* performance improvements.
* @param queue the command queue
*/
public void releaseBufferForSharingNoEvent(CommandQueue queue) {
//default implementation, overwrite for better performance
releaseBufferForSharingAsync(queue).release();
}
} }

@ -485,6 +485,30 @@ memory layout in which channels are stored in the image.
* @return the event object * @return the event object
*/ */
public abstract Event acquireImageForSharingAsync(CommandQueue queue); public abstract Event acquireImageForSharingAsync(CommandQueue queue);
/**
* Aquires this image object for using. Only call this method if this image
* represents a shared object from OpenGL, created with e.g.
* {@link Context#bindImage(com.jme3.texture.Image, com.jme3.texture.Texture.Type, int, com.jme3.opencl.MemoryAccess) }
* or variations.
* This method must be called before the image is used. After the work is
* done, the image must be released by calling
* {@link #releaseImageForSharingAsync(com.jme3.opencl.CommandQueue) }
* so that OpenGL can use the image/texture/renderbuffer again.
*
* The generated event object is directly released.
* This brings a performance improvement when the resource is e.g. directly
* used by a kernel afterwards on the same queue (this implicitly waits for
* this action). If you need the event, use
* {@link #acquireImageForSharingAsync(com.jme3.opencl.CommandQueue) }.
*
* @param queue the command queue
*/
public void acquireImageForSharingNoEvent(CommandQueue queue) {
//Default implementation, overwrite for performance
acquireImageForSharingAsync(queue).release();
}
/** /**
* Releases a shared image object. * Releases a shared image object.
* Call this method after the image object was acquired by * Call this method after the image object was acquired by
@ -495,5 +519,19 @@ memory layout in which channels are stored in the image.
*/ */
public abstract Event releaseImageForSharingAsync(CommandQueue queue); public abstract Event releaseImageForSharingAsync(CommandQueue queue);
/**
* Releases a shared image object.
* Call this method after the image object was acquired by
* {@link #acquireImageForSharingAsync(com.jme3.opencl.CommandQueue) }
* to hand the control back to OpenGL.
* The generated event object is directly released, resulting in
* performance improvements.
* @param queue the command queue
*/
public void releaseImageForSharingNoEvent(CommandQueue queue) {
//default implementation, overwrite it for performance improvements
releaseImageForSharingAsync(queue).release();
}
//TODO: add variants of the above two methods that don't create the event object, but release the event immediately //TODO: add variants of the above two methods that don't create the event object, but release the event immediately
} }

@ -299,6 +299,9 @@ public abstract class Kernel extends AbstractOpenCLObject {
/** /**
* Launches the kernel with the current global work size, work group size * Launches the kernel with the current global work size, work group size
* and arguments. * and arguments.
* If the returned event object is not needed and would otherwise be
* released immediately, {@link #RunNoEvent(com.jme3.opencl.CommandQueue) }
* might bring a better performance.
* @param queue the command queue * @param queue the command queue
* @return an event object indicating when the kernel is finished * @return an event object indicating when the kernel is finished
* @see #setGlobalWorkSize(com.jme3.opencl.Kernel.WorkSize) * @see #setGlobalWorkSize(com.jme3.opencl.Kernel.WorkSize)
@ -307,6 +310,22 @@ public abstract class Kernel extends AbstractOpenCLObject {
*/ */
public abstract Event Run(CommandQueue queue); public abstract Event Run(CommandQueue queue);
/**
* Launches the kernel with the current global work size, work group size
* and arguments without returning an event object.
* The generated event is directly released. Therefore, the performance
* is better, but there is no way to detect when the kernel execution
* has finished. For this purpose, use {@link #Run(com.jme3.opencl.CommandQueue) }.
* @param queue the command queue
* @see #setGlobalWorkSize(com.jme3.opencl.Kernel.WorkSize)
* @see #setWorkGroupSize(com.jme3.opencl.Kernel.WorkSize)
* @see #setArg(int, java.lang.Object)
*/
public void RunNoEvent(CommandQueue queue) {
//Default implementation, overwrite to not allocate the event object
Run(queue).release();
}
/** /**
* Sets the work sizes and arguments in one call and launches the kernel. * Sets the work sizes and arguments in one call and launches the kernel.
* The global work size is set to the specified size. The work group * The global work size is set to the specified size. The work group
@ -326,6 +345,28 @@ public abstract class Kernel extends AbstractOpenCLObject {
return Run(queue); return Run(queue);
} }
/**
* Sets the work sizes and arguments in one call and launches the kernel.
* The global work size is set to the specified size. The work group
* size is automatically determined by the driver.
* Each object in the argument array is sent to the kernel by
* {@link #setArg(int, java.lang.Object) }.
* The generated event is directly released. Therefore, the performance
* is better, but there is no way to detect when the kernel execution
* has finished. For this purpose, use
* {@link #Run1(com.jme3.opencl.CommandQueue, com.jme3.opencl.Kernel.WorkSize, java.lang.Object...) }.
* @param queue the command queue
* @param globalWorkSize the global work size
* @param args the kernel arguments
* @see #Run2(com.jme3.opencl.CommandQueue, com.jme3.opencl.Kernel.WorkSize, com.jme3.opencl.Kernel.WorkSize, java.lang.Object...)
*/
public void Run1NoEvent(CommandQueue queue, WorkSize globalWorkSize, Object... args) {
setGlobalWorkSize(globalWorkSize);
setWorkGroupSizeToNull();
setArgs(args);
RunNoEvent(queue);
}
/** /**
* Sets the work sizes and arguments in one call and launches the kernel. * Sets the work sizes and arguments in one call and launches the kernel.
* @param queue the command queue * @param queue the command queue
@ -342,7 +383,24 @@ public abstract class Kernel extends AbstractOpenCLObject {
return Run(queue); return Run(queue);
} }
//TODO: add variants of the above three methods that don't create the event object, but release the event immediately /**
* Sets the work sizes and arguments in one call and launches the kernel.
* The generated event is directly released. Therefore, the performance
* is better, but there is no way to detect when the kernel execution
* has finished. For this purpose, use
* {@link #Run2(com.jme3.opencl.CommandQueue, com.jme3.opencl.Kernel.WorkSize, com.jme3.opencl.Kernel.WorkSize, java.lang.Object...) }.
* @param queue the command queue
* @param globalWorkSize the global work size
* @param workGroupSize the work group size
* @param args the kernel arguments
*/
public void Run2NoEvent(CommandQueue queue, WorkSize globalWorkSize,
WorkSize workGroupSize, Object... args) {
setGlobalWorkSize(globalWorkSize);
setWorkGroupSize(workGroupSize);
setArgs(args);
RunNoEvent(queue);
}
/** /**
* A placeholder for kernel arguments representing local kernel memory. * A placeholder for kernel arguments representing local kernel memory.

@ -89,6 +89,16 @@
* These async calls all return {@link com.jme3.opencl.Event} objects. * These async calls all return {@link com.jme3.opencl.Event} objects.
* These events can be used to check (non-blocking) if the action has completed, e.g. a memory copy * These events can be used to check (non-blocking) if the action has completed, e.g. a memory copy
* is finished, or to block the execution until the action has finished. * is finished, or to block the execution until the action has finished.
* <br>
* Some methods have the suffix {@code -NoEvent}. This means that these methods
* don't return an event object even if the OpenCL function would return an event.
* There exists always an alternative version that does return an event.
* These methods exist to increase the performance: since all actions (like multiple kernel calls)
* that are sent to the same command queue are executed in order, there is no
* need for intermediate events. (These intermediate events would be released
* immediately). Therefore, the no-event alternatives increase the performance
* because no additional event object has to be allocated and less system calls
* are neccessary.
* *
* <p> * <p>
* <b>Interoperability between OpenCL and jME3:</b><br> * <b>Interoperability between OpenCL and jME3:</b><br>
@ -142,6 +152,10 @@
* thrown. The exception always records the error code and error name and the * thrown. The exception always records the error code and error name and the
* OpenCL function call where the error was detected. Please check the official * OpenCL function call where the error was detected. Please check the official
* OpenCL specification for the meanings of these errors for that particular function.</li> * OpenCL specification for the meanings of these errors for that particular function.</li>
* <li>{@code UnsupportedOperationException}: the OpenCL implementation does not
* support some operations. This is currently only an issue for Jogamp's Jogl
* renderer, since Jocl only supports OpenCL 1.1. LWJGL has full support for
* OpenCL 1.2 and 2.0.
* </ul> * </ul>
*/ */
package com.jme3.opencl; package com.jme3.opencl;

@ -135,15 +135,15 @@ public class TestVertexBufferSharing extends SimpleApplication {
time += tpf; time += tpf;
//aquire resource //aquire resource
buffer.acquireBufferForSharingAsync(clQueue).release(); buffer.acquireBufferForSharingNoEvent(clQueue);
//no need to wait for the returned event, since the kernel implicitely waits for it (same command queue) //no need to wait for the returned event, since the kernel implicitely waits for it (same command queue)
//execute kernel //execute kernel
float scale = (float) Math.pow(1.1, (1.0 - time%2) / 16.0); float scale = (float) Math.pow(1.1, (1.0 - time%2) / 16.0);
kernel.Run1(clQueue, ws, buffer, scale).release(); kernel.Run1NoEvent(clQueue, ws, buffer, scale);
//release resource //release resource
buffer.releaseBufferForSharingAsync(clQueue).release(); buffer.releaseBufferForSharingNoEvent(clQueue);
} }
} }

@ -136,15 +136,15 @@ public class TestWriteToTexture extends SimpleApplication implements AnalogListe
} }
private void updateOpenCL(float tpf) { private void updateOpenCL(float tpf) {
//aquire resource //aquire resource
texCL.acquireImageForSharingAsync(clQueue).release(); texCL.acquireImageForSharingNoEvent(clQueue);
//no need to wait for the returned event, since the kernel implicitely waits for it (same command queue) //no need to wait for the returned event, since the kernel implicitely waits for it (same command queue)
//execute kernel //execute kernel
kernel.Run1(clQueue, new com.jme3.opencl.Kernel.WorkSize(settings.getWidth(), settings.getHeight()), texCL, C, 16) Kernel.WorkSize ws = new Kernel.WorkSize(settings.getWidth(), settings.getHeight());
.release(); kernel.Run1NoEvent(clQueue, ws, texCL, C, 16);
//release resource //release resource
texCL.releaseImageForSharingAsync(clQueue).release(); texCL.releaseImageForSharingNoEvent(clQueue);
} }
@Override @Override

@ -191,6 +191,13 @@ public class JoclBuffer extends Buffer {
long event = Utils.pointers[0].get(0); long event = Utils.pointers[0].get(0);
return new JoclEvent(event); return new JoclEvent(event);
} }
@Override
public void acquireBufferForSharingNoEvent(CommandQueue queue) {
Utils.pointers[1].rewind();
Utils.pointers[1].put(0, id);
long q = ((JoclCommandQueue)queue).id;
((CLGL) cl).clEnqueueAcquireGLObjects(q, 1, Utils.pointers[1], 0, null, null);
}
@Override @Override
public Event releaseBufferForSharingAsync(CommandQueue queue) { public Event releaseBufferForSharingAsync(CommandQueue queue) {
@ -202,6 +209,13 @@ public class JoclBuffer extends Buffer {
long event = Utils.pointers[0].get(0); long event = Utils.pointers[0].get(0);
return new JoclEvent(event); return new JoclEvent(event);
} }
@Override
public void releaseBufferForSharingNoEvent(CommandQueue queue) {
Utils.pointers[1].rewind();
Utils.pointers[1].put(0, id);
long q = ((JoclCommandQueue)queue).id;
((CLGL) cl).clEnqueueReleaseGLObjects(q, 1, Utils.pointers[1], 0, null, null);
}
private static class ReleaserImpl implements ObjectReleaser { private static class ReleaserImpl implements ObjectReleaser {
private long mem; private long mem;

@ -502,6 +502,13 @@ public class JoclImage extends Image {
return new JoclEvent(event); return new JoclEvent(event);
} }
@Override @Override
public void acquireImageForSharingNoEvent(CommandQueue queue) {
Utils.pointers[1].rewind();
Utils.pointers[1].put(0, id);
long q = ((JoclCommandQueue)queue).id;
((CLGL) cl).clEnqueueAcquireGLObjects(q, 1, Utils.pointers[1], 0, null, null);
}
@Override
public Event releaseImageForSharingAsync(CommandQueue queue) { public Event releaseImageForSharingAsync(CommandQueue queue) {
Utils.pointers[0].rewind(); Utils.pointers[0].rewind();
Utils.pointers[1].rewind(); Utils.pointers[1].rewind();
@ -511,6 +518,13 @@ public class JoclImage extends Image {
long event = Utils.pointers[0].get(0); long event = Utils.pointers[0].get(0);
return new JoclEvent(event); return new JoclEvent(event);
} }
@Override
public void releaseImageForSharingNoEvent(CommandQueue queue) {
Utils.pointers[1].rewind();
Utils.pointers[1].put(0, id);
long q = ((JoclCommandQueue)queue).id;
((CLGL) cl).clEnqueueReleaseGLObjects(q, 1, Utils.pointers[1], 0, null, null);
}
private static class ReleaserImpl implements ObjectReleaser { private static class ReleaserImpl implements ObjectReleaser {
private long mem; private long mem;

@ -240,6 +240,24 @@ public class JoclKernel extends Kernel {
return new JoclEvent(Utils.pointers[0].get(0)); return new JoclEvent(Utils.pointers[0].get(0));
} }
@Override
public void RunNoEvent(CommandQueue queue) {
Utils.pointers[1].rewind();
Utils.pointers[1].put(globalWorkSize.getSizes(), 0, globalWorkSize.getSizes().length);
Utils.pointers[1].position(0);
PointerBuffer p2 = null;
if (workGroupSize.getSizes()[0] > 0) {
p2 = Utils.pointers[2].rewind();
p2.put(workGroupSize.getSizes(), 0, workGroupSize.getSizes().length);
p2.position(0);
}
long q = ((JoclCommandQueue) queue).id;
int ret = cl.clEnqueueNDRangeKernel(q, kernel,
globalWorkSize.getDimension(), null, Utils.pointers[1],
p2, 0, null, null);
Utils.checkError(ret, "clEnqueueNDRangeKernel");
}
private static class ReleaserImpl implements ObjectReleaser { private static class ReleaserImpl implements ObjectReleaser {
private long kernel; private long kernel;
private ReleaserImpl(long kernel) { private ReleaserImpl(long kernel) {

@ -194,6 +194,12 @@ public class LwjglBuffer extends Buffer {
long event = Utils.pointerBuffers[0].get(0); long event = Utils.pointerBuffers[0].get(0);
return new LwjglEvent(q.getCLEvent(event)); return new LwjglEvent(q.getCLEvent(event));
} }
@Override
public void acquireBufferForSharingNoEvent(CommandQueue queue) {
CLCommandQueue q = ((LwjglCommandQueue) queue).getQueue();
int ret = CL10GL.clEnqueueAcquireGLObjects(q, buffer, null, null);
Utils.checkError(ret, "clEnqueueAcquireGLObjects");
}
@Override @Override
public Event releaseBufferForSharingAsync(CommandQueue queue) { public Event releaseBufferForSharingAsync(CommandQueue queue) {
@ -204,6 +210,12 @@ public class LwjglBuffer extends Buffer {
long event = Utils.pointerBuffers[0].get(0); long event = Utils.pointerBuffers[0].get(0);
return new LwjglEvent(q.getCLEvent(event)); return new LwjglEvent(q.getCLEvent(event));
} }
@Override
public void releaseBufferForSharingNoEvent(CommandQueue queue) {
CLCommandQueue q = ((LwjglCommandQueue) queue).getQueue();
int ret = CL10GL.clEnqueueReleaseGLObjects(q, buffer, null, null);
Utils.checkError(ret, "clEnqueueReleaseGLObjects");
}
private static class ReleaserImpl implements ObjectReleaser { private static class ReleaserImpl implements ObjectReleaser {
private CLMem mem; private CLMem mem;

@ -534,6 +534,13 @@ public class LwjglImage extends Image {
long event = Utils.pointerBuffers[0].get(0); long event = Utils.pointerBuffers[0].get(0);
return new LwjglEvent(q.getCLEvent(event)); return new LwjglEvent(q.getCLEvent(event));
} }
@Override
public void acquireImageForSharingNoEvent(CommandQueue queue) {
CLCommandQueue q = ((LwjglCommandQueue) queue).getQueue();
int ret = CL10GL.clEnqueueAcquireGLObjects(q, image, null, null);
Utils.checkError(ret, "clEnqueueAcquireGLObjects");
}
@Override
public Event releaseImageForSharingAsync(CommandQueue queue) { public Event releaseImageForSharingAsync(CommandQueue queue) {
Utils.pointerBuffers[0].rewind(); Utils.pointerBuffers[0].rewind();
CLCommandQueue q = ((LwjglCommandQueue) queue).getQueue(); CLCommandQueue q = ((LwjglCommandQueue) queue).getQueue();
@ -542,6 +549,12 @@ public class LwjglImage extends Image {
long event = Utils.pointerBuffers[0].get(0); long event = Utils.pointerBuffers[0].get(0);
return new LwjglEvent(q.getCLEvent(event)); return new LwjglEvent(q.getCLEvent(event));
} }
@Override
public void releaseImageForSharingNoEvent(CommandQueue queue) {
CLCommandQueue q = ((LwjglCommandQueue) queue).getQueue();
int ret = CL10GL.clEnqueueReleaseGLObjects(q, image, null, null);
Utils.checkError(ret, "clEnqueueReleaseGLObjects");
}
private static class ReleaserImpl implements ObjectReleaser { private static class ReleaserImpl implements ObjectReleaser {
private CLMem mem; private CLMem mem;

@ -223,6 +223,23 @@ public class LwjglKernel extends Kernel {
Utils.checkError(ret, "clEnqueueNDRangeKernel"); Utils.checkError(ret, "clEnqueueNDRangeKernel");
return new LwjglEvent(q.getCLEvent(Utils.pointerBuffers[0].get(0))); return new LwjglEvent(q.getCLEvent(Utils.pointerBuffers[0].get(0)));
} }
@Override
public void RunNoEvent(CommandQueue queue) {
Utils.pointerBuffers[1].rewind();
Utils.pointerBuffers[1].put(globalWorkSize.getSizes());
Utils.pointerBuffers[1].position(0);
PointerBuffer p2 = null;
if (workGroupSize.getSizes()[0] > 0) {
p2 = Utils.pointerBuffers[2].rewind();
p2.put(workGroupSize.getSizes());
p2.position(0);
}
CLCommandQueue q = ((LwjglCommandQueue) queue).getQueue();
int ret = CL10.clEnqueueNDRangeKernel(q, kernel,
globalWorkSize.getDimension(), null, Utils.pointerBuffers[1],
p2, null, null);
Utils.checkError(ret, "clEnqueueNDRangeKernel");
}
@Override @Override
public ObjectReleaser getReleaser() { public ObjectReleaser getReleaser() {

Loading…
Cancel
Save