diff --git a/jme3-core/src/main/java/com/jme3/opencl/CommandQueue.java b/jme3-core/src/main/java/com/jme3/opencl/CommandQueue.java
index 5f667e205..761df702a 100644
--- a/jme3-core/src/main/java/com/jme3/opencl/CommandQueue.java
+++ b/jme3-core/src/main/java/com/jme3/opencl/CommandQueue.java
@@ -32,13 +32,33 @@
package com.jme3.opencl;
/**
- *
+ * Wrapper for an OpenCL command queue.
+ * The command queue serializes every GPU function call: By passing the same
+ * queue to OpenCL function (buffer, image operations, kernel calls), it is
+ * ensured that they are executed in the order in which they are passed.
+ *
+ * Each command queue is associtated with exactly one device: that device
+ * is specified on creation ({@link Context#createQueue(com.jme3.opencl.Device) })
+ * and all commands are sent to this device.
* @author Sebastian Weiss
*/
public interface CommandQueue extends OpenCLObject {
+ /**
+ * Issues all previously queued OpenCL commands in command_queue to the
+ * device associated with command queue. Flush only guarantees that all
+ * queued commands to command_queue will eventually be submitted to the
+ * appropriate device. There is no guarantee that they will be complete
+ * after flush returns.
+ */
void flush();
+ /**
+ * Blocks until all previously queued OpenCL commands in command queue are
+ * issued to the associated device and have completed. Finish does not
+ * return until all previously queued commands in command queue have been
+ * processed and completed. Finish is also a synchronization point.
+ */
void finish();
}
diff --git a/jme3-core/src/main/java/com/jme3/opencl/Context.java b/jme3-core/src/main/java/com/jme3/opencl/Context.java
index 0b18fbb17..e108ad729 100644
--- a/jme3-core/src/main/java/com/jme3/opencl/Context.java
+++ b/jme3-core/src/main/java/com/jme3/opencl/Context.java
@@ -50,44 +50,216 @@ import java.util.logging.Level;
import java.util.logging.Logger;
/**
- * The central OpenCL context. Every actions start from here.
- *
+ * The central OpenCL context. Every action starts from here.
+ * The context can be obtained by {@link com.jme3.system.JmeContext#getOpenCLContext() }.
+ *
+ * The context is used to: + *
+ * Before the returned buffer can be used, it must be acquried explicitly
+ * by {@link Buffer#acquireBufferForSharingAsync(com.jme3.opencl.CommandQueue) }
+ * and after modifying it, released by {@link Buffer#releaseBufferForSharingAsync(com.jme3.opencl.CommandQueue) }.
+ * This is needed so that OpenGL and OpenCL operations do not interfer with each other.
+ * @param vb the vertex buffer to share
+ * @param access the memory access for the kernel
+ * @return the new buffer
+ */
public abstract Buffer bindVertexBuffer(VertexBuffer vb, MemoryAccess access);
+ /**
+ * Creates a shared image object from a jME3-image.
+ * The returned image shares the same memory with the jME3-image, changes
+ * in one view are visible in the other view.
+ * This can be used to modify textures and images directly from OpenCL
+ * (e.g. for post processing effects and other texture effects).
+ *
+ * Note: The image must already been uploaded to the GPU,
+ * i.e. it must be used at least once for drawing.
+ *
+ * Before the returned image can be used, it must be acquried explicitly
+ * by {@link Image#acquireImageForSharingAsync(com.jme3.opencl.CommandQueue) }
+ * and after modifying it, released by {@link Image#releaseImageForSharingAsync(com.jme3.opencl.CommandQueue) }
+ * This is needed so that OpenGL and OpenCL operations do not interfer with each other.
+ *
+ * @param image the jME3 image object
+ * @param textureType the texture type (1D, 2D, 3D), since this is not stored in the image
+ * @param miplevel the mipmap level that should be shared
+ * @param access the allowed memory access for kernels
+ * @return the OpenCL image
+ */
public abstract Image bindImage(com.jme3.texture.Image image, Texture.Type textureType, int miplevel, MemoryAccess access);
+ /**
+ * Creates a shared image object from a jME3 texture.
+ * The returned image shares the same memory with the jME3 texture, changes
+ * in one view are visible in the other view.
+ * This can be used to modify textures and images directly from OpenCL
+ * (e.g. for post processing effects and other texture effects).
+ *
+ * Note: The image must already been uploaded to the GPU,
+ * i.e. it must be used at least once for drawing.
+ *
+ * Before the returned image can be used, it must be acquried explicitly + * by {@link Image#acquireImageForSharingAsync(com.jme3.opencl.CommandQueue) } + * and after modifying it, released by {@link Image#releaseImageForSharingAsync(com.jme3.opencl.CommandQueue) } + * This is needed so that OpenGL and OpenCL operations do not interfer with each other. + *
+ * This method is equivalent to calling
+ * {@code bindImage(texture.getImage(), texture.getType(), miplevel, access)}.
+ *
+ * @param texture the jME3 texture
+ * @param miplevel the mipmap level that should be shared
+ * @param access the allowed memory access for kernels
+ * @return the OpenCL image
+ */
public Image bindImage(Texture texture, int miplevel, MemoryAccess access) {
return bindImage(texture.getImage(), texture.getType(), miplevel, access);
}
+ /**
+ * Alternative version to {@link #bindImage(com.jme3.texture.Texture, int, com.jme3.opencl.MemoryAccess) },
+ * uses {@code miplevel=0}.
+ * @param texture the jME3 texture
+ * @param access the allowed memory access for kernels
+ * @return the OpenCL image
+ */
public Image bindImage(Texture texture, MemoryAccess access) {
return bindImage(texture, 0, access);
}
+ /**
+ * Creates a shared image object from a jME3 render buffer.
+ * The returned image shares the same memory with the jME3 render buffer, changes
+ * in one view are visible in the other view.
+ *
+ * This can be used as an alternative to post processing effects
+ * (e.g. reduce sum operations, needed e.g. for tone mapping).
+ *
+ * Note: The renderbuffer must already been uploaded to the GPU,
+ * i.e. it must be used at least once for drawing.
+ *
+ * Before the returned image can be used, it must be acquried explicitly + * by {@link Image#acquireImageForSharingAsync(com.jme3.opencl.CommandQueue) } + * and after modifying it, released by {@link Image#releaseImageForSharingAsync(com.jme3.opencl.CommandQueue) } + * This is needed so that OpenGL and OpenCL operations do not interfer with each other. + * + * @param buffer + * @param access + * @return + */ public Image bindRenderBuffer(FrameBuffer.RenderBuffer buffer, MemoryAccess access) { if (buffer.getTexture() == null) { return bindPureRenderBuffer(buffer, access); @@ -97,12 +269,61 @@ public abstract class Context implements OpenCLObject { } protected abstract Image bindPureRenderBuffer(FrameBuffer.RenderBuffer buffer, MemoryAccess access); + /** + * Creates a program object from the provided source code. + * The program still needs to be compiled using {@link Program#build() }. + * + * @param sourceCode the source code + * @return the program object + */ public abstract Program createProgramFromSourceCode(String sourceCode); + /** + * Creates a program object from the provided source code and files. + * The source code is made up from the specified include string first, + * then all files specified by the resource array (array of asset paths) + * are loaded by the provided asset manager and appended to the source code. + *
+ * The typical use case is: + *
+ * The typical use case is: + *
+ * This class is used to query the capabilities of the underlying device.
+ *
* @author Sebastian Weiss
*/
public interface Device {
+ /**
+ * @return the platform accociated with this device
+ */
Platform getPlatform();
+ /**
+ * The device type
+ */
public static enum DeviceType {
DEFAULT,
CPU,
@@ -48,55 +61,251 @@ public interface Device {
ACCELEARTOR,
ALL
}
+ /**
+ * @return queries the device type
+ */
DeviceType getDeviceType();
+ /**
+ * @return the vendor id
+ */
int getVendorId();
+ /**
+ * checks if this device is available at all, must always be tested
+ * @return checks if this device is available at all, must always be tested
+ */
boolean isAvailable();
+ /**
+ * @return if this device has a compiler for kernel code
+ */
boolean hasCompiler();
+ /**
+ * @return supports double precision floats (64 bit)
+ */
boolean hasDouble();
+ /**
+ * @return supports half precision floats (16 bit)
+ */
boolean hasHalfFloat();
+ /**
+ * @return supports error correction for every access to global or constant memory
+ */
boolean hasErrorCorrectingMemory();
+ /**
+ * @return supports unified virtual memory (OpenCL 2.0)
+ */
boolean hasUnifiedMemory();
+ /**
+ * @return supports images
+ */
boolean hasImageSupport();
+ /**
+ * @return supports writes to 3d images (this is an extension)
+ */
boolean hasWritableImage3D();
+ /**
+ * @return supports sharing with OpenGL
+ */
boolean hasOpenGLInterop();
+ /**
+ * Explicetly tests for the availability of the specified extension
+ * @param extension the name of the extension
+ * @return {@code true} iff this extension is supported
+ */
boolean hasExtension(String extension);
+ /**
+ * Lists all available extensions
+ * @return all available extensions
+ */
Collection extends String> getExtensions();
+ /**
+ * Returns the number of parallel compute units on
+ * the OpenCL device. A work-group
+ * executes on a single compute unit. The
+ * minimum value is 1.
+ * @return the number of parallel compute units
+ * @see #getMaximumWorkItemDimensions()
+ * @see #getMaximumWorkItemSizes()
+ */
int getComputeUnits();
+ /**
+ * @return maximum clock frequency of the device in MHz
+ */
int getClockFrequency();
+ /**
+ * Returns the default compute device address space
+ * size specified as an unsigned integer value
+ * in bits. Currently supported values are 32
+ * or 64 bits.
+ * @return the size of an adress
+ */
int getAddressBits();
+ /**
+ * @return {@code true} if this device is little endian
+ */
boolean isLittleEndian();
+ /**
+ * The maximum dimension that specify the local and global work item ids.
+ * You can always assume to be this at least 3.
+ * Therefore, the ids are always three integers x,y,z.
+ * @return the maximum dimension of work item ids
+ */
long getMaximumWorkItemDimensions();
+ /**
+ * Maximum number of work-items that can be specified in each dimension of the
+ * work-group to {@link Kernel#Run2(com.jme3.opencl.CommandQueue, com.jme3.opencl.WorkSize, com.jme3.opencl.WorkSize, java.lang.Object...) }.
+ * The array has a length of at least 3.
+ * @return the maximum size of the work group in each dimension
+ */
long[] getMaximumWorkItemSizes();
+ /**
+ * Maximum number of work-items in a
+ * work-group executing a kernel on a single
+ * compute unit, using the data parallel
+ * execution model.
+ * @return maximum number of work-items in a work-group
+ */
long getMaxiumWorkItemsPerGroup();
+ /**
+ * @return the maximum number of samples that can be used in a kernel
+ */
int getMaximumSamplers();
+ /**
+ * @return the maximum number of images that can be used for reading in a kernel
+ */
int getMaximumReadImages();
+ /**
+ * @return the maximum number of images that can be used for writing in a kernel
+ */
int getMaximumWriteImages();
+ /**
+ * Queries the maximal size of a 2D image
+ * @return an array of length 2 with the maximal size of a 2D image
+ */
long[] getMaximumImage2DSize();
+ /**
+ * Queries the maximal size of a 3D image
+ * @return an array of length 3 with the maximal size of a 3D image
+ */
long[] getMaximumImage3DSize();
+ /**
+ * @return the maximal size of a memory object (buffer and image) in bytes
+ */
long getMaximumAllocationSize();
+ /**
+ * @return the total available global memory in bytes
+ */
long getGlobalMemorySize();
+ /**
+ * @return the total available local memory in bytes
+ */
long getLocalMemorySize();
+ /**
+ * Returns the maximal size of a constant buffer.
+ *
+ * Constant buffers are normal buffer objects, but passed to the kernel
+ * with the special declaration {@code __constant BUFFER_TYPE* BUFFER_NAME}.
+ * Because they have a special caching, their size is usually very limited.
+ *
+ * @return the maximal size of a constant buffer
+ */
long getMaximumConstantBufferSize();
+ /**
+ * @return the maximal number of constant buffer arguments in a kernel call
+ */
int getMaximumConstantArguments();
//TODO: cache, prefered sizes properties
-
+ /**
+ * OpenCL profile string. Returns the profile name supported by the device.
+ * The profile name returned can be one of the following strings:
+ * FULL_PROFILE – if the device supports the OpenCL specification
+ * (functionality defined as part of the core specification and does not
+ * require any extensions to be supported).
+ * EMBEDDED_PROFILE - if the device supports the OpenCL embedded profile.
+ *
+ * @return the profile string
+ */
String getProfile();
+ /**
+ * OpenCL version string. Returns the OpenCL version supported by the
+ * device. This version string has the following format: OpenCL space
+ * major_version.minor_version space vendor-specific information.
+ *
+ * E.g. OpenCL 1.1, OpenCL 1.2, OpenCL 2.0
+ *
+ * @return the version string
+ */
String getVersion();
+ /**
+ * Extracts the major version from the version string
+ * @return the major version
+ * @see #getVersion()
+ */
int getVersionMajor();
+ /**
+ * Extracts the minor version from the version string
+ * @return the minor version
+ * @see #getVersion() }
+ */
int getVersionMinor();
+
+ /**
+ * OpenCL C version string. Returns the highest OpenCL C version supported
+ * by the compiler for this device that is not of type
+ * CL_DEVICE_TYPE_CUSTOM. This version string has the following format:
+ * OpenCL space C space major_version.minor_version space vendor-specific
+ * information.
+ * The major_version.minor_version value returned must be 1.2 if
+ * CL_DEVICE_VERSION is OpenCL 1.2. The major_version.minor_version value
+ * returned must be 1.1 if CL_DEVICE_VERSION is OpenCL 1.1. The
+ * major_version.minor_version value returned can be 1.0 or 1.1 if
+ * CL_DEVICE_VERSION is OpenCL 1.0.
+ *
+ * @return the compiler version
+ */
String getCompilerVersion();
+ /**
+ * Extracts the major version from the compiler version
+ * @return the major compiler version
+ * @see #getCompilerVersion()
+ */
int getCompilerVersionMajor();
+ /**
+ * Extracts the minor version from the compiler version
+ * @return the minor compiler version
+ * @see #getCompilerVersion()
+ */
int getCompilerVersionMinor();
+ /**
+ * @return the OpenCL software driver version string in the form
+ * major_number.minor_number
+ */
String getDriverVersion();
+ /**
+ * Extracts the major version from the driver version
+ * @return the major driver version
+ * @see #getDriverVersion()
+ */
int getDriverVersionMajor();
+ /**
+ * Extracts the minor version from the driver version
+ * @return the minor driver version
+ * @see #getDriverVersion()
+ */
int getDriverVersionMinor();
+
+ /**
+ * @return the device name
+ */
String getName();
+ /**
+ * @return the vendor
+ */
String getVendor();
}
diff --git a/jme3-core/src/main/java/com/jme3/opencl/Image.java b/jme3-core/src/main/java/com/jme3/opencl/Image.java
index 389ab265e..4e595d974 100644
--- a/jme3-core/src/main/java/com/jme3/opencl/Image.java
+++ b/jme3-core/src/main/java/com/jme3/opencl/Image.java
@@ -131,6 +131,7 @@ public interface Image extends OpenCLObject {
public long arraySize;
public long rowPitch;
public long slicePitch;
+ public ByteBuffer hostPtr;
/*
public int numMipLevels; //They must always be set to zero
public int numSamples;
@@ -139,7 +140,7 @@ public interface Image extends OpenCLObject {
public ImageDescriptor() {
}
- public ImageDescriptor(ImageType type, long width, long height, long depth, long arraySize, long rowPitch, long slicePitch) {
+ public ImageDescriptor(ImageType type, long width, long height, long depth, long arraySize, long rowPitch, long slicePitch, ByteBuffer hostPtr) {
this.type = type;
this.width = width;
this.height = height;
@@ -147,6 +148,7 @@ public interface Image extends OpenCLObject {
this.arraySize = arraySize;
this.rowPitch = rowPitch;
this.slicePitch = slicePitch;
+ this.hostPtr = hostPtr;
}
public ImageDescriptor(ImageType type, long width, long height, long depth, long arraySize) {
this.type = type;
@@ -156,6 +158,7 @@ public interface Image extends OpenCLObject {
this.arraySize = arraySize;
this.rowPitch = 0;
this.slicePitch = 0;
+ hostPtr = null;
}
@Override
diff --git a/jme3-examples/src/main/java/jme3test/opencl/HelloOpenCL.java b/jme3-examples/src/main/java/jme3test/opencl/HelloOpenCL.java
index 95d052aed..e697dc584 100644
--- a/jme3-examples/src/main/java/jme3test/opencl/HelloOpenCL.java
+++ b/jme3-examples/src/main/java/jme3test/opencl/HelloOpenCL.java
@@ -174,10 +174,6 @@ public class HelloOpenCL extends SimpleApplication {
String include = "#define TYPE float\n";
Program program = clContext.createProgramFromSourceFilesWithInclude(assetManager, include, "jme3test/opencl/Blas.cl");
program.build();
- Kernel[] kernels = program.createAllKernels();
- for (Kernel k : kernels) {
- System.out.println("available kernel: "+k.getName());
- }
Kernel kernel = program.createKernel("Fill");
System.out.println("number of args: "+kernel.getArgCount());
@@ -218,8 +214,8 @@ public class HelloOpenCL extends SimpleApplication {
//create an image
Image.ImageFormat format = new Image.ImageFormat(Image.ImageChannelOrder.RGBA, Image.ImageChannelType.FLOAT);
- Image.ImageDescriptor descr = new Image.ImageDescriptor(Image.ImageType.IMAGE_2D, 1920, 1080, 0, 0, 0, 0);
- Image image = clContext.createImage(MemoryAccess.READ_WRITE, format, descr, null);
+ Image.ImageDescriptor descr = new Image.ImageDescriptor(Image.ImageType.IMAGE_2D, 1920, 1080, 0, 0);
+ Image image = clContext.createImage(MemoryAccess.READ_WRITE, format, descr);
System.out.println("image created");
//check queries
@@ -258,8 +254,8 @@ public class HelloOpenCL extends SimpleApplication {
//create a second image
format = new Image.ImageFormat(Image.ImageChannelOrder.RGBA, Image.ImageChannelType.FLOAT);
- descr = new Image.ImageDescriptor(Image.ImageType.IMAGE_2D, 512, 512, 0, 0, 0, 0);
- Image image2 = clContext.createImage(MemoryAccess.READ_WRITE, format, descr, null);
+ descr = new Image.ImageDescriptor(Image.ImageType.IMAGE_2D, 512, 512, 0, 0);
+ Image image2 = clContext.createImage(MemoryAccess.READ_WRITE, format, descr);
//copy an area of image1 to image2
image.copyTo(clQueue, image2, new long[]{1000, 20,0}, new long[]{0,0,0}, new long[]{512, 512,1});
//this area should be completely blue
diff --git a/jme3-lwjgl/src/main/java/com/jme3/opencl/lwjgl/LwjglContext.java b/jme3-lwjgl/src/main/java/com/jme3/opencl/lwjgl/LwjglContext.java
index eddfaf4cc..13af4946f 100644
--- a/jme3-lwjgl/src/main/java/com/jme3/opencl/lwjgl/LwjglContext.java
+++ b/jme3-lwjgl/src/main/java/com/jme3/opencl/lwjgl/LwjglContext.java
@@ -100,7 +100,7 @@ public class LwjglContext extends Context {
}
@Override
- public Image createImage(MemoryAccess access, ImageFormat format, ImageDescriptor descr, ByteBuffer hostPtr) {
+ public Image createImage(MemoryAccess access, ImageFormat format, ImageDescriptor descr) {
long memFlags = Utils.getMemoryAccessFlags(access);
Utils.errorBuffer.rewind();
//fill image format
@@ -116,7 +116,7 @@ public class LwjglContext extends Context {
.put(0).put(0).put(0);
Utils.b80.rewind();
//create image
- CLMem mem = CL12.clCreateImage(context, memFlags, Utils.tempBuffers[0].b16, Utils.b80, hostPtr, Utils.errorBuffer);
+ CLMem mem = CL12.clCreateImage(context, memFlags, Utils.tempBuffers[0].b16, Utils.b80, descr.hostPtr, Utils.errorBuffer);
Utils.checkError(Utils.errorBuffer, "clCreateImage");
return new LwjglImage(mem);
}