diff --git a/jme3-core/src/main/java/com/jme3/opencl/Buffer.java b/jme3-core/src/main/java/com/jme3/opencl/Buffer.java index ce3ff8fab..26fe5f8fd 100644 --- a/jme3-core/src/main/java/com/jme3/opencl/Buffer.java +++ b/jme3-core/src/main/java/com/jme3/opencl/Buffer.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009-2016 jMonkeyEngine + * Copyright (c) 2009-2020 jMonkeyEngine * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -43,8 +43,8 @@ import java.nio.ByteBuffer; * All access methods (read/write/copy/map) are available in both sychronized/blocking versions * and in async/non-blocking versions. The later ones always return an {@link Event} object * and have the prefix -Async in their name. - * - * @see Context#createBuffer(long, com.jme3.opencl.MemoryAccess) + * + * @see Context#createBuffer(long, com.jme3.opencl.MemoryAccess) * @author shaman */ public abstract class Buffer extends AbstractOpenCLObject { @@ -53,21 +53,21 @@ public abstract class Buffer extends AbstractOpenCLObject { super(releaser); } - @Override - public Buffer register() { - super.register(); - return this; - } - + @Override + public Buffer register() { + super.register(); + return this; + } + /** * @return the size of the buffer in bytes. - * @see Context#createBuffer(long) + * @see Context#createBuffer(long) */ public abstract long getSize(); /** * @return the memory access flags set on creation. - * @see Context#createBuffer(long, com.jme3.opencl.MemoryAccess) + * @see Context#createBuffer(long, com.jme3.opencl.MemoryAccess) */ public abstract MemoryAccess getMemoryAccessFlags(); @@ -75,6 +75,7 @@ public abstract class Buffer extends AbstractOpenCLObject { * Performs a blocking read of the buffer. * The target buffer must have at least {@code size} bytes remaining. * This method may set the limit to the last byte read. + * * @param queue the command queue * @param dest the target buffer * @param size the size in bytes being read @@ -102,6 +103,7 @@ public abstract class Buffer extends AbstractOpenCLObject { * Performs an async/non-blocking read of the buffer. * The target buffer must have at least {@code size} bytes remaining. * This method may set the limit to the last byte read. + * * @param queue the command queue * @param dest the target buffer * @param size the size in bytes being read @@ -130,6 +132,7 @@ public abstract class Buffer extends AbstractOpenCLObject { * Performs a blocking write to the buffer. * The target buffer must have at least {@code size} bytes remaining. * This method may set the limit to the last byte that will be written. + * * @param queue the command queue * @param src the source buffer, its data is written to this buffer * @param size the size in bytes to write @@ -157,6 +160,7 @@ public abstract class Buffer extends AbstractOpenCLObject { * Performs an async/non-blocking write to the buffer. * The target buffer must have at least {@code size} bytes remaining. * This method may set the limit to the last byte that will be written. + * * @param queue the command queue * @param src the source buffer, its data is written to this buffer * @param size the size in bytes to write @@ -183,6 +187,7 @@ public abstract class Buffer extends AbstractOpenCLObject { /** * Performs a blocking copy operation from this buffer to the specified buffer. + * * @param queue the command queue * @param dest the target buffer * @param size the size in bytes to copy @@ -209,6 +214,7 @@ public abstract class Buffer extends AbstractOpenCLObject { /** * Performs an async/non-blocking copy operation from this buffer to the specified buffer. + * * @param queue the command queue * @param dest the target buffer * @param size the size in bytes to copy @@ -238,8 +244,9 @@ public abstract class Buffer extends AbstractOpenCLObject { * Maps this buffer directly into host memory. This might be the fastest method * to access the contents of the buffer since the OpenCL implementation directly * provides the memory.
- * Important: The mapped memory MUST be released by calling + * Important: The mapped memory MUST be released by calling * {@link #unmap(com.jme3.opencl.CommandQueue, java.nio.ByteBuffer) }. + * * @param queue the command queue * @param size the size in bytes to map * @param offset the offset into this buffer @@ -251,7 +258,7 @@ public abstract class Buffer extends AbstractOpenCLObject { /** * Alternative version of {@link #map(com.jme3.opencl.CommandQueue, long, long, com.jme3.opencl.MappingAccess) }, * sets {@code offset} to zero. - * Important: The mapped memory MUST be released by calling + * Important: The mapped memory MUST be released by calling * {@link #unmap(com.jme3.opencl.CommandQueue, java.nio.ByteBuffer) }. */ public ByteBuffer map(CommandQueue queue, long size, MappingAccess access) { @@ -261,7 +268,7 @@ public abstract class Buffer extends AbstractOpenCLObject { /** * Alternative version of {@link #map(com.jme3.opencl.CommandQueue, long, com.jme3.opencl.MappingAccess) }, * sets {@code size} to {@link #getSize() }. - * Important: The mapped memory MUST be released by calling + * Important: The mapped memory MUST be released by calling * {@link #unmap(com.jme3.opencl.CommandQueue, java.nio.ByteBuffer) }. */ public ByteBuffer map(CommandQueue queue, MappingAccess access) { @@ -272,6 +279,7 @@ public abstract class Buffer extends AbstractOpenCLObject { * Unmaps a previously mapped memory. * This releases the native resources and for WRITE_ONLY or READ_WRITE access, * the memory content is sent back to the GPU. + * * @param queue the command queue * @param ptr the buffer that was previously mapped */ @@ -281,8 +289,9 @@ public abstract class Buffer extends AbstractOpenCLObject { * Maps this buffer asynchronously into host memory. This might be the fastest method * to access the contents of the buffer since the OpenCL implementation directly * provides the memory.
- * Important: The mapped memory MUST be released by calling + * Important: The mapped memory MUST be released by calling * {@link #unmap(com.jme3.opencl.CommandQueue, java.nio.ByteBuffer) }. + * * @param queue the command queue * @param size the size in bytes to map * @param offset the offset into this buffer @@ -291,28 +300,31 @@ public abstract class Buffer extends AbstractOpenCLObject { * and the event indicating when the buffer contents are available */ public abstract AsyncMapping mapAsync(CommandQueue queue, long size, long offset, MappingAccess access); + /** * Alternative version of {@link #mapAsync(com.jme3.opencl.CommandQueue, long, long, com.jme3.opencl.MappingAccess) }, * sets {@code offset} to zero. - * Important: The mapped memory MUST be released by calling + * Important: The mapped memory MUST be released by calling * {@link #unmap(com.jme3.opencl.CommandQueue, java.nio.ByteBuffer) }. */ public AsyncMapping mapAsync(CommandQueue queue, long size, MappingAccess access) { return mapAsync(queue, size, 0, access); } + /** * Alternative version of {@link #mapAsync(com.jme3.opencl.CommandQueue, long, com.jme3.opencl.MappingAccess) }, * sets {@code size} to {@link #getSize() }. - * Important: The mapped memory MUST be released by calling + * Important: The mapped memory MUST be released by calling * {@link #unmap(com.jme3.opencl.CommandQueue, java.nio.ByteBuffer) }. */ public AsyncMapping mapAsync(CommandQueue queue, MappingAccess access) { return mapAsync(queue, getSize(), 0, access); } - + /** * Enqueues a fill operation. This method can be used to initialize or clear * a buffer with a certain value. + * * @param queue the command queue * @param pattern the buffer containing the filling pattern. * The remaining bytes specify the pattern length @@ -354,14 +366,14 @@ public abstract class Buffer extends AbstractOpenCLObject { return buffer; } } - + /** * Copies this buffer to the specified image. * Note that no format conversion is done. *
* For detailed description of the origin and region paramenter, see the * documentation of the {@link Image} class. - * + * * @param queue the command queue * @param dest the target image * @param srcOffset the offset in bytes into this buffer @@ -370,7 +382,7 @@ public abstract class Buffer extends AbstractOpenCLObject { * @return the event object */ public abstract Event copyToImageAsync(CommandQueue queue, Image dest, long srcOffset, long[] destOrigin, long[] destRegion); - + /** * Aquires this buffer object for using. Only call this method if this buffer * represents a shared object from OpenGL, created with e.g. @@ -379,11 +391,12 @@ public abstract class Buffer extends AbstractOpenCLObject { * done, the buffer must be released by calling * {@link #releaseBufferForSharingAsync(com.jme3.opencl.CommandQueue) } * so that OpenGL can use the VertexBuffer again. + * * @param queue the command queue * @return the event object */ public abstract Event acquireBufferForSharingAsync(CommandQueue queue); - + /** * Aquires this buffer object for using. Only call this method if this buffer * represents a shared object from OpenGL, created with e.g. @@ -392,36 +405,37 @@ public abstract class Buffer extends AbstractOpenCLObject { * done, the buffer must be released by calling * {@link #releaseBufferForSharingAsync(com.jme3.opencl.CommandQueue) } * so that OpenGL can use the VertexBuffer again. - * + * * The generated event object is directly released. * This brings a performance improvement when the resource is e.g. directly * used by a kernel afterwards on the same queue (this implicitly waits for - * this action). If you need the event, use + * this action). If you need the event, use * {@link #acquireBufferForSharingAsync(com.jme3.opencl.CommandQueue) } instead. - * + * * @param queue the command queue */ public void acquireBufferForSharingNoEvent(CommandQueue queue) { //default implementation, overwrite for better performance acquireBufferForSharingAsync(queue).release(); } - + /** * Releases a shared buffer object. * Call this method after the buffer object was acquired by * {@link #acquireBufferForSharingAsync(com.jme3.opencl.CommandQueue) } * to hand the control back to OpenGL. + * * @param queue the command queue * @return the event object */ public abstract Event releaseBufferForSharingAsync(CommandQueue queue); - + /** * Releases a shared buffer object. * Call this method after the buffer object was acquired by * {@link #acquireBufferForSharingAsync(com.jme3.opencl.CommandQueue) } * to hand the control back to OpenGL. - * The generated event object is directly released, resulting in + * The generated event object is directly released, resulting in * performance improvements. * @param queue the command queue */ @@ -430,9 +444,8 @@ public abstract class Buffer extends AbstractOpenCLObject { releaseBufferForSharingAsync(queue).release(); } - @Override - public String toString() { - return "Buffer (" + getSize() + "B)"; - } - + @Override + public String toString() { + return "Buffer (" + getSize() + "B)"; + } } diff --git a/jme3-core/src/main/java/com/jme3/opencl/CommandQueue.java b/jme3-core/src/main/java/com/jme3/opencl/CommandQueue.java index 8b9f77ef2..dadfebcb6 100644 --- a/jme3-core/src/main/java/com/jme3/opencl/CommandQueue.java +++ b/jme3-core/src/main/java/com/jme3/opencl/CommandQueue.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009-2016 jMonkeyEngine + * Copyright (c) 2009-2020 jMonkeyEngine * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -34,39 +34,40 @@ package com.jme3.opencl; /** * Wrapper for an OpenCL command queue. * The command queue serializes every GPU function call: By passing the same - * queue to OpenCL function (buffer, image operations, kernel calls), it is + * queue to OpenCL function (buffer, image operations, kernel calls), it is * ensured that they are executed in the order in which they are passed. *
* Each command queue is associtated with exactly one device: that device * is specified on creation ({@link Context#createQueue(com.jme3.opencl.Device) }) * and all commands are sent to this device. + * * @author shaman */ public abstract class CommandQueue extends AbstractOpenCLObject { - - protected Device device; + protected Device device; protected CommandQueue(ObjectReleaser releaser, Device device) { super(releaser); - this.device = device; + this.device = device; } - @Override - public CommandQueue register() { - super.register(); - return this; - } + @Override + public CommandQueue register() { + super.register(); + return this; + } + + /** + * Returns the device associated with this command queue. + * It can be used to query properties of the device that is used to execute + * the commands issued to this command queue. + * + * @return the associated device + */ + public Device getDevice() { + return device; + } - /** - * Returns the device associated with this command queue. - * It can be used to query properties of the device that is used to execute - * the commands issued to this command queue. - * @return the associated device - */ - public Device getDevice() { - return device; - } - /** * Issues all previously queued OpenCL commands in command_queue to the * device associated with command queue. Flush only guarantees that all @@ -83,5 +84,4 @@ public abstract class CommandQueue extends AbstractOpenCLObject { * processed and completed. Finish is also a synchronization point. */ public abstract void finish(); - } diff --git a/jme3-core/src/main/java/com/jme3/opencl/Context.java b/jme3-core/src/main/java/com/jme3/opencl/Context.java index 3b48c29d9..fa996fa98 100644 --- a/jme3-core/src/main/java/com/jme3/opencl/Context.java +++ b/jme3-core/src/main/java/com/jme3/opencl/Context.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009-2019 jMonkeyEngine + * Copyright (c) 2009-2020 jMonkeyEngine * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -63,6 +63,7 @@ import java.util.logging.Logger; *
  • Created buffers and images shared with OpenGL vertex buffers, textures and renderbuffers
  • *
  • Create program objects from source code and source files
  • * + * * @author shaman */ public abstract class Context extends AbstractOpenCLObject { @@ -72,11 +73,11 @@ public abstract class Context extends AbstractOpenCLObject { super(releaser); } - @Override - public Context register() { - super.register(); - return this; - } + @Override + public Context register() { + super.register(); + return this; + } /** * Returns all available devices for this context. @@ -87,6 +88,7 @@ public abstract class Context extends AbstractOpenCLObject { * memory size and so on, are queried over the Device instances. *
    * The available devices were specified by a {@link PlatformChooser}. + * * @return a list of devices */ public abstract List getDevices(); @@ -94,29 +96,35 @@ public abstract class Context extends AbstractOpenCLObject { /** * Alternative version of {@link #createQueue(com.jme3.opencl.Device) }, * just uses the first device returned by {@link #getDevices() }. + * * @return the command queue */ public CommandQueue createQueue() { return createQueue(getDevices().get(0)); } + /** * Creates a command queue sending commands to the specified device. * The device must be an entry of {@link #getDevices() }. + * * @param device the target device * @return the command queue */ - public abstract CommandQueue createQueue(Device device); + public abstract CommandQueue createQueue(Device device); /** * Allocates a new buffer of the specific size and access type on the device. + * * @param size the size of the buffer in bytes * @param access the allowed access of this buffer from kernel code * @return the new buffer */ public abstract Buffer createBuffer(long size, MemoryAccess access); + /** * Alternative version of {@link #createBuffer(long, com.jme3.opencl.MemoryAccess) }, * creates a buffer with read and write access. + * * @param size the size of the buffer in bytes * @return the new buffer */ @@ -129,14 +137,17 @@ public abstract class Context extends AbstractOpenCLObject { * specified by a ByteBuffer can then be used directly by kernel code, * although the access might be slower than with native buffers * created by {@link #createBuffer(long, com.jme3.opencl.MemoryAccess) }. + * * @param data the host buffer to use * @param access the allowed access of this buffer from kernel code * @return the new buffer */ public abstract Buffer createBufferFromHost(ByteBuffer data, MemoryAccess access); + /** * Alternative version of {@link #createBufferFromHost(java.nio.ByteBuffer, com.jme3.opencl.MemoryAccess) }, * creates a buffer with read and write access. + * * @param data the host buffer to use * @return the new buffer */ @@ -152,14 +163,15 @@ public abstract class Context extends AbstractOpenCLObject { * with row and slice pitches. This buffer is then used to store the image. * If no ByteBuffer is specified, a new buffer is allocated (this is the * normal behaviour). + * * @param access the allowed access of this image from kernel code * @param format the image format * @param descr the image descriptor * @return the new image object */ public abstract Image createImage(MemoryAccess access, ImageFormat format, ImageDescriptor descr); - //TODO: add simplified methods for 1D, 2D, 3D textures - + //TODO: add simplified methods for 1D, 2D, 3D textures + /** * Queries all supported image formats for a specified memory access and * image type. @@ -168,16 +180,17 @@ public abstract class Context extends AbstractOpenCLObject { * where {@code ImageChannelType} or {@code ImageChannelOrder} are {@code null} * (or both). This is the case when the device supports new formats that * are not included in this wrapper yet. + * * @param access the memory access type * @param type the image type (1D, 2D, 3D, ...) * @return an array of all supported image formats */ public abstract ImageFormat[] querySupportedFormats(MemoryAccess access, ImageType type); - - //Interop + + //Interop /** - * Creates a shared buffer from a VertexBuffer. - * The returned buffer and the vertex buffer operate on the same memory, + * Creates a shared buffer from a VertexBuffer. + * The returned buffer and the vertex buffer operate on the same memory, * changes in one view are visible in the other view. * This can be used to modify meshes directly from OpenCL (e.g. for particle systems). *
    @@ -188,6 +201,7 @@ public abstract class Context extends AbstractOpenCLObject { * by {@link Buffer#acquireBufferForSharingAsync(com.jme3.opencl.CommandQueue) } * and after modifying it, released by {@link Buffer#releaseBufferForSharingAsync(com.jme3.opencl.CommandQueue) }. * This is needed so that OpenGL and OpenCL operations do not interfere with each other. + * * @param vb the vertex buffer to share * @param access the memory access for the kernel * @return the new buffer @@ -208,7 +222,7 @@ public abstract class Context extends AbstractOpenCLObject { * by {@link Image#acquireImageForSharingAsync(com.jme3.opencl.CommandQueue) } * and after modifying it, released by {@link Image#releaseImageForSharingAsync(com.jme3.opencl.CommandQueue) } * This is needed so that OpenGL and OpenCL operations do not interfere with each other. - * + * * @param image the jME3 image object * @param textureType the texture type (1D, 2D, 3D), since this is not stored in the image * @param miplevel the mipmap level that should be shared @@ -216,6 +230,7 @@ public abstract class Context extends AbstractOpenCLObject { * @return the OpenCL image */ public abstract Image bindImage(com.jme3.texture.Image image, Texture.Type textureType, int miplevel, MemoryAccess access); + /** * Creates a shared image object from a jME3 texture. * The returned image shares the same memory with the jME3 texture, changes @@ -233,7 +248,7 @@ public abstract class Context extends AbstractOpenCLObject { *

    * This method is equivalent to calling * {@code bindImage(texture.getImage(), texture.getType(), miplevel, access)}. - * + * * @param texture the jME3 texture * @param miplevel the mipmap level that should be shared * @param access the allowed memory access for kernels @@ -242,9 +257,11 @@ public abstract class Context extends AbstractOpenCLObject { public Image bindImage(Texture texture, int miplevel, MemoryAccess access) { return bindImage(texture.getImage(), texture.getType(), miplevel, access); } + /** * Alternative version to {@link #bindImage(com.jme3.texture.Texture, int, com.jme3.opencl.MemoryAccess) }, - * uses {@code miplevel=0}. + * uses {@code miplevel=0}. + * * @param texture the jME3 texture * @param access the allowed memory access for kernels * @return the OpenCL image @@ -252,6 +269,7 @@ public abstract class Context extends AbstractOpenCLObject { public Image bindImage(Texture texture, MemoryAccess access) { return bindImage(texture, 0, access); } + /** * Creates a shared image object from a jME3 render buffer. * The returned image shares the same memory with the jME3 render buffer, changes @@ -267,7 +285,7 @@ public abstract class Context extends AbstractOpenCLObject { * by {@link Image#acquireImageForSharingAsync(com.jme3.opencl.CommandQueue) } * and after modifying it, released by {@link Image#releaseImageForSharingAsync(com.jme3.opencl.CommandQueue) } * This is needed so that OpenGL and OpenCL operations do not interfere with each other. - * + * * @param buffer * @param access * @return an image @@ -279,22 +297,24 @@ public abstract class Context extends AbstractOpenCLObject { return bindImage(buffer.getTexture(), access); } } + protected abstract Image bindPureRenderBuffer(FrameBuffer.RenderBuffer buffer, MemoryAccess access); /** * Creates a program object from the provided source code. * The program still needs to be compiled using {@link Program#build() }. - * + * * @param sourceCode the source code * @return the program object */ public abstract Program createProgramFromSourceCode(String sourceCode); - + /** * Resolves dependencies (using {@code #include } in the source code) * and delegates the combined source code to * {@link #createProgramFromSourceCode(java.lang.String) }. * Important: only absolute paths are allowed. + * * @param sourceCode the original source code * @param assetManager the asset manager to load the files * @return the created program object @@ -310,6 +330,7 @@ public abstract class Context extends AbstractOpenCLObject { } return createProgramFromSourceCode(builder.toString()); } + private void buildSourcesRec(BufferedReader reader, StringBuilder builder, AssetManager assetManager) throws IOException { String ln; while ((ln = reader.readLine()) != null) { @@ -319,11 +340,11 @@ public abstract class Context extends AbstractOpenCLObject { ln = ln.substring(1); } if (ln.endsWith("\"")) { - ln = ln.substring(0, ln.length()-1); + ln = ln.substring(0, ln.length() - 1); } AssetInfo info = assetManager.locateAsset(new AssetKey(ln)); if (info == null) { - throw new AssetNotFoundException("Unable to load source file \""+ln+"\""); + throw new AssetNotFoundException("Unable to load source file \"" + ln + "\""); } try (BufferedReader r = new BufferedReader(new InputStreamReader(info.openStream()))) { builder.append("//-- begin import ").append(ln).append(" --\n"); @@ -335,10 +356,10 @@ public abstract class Context extends AbstractOpenCLObject { } } } - + /** * Creates a program object from the provided source code and files. - * The source code is made up from the specified include string first, + * The source code is made up from the specified include string first, * then all files specified by the resource array (array of asset paths) * are loaded by the provided asset manager and appended to the source code. *

    @@ -348,10 +369,10 @@ public abstract class Context extends AbstractOpenCLObject { *

  • Some common OpenCL files used as libraries (Convention: file names end with {@code .clh}
  • *
  • One main OpenCL file containing the actual kernels (Convention: file name ends with {@code .cl})
  • * - * + * * After the files were combined, additional include statements are resolved * by {@link #createProgramFromSourceCodeWithDependencies(java.lang.String, com.jme3.asset.AssetManager) }. - * + * * @param assetManager the asset manager used to load the files * @param include an additional include string * @param resources an array of asset paths pointing to OpenCL source files @@ -364,7 +385,7 @@ public abstract class Context extends AbstractOpenCLObject { /** * Creates a program object from the provided source code and files. - * The source code is made up from the specified include string first, + * The source code is made up from the specified include string first, * then all files specified by the resource array (array of asset paths) * are loaded by the provided asset manager and appended to the source code. *

    @@ -374,10 +395,10 @@ public abstract class Context extends AbstractOpenCLObject { *

  • Some common OpenCL files used as libraries (Convention: file names end with {@code .clh}
  • *
  • One main OpenCL file containing the actual kernels (Convention: file name ends with {@code .cl})
  • * - * + * * After the files were combined, additional include statements are resolved * by {@link #createProgramFromSourceCodeWithDependencies(java.lang.String, com.jme3.asset.AssetManager) }. - * + * * @param assetManager the asset manager used to load the files * @param include an additional include string * @param resources an array of asset paths pointing to OpenCL source files @@ -390,7 +411,7 @@ public abstract class Context extends AbstractOpenCLObject { for (String res : resources) { AssetInfo info = assetManager.locateAsset(new AssetKey(res)); if (info == null) { - throw new AssetNotFoundException("Unable to load source file \""+res+"\""); + throw new AssetNotFoundException("Unable to load source file \"" + res + "\""); } try (BufferedReader reader = new BufferedReader(new InputStreamReader(info.openStream()))) { while (true) { @@ -401,7 +422,7 @@ public abstract class Context extends AbstractOpenCLObject { str.append(line).append('\n'); } } catch (IOException ex) { - LOG.log(Level.WARNING, "unable to load source file '"+res+"'", ex); + LOG.log(Level.WARNING, "unable to load source file '" + res + "'", ex); } } return createProgramFromSourceCodeWithDependencies(str.toString(), assetManager); @@ -410,6 +431,7 @@ public abstract class Context extends AbstractOpenCLObject { /** * Alternative version of {@link #createProgramFromSourceFilesWithInclude(com.jme3.asset.AssetManager, java.lang.String, java.lang.String...) } * with an empty include string + * * @throws AssetNotFoundException if a file could not be loaded */ public Program createProgramFromSourceFiles(AssetManager assetManager, String... resources) { @@ -419,12 +441,13 @@ public abstract class Context extends AbstractOpenCLObject { /** * Alternative version of {@link #createProgramFromSourceFilesWithInclude(com.jme3.asset.AssetManager, java.lang.String, java.util.List) } * with an empty include string + * * @throws AssetNotFoundException if a file could not be loaded */ public Program createProgramFromSourceFiles(AssetManager assetManager, List resources) { return createProgramFromSourceFilesWithInclude(assetManager, "", resources); } - + /** * Creates a program from the specified binaries. * The binaries are created by {@link Program#getBinary(com.jme3.opencl.Device) }. @@ -432,20 +455,19 @@ public abstract class Context extends AbstractOpenCLObject { * {@link Program#build(java.lang.String, com.jme3.opencl.Device...) }. * Important:The device passed to {@code Program.getBinary(..)}, * this method and {@code Program#build(..)} must be the same. - * + * * The binaries are used to build a program cache across multiple launches * of the application. The programs build much faster from binaries than * from sources. - * + * * @param binaries the binaries * @param device the device to use * @return the new program */ public abstract Program createProgramFromBinary(ByteBuffer binaries, Device device); - @Override - public String toString() { - return "Context (" + getDevices() + ')'; - } - + @Override + public String toString() { + return "Context (" + getDevices() + ')'; + } } diff --git a/jme3-core/src/main/java/com/jme3/opencl/Device.java b/jme3-core/src/main/java/com/jme3/opencl/Device.java index b9f083c57..28af126f9 100644 --- a/jme3-core/src/main/java/com/jme3/opencl/Device.java +++ b/jme3-core/src/main/java/com/jme3/opencl/Device.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009-2019 jMonkeyEngine + * Copyright (c) 2009-2020 jMonkeyEngine * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -41,185 +41,217 @@ import java.util.Collection; * queue ({@link Context#createQueue(com.jme3.opencl.Device) }). *

    * This class is used to query the capabilities of the underlying device. - * + * * @author shaman */ public interface Device { - /** * @return the platform associated with this device */ Platform getPlatform(); - /** * The device type */ - public static enum DeviceType { - DEFAULT, - CPU, - GPU, - ACCELEARTOR, - ALL - } + public static enum DeviceType { + DEFAULT, + CPU, + GPU, + ACCELEARTOR, + ALL + } + /** * @return queries the device type */ - DeviceType getDeviceType(); + DeviceType getDeviceType(); + /** * @return the vendor id */ - int getVendorId(); + int getVendorId(); + /** * checks if this device is available at all, must always be tested + * * @return checks if this device is available at all, must always be tested */ - boolean isAvailable(); - + boolean isAvailable(); + /** * @return if this device has a compiler for kernel code */ - boolean hasCompiler(); + boolean hasCompiler(); + /** * @return supports double precision floats (64 bit) */ - boolean hasDouble(); + boolean hasDouble(); + /** * @return supports half precision floats (16 bit) */ - boolean hasHalfFloat(); + boolean hasHalfFloat(); + /** * @return supports error correction for every access to global or constant memory */ - boolean hasErrorCorrectingMemory(); + boolean hasErrorCorrectingMemory(); + /** * @return supports unified virtual memory (OpenCL 2.0) */ - boolean hasUnifiedMemory(); + boolean hasUnifiedMemory(); + /** * @return supports images */ - boolean hasImageSupport(); + boolean hasImageSupport(); + /** * @return supports writes to 3d images (this is an extension) */ boolean hasWritableImage3D(); + /** * @return supports sharing with OpenGL */ boolean hasOpenGLInterop(); + /** * Explictly tests for the availability of the specified extension + * * @param extension the name of the extension * @return {@code true} iff this extension is supported */ - boolean hasExtension(String extension); + boolean hasExtension(String extension); + /** * Lists all available extensions + * * @return all available extensions */ - Collection getExtensions(); - + Collection getExtensions(); + /** * Returns the number of parallel compute units on * the OpenCL device. A work-group * executes on a single compute unit. The * minimum value is 1. * @return the number of parallel compute units - * @see #getMaximumWorkItemDimensions() - * @see #getMaximumWorkItemSizes() + * @see #getMaximumWorkItemDimensions() + * @see #getMaximumWorkItemSizes() */ - int getComputeUnits(); + int getComputeUnits(); + /** * @return maximum clock frequency of the device in MHz */ - int getClockFrequency(); + int getClockFrequency(); + /** * Returns the default compute device address space * size specified as an unsigned integer value * in bits. Currently supported values are 32 * or 64 bits. + * * @return the size of an address */ - int getAddressBits(); + int getAddressBits(); + /** * @return {@code true} if this device is little endian */ - boolean isLittleEndian(); - + boolean isLittleEndian(); + /** * The maximum dimension that specify the local and global work item ids. * You can always assume to be this at least 3. * Therefore, the ids are always three integers x,y,z. + * * @return the maximum dimension of work item ids */ - long getMaximumWorkItemDimensions(); + long getMaximumWorkItemDimensions(); + /** * Maximum number of work-items that can be specified in each dimension of the * work-group to {@link Kernel#Run2(com.jme3.opencl.CommandQueue, com.jme3.opencl.Kernel.WorkSize, com.jme3.opencl.Kernel.WorkSize, java.lang.Object...)}. * The array has a length of at least 3. + * * @return the maximum size of the work group in each dimension */ - long[] getMaximumWorkItemSizes(); + long[] getMaximumWorkItemSizes(); + /** * Maximum number of work-items in a * work-group executing a kernel on a single * compute unit, using the data parallel * execution model. + * * @return maximum number of work-items in a work-group */ - long getMaxiumWorkItemsPerGroup(); - + long getMaxiumWorkItemsPerGroup(); + /** * @return the maximum number of samples that can be used in a kernel */ - int getMaximumSamplers(); + int getMaximumSamplers(); + /** * @return the maximum number of images that can be used for reading in a kernel */ - int getMaximumReadImages(); + int getMaximumReadImages(); + /** * @return the maximum number of images that can be used for writing in a kernel */ - int getMaximumWriteImages(); + int getMaximumWriteImages(); + /** * Queries the maximal size of a 2D image + * * @return an array of length 2 with the maximal size of a 2D image */ - long[] getMaximumImage2DSize(); + long[] getMaximumImage2DSize(); + /** * Queries the maximal size of a 3D image + * * @return an array of length 3 with the maximal size of a 3D image */ - long[] getMaximumImage3DSize(); - + long[] getMaximumImage3DSize(); + /** * @return the maximal size of a memory object (buffer and image) in bytes */ long getMaximumAllocationSize(); + /** * @return the total available global memory in bytes */ long getGlobalMemorySize(); + /** * @return the total available local memory in bytes */ long getLocalMemorySize(); + /** * Returns the maximal size of a constant buffer. *
    * Constant buffers are normal buffer objects, but passed to the kernel * with the special declaration {@code __constant BUFFER_TYPE* BUFFER_NAME}. * Because they have a special caching, their size is usually very limited. - * + * * @return the maximal size of a constant buffer */ long getMaximumConstantBufferSize(); + /** * @return the maximal number of constant buffer arguments in a kernel call */ int getMaximumConstantArguments(); - - //TODO: cache, prefered sizes properties + + //TODO: cache, prefered sizes properties /** * OpenCL profile string. Returns the profile name supported by the device. * The profile name returned can be one of the following strings:
    @@ -230,7 +262,8 @@ public interface Device { * * @return the profile string */ - String getProfile(); + String getProfile(); + /** * OpenCL version string. Returns the OpenCL version supported by the * device. This version string has the following format: OpenCL space @@ -240,20 +273,24 @@ public interface Device { * * @return the version string */ - String getVersion(); + String getVersion(); + /** * Extracts the major version from the version string + * * @return the major version - * @see #getVersion() + * @see #getVersion() */ - int getVersionMajor(); + int getVersionMajor(); + /** * Extracts the minor version from the version string + * * @return the minor version * @see #getVersion() } */ - int getVersionMinor(); - + int getVersionMinor(); + /** * OpenCL C version string. Returns the highest OpenCL C version supported * by the compiler for this device that is not of type @@ -268,44 +305,53 @@ public interface Device { * * @return the compiler version */ - String getCompilerVersion(); + String getCompilerVersion(); + /** * Extracts the major version from the compiler version + * * @return the major compiler version - * @see #getCompilerVersion() + * @see #getCompilerVersion() */ - int getCompilerVersionMajor(); + int getCompilerVersionMajor(); + /** * Extracts the minor version from the compiler version + * * @return the minor compiler version - * @see #getCompilerVersion() + * @see #getCompilerVersion() */ - int getCompilerVersionMinor(); - /** + int getCompilerVersionMinor(); + + /** * @return the OpenCL software driver version string in the form * major_number.minor_number */ - String getDriverVersion(); + String getDriverVersion(); + /** * Extracts the major version from the driver version + * * @return the major driver version - * @see #getDriverVersion() + * @see #getDriverVersion() */ - int getDriverVersionMajor(); + int getDriverVersionMajor(); + /** * Extracts the minor version from the driver version + * * @return the minor driver version - * @see #getDriverVersion() + * @see #getDriverVersion() */ - int getDriverVersionMinor(); - + int getDriverVersionMinor(); + /** * @return the device name */ - String getName(); + String getName(); + /** * @return the vendor */ - String getVendor(); - + String getVendor(); } diff --git a/jme3-core/src/main/java/com/jme3/opencl/Event.java b/jme3-core/src/main/java/com/jme3/opencl/Event.java index 66aa12a5c..fd1ecebb5 100644 --- a/jme3-core/src/main/java/com/jme3/opencl/Event.java +++ b/jme3-core/src/main/java/com/jme3/opencl/Event.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009-2016 jMonkeyEngine + * Copyright (c) 2009-2020 jMonkeyEngine * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -36,6 +36,7 @@ package com.jme3.opencl; * Events are returned from kernel launches and all asynchronous operations. * They allow to test if the action has completed and to block until the operation * is done. + * * @author shaman */ public abstract class Event extends AbstractOpenCLObject { @@ -44,22 +45,23 @@ public abstract class Event extends AbstractOpenCLObject { super(releaser); } - @Override - public Event register() { - super.register(); - return this; - } - + @Override + public Event register() { + super.register(); + return this; + } + /** * Waits until the action has finished (blocking). * This automatically releases the event. */ - public abstract void waitForFinished(); - + public abstract void waitForFinished(); + /** * Tests if the action is completed. * If the action is completed, the event is released. + * * @return {@code true} if the action is completed */ - public abstract boolean isCompleted(); + public abstract boolean isCompleted(); } diff --git a/jme3-core/src/main/java/com/jme3/opencl/Image.java b/jme3-core/src/main/java/com/jme3/opencl/Image.java index fd024000c..98059801d 100644 --- a/jme3-core/src/main/java/com/jme3/opencl/Image.java +++ b/jme3-core/src/main/java/com/jme3/opencl/Image.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009-2019 jMonkeyEngine + * Copyright (c) 2009-2020 jMonkeyEngine * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -41,16 +41,16 @@ import java.util.Objects; * An image object is similar to a {@link Buffer}, but with a specific element * format and buffer structure. *
    - * The image is specified by the {@link ImageDescriptor}, specifying + * The image is specified by the {@link ImageDescriptor}, specifying * the extend and dimension of the image, and {@link ImageFormat}, specifying * the type of each pixel. *
    - * An image is created from scratch using + * An image is created from scratch using * {@link Context#createImage(com.jme3.opencl.MemoryAccess, com.jme3.opencl.Image.ImageFormat, com.jme3.opencl.Image.ImageDescriptor) } * or from OpenGL by * {@link Context#bindImage(com.jme3.texture.Image, com.jme3.texture.Texture.Type, int, com.jme3.opencl.MemoryAccess) } * (and alternative versions). - * + * *

    * Most methods take long arrays as input: {@code long[] origin} and {@code long[] region}. * Both are arrays of length 3. @@ -75,7 +75,6 @@ import java.util.Objects; * @author shaman */ public abstract class Image extends AbstractOpenCLObject { - /** * {@code ImageChannelType} describes the size of the channel data type. */ @@ -96,10 +95,10 @@ public abstract class Image extends AbstractOpenCLObject { HALF_FLOAT, FLOAT } - + /** * {@code ImageChannelOrder} specifies the number of channels and the channel layout i.e. the -memory layout in which channels are stored in the image. + * memory layout in which channels are stored in the image. */ public static enum ImageChannelOrder { R, Rx, A, @@ -112,7 +111,7 @@ memory layout in which channels are stored in the image. } /** - * Describes the image format, consisting of + * Describes the image format, consisting of * {@link ImageChannelOrder} and {@link ImageChannelType}. */ public static class ImageFormat { //Struct @@ -157,7 +156,6 @@ memory layout in which channels are stored in the image. } return true; } - } /** @@ -193,13 +191,14 @@ memory layout in which channels are stored in the image. /* public int numMipLevels; //They must always be set to zero public int numSamples; - */ + */ public ImageDescriptor() { } /** * Used to specify an image with the provided ByteBuffer as soruce + * * @param type the image type * @param width the width * @param height the height, unused for image types {@code ImageType.IMAGE_1D*} @@ -219,9 +218,11 @@ memory layout in which channels are stored in the image. this.slicePitch = slicePitch; this.hostPtr = hostPtr; } + /** - * Specifies an image without a host buffer, a new chunk of memory + * Specifies an image without a host buffer, a new chunk of memory * will be allocated. + * * @param type the image type * @param width the width * @param height the height, unused for image types {@code ImageType.IMAGE_1D*} @@ -243,60 +244,68 @@ memory layout in which channels are stored in the image. public String toString() { return "ImageDescriptor{" + "type=" + type + ", width=" + width + ", height=" + height + ", depth=" + depth + ", arraySize=" + arraySize + ", rowPitch=" + rowPitch + ", slicePitch=" + slicePitch + '}'; } - } protected Image(ObjectReleaser releaser) { super(releaser); } - - @Override - public Image register() { - super.register(); - return this; - } - + + @Override + public Image register() { + super.register(); + return this; + } + /** * @return the width of the image */ public abstract long getWidth(); + /** * @return the height of the image */ public abstract long getHeight(); + /** * @return the depth of the image */ public abstract long getDepth(); + /** * @return the row pitch when the image was created from a host buffer */ public abstract long getRowPitch(); + /** * @return the slice pitch when the image was created from a host buffer */ public abstract long getSlicePitch(); + /** * @return the number of elements in the image array * @see ImageType#IMAGE_1D_ARRAY * @see ImageType#IMAGE_2D_ARRAY */ public abstract long getArraySize(); + /** * @return the image format */ public abstract ImageFormat getImageFormat(); + /** * @return the image type */ public abstract ImageType getImageType(); + /** * @return the number of bytes per pixel */ public abstract int getElementSize(); - + /** * Performs a blocking read of the image into the specified byte buffer. + * * @param queue the command queue * @param dest the target byte buffer * @param origin the image origin location, see class description for the format @@ -307,8 +316,10 @@ memory layout in which channels are stored in the image. * If set to 0 for 3D images, the slice pitch is calculated as {@code rowPitch * height} */ public abstract void readImage(CommandQueue queue, ByteBuffer dest, long[] origin, long[] region, long rowPitch, long slicePitch); + /** * Performs an async/non-blocking read of the image into the specified byte buffer. + * * @param queue the command queue * @param dest the target byte buffer * @param origin the image origin location, see class description for the format @@ -320,9 +331,10 @@ memory layout in which channels are stored in the image. * @return the event object indicating the status of the operation */ public abstract Event readImageAsync(CommandQueue queue, ByteBuffer dest, long[] origin, long[] region, long rowPitch, long slicePitch); - + /** * Performs a blocking write from the specified byte buffer into the image. + * * @param queue the command queue * @param src the source buffer * @param origin the image origin location, see class description for the format @@ -333,8 +345,10 @@ memory layout in which channels are stored in the image. * If set to 0 for 3D images, the slice pitch is calculated as {@code rowPitch * height} */ public abstract void writeImage(CommandQueue queue, ByteBuffer src, long[] origin, long[] region, long rowPitch, long slicePitch); + /** * Performs an async/non-blocking write from the specified byte buffer into the image. + * * @param queue the command queue * @param src the source buffer * @param origin the image origin location, see class description for the format @@ -346,10 +360,11 @@ memory layout in which channels are stored in the image. * @return the event object indicating the status of the operation */ public abstract Event writeImageAsync(CommandQueue queue, ByteBuffer src, long[] origin, long[] region, long rowPitch, long slicePitch); - + /** * Performs a blocking copy operation from one image to another. * Important: Both images must have the same format! + * * @param queue the command queue * @param dest the target image * @param srcOrigin the source image origin, see class description for the format @@ -357,9 +372,11 @@ memory layout in which channels are stored in the image. * @param region the copied region, see class description for the format */ public abstract void copyTo(CommandQueue queue, Image dest, long[] srcOrigin, long[] destOrigin, long[] region); + /** * Performs an async/non-blocking copy operation from one image to another. * Important: Both images must have the same format! + * * @param queue the command queue * @param dest the target image * @param srcOrigin the source image origin, see class description for the format @@ -368,20 +385,22 @@ memory layout in which channels are stored in the image. * @return the event object indicating the status of the operation */ public abstract Event copyToAsync(CommandQueue queue, Image dest, long[] srcOrigin, long[] destOrigin, long[] region); - + /** * Maps the image into host memory. * The returned structure contains the mapped byte buffer and row and slice pitch. * The event object is set to {@code null}, it is needed for the asnyc * version {@link #mapAsync(com.jme3.opencl.CommandQueue, long[], long[], com.jme3.opencl.MappingAccess) }. + * * @param queue the command queue * @param origin the image origin, see class description for the format * @param region the mapped region, see class description for the format * @param access the allowed memory access to the mapped memory * @return a structure describing the mapped memory - * @see #unmap(com.jme3.opencl.CommandQueue, com.jme3.opencl.Image.ImageMapping) + * @see #unmap(com.jme3.opencl.CommandQueue, com.jme3.opencl.Image.ImageMapping) */ public abstract ImageMapping map(CommandQueue queue, long[] origin, long[] region, MappingAccess access); + /** * Non-blocking version of {@link #map(com.jme3.opencl.CommandQueue, long[], long[], com.jme3.opencl.MappingAccess) }. * The returned structure contains the mapped byte buffer and row and slice pitch. @@ -391,16 +410,18 @@ memory layout in which channels are stored in the image. * @param region the mapped region, see class description for the format * @param access the allowed memory access to the mapped memory * @return a structure describing the mapped memory - * @see #unmap(com.jme3.opencl.CommandQueue, com.jme3.opencl.Image.ImageMapping) + * @see #unmap(com.jme3.opencl.CommandQueue, com.jme3.opencl.Image.ImageMapping) */ public abstract ImageMapping mapAsync(CommandQueue queue, long[] origin, long[] region, MappingAccess access); + /** * Unmaps the mapped memory + * * @param queue the command queue * @param mapping the mapped memory */ public abstract void unmap(CommandQueue queue, ImageMapping mapping); - + /** * Describes a mapped region of the image */ @@ -421,7 +442,8 @@ memory layout in which channels are stored in the image. public final long slicePitch; /** * The event object used to detect when the memory is available. - * @see #mapAsync(com.jme3.opencl.CommandQueue, long[], long[], com.jme3.opencl.MappingAccess) + * + * @see #mapAsync(com.jme3.opencl.CommandQueue, long[], long[], com.jme3.opencl.MappingAccess) */ public final Event event; @@ -431,19 +453,20 @@ memory layout in which channels are stored in the image. this.slicePitch = slicePitch; this.event = event; } + public ImageMapping(ByteBuffer buffer, long rowPitch, long slicePitch) { this.buffer = buffer; this.rowPitch = rowPitch; this.slicePitch = slicePitch; this.event = null; } - } - + /** * Fills the image with the specified color. * Does only work if the image channel is {@link ImageChannelType#FLOAT} * or {@link ImageChannelType#HALF_FLOAT}. + * * @param queue the command queue * @param origin the image origin, see class description for the format * @param region the size of the region, see class description for the format @@ -455,6 +478,7 @@ memory layout in which channels are stored in the image. * Fills the image with the specified color given as four integer variables. * Does not work if the image channel is {@link ImageChannelType#FLOAT} * or {@link ImageChannelType#HALF_FLOAT}. + * * @param queue the command queue * @param origin the image origin, see class description for the format * @param region the size of the region, see class description for the format @@ -462,11 +486,12 @@ memory layout in which channels are stored in the image. * @return an event object to detect for the completion */ public abstract Event fillAsync(CommandQueue queue, long[] origin, long[] region, int[] color); - + /** * Copies this image into the specified buffer, no format conversion is done. - * This is the dual function to + * This is the dual function to * {@link Buffer#copyToImageAsync(com.jme3.opencl.CommandQueue, com.jme3.opencl.Image, long, long[], long[]) }. + * * @param queue the command queue * @param dest the target buffer * @param srcOrigin the image origin, see class description for the format @@ -475,7 +500,7 @@ memory layout in which channels are stored in the image. * @return the event object to detect the completion of the operation */ public abstract Event copyToBufferAsync(CommandQueue queue, Buffer dest, long[] srcOrigin, long[] srcRegion, long destOffset); - + /** * Aquires this image object for using. Only call this method if this image * represents a shared object from OpenGL, created with e.g. @@ -485,11 +510,12 @@ memory layout in which channels are stored in the image. * done, the image must be released by calling * {@link #releaseImageForSharingAsync(com.jme3.opencl.CommandQueue) } * so that OpenGL can use the image/texture/renderbuffer again. + * * @param queue the command queue * @return the event object */ public abstract Event acquireImageForSharingAsync(CommandQueue queue); - + /** * Aquires this image object for using. Only call this method if this image * represents a shared object from OpenGL, created with e.g. @@ -499,37 +525,39 @@ memory layout in which channels are stored in the image. * done, the image must be released by calling * {@link #releaseImageForSharingAsync(com.jme3.opencl.CommandQueue) } * so that OpenGL can use the image/texture/renderbuffer again. - * + * * The generated event object is directly released. * This brings a performance improvement when the resource is e.g. directly * used by a kernel afterwards on the same queue (this implicitly waits for - * this action). If you need the event, use + * this action). If you need the event, use * {@link #acquireImageForSharingAsync(com.jme3.opencl.CommandQueue) }. - * + * * @param queue the command queue */ public void acquireImageForSharingNoEvent(CommandQueue queue) { //Default implementation, overwrite for performance acquireImageForSharingAsync(queue).release(); } - + /** * Releases a shared image object. * Call this method after the image object was acquired by * {@link #acquireImageForSharingAsync(com.jme3.opencl.CommandQueue) } * to hand the control back to OpenGL. + * * @param queue the command queue * @return the event object */ public abstract Event releaseImageForSharingAsync(CommandQueue queue); - + /** * Releases a shared image object. * Call this method after the image object was acquired by * {@link #acquireImageForSharingAsync(com.jme3.opencl.CommandQueue) } * to hand the control back to OpenGL. - * The generated event object is directly released, resulting in + * The generated event object is directly released, resulting in * performance improvements. + * * @param queue the command queue */ public void releaseImageForSharingNoEvent(CommandQueue queue) { @@ -537,25 +565,24 @@ memory layout in which channels are stored in the image. releaseImageForSharingAsync(queue).release(); } - @Override - public String toString() { - StringBuilder str = new StringBuilder(); - str.append("Image ("); - ImageType t = getImageType(); - str.append(t); - str.append(", w=").append(getWidth()); - if (t == ImageType.IMAGE_2D || t == ImageType.IMAGE_3D) { - str.append(", h=").append(getHeight()); - } - if (t == ImageType.IMAGE_3D) { - str.append(", d=").append(getDepth()); - } - if (t == ImageType.IMAGE_1D_ARRAY || t == ImageType.IMAGE_2D_ARRAY) { - str.append(", arrays=").append(getArraySize()); - } - str.append(", ").append(getImageFormat()); - str.append(')'); - return str.toString(); - } - + @Override + public String toString() { + StringBuilder str = new StringBuilder(); + str.append("Image ("); + ImageType t = getImageType(); + str.append(t); + str.append(", w=").append(getWidth()); + if (t == ImageType.IMAGE_2D || t == ImageType.IMAGE_3D) { + str.append(", h=").append(getHeight()); + } + if (t == ImageType.IMAGE_3D) { + str.append(", d=").append(getDepth()); + } + if (t == ImageType.IMAGE_1D_ARRAY || t == ImageType.IMAGE_2D_ARRAY) { + str.append(", arrays=").append(getArraySize()); + } + str.append(", ").append(getImageFormat()); + str.append(')'); + return str.toString(); + } } diff --git a/jme3-core/src/main/java/com/jme3/opencl/Kernel.java b/jme3-core/src/main/java/com/jme3/opencl/Kernel.java index 245657c63..343ba4830 100644 --- a/jme3-core/src/main/java/com/jme3/opencl/Kernel.java +++ b/jme3-core/src/main/java/com/jme3/opencl/Kernel.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009-2018 jMonkeyEngine + * Copyright (c) 2009-2020 jMonkeyEngine * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -40,7 +40,7 @@ import java.util.Arrays; * Wrapper for an OpenCL kernel, a piece of executable code on the GPU. *

    * Terminology:
    - * A Kernel is executed in parallel. In total number of parallel threads, + * A Kernel is executed in parallel. In total number of parallel threads, * called work items, are specified by the global work size (of type * {@link WorkSize}. These threads are organized in a 1D, 2D or 3D grid * (of coarse, this is only a logical view). Inside each kernel, @@ -54,7 +54,7 @@ import java.util.Arrays; * The maximal size of it can be queried by {@link Device#getMaxiumWorkItemsPerGroup() }. * Again, the threads inside the work group can be organized in a 1D, 2D or 3D * grid, but this is also just a logical view (specifying how the threads are - * indexed). + * indexed). * The work group is important for another concept: shared memory * Unlike the normal global or constant memory (passing a {@link Buffer} object * as argument), shared memory can't be set from outside. Shared memory is @@ -64,22 +64,22 @@ import java.util.Arrays; * {@link LocalMem} or {@link LocalMemPerElement} as argument.
    * Due to heavy register usage or other reasons, a kernel might not be able * to utilize a whole work group. Therefore, the actual number of threads - * that can be executed in a work group can be queried by - * {@link #getMaxWorkGroupSize(com.jme3.opencl.Device) }, which might differ from the + * that can be executed in a work group can be queried by + * {@link #getMaxWorkGroupSize(com.jme3.opencl.Device) }, which might differ from the * value returned from the Device. - * + * *

    * There are two ways to launch a kernel:
    - * First, arguments and the work group sizes can be set in advance + * First, arguments and the work group sizes can be set in advance * ({@code setArg(index, ...)}, {@code setGlobalWorkSize(...)} and {@code setWorkGroupSize(...)}. * Then a kernel is launched by {@link #Run(com.jme3.opencl.CommandQueue) }.
    * Second, two convenient functions are provided that set the arguments * and work sizes in one call: * {@link #Run1(com.jme3.opencl.CommandQueue, com.jme3.opencl.Kernel.WorkSize, java.lang.Object...) } * and {@link #Run2(com.jme3.opencl.CommandQueue, com.jme3.opencl.Kernel.WorkSize, com.jme3.opencl.Kernel.WorkSize, java.lang.Object...) }. - * + * * @author shaman - * @see Program#createKernel(java.lang.String) + * @see Program#createKernel(java.lang.String) */ public abstract class Kernel extends AbstractOpenCLObject { /** @@ -97,12 +97,12 @@ public abstract class Kernel extends AbstractOpenCLObject { this.workGroupSize = new WorkSize(0); } - @Override - public Kernel register() { - super.register(); - return this; - } - + @Override + public Kernel register() { + super.register(); + return this; + } + /** * @return the name of the kernel as defined in the program source code */ @@ -122,6 +122,7 @@ public abstract class Kernel extends AbstractOpenCLObject { /** * Sets the global work size. + * * @param ws the work size to set */ public void setGlobalWorkSize(WorkSize ws) { @@ -130,6 +131,7 @@ public abstract class Kernel extends AbstractOpenCLObject { /** * Sets the global work size to a 1D grid + * * @param size the size in 1D */ public void setGlobalWorkSize(int size) { @@ -138,6 +140,7 @@ public abstract class Kernel extends AbstractOpenCLObject { /** * Sets the global work size to be a 2D grid + * * @param width the width * @param height the height */ @@ -147,6 +150,7 @@ public abstract class Kernel extends AbstractOpenCLObject { /** * Sets the global work size to be a 3D grid + * * @param width the width * @param height the height * @param depth the depth @@ -164,6 +168,7 @@ public abstract class Kernel extends AbstractOpenCLObject { /** * Sets the work group size + * * @param ws the work group size to set */ public void setWorkGroupSize(WorkSize ws) { @@ -172,6 +177,7 @@ public abstract class Kernel extends AbstractOpenCLObject { /** * Sets the work group size to be a 1D grid + * * @param size the size to set */ public void setWorkGroupSize(int size) { @@ -180,6 +186,7 @@ public abstract class Kernel extends AbstractOpenCLObject { /** * Sets the work group size to be a 2D grid + * * @param width the width * @param height the height */ @@ -189,6 +196,7 @@ public abstract class Kernel extends AbstractOpenCLObject { /** * Sets the work group size to be a 3D grid + * * @param width the width * @param height the height * @param depth the depth @@ -196,7 +204,7 @@ public abstract class Kernel extends AbstractOpenCLObject { public void setWorkGroupSdize(int width, int height, int depth) { workGroupSize.set(3, width, height, depth); } - + /** * Tells the driver to figure out the work group size on their own. * Use this if you do not rely on specific work group layouts, i.e. @@ -207,10 +215,11 @@ public abstract class Kernel extends AbstractOpenCLObject { public void setWorkGroupSizeToNull() { workGroupSize.set(1, 0, 0, 0); } - + /** * Returns the maximal work group size when this kernel is executed on * the specified device + * * @param device the device * @return the maximal work group size */ @@ -221,7 +230,7 @@ public abstract class Kernel extends AbstractOpenCLObject { public abstract void setArg(int index, LocalMem t); public abstract void setArg(int index, Buffer t); - + public abstract void setArg(int index, Image i); public abstract void setArg(int index, byte b); @@ -237,20 +246,20 @@ public abstract class Kernel extends AbstractOpenCLObject { public abstract void setArg(int index, double d); public abstract void setArg(int index, Vector2f v); - + public abstract void setArg(int index, Vector4f v); public abstract void setArg(int index, Quaternion q); - + public abstract void setArg(int index, Matrix4f mat); - + public void setArg(int index, Matrix3f mat) { TempVars vars = TempVars.get(); try { Matrix4f m = vars.tempMat4; m.zero(); - for (int i=0; i<3; ++i) { - for (int j=0; j<3; ++j) { + for (int i = 0; i < 3; ++i) { + for (int j = 0; j < 3; ++j) { m.set(i, j, mat.get(i, j)); } } @@ -259,13 +268,14 @@ public abstract class Kernel extends AbstractOpenCLObject { vars.release(); } } - + /** * Raw version to set an argument. * {@code size} bytes of the provided byte buffer are copied to the kernel * argument. The size in bytes must match exactly the argument size * as defined in the kernel code. * Use this method to send custom structures to the kernel + * * @param index the index of the argument * @param buffer the raw buffer * @param size the size in bytes @@ -279,6 +289,7 @@ public abstract class Kernel extends AbstractOpenCLObject { * long, float, double, Vector2f, Vector4f, Quaternion, Matrix3f, Matrix4f}. *
    * Note: Matrix3f and Matrix4f will be mapped to a {@code float16} (row major). + * * @param index the index of the argument, from 0 to {@link #getArgCount()}-1 * @param arg the argument * @throws IllegalArgumentException if the argument type is not one of the listed ones @@ -331,24 +342,26 @@ public abstract class Kernel extends AbstractOpenCLObject { * If the returned event object is not needed and would otherwise be * released immediately, {@link #RunNoEvent(com.jme3.opencl.CommandQueue) } * might bring a better performance. + * * @param queue the command queue * @return an event object indicating when the kernel is finished - * @see #setGlobalWorkSize(com.jme3.opencl.Kernel.WorkSize) - * @see #setWorkGroupSize(com.jme3.opencl.Kernel.WorkSize) - * @see #setArg(int, java.lang.Object) + * @see #setGlobalWorkSize(com.jme3.opencl.Kernel.WorkSize) + * @see #setWorkGroupSize(com.jme3.opencl.Kernel.WorkSize) + * @see #setArg(int, java.lang.Object) */ public abstract Event Run(CommandQueue queue); - + /** * Launches the kernel with the current global work size, work group size * and arguments without returning an event object. * The generated event is directly released. Therefore, the performance * is better, but there is no way to detect when the kernel execution * has finished. For this purpose, use {@link #Run(com.jme3.opencl.CommandQueue) }. + * * @param queue the command queue - * @see #setGlobalWorkSize(com.jme3.opencl.Kernel.WorkSize) - * @see #setWorkGroupSize(com.jme3.opencl.Kernel.WorkSize) - * @see #setArg(int, java.lang.Object) + * @see #setGlobalWorkSize(com.jme3.opencl.Kernel.WorkSize) + * @see #setWorkGroupSize(com.jme3.opencl.Kernel.WorkSize) + * @see #setArg(int, java.lang.Object) */ public void RunNoEvent(CommandQueue queue) { //Default implementation, overwrite to not allocate the event object @@ -361,11 +374,12 @@ public abstract class Kernel extends AbstractOpenCLObject { * size is automatically determined by the driver. * Each object in the argument array is sent to the kernel by * {@link #setArg(int, java.lang.Object) }. + * * @param queue the command queue * @param globalWorkSize the global work size * @param args the kernel arguments * @return an event object indicating when the kernel is finished - * @see #Run2(com.jme3.opencl.CommandQueue, com.jme3.opencl.Kernel.WorkSize, com.jme3.opencl.Kernel.WorkSize, java.lang.Object...) + * @see #Run2(com.jme3.opencl.CommandQueue, com.jme3.opencl.Kernel.WorkSize, com.jme3.opencl.Kernel.WorkSize, java.lang.Object...) */ public Event Run1(CommandQueue queue, WorkSize globalWorkSize, Object... args) { setGlobalWorkSize(globalWorkSize); @@ -373,7 +387,7 @@ public abstract class Kernel extends AbstractOpenCLObject { setArgs(args); return Run(queue); } - + /** * Sets the work sizes and arguments in one call and launches the kernel. * The global work size is set to the specified size. The work group @@ -382,12 +396,13 @@ public abstract class Kernel extends AbstractOpenCLObject { * {@link #setArg(int, java.lang.Object) }. * The generated event is directly released. Therefore, the performance * is better, but there is no way to detect when the kernel execution - * has finished. For this purpose, use + * has finished. For this purpose, use * {@link #Run1(com.jme3.opencl.CommandQueue, com.jme3.opencl.Kernel.WorkSize, java.lang.Object...) }. + * * @param queue the command queue * @param globalWorkSize the global work size * @param args the kernel arguments - * @see #Run2(com.jme3.opencl.CommandQueue, com.jme3.opencl.Kernel.WorkSize, com.jme3.opencl.Kernel.WorkSize, java.lang.Object...) + * @see #Run2(com.jme3.opencl.CommandQueue, com.jme3.opencl.Kernel.WorkSize, com.jme3.opencl.Kernel.WorkSize, java.lang.Object...) */ public void Run1NoEvent(CommandQueue queue, WorkSize globalWorkSize, Object... args) { setGlobalWorkSize(globalWorkSize); @@ -398,6 +413,7 @@ public abstract class Kernel extends AbstractOpenCLObject { /** * Sets the work sizes and arguments in one call and launches the kernel. + * * @param queue the command queue * @param globalWorkSize the global work size * @param workGroupSize the work group size @@ -416,8 +432,9 @@ public abstract class Kernel extends AbstractOpenCLObject { * Sets the work sizes and arguments in one call and launches the kernel. * The generated event is directly released. Therefore, the performance * is better, but there is no way to detect when the kernel execution - * has finished. For this purpose, use + * has finished. For this purpose, use * {@link #Run2(com.jme3.opencl.CommandQueue, com.jme3.opencl.Kernel.WorkSize, com.jme3.opencl.Kernel.WorkSize, java.lang.Object...) }. + * * @param queue the command queue * @param globalWorkSize the global work size * @param workGroupSize the work group size @@ -431,22 +448,22 @@ public abstract class Kernel extends AbstractOpenCLObject { RunNoEvent(queue); } - @Override - public String toString() { - return "Kernel (" + getName() + ")"; - } - + @Override + public String toString() { + return "Kernel (" + getName() + ")"; + } + /** - * A placeholder for kernel arguments representing local kernel memory. - * This defines the size of available shared memory of a {@code __shared} kernel + * A placeholder for kernel arguments representing local kernel memory. This + * defines the size of available shared memory of a {@code __shared} kernel * argument */ public static final class LocalMem { - private int size; /** * Creates a new LocalMem instance + * * @param size the size of the available shared memory in bytes */ public LocalMem(int size) { @@ -480,11 +497,11 @@ public abstract class Kernel extends AbstractOpenCLObject { return true; } - @Override - public String toString() { - return "LocalMem (" + size + "B)"; - } - + @Override + public String toString() { + return "LocalMem (" + size + "B)"; + } + } /** @@ -498,11 +515,11 @@ public abstract class Kernel extends AbstractOpenCLObject { * (e.g. by {@link #setWorkGroupSizeToNull()} or {@link #Run1(com.jme3.opencl.CommandQueue, com.jme3.opencl.Kernel.WorkSize, java.lang.Object...) }. */ public static final class LocalMemPerElement { - private int size; /** * Creates a new LocalMemPerElement instance + * * @param size the number of bytes available for each thread within * a work group */ @@ -537,15 +554,16 @@ public abstract class Kernel extends AbstractOpenCLObject { return true; } - @Override - public String toString() { - return "LocalMemPerElement (" + size + "B)"; - } - + @Override + public String toString() { + return "LocalMemPerElement (" + size + "B)"; + } + } /** * The work size (global and local) for executing a kernel + * * @author shaman */ public static final class WorkSize { @@ -555,6 +573,7 @@ public abstract class Kernel extends AbstractOpenCLObject { /** * Creates a new work size object + * * @param dimension the dimension (1,2,3) * @param sizes the sizes in each dimension, the length must match the specified dimension */ @@ -572,6 +591,7 @@ public abstract class Kernel extends AbstractOpenCLObject { /** * Creates a 1D work size of the specified extend + * * @param size the size */ public WorkSize(long size) { @@ -580,6 +600,7 @@ public abstract class Kernel extends AbstractOpenCLObject { /** * Creates a 2D work size of the specified extend + * * @param width the width * @param height the height */ @@ -589,6 +610,7 @@ public abstract class Kernel extends AbstractOpenCLObject { /** * Creates a 3D work size of the specified extend. + * * @param width the width * @param height the height * @param depth the depth @@ -647,20 +669,18 @@ public abstract class Kernel extends AbstractOpenCLObject { return true; } - @Override - public String toString() { - StringBuilder str = new StringBuilder(); - str.append("WorkSize["); - for (int i=0; i0) { - str.append(", "); - } - str.append(sizes[i]); - } - str.append(']'); - return str.toString(); - } - + @Override + public String toString() { + StringBuilder str = new StringBuilder(); + str.append("WorkSize["); + for (int i = 0; i < dimension; ++i) { + if (i > 0) { + str.append(", "); + } + str.append(sizes[i]); + } + str.append(']'); + return str.toString(); + } } - } diff --git a/jme3-core/src/main/java/com/jme3/opencl/KernelCompilationException.java b/jme3-core/src/main/java/com/jme3/opencl/KernelCompilationException.java index 6203dd641..e022c5170 100644 --- a/jme3-core/src/main/java/com/jme3/opencl/KernelCompilationException.java +++ b/jme3-core/src/main/java/com/jme3/opencl/KernelCompilationException.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009-2019 jMonkeyEngine + * Copyright (c) 2009-2020 jMonkeyEngine * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -36,23 +36,23 @@ package com.jme3.opencl; * when the compilation failed. * The error log returned by {@link #getLog() } contains detailed information * where the error occurred. + * * @author shaman */ public class KernelCompilationException extends OpenCLException { + private final String log; - private final String log; - - public KernelCompilationException(String msg, int errorCode, String log) { - super(msg, errorCode); - this.log = log; - } + public KernelCompilationException(String msg, int errorCode, String log) { + super(msg, errorCode); + this.log = log; + } /** * The output of the compiler + * * @return the output text */ - public String getLog() { - return log; - } - -} \ No newline at end of file + public String getLog() { + return log; + } +} diff --git a/jme3-core/src/main/java/com/jme3/opencl/OpenCLException.java b/jme3-core/src/main/java/com/jme3/opencl/OpenCLException.java index fd0836ac9..84708c1c7 100644 --- a/jme3-core/src/main/java/com/jme3/opencl/OpenCLException.java +++ b/jme3-core/src/main/java/com/jme3/opencl/OpenCLException.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009-2016 jMonkeyEngine + * Copyright (c) 2009-2020 jMonkeyEngine * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -36,43 +36,42 @@ package com.jme3.opencl; * The error code and its name is reported in the message string as well as the OpenCL call that * causes this exception. Please refer to the official OpenCL specification * to see what might cause this exception. + * * @author shaman */ public class OpenCLException extends RuntimeException { private static final long serialVersionUID = 8471229972153694848L; - private final int errorCode; - - /** - * Creates a new instance of OpenCLExceptionn without detail - * message. - */ - public OpenCLException() { - errorCode = 0; - } + private final int errorCode; - /** - * Constructs an instance of OpenCLExceptionn with the - * specified detail message. - * - * @param msg the detail message. - */ - public OpenCLException(String msg) { - super(msg); - errorCode = 0; - } - - public OpenCLException(String msg, int errorCode) { - super(msg); - this.errorCode = errorCode; - } + /** + * Creates a new instance of OpenCLExceptionn without detail + * message. + */ + public OpenCLException() { + errorCode = 0; + } /** - * @return the error code + * Constructs an instance of OpenCLExceptionn with the + * specified detail message. + * + * @param msg the detail message. */ - public int getErrorCode() { - return errorCode; - } + public OpenCLException(String msg) { + super(msg); + errorCode = 0; + } - + public OpenCLException(String msg, int errorCode) { + super(msg); + this.errorCode = errorCode; + } + + /** + * @return the error code + */ + public int getErrorCode() { + return errorCode; + } } diff --git a/jme3-core/src/main/java/com/jme3/opencl/Program.java b/jme3-core/src/main/java/com/jme3/opencl/Program.java index 5c4342b0a..b2bbfe5ee 100644 --- a/jme3-core/src/main/java/com/jme3/opencl/Program.java +++ b/jme3-core/src/main/java/com/jme3/opencl/Program.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009-2019 jMonkeyEngine + * Copyright (c) 2009-2020 jMonkeyEngine * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -39,9 +39,9 @@ import java.nio.ByteBuffer; *

    * Warning: Creating the same kernel more than one leads to undefined behaviour, * this is especially important for {@link #createAllKernels() } - * - * @see Context#createProgramFromSourceCode(java.lang.String) - * @see #createKernel(java.lang.String) + * + * @see Context#createProgramFromSourceCode(java.lang.String) + * @see #createKernel(java.lang.String) * @author shaman */ public abstract class Program extends AbstractOpenCLObject { @@ -49,13 +49,13 @@ public abstract class Program extends AbstractOpenCLObject { protected Program(ObjectReleaser releaser) { super(releaser); } - - @Override - public Program register() { - super.register(); - return this; - } - + + @Override + public Program register() { + super.register(); + return this; + } + /** * Builds this program with the specified argument string on the specified * devices. @@ -64,46 +64,50 @@ public abstract class Program extends AbstractOpenCLObject { * The list of devices specify on which device the compiled program * can then be executed. It must be a subset of {@link Context#getDevices() }. * If {@code null} is passed, the program is built on all available devices. - * + * * @param args the compilation arguments * @param devices a list of devices on which the program is build. * @throws KernelCompilationException if the compilation fails - * @see #build() + * @see #build() */ - public abstract void build(String args, Device... devices) throws KernelCompilationException; + public abstract void build(String args, Device... devices) throws KernelCompilationException; + /** * Builds this program without additional arguments + * * @throws KernelCompilationException if the compilation fails */ - public void build() throws KernelCompilationException { + public void build() throws KernelCompilationException { build("", (Device[]) null); } /** * Creates the kernel with the specified name. + * * @param name the name of the kernel as defined in the source code * @return the kernel object - * @throws OpenCLException if the kernel was not found or some other - * error occurred + * @throws OpenCLException if the kernel was not found or some other error + * occurred */ - public abstract Kernel createKernel(String name); - + public abstract Kernel createKernel(String name); + /** * Creates all available kernels in this program. * The names of the kernels can then by queried by {@link Kernel#getName() }. + * * @return an array of all kernels */ - public abstract Kernel[] createAllKernels(); - + public abstract Kernel[] createAllKernels(); + /** * Queries a compiled binary representation of this program for a particular * device. This binary can then be used e.g. in the next application launch * to create the program from the binaries and not from the sources. * This saves time. + * * @param device the device from which the binaries are taken * @return the binaries - * @see Context#createProgramFromBinary(java.nio.ByteBuffer, com.jme3.opencl.Device) + * @see Context#createProgramFromBinary(java.nio.ByteBuffer, com.jme3.opencl.Device) */ public abstract ByteBuffer getBinary(Device device); - }