diff --git a/jme3-core/src/main/resources/Common/OpenCL/Matrix4f.clh b/jme3-core/src/main/resources/Common/OpenCL/Matrix4f.clh index b2af2b127..0df3de3d1 100644 --- a/jme3-core/src/main/resources/Common/OpenCL/Matrix4f.clh +++ b/jme3-core/src/main/resources/Common/OpenCL/Matrix4f.clh @@ -9,4 +9,311 @@ typedef float16 mat4; //All matrix functions are prefixed with mat3 or mat4 +//Returns the zero matrix +inline mat4 mat4Zero() { + return (float16)(0); +} + +//Returns the identity matrix +inline mat4 mat4Identity() { + return (float16) + (1, 0, 0, 0, + 0, 1, 0, 0, + 0, 0, 1, 0, + 0, 0, 0, 1); +} + +inline mat4 mat4FromRows(float4 row1, float4 row2, float4 row3, float4 row4) { + return (float16) (row1, row2, row3, row4); +} + +inline mat4 mat4FromColumns(float4 col1, float4 col2, float4 col3, float4 col4) { + return (float16) + (col1.x, col2.x, col3.x, col4.x, + col1.y, col2.y, col3.y, col4.y, + col1.z, col2.z, col3.z, col4.z, + col1.w, col2.w, col3.w, col4.w); +} + +inline mat4 mat4FromDiagonal(float4 diag) { + return (float16) + (diag.x, 0, 0, 0, + 0, diag.y, 0, 0, + 0, 0, diag.z, 0, + 0, 0, 0, diag.w); +} + +//Returns the i-th row (0-based) +inline float4 mat4GetRow(mat4 mat, int i) { + if (i==0) return mat.s0123; + else if (i==1) return mat.s4567; + else if (i==2) return mat.s89ab; + else return mat.scdef; +} + +//Sets the i-th row (0-based) +inline mat4 mat4SetRow(mat4 mat, int i, float4 row) { + if (i==0) mat.s0123 = row; + else if (i==1) mat.s4567 = row; + else if (i==2) mat.s89ab = row; + else mat.scdef = row; + return mat; +} + +//Returns the i-th column (0-based) +inline float4 mat4GetColumn(mat4 mat, int i) { + if (i==0) return mat.s048c; + else if (i==1) return mat.s159d; + else if (i==2) return mat.s26ae; + else return mat.s37bf; +} + +//Sets the i-th column (0-based) +inline mat4 mat4SetColumn(mat4 mat, int i, float4 col) { + if (i==0) mat.s048c = col; + else if (i==1) mat.s159d = col; + else if (i==2) mat.s26ae = col; + else mat.s37bf = col; + return mat; +} + +//Returns the diagonal +inline float4 mat4GetDiagonal(mat4 mat) { + return mat.s05af; +} + +//Sets the diagonal +inline mat4 mat3SetDiagonal(mat4 mat, float4 diag) { + mat.s05af = diag; + return mat; +} + +mat4 mat4FromFrame(float3 location, float3 direction, float3 up, float3 left) +{ + float3 fwdVector = direction; + float3 leftVector = cross(fwdVector, up); + float3 upVector = cross(leftVector, fwdVector); + + return (float16) ( + leftVector.x, + leftVector.y, + leftVector.z, + dot(-leftVector, location), + + upVector.x, + upVector.y, + upVector.z, + dot(-upVector, location), + + -fwdVector.x, + -fwdVector.y, + -fwdVector.z, + dot(fwdVector, location), + + 0, 0, 0, 1 + ); +} + +inline mat4 mat4Transpose(mat4 mat) { + return mat.s048c159d26ae37bf; //magic +} + +mat4 mat4FromAngleNormalAxis(float angle, float3 axis) { + float fCos = cos(angle); + float fSin = sin(angle); + float fOneMinusCos = 1.0f - fCos; + float fX2 = axis.x * axis.x; + float fY2 = axis.y * axis.y; + float fZ2 = axis.z * axis.z; + float fXYM = axis.x * axis.y * fOneMinusCos; + float fXZM = axis.x * axis.z * fOneMinusCos; + float fYZM = axis.y * axis.z * fOneMinusCos; + float fXSin = axis.x * fSin; + float fYSin = axis.y * fSin; + float fZSin = axis.z * fSin; + + return (float16) ( + fX2 * fOneMinusCos + fCos, + fXYM - fZSin, + fXZM + fYSin, + 0, + fXYM + fZSin, + fY2 * fOneMinusCos + fCos, + fYZM - fXSin, + 0, + fXZM - fYSin, + fYZM + fXSin, + fZ2 * fOneMinusCos + fCos, + 0, + 0, 0, 0, 1 + ); +} + +mat4 mat4FromAngleAxis(float angle, float3 axis) { + return mat4FromAngleNormalAxis(angle, normalize(axis)); +} + +inline mat4 mat4Scale(mat4 mat, float s) { + return mat * s; +} + +inline mat4 mat4Add(mat4 A, mat4 B) { + return A + B; +} + +//Multiplies the two matrices A and B +inline mat4 mat4Mult(mat4 A, mat4 B) { + return (float16) ( + dot(A.s0123, B.s048c), + dot(A.s0123, B.s159d), + dot(A.s0123, B.s26ae), + dot(A.s0123, B.s37bf), + + dot(A.s4567, B.s048c), + dot(A.s4567, B.s159d), + dot(A.s4567, B.s26ae), + dot(A.s4567, B.s37bf), + + dot(A.s89ab, B.s048c), + dot(A.s89ab, B.s159d), + dot(A.s89ab, B.s26ae), + dot(A.s89ab, B.s37bf), + + dot(A.scdef, B.s048c), + dot(A.scdef, B.s159d), + dot(A.scdef, B.s26ae), + dot(A.scdef, B.s37bf) + ); +} + +//Computes Av (right multiply of a vector to a matrix) +inline float4 mat4VMult(mat4 A, float4 v) { + return (float4) ( + dot(A.s0123, v), + dot(A.s4567, v), + dot(A.s89ab, v), + dot(A.scdef, v)); +} + +//Computes vA (left multiply of a vector to a matrix) +inline float4 mat4VMult2(float4 v, mat4 A) { + return (float4) ( + dot(v, A.s048c), + dot(v, A.s159d), + dot(v, A.s26ae), + dot(v, A.s37bf)); +} + +inline float4 mat4MultAcross(mat4 mat, float4 v) { + return mat4VMult2(v, mat); +} + +inline float3 mat4MultNormal(mat4 mat, float3 v) { + return mat4VMult(mat, (float4)(v, 0)).xyz; +} + +inline float3 mat4MultNormalAcross(mat4 mat, float3 v) { + return mat4VMult2((float4)(v, 0), mat).xyz; +} + +mat4 mat4Invert(mat4 mat) { + float fA0 = mat.s0 * mat.s5 - mat.s1 * mat.s4; + float fA1 = mat.s0 * mat.s6 - mat.s2 * mat.s4; + float fA2 = mat.s0 * mat.s7 - mat.s3 * mat.s4; + float fA3 = mat.s1 * mat.s6 - mat.s2 * mat.s5; + float fA4 = mat.s1 * mat.s7 - mat.s3 * mat.s5; + float fA5 = mat.s2 * mat.s7 - mat.s3 * mat.s6; + float fB0 = mat.s8 * mat.sd - mat.s9 * mat.sc; + float fB1 = mat.s8 * mat.se - mat.sa * mat.sc; + float fB2 = mat.s8 * mat.sf - mat.sb * mat.sc; + float fB3 = mat.s9 * mat.se - mat.sa * mat.sd; + float fB4 = mat.s9 * mat.sf - mat.sb * mat.sd; + float fB5 = mat.sa * mat.sf - mat.sb * mat.se; + float fDet = fA0 * fB5 - fA1 * fB4 + fA2 * fB3 + fA3 * fB2 - fA4 * fB1 + fA5 * fB0; + + if (fabs(fDet) <= 0.000001f) { + return mat4Zero(); + } + + mat4 store; + store.s0 = +mat.s5 * fB5 - mat.s6 * fB4 + mat.s7 * fB3; + store.s4 = -mat.s4 * fB5 + mat.s6 * fB2 - mat.s7 * fB1; + store.s8 = +mat.s4 * fB4 - mat.s5 * fB2 + mat.s7 * fB0; + store.sc = -mat.s4 * fB3 + mat.s5 * fB1 - mat.s6 * fB0; + store.s1 = -mat.s1 * fB5 + mat.s2 * fB4 - mat.s3 * fB3; + store.s5 = +mat.s0 * fB5 - mat.s2 * fB2 + mat.s3 * fB1; + store.s9 = -mat.s0 * fB4 + mat.s1 * fB2 - mat.s3 * fB0; + store.sd = +mat.s0 * fB3 - mat.s1 * fB1 + mat.s2 * fB0; + store.s2 = +mat.sd * fA5 - mat.se * fA4 + mat.sf * fA3; + store.s6 = -mat.sc * fA5 + mat.se * fA2 - mat.sf * fA1; + store.sa = +mat.sc * fA4 - mat.sd * fA2 + mat.sf * fA0; + store.se = -mat.sc * fA3 + mat.sd * fA1 - mat.se * fA0; + store.s3 = -mat.s9 * fA5 + mat.sa * fA4 - mat.sb * fA3; + store.s7 = +mat.s8 * fA5 - mat.sa * fA2 + mat.sb * fA1; + store.sb = -mat.s8 * fA4 + mat.s9 * fA2 - mat.sb * fA0; + store.sf = +mat.s8 * fA3 - mat.s9 * fA1 + mat.sa * fA0; + + store /= fDet; + + return store; +} + +mat4 mat4Adjoint(mat4 mat) { + float fA0 = mat.s0 * mat.s5 - mat.s1 * mat.s4; + float fA1 = mat.s0 * mat.s6 - mat.s2 * mat.s4; + float fA2 = mat.s0 * mat.s7 - mat.s3 * mat.s4; + float fA3 = mat.s1 * mat.s6 - mat.s2 * mat.s5; + float fA4 = mat.s1 * mat.s7 - mat.s3 * mat.s5; + float fA5 = mat.s2 * mat.s7 - mat.s3 * mat.s6; + float fB0 = mat.s8 * mat.sd - mat.s9 * mat.sc; + float fB1 = mat.s8 * mat.se - mat.sa * mat.sc; + float fB2 = mat.s8 * mat.sf - mat.sb * mat.sc; + float fB3 = mat.s9 * mat.se - mat.sa * mat.sd; + float fB4 = mat.s9 * mat.sf - mat.sb * mat.sd; + float fB5 = mat.sa * mat.sf - mat.sb * mat.se; + + mat4 store; + + store.s0 = +mat.s5 * fB5 - mat.s6 * fB4 + mat.s7 * fB3; + store.s4 = -mat.s4 * fB5 + mat.s6 * fB2 - mat.s7 * fB1; + store.s8 = +mat.s4 * fB4 - mat.s5 * fB2 + mat.s7 * fB0; + store.sc = -mat.s4 * fB3 + mat.s5 * fB1 - mat.s6 * fB0; + store.s1 = -mat.s1 * fB5 + mat.s2 * fB4 - mat.s3 * fB3; + store.s5 = +mat.s0 * fB5 - mat.s2 * fB2 + mat.s3 * fB1; + store.s9 = -mat.s0 * fB4 + mat.s1 * fB2 - mat.s3 * fB0; + store.sd = +mat.s0 * fB3 - mat.s1 * fB1 + mat.s2 * fB0; + store.s2 = +mat.sd * fA5 - mat.se * fA4 + mat.sf * fA3; + store.s6 = -mat.sc * fA5 + mat.se * fA2 - mat.sf * fA1; + store.sa = +mat.sc * fA4 - mat.sd * fA2 + mat.sf * fA0; + store.se = -mat.sc * fA3 + mat.sd * fA1 - mat.se * fA0; + store.s3 = -mat.s9 * fA5 + mat.sa * fA4 - mat.sb * fA3; + store.s7 = +mat.s8 * fA5 - mat.sa * fA2 + mat.sb * fA1; + store.sb = -mat.s8 * fA4 + mat.s9 * fA2 - mat.sb * fA0; + store.sf = +mat.s8 * fA3 - mat.s9 * fA1 + mat.sa * fA0; + + return store; +} + +float mat4Determinant(mat4 mat) { + float fA0 = mat.s0 * mat.s5 - mat.s1 * mat.s4; + float fA1 = mat.s0 * mat.s6 - mat.s2 * mat.s4; + float fA2 = mat.s0 * mat.s7 - mat.s3 * mat.s4; + float fA3 = mat.s1 * mat.s6 - mat.s2 * mat.s5; + float fA4 = mat.s1 * mat.s7 - mat.s3 * mat.s5; + float fA5 = mat.s2 * mat.s7 - mat.s3 * mat.s6; + float fB0 = mat.s8 * mat.sd - mat.s9 * mat.sc; + float fB1 = mat.s8 * mat.se - mat.sa * mat.sc; + float fB2 = mat.s8 * mat.sf - mat.sb * mat.sc; + float fB3 = mat.s9 * mat.se - mat.sa * mat.sd; + float fB4 = mat.s9 * mat.sf - mat.sb * mat.sd; + float fB5 = mat.sa * mat.sf - mat.sb * mat.se; + float fDet = fA0 * fB5 - fA1 * fB4 + fA2 * fB3 + fA3 * fB2 - fA4 * fB1 + fA5 * fB0; + return fDet; +} + +inline bool mat4Equals(mat4 A, mat4 B, float epsilon) { + return all(isless(fabs(A - B), epsilon)); +} + + #endif \ No newline at end of file diff --git a/jme3-examples/src/main/java/jme3test/opencl/TestOpenCLLibraries.java b/jme3-examples/src/main/java/jme3test/opencl/TestOpenCLLibraries.java index a291ad3b2..766437c65 100644 --- a/jme3-examples/src/main/java/jme3test/opencl/TestOpenCLLibraries.java +++ b/jme3-examples/src/main/java/jme3test/opencl/TestOpenCLLibraries.java @@ -36,7 +36,9 @@ import com.jme3.app.SimpleApplication; import com.jme3.font.BitmapFont; import com.jme3.font.BitmapText; import com.jme3.math.ColorRGBA; +import com.jme3.math.FastMath; import com.jme3.math.Matrix3f; +import com.jme3.math.Matrix4f; import com.jme3.opencl.*; import com.jme3.system.AppSettings; import com.jme3.util.BufferUtils; @@ -84,6 +86,7 @@ public class TestOpenCLLibraries extends SimpleApplication { str.append("\nTests:"); str.append("\n Random numbers: ").append(testRandom(clContext, clQueue)); str.append("\n Matrix3f: ").append(testMatrix3f(clContext, clQueue)); + str.append("\n Matrix4f: ").append(testMatrix4f(clContext, clQueue)); clQueue.release(); @@ -279,7 +282,7 @@ public class TestOpenCLLibraries extends SimpleApplication { + "}\n"; Program program = clContext.createProgramFromSourceCodeWithDependencies(code, assetManager); program.build(); - com.jme3.opencl.Buffer buffer = clContext.createBuffer(4 * 1); + com.jme3.opencl.Buffer buffer = clContext.createBuffer(1); Kernel testMatrix3fKernel1 = program.createKernel("TestMatrix3f_1"); testMatrix3fKernel1.Run1NoEvent(clQueue, new Kernel.WorkSize(1), buffer); @@ -316,4 +319,85 @@ public class TestOpenCLLibraries extends SimpleApplication { return true; } + private boolean testMatrix4f(Context clContext, CommandQueue clQueue) { + try { + + String code = "" + + "#import \"Common/OpenCL/Matrix4f.clh\"\n" + + "\n" + + "__kernel void TestMatrix4f_1(mat4 m1, __global char* result)\n" + + "{\n" + + " mat4 id = mat4Identity();\n" + + " mat4 m1Inv = mat4Invert(m1);\n" + + " mat4 m1Res = mat4Mult(m1, m1Inv);\n" + + " result[0] = mat4Equals(id, m1Res, 0.0001f) ? 1 : 0;\n" + + "}\n" + + "\n" + + "__kernel void TestMatrix4f_2(mat4 m1, float d, mat4 m2, mat4 m3, __global char* result)\n" + + "{\n" + + " float d2 = mat4Determinant(m1);\n" + + " result[0] = fabs(d - d2) < 0.0001f ? 1 : 0;\n" + + " mat4 res = mat4Transpose(m1);\n" + + " result[1] = mat4Equals(res, m2, 0.0001f) ? 1 : 0;\n" + + " res = mat4Adjoint(m1);\n" + + " result[2] = mat4Equals(res, m3, 0.0001f) ? 1 : 0;\n" + + "}\n"; + Program program = clContext.createProgramFromSourceCodeWithDependencies(code, assetManager); + program.build(); + com.jme3.opencl.Buffer buffer = clContext.createBuffer(3); + + Random rand = new Random(1561); + + Kernel testMatrix4fKernel1 = program.createKernel("TestMatrix4f_1"); + Matrix4f m1 = new Matrix4f(); + do { + for (int i=0; i<4; ++i) { + for (int j=0; j<4; ++j) { + m1.set(i, j, rand.nextFloat()*20 - 10); + } + } + } while (FastMath.abs(m1.determinant()) < 0.00001f); + testMatrix4fKernel1.Run1NoEvent(clQueue, new Kernel.WorkSize(1), m1, buffer); + ByteBuffer bb = buffer.map(clQueue, MappingAccess.MAP_READ_ONLY); + if (bb.get() == 0) { + LOG.severe("Matrix inversion failed"); + return false; + } + buffer.unmap(clQueue, bb); + testMatrix4fKernel1.release(); + + Kernel testMatrix4fKernel2 = program.createKernel("TestMatrix4f_2"); + for (int i=0; i<4; ++i) { + for (int j=0; j<4; ++j) { + m1.set(i, j, rand.nextFloat()*20 - 10); + } + } + testMatrix4fKernel2.Run1NoEvent(clQueue, new Kernel.WorkSize(1), m1, m1.determinant(), m1.transpose(), m1.adjoint(), buffer); + bb = buffer.map(clQueue, MappingAccess.MAP_READ_ONLY); + if (bb.get() == 0) { + LOG.severe("Matrix determinant computation failed"); + return false; + } + if (bb.get() == 0) { + LOG.severe("Matrix transposing failed"); + return false; + } + if (bb.get() == 0) { + LOG.severe("Matrix adjoint computation failed"); + return false; + } + buffer.unmap(clQueue, bb); + testMatrix4fKernel2.release(); + + buffer.release(); + + } catch (AssertionError ex) { + LOG.log(Level.SEVERE, "kernel test failed with an assertion error"); + return false; + } catch (Exception ex) { + LOG.log(Level.SEVERE, "kernel test failed with:", ex); + return false; + } + return true; + } } \ No newline at end of file