/* OneLoneCoder.com - Augmenting Reality #1 - Optical Flow "My arms are tired now." - @Javidx9 Disclaimer ~~~~~~~~~~ I don't care what you use this for. It's intended to be educational, and perhaps to the oddly minded - a little bit of fun. Please hack this, change it and use it in any way you see fit. BUT, you acknowledge that I am not responsible for anything bad that happens as a result of your actions. However, if good stuff happens, I would appreciate a shout out, or at least give the blog some publicity for me. Cheers! Background ~~~~~~~~~~ Optical flow is the determination of motion in a video stream at the pixel level. Each pixel is associated with a motion vector that is used to create a map of velocity vectors which are then used to interact with a virtual object superimposed on the video stream. You will need to have watched my webcam video for this one to make sense! https://youtu.be/pk1Y_26j1Y4 Author ~~~~~~ Twitter: @javidx9 Blog: www.onelonecoder.com Video: ~~~~~~ https://youtu.be/aNtzgoEGC1Y Last Updated: 15/11/2017 */ #include #include #include using namespace std; #include "olcConsoleGameEngine.h" #include "escapi.h" class OneLoneCoder_AROpticFlow : public olcConsoleGameEngine { public: OneLoneCoder_AROpticFlow() { m_sAppName = L"Augmented Reality Part #1 - Optic Flow"; } private: union RGBint { int rgb; unsigned char c[4]; }; int nCameras = 0; SimpleCapParams capture; // 2D Maps for image processing float *fOldCamera = nullptr; // Previous raw frame from camera float *fNewCamera = nullptr; // Recent raw frame from camera float *fFilteredCamera = nullptr; // low-pass filtered image float *fOldFilteredCamera = nullptr; // low-pass filtered image float *fOldMotionImage = nullptr; // previous motion image float *fMotionImage = nullptr; // recent motion image float *fFlowX = nullptr; // x-component of flow field vector float *fFlowY = nullptr; // y-component of flow field vector // Object Physics Variables float fBallX = 0.0f; // Ball position 2D float fBallY = 0.0f; float fBallVX = 0.0f; // Ball Velocity 2D float fBallVY = 0.0f; protected: virtual bool OnUserCreate() { // Initialise webcam to console dimensions nCameras = setupESCAPI(); if (nCameras == 0) return false; capture.mWidth = ScreenWidth(); capture.mHeight = ScreenHeight(); capture.mTargetBuf = new int[ScreenWidth() * ScreenHeight()]; if (initCapture(0, &capture) == 0) return false; // Allocate memory for images fOldCamera = new float[ScreenWidth() * ScreenHeight()]; fNewCamera = new float[ScreenWidth() * ScreenHeight()]; fFilteredCamera = new float[ScreenWidth() * ScreenHeight()]; fOldFilteredCamera = new float[ScreenWidth() * ScreenHeight()]; fFlowX = new float[ScreenWidth() * ScreenHeight()]; fFlowY = new float[ScreenWidth() * ScreenHeight()]; fOldMotionImage = new float[ScreenWidth() * ScreenHeight()]; fMotionImage = new float[ScreenWidth() * ScreenHeight()]; // Initialise images to 0 memset(fOldCamera, 0, sizeof(float) * ScreenWidth() * ScreenHeight()); memset(fNewCamera, 0, sizeof(float) * ScreenWidth() * ScreenHeight()); memset(fFilteredCamera, 0, sizeof(float) * ScreenWidth() * ScreenHeight()); memset(fOldFilteredCamera, 0, sizeof(float) * ScreenWidth() * ScreenHeight()); memset(fFlowX, 0, sizeof(float) * ScreenWidth() * ScreenHeight()); memset(fFlowY, 0, sizeof(float) * ScreenWidth() * ScreenHeight()); memset(fOldMotionImage, 0, sizeof(float) * ScreenWidth() * ScreenHeight()); memset(fMotionImage, 0, sizeof(float) * ScreenWidth() * ScreenHeight()); // Set ball position to middle of frame fBallX = ScreenWidth() / 2.0f; fBallY = ScreenHeight() / 2.0f; return true; } virtual bool OnUserUpdate(float fElapsedTime) { // Lambda function to draw "image" in greyscale auto draw_image = [&](float *image) { for (int x = 0; x < capture.mWidth; x++) { for (int y = 0; y < capture.mHeight; y++) { wchar_t sym = 0; short bg_col = 0; short fg_col = 0; int pixel_bw = (int)(image[y*ScreenWidth() + x] * 13.0f); switch (pixel_bw) { case 0: bg_col = BG_BLACK; fg_col = FG_BLACK; sym = PIXEL_SOLID; break; case 1: bg_col = BG_BLACK; fg_col = FG_DARK_GREY; sym = PIXEL_QUARTER; break; case 2: bg_col = BG_BLACK; fg_col = FG_DARK_GREY; sym = PIXEL_HALF; break; case 3: bg_col = BG_BLACK; fg_col = FG_DARK_GREY; sym = PIXEL_THREEQUARTERS; break; case 4: bg_col = BG_BLACK; fg_col = FG_DARK_GREY; sym = PIXEL_SOLID; break; case 5: bg_col = BG_DARK_GREY; fg_col = FG_GREY; sym = PIXEL_QUARTER; break; case 6: bg_col = BG_DARK_GREY; fg_col = FG_GREY; sym = PIXEL_HALF; break; case 7: bg_col = BG_DARK_GREY; fg_col = FG_GREY; sym = PIXEL_THREEQUARTERS; break; case 8: bg_col = BG_DARK_GREY; fg_col = FG_GREY; sym = PIXEL_SOLID; break; case 9: bg_col = BG_GREY; fg_col = FG_WHITE; sym = PIXEL_QUARTER; break; case 10: bg_col = BG_GREY; fg_col = FG_WHITE; sym = PIXEL_HALF; break; case 11: bg_col = BG_GREY; fg_col = FG_WHITE; sym = PIXEL_THREEQUARTERS; break; case 12: bg_col = BG_GREY; fg_col = FG_WHITE; sym = PIXEL_SOLID; break; } Draw(x, y, sym, bg_col | fg_col); } } }; // Lambda function to read from a 2D array without error auto get_pixel = [&](float* image, int x, int y) { if (x >= 0 && x < ScreenWidth() && y >= 0 && y < ScreenHeight()) return image[y*ScreenWidth() + x]; else return 0.0f; }; // === Capture & Filter New Input Image ========================================== // Get Image from webcam doCapture(0); while (isCaptureDone(0) == 0) {} // Do Temporal Filtering per pixel for (int y = 0; y < capture.mHeight; y++) for (int x = 0; x < capture.mWidth; x++) { RGBint col; int id = y * capture.mWidth + x; col.rgb = capture.mTargetBuf[id]; int r = col.c[2], g = col.c[1], b = col.c[0]; float fR = (float)r / 255.0f; float fG = (float)g / 255.0f; float fB = (float)b / 255.0f; // Store previous camera frame for temporal processing fOldCamera[y*ScreenWidth() + x] = fNewCamera[y*ScreenWidth() + x]; // Store previous camera frame for temporal processing fOldFilteredCamera[y*ScreenWidth() + x] = fFilteredCamera[y*ScreenWidth() + x]; // Store previous motion only frame fOldMotionImage[y*ScreenWidth() + x] = fMotionImage[y*ScreenWidth() + x]; // Calculate luminance (greyscale equivalent) of pixel float fLuminance = 0.2987f * fR + 0.5870f * fG + 0.1140f * fB; fNewCamera[y*ScreenWidth() + x] = fLuminance; // Low-Pass filter camera image, to remove pixel jitter fFilteredCamera[y*ScreenWidth() + x] += (fNewCamera[y*ScreenWidth() + x] - fFilteredCamera[y*ScreenWidth() + x]) * 0.8f; // Create motion image as difference between two successive camera frames float fDiff = fabs(get_pixel(fFilteredCamera, x, y) - get_pixel(fOldFilteredCamera, x, y)); // Threshold motion image to remove filter out camera noise fMotionImage[y*ScreenWidth() + x] = (fDiff >= 0.05f) ? fDiff : 0.0f; } // === Calculate Optic Flow Vector Map ========================================== // Brute Force Local Spatial Pattern Matching int nPatchSize = 9; int nSearchSize = 7; for (int x = 0; x < ScreenWidth(); x++) { for (int y = 0; y < ScreenHeight(); y++) { // Initialise serach variables float fPatchDifferenceMax = INFINITY; float fPatchDifferenceX = 0.0f; float fPatchDifferenceY = 0.0f; fFlowX[y*ScreenWidth() + x] = 0.0f; fFlowY[y*ScreenWidth() + x] = 0.0f; // Search over a given rectangular area for a "patch" of old image // that "resembles" a patch of the new image. for (int sx = 0; sx < nSearchSize; sx++) { for (int sy = 0; sy < nSearchSize; sy++) { // Search vector is centre of patch test int nSearchVectorX = x + (sx - nSearchSize / 2); int nSearchVectorY = y + (sy - nSearchSize / 2); float fAccumulatedDifference = 0.0f; // For each pixel in search patch, accumulate difference with base patch for (int px = 0; px < nPatchSize; px++) for (int py = 0; py < nPatchSize; py++) { // Work out search patch offset indices int nPatchPixelX = nSearchVectorX + (px - nPatchSize / 2); int nPatchPixelY = nSearchVectorY + (py - nPatchSize / 2); // Work out base patch indices int nBasePixelX = x + (px - nPatchSize / 2); int nBasePixelY = y + (py - nPatchSize / 2); // Get adjacent values for each patch float fPatchPixel = get_pixel(fNewCamera, nPatchPixelX, nPatchPixelY); float fBasePixel = get_pixel(fOldCamera, nBasePixelX, nBasePixelY); // Accumulate difference fAccumulatedDifference += fabs(fPatchPixel - fBasePixel); } // Record the vector offset for the search patch that is the // least different to the base patch if (fAccumulatedDifference <= fPatchDifferenceMax) { fPatchDifferenceMax = fAccumulatedDifference; fFlowX[y*ScreenWidth() + x] = (float)(nSearchVectorX - x); fFlowY[y*ScreenWidth() + x] = (float)(nSearchVectorY - y); } } } } } // Modulate Optic Flow Vector Map with motion map, to remove vectors that // errornously indicate large local motion for (int i = 0; i < ScreenWidth()*ScreenHeight(); i++) { fFlowX[i] *= fMotionImage[i] > 0 ? 1.0f : 0.0f; fFlowY[i] *= fMotionImage[i] > 0 ? 1.0f : 0.0f; } // === Update Ball Physics ======================================================== // Ball velocity is updated by optic flow vector field fBallVX += 100.0f * fFlowX[(int)fBallY * ScreenWidth() + (int)fBallX] * fElapsedTime; fBallVY += 100.0f * fFlowY[(int)fBallY * ScreenWidth() + (int)fBallX] * fElapsedTime; // Ball position is updated by velocity fBallX += 1.0f * fBallVX * fElapsedTime; fBallY += 1.0f * fBallVY * fElapsedTime; // Add "drag" effect to ball velocity fBallVX *= 0.85f; fBallVY *= 0.85f; // Wrap ball around screen if (fBallX >= ScreenWidth()) fBallX -= (float)ScreenWidth(); if (fBallY >= ScreenHeight()) fBallY -= (float)ScreenHeight(); if (fBallX < 0) fBallX += (float)ScreenWidth(); if (fBallY < 0) fBallY += (float)ScreenHeight(); // === Update Screen ================================================================= // Draw Camera Image draw_image(fNewCamera); // Draw "Ball" Fill(fBallX - 4, fBallY - 4, fBallX + 4, fBallY + 4, PIXEL_SOLID, FG_RED); return true; } }; int main() { OneLoneCoder_AROpticFlow game; game.ConstructConsole(80, 60, 16, 16); game.Start(); return 0; }