You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
319 lines
11 KiB
319 lines
11 KiB
/*
|
|
OneLoneCoder.com - Augmenting Reality #1 - Optical Flow
|
|
"My arms are tired now." - @Javidx9
|
|
|
|
License
|
|
~~~~~~~
|
|
One Lone Coder Console Game Engine Copyright (C) 2018 Javidx9
|
|
This program comes with ABSOLUTELY NO WARRANTY.
|
|
This is free software, and you are welcome to redistribute it
|
|
under certain conditions; See license for details.
|
|
Original works located at:
|
|
https://www.github.com/onelonecoder
|
|
https://www.onelonecoder.com
|
|
https://www.youtube.com/javidx9
|
|
GNU GPLv3
|
|
https://github.com/OneLoneCoder/videos/blob/master/LICENSE
|
|
|
|
From Javidx9 :)
|
|
~~~~~~~~~~~~~~~
|
|
Hello! Ultimately I don't care what you use this for. It's intended to be
|
|
educational, and perhaps to the oddly minded - a little bit of fun.
|
|
Please hack this, change it and use it in any way you see fit. You acknowledge
|
|
that I am not responsible for anything bad that happens as a result of
|
|
your actions. However this code is protected by GNU GPLv3, see the license in the
|
|
github repo. This means you must attribute me if you use it. You can view this
|
|
license here: https://github.com/OneLoneCoder/videos/blob/master/LICENSE
|
|
Cheers!
|
|
|
|
Background
|
|
~~~~~~~~~~
|
|
Optical flow is the determination of motion in a video stream at the pixel level.
|
|
Each pixel is associated with a motion vector that is used to create a map of
|
|
velocity vectors which are then used to interact with a virtual object superimposed
|
|
on the video stream.
|
|
|
|
You will need to have watched my webcam video for this one to make sense!
|
|
https://youtu.be/pk1Y_26j1Y4
|
|
|
|
Author
|
|
~~~~~~
|
|
Twitter: @javidx9
|
|
Blog: www.onelonecoder.com
|
|
|
|
Video:
|
|
~~~~~~
|
|
https://youtu.be/aNtzgoEGC1Y
|
|
|
|
Last Updated: 15/11/2017
|
|
*/
|
|
#include <iostream>
|
|
#include <string>
|
|
#include <algorithm>
|
|
using namespace std;
|
|
|
|
#include "olcConsoleGameEngine.h"
|
|
#include "escapi.h"
|
|
|
|
class OneLoneCoder_AROpticFlow : public olcConsoleGameEngine
|
|
{
|
|
public:
|
|
OneLoneCoder_AROpticFlow()
|
|
{
|
|
m_sAppName = L"Augmented Reality Part #1 - Optic Flow";
|
|
}
|
|
|
|
private:
|
|
union RGBint
|
|
{
|
|
int rgb;
|
|
unsigned char c[4];
|
|
};
|
|
|
|
int nCameras = 0;
|
|
SimpleCapParams capture;
|
|
|
|
// 2D Maps for image processing
|
|
float *fOldCamera = nullptr; // Previous raw frame from camera
|
|
float *fNewCamera = nullptr; // Recent raw frame from camera
|
|
float *fFilteredCamera = nullptr; // low-pass filtered image
|
|
float *fOldFilteredCamera = nullptr; // low-pass filtered image
|
|
float *fOldMotionImage = nullptr; // previous motion image
|
|
float *fMotionImage = nullptr; // recent motion image
|
|
float *fFlowX = nullptr; // x-component of flow field vector
|
|
float *fFlowY = nullptr; // y-component of flow field vector
|
|
|
|
// Object Physics Variables
|
|
float fBallX = 0.0f; // Ball position 2D
|
|
float fBallY = 0.0f;
|
|
float fBallVX = 0.0f; // Ball Velocity 2D
|
|
float fBallVY = 0.0f;
|
|
|
|
protected:
|
|
virtual bool OnUserCreate()
|
|
{
|
|
// Initialise webcam to console dimensions
|
|
nCameras = setupESCAPI();
|
|
if (nCameras == 0) return false;
|
|
capture.mWidth = ScreenWidth();
|
|
capture.mHeight = ScreenHeight();
|
|
capture.mTargetBuf = new int[ScreenWidth() * ScreenHeight()];
|
|
if (initCapture(0, &capture) == 0) return false;
|
|
|
|
// Allocate memory for images
|
|
fOldCamera = new float[ScreenWidth() * ScreenHeight()];
|
|
fNewCamera = new float[ScreenWidth() * ScreenHeight()];
|
|
fFilteredCamera = new float[ScreenWidth() * ScreenHeight()];
|
|
fOldFilteredCamera = new float[ScreenWidth() * ScreenHeight()];
|
|
fFlowX = new float[ScreenWidth() * ScreenHeight()];
|
|
fFlowY = new float[ScreenWidth() * ScreenHeight()];
|
|
fOldMotionImage = new float[ScreenWidth() * ScreenHeight()];
|
|
fMotionImage = new float[ScreenWidth() * ScreenHeight()];
|
|
|
|
// Initialise images to 0
|
|
memset(fOldCamera, 0, sizeof(float) * ScreenWidth() * ScreenHeight());
|
|
memset(fNewCamera, 0, sizeof(float) * ScreenWidth() * ScreenHeight());
|
|
memset(fFilteredCamera, 0, sizeof(float) * ScreenWidth() * ScreenHeight());
|
|
memset(fOldFilteredCamera, 0, sizeof(float) * ScreenWidth() * ScreenHeight());
|
|
memset(fFlowX, 0, sizeof(float) * ScreenWidth() * ScreenHeight());
|
|
memset(fFlowY, 0, sizeof(float) * ScreenWidth() * ScreenHeight());
|
|
memset(fOldMotionImage, 0, sizeof(float) * ScreenWidth() * ScreenHeight());
|
|
memset(fMotionImage, 0, sizeof(float) * ScreenWidth() * ScreenHeight());
|
|
|
|
// Set ball position to middle of frame
|
|
fBallX = ScreenWidth() / 2.0f;
|
|
fBallY = ScreenHeight() / 2.0f;
|
|
return true;
|
|
}
|
|
|
|
virtual bool OnUserUpdate(float fElapsedTime)
|
|
{
|
|
// Lambda function to draw "image" in greyscale
|
|
auto draw_image = [&](float *image)
|
|
{
|
|
for (int x = 0; x < capture.mWidth; x++)
|
|
{
|
|
for (int y = 0; y < capture.mHeight; y++)
|
|
{
|
|
wchar_t sym = 0;
|
|
short bg_col = 0;
|
|
short fg_col = 0;
|
|
int pixel_bw = (int)(image[y*ScreenWidth() + x] * 13.0f);
|
|
switch (pixel_bw)
|
|
{
|
|
case 0: bg_col = BG_BLACK; fg_col = FG_BLACK; sym = PIXEL_SOLID; break;
|
|
case 1: bg_col = BG_BLACK; fg_col = FG_DARK_GREY; sym = PIXEL_QUARTER; break;
|
|
case 2: bg_col = BG_BLACK; fg_col = FG_DARK_GREY; sym = PIXEL_HALF; break;
|
|
case 3: bg_col = BG_BLACK; fg_col = FG_DARK_GREY; sym = PIXEL_THREEQUARTERS; break;
|
|
case 4: bg_col = BG_BLACK; fg_col = FG_DARK_GREY; sym = PIXEL_SOLID; break;
|
|
case 5: bg_col = BG_DARK_GREY; fg_col = FG_GREY; sym = PIXEL_QUARTER; break;
|
|
case 6: bg_col = BG_DARK_GREY; fg_col = FG_GREY; sym = PIXEL_HALF; break;
|
|
case 7: bg_col = BG_DARK_GREY; fg_col = FG_GREY; sym = PIXEL_THREEQUARTERS; break;
|
|
case 8: bg_col = BG_DARK_GREY; fg_col = FG_GREY; sym = PIXEL_SOLID; break;
|
|
case 9: bg_col = BG_GREY; fg_col = FG_WHITE; sym = PIXEL_QUARTER; break;
|
|
case 10: bg_col = BG_GREY; fg_col = FG_WHITE; sym = PIXEL_HALF; break;
|
|
case 11: bg_col = BG_GREY; fg_col = FG_WHITE; sym = PIXEL_THREEQUARTERS; break;
|
|
case 12: bg_col = BG_GREY; fg_col = FG_WHITE; sym = PIXEL_SOLID; break;
|
|
}
|
|
Draw(x, y, sym, bg_col | fg_col);
|
|
}
|
|
}
|
|
};
|
|
|
|
// Lambda function to read from a 2D array without error
|
|
auto get_pixel = [&](float* image, int x, int y)
|
|
{
|
|
if (x >= 0 && x < ScreenWidth() && y >= 0 && y < ScreenHeight())
|
|
return image[y*ScreenWidth() + x];
|
|
else
|
|
return 0.0f;
|
|
};
|
|
|
|
// === Capture & Filter New Input Image ==========================================
|
|
|
|
// Get Image from webcam
|
|
doCapture(0); while (isCaptureDone(0) == 0) {}
|
|
|
|
// Do Temporal Filtering per pixel
|
|
for (int y = 0; y < capture.mHeight; y++)
|
|
for (int x = 0; x < capture.mWidth; x++)
|
|
{
|
|
RGBint col;
|
|
int id = y * capture.mWidth + x;
|
|
col.rgb = capture.mTargetBuf[id];
|
|
int r = col.c[2], g = col.c[1], b = col.c[0];
|
|
float fR = (float)r / 255.0f;
|
|
float fG = (float)g / 255.0f;
|
|
float fB = (float)b / 255.0f;
|
|
|
|
// Store previous camera frame for temporal processing
|
|
fOldCamera[y*ScreenWidth() + x] = fNewCamera[y*ScreenWidth() + x];
|
|
|
|
// Store previous camera frame for temporal processing
|
|
fOldFilteredCamera[y*ScreenWidth() + x] = fFilteredCamera[y*ScreenWidth() + x];
|
|
|
|
// Store previous motion only frame
|
|
fOldMotionImage[y*ScreenWidth() + x] = fMotionImage[y*ScreenWidth() + x];
|
|
|
|
// Calculate luminance (greyscale equivalent) of pixel
|
|
float fLuminance = 0.2987f * fR + 0.5870f * fG + 0.1140f * fB;
|
|
fNewCamera[y*ScreenWidth() + x] = fLuminance;
|
|
|
|
// Low-Pass filter camera image, to remove pixel jitter
|
|
fFilteredCamera[y*ScreenWidth() + x] += (fNewCamera[y*ScreenWidth() + x] - fFilteredCamera[y*ScreenWidth() + x]) * 0.8f;
|
|
|
|
// Create motion image as difference between two successive camera frames
|
|
float fDiff = fabs(get_pixel(fFilteredCamera, x, y) - get_pixel(fOldFilteredCamera, x, y));
|
|
|
|
// Threshold motion image to remove filter out camera noise
|
|
fMotionImage[y*ScreenWidth() + x] = (fDiff >= 0.05f) ? fDiff : 0.0f;
|
|
}
|
|
|
|
// === Calculate Optic Flow Vector Map ==========================================
|
|
|
|
// Brute Force Local Spatial Pattern Matching
|
|
int nPatchSize = 9;
|
|
int nSearchSize = 7;
|
|
|
|
for (int x = 0; x < ScreenWidth(); x++)
|
|
{
|
|
for (int y = 0; y < ScreenHeight(); y++)
|
|
{
|
|
// Initialise serach variables
|
|
float fPatchDifferenceMax = INFINITY;
|
|
float fPatchDifferenceX = 0.0f;
|
|
float fPatchDifferenceY = 0.0f;
|
|
fFlowX[y*ScreenWidth() + x] = 0.0f;
|
|
fFlowY[y*ScreenWidth() + x] = 0.0f;
|
|
|
|
// Search over a given rectangular area for a "patch" of old image
|
|
// that "resembles" a patch of the new image.
|
|
for (int sx = 0; sx < nSearchSize; sx++)
|
|
{
|
|
for (int sy = 0; sy < nSearchSize; sy++)
|
|
{
|
|
// Search vector is centre of patch test
|
|
int nSearchVectorX = x + (sx - nSearchSize / 2);
|
|
int nSearchVectorY = y + (sy - nSearchSize / 2);
|
|
|
|
float fAccumulatedDifference = 0.0f;
|
|
|
|
// For each pixel in search patch, accumulate difference with base patch
|
|
for (int px = 0; px < nPatchSize; px++)
|
|
for (int py = 0; py < nPatchSize; py++)
|
|
{
|
|
// Work out search patch offset indices
|
|
int nPatchPixelX = nSearchVectorX + (px - nPatchSize / 2);
|
|
int nPatchPixelY = nSearchVectorY + (py - nPatchSize / 2);
|
|
|
|
// Work out base patch indices
|
|
int nBasePixelX = x + (px - nPatchSize / 2);
|
|
int nBasePixelY = y + (py - nPatchSize / 2);
|
|
|
|
// Get adjacent values for each patch
|
|
float fPatchPixel = get_pixel(fNewCamera, nPatchPixelX, nPatchPixelY);
|
|
float fBasePixel = get_pixel(fOldCamera, nBasePixelX, nBasePixelY);
|
|
|
|
// Accumulate difference
|
|
fAccumulatedDifference += fabs(fPatchPixel - fBasePixel);
|
|
}
|
|
|
|
// Record the vector offset for the search patch that is the
|
|
// least different to the base patch
|
|
if (fAccumulatedDifference <= fPatchDifferenceMax)
|
|
{
|
|
fPatchDifferenceMax = fAccumulatedDifference;
|
|
fFlowX[y*ScreenWidth() + x] = (float)(nSearchVectorX - x);
|
|
fFlowY[y*ScreenWidth() + x] = (float)(nSearchVectorY - y);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Modulate Optic Flow Vector Map with motion map, to remove vectors that
|
|
// errornously indicate large local motion
|
|
for (int i = 0; i < ScreenWidth()*ScreenHeight(); i++)
|
|
{
|
|
fFlowX[i] *= fMotionImage[i] > 0 ? 1.0f : 0.0f;
|
|
fFlowY[i] *= fMotionImage[i] > 0 ? 1.0f : 0.0f;
|
|
}
|
|
|
|
// === Update Ball Physics ========================================================
|
|
|
|
// Ball velocity is updated by optic flow vector field
|
|
fBallVX += 100.0f * fFlowX[(int)fBallY * ScreenWidth() + (int)fBallX] * fElapsedTime;
|
|
fBallVY += 100.0f * fFlowY[(int)fBallY * ScreenWidth() + (int)fBallX] * fElapsedTime;
|
|
|
|
// Ball position is updated by velocity
|
|
fBallX += 1.0f * fBallVX * fElapsedTime;
|
|
fBallY += 1.0f * fBallVY * fElapsedTime;
|
|
|
|
// Add "drag" effect to ball velocity
|
|
fBallVX *= 0.85f;
|
|
fBallVY *= 0.85f;
|
|
|
|
// Wrap ball around screen
|
|
if (fBallX >= ScreenWidth()) fBallX -= (float)ScreenWidth();
|
|
if (fBallY >= ScreenHeight()) fBallY -= (float)ScreenHeight();
|
|
if (fBallX < 0) fBallX += (float)ScreenWidth();
|
|
if (fBallY < 0) fBallY += (float)ScreenHeight();
|
|
|
|
// === Update Screen =================================================================
|
|
|
|
// Draw Camera Image
|
|
draw_image(fNewCamera);
|
|
|
|
// Draw "Ball"
|
|
Fill(fBallX - 4, fBallY - 4, fBallX + 4, fBallY + 4, PIXEL_SOLID, FG_RED);
|
|
return true;
|
|
}
|
|
};
|
|
|
|
int main()
|
|
{
|
|
OneLoneCoder_AROpticFlow game;
|
|
game.ConstructConsole(80, 60, 16, 16);
|
|
game.Start();
|
|
return 0;
|
|
}
|
|
|