Added Optic Flow, first video in Augmented Reality Series

7 years ago · 6f57a46f6a
parent 5ed77fbf79
commit 6f57a46f6a
1 changed files with 304 additions and 0 deletions
--- a/OneLoneCoder_AR_OpticFlow.cpp
+++ b/OneLoneCoder_AR_OpticFlow.cpp
@ -0,0 +1,304 @@
 /*
 OneLoneCoder.com - Augmenting Reality #1 - Optical Flow
 "My arms are tired now." - @Javidx9
 Disclaimer
 ~~~~~~~~~~
 I don't care what you use this for. It's intended to be educational, and perhaps
 to the oddly minded - a little bit of fun. Please hack this, change it and use it
 in any way you see fit. BUT, you acknowledge that I am not responsible for anything
 bad that happens as a result of your actions. However, if good stuff happens, I
 would appreciate a shout out, or at least give the blog some publicity for me.
 Cheers!
 Background
 ~~~~~~~~~~
 Optical flow is the determination of motion in a video stream at the pixel level.
 Each pixel is associated with a motion vector that is used to create a map of
 velocity vectors which are then used to interact with a virtual object superimposed
 on the video stream.
 You will need to have watched my webcam video for this one to make sense!
 https://youtu.be/pk1Y_26j1Y4
 Author
 ~~~~~~
 Twitter: @javidx9
 Blog: www.onelonecoder.com
 Video:
 ~~~~~~
 https://youtu.be/aNtzgoEGC1Y
 Last Updated: 15/11/2017
 */
 #include <iostream>
 #include <string>
 #include <algorithm>
 using namespace std;
 #include "olcConsoleGameEngine.h"
 #include "escapi.h"
 class OneLoneCoder_AROpticFlow : public olcConsoleGameEngine
 {
 public:
 	OneLoneCoder_AROpticFlow()
 	{
 		m_sAppName = L"Augmented Reality Part #1 - Optic Flow";
 	}
 private:
 	union RGBint
 	{
 		int rgb;
 		unsigned char c[4];
 	};
 	int nCameras = 0;
 	SimpleCapParams capture;
 	// 2D Maps for image processing
 	float *fOldCamera = nullptr;		// Previous raw frame from camera
 	float *fNewCamera = nullptr;		// Recent raw frame from camera
 	float *fFilteredCamera = nullptr;	// low-pass filtered image
 	float *fOldFilteredCamera = nullptr;	// low-pass filtered image
 	float *fOldMotionImage = nullptr;	// previous motion image
 	float *fMotionImage = nullptr;		// recent motion image
 	float *fFlowX = nullptr;			// x-component of flow field vector
 	float *fFlowY = nullptr;			// y-component of flow field vector
 	// Object Physics Variables
 	float fBallX = 0.0f;				// Ball position 2D
 	float fBallY = 0.0f;
 	float fBallVX = 0.0f;				// Ball Velocity 2D
 	float fBallVY = 0.0f;
 protected:
 	virtual bool OnUserCreate()
 	{
 		// Initialise webcam to console dimensions
 		nCameras = setupESCAPI();
 		if (nCameras == 0)	return false;
 		capture.mWidth = ScreenWidth();
 		capture.mHeight = ScreenHeight();
 		capture.mTargetBuf = new int[ScreenWidth() * ScreenHeight()];
 		if (initCapture(0, &capture) == 0)	return false;
 		// Allocate memory for images
 		fOldCamera		= new float[ScreenWidth() * ScreenHeight()];
 		fNewCamera		= new float[ScreenWidth() * ScreenHeight()];
 		fFilteredCamera = new float[ScreenWidth() * ScreenHeight()];
 		fOldFilteredCamera = new float[ScreenWidth() * ScreenHeight()];
 		fFlowX			= new float[ScreenWidth() * ScreenHeight()];
 		fFlowY			= new float[ScreenWidth() * ScreenHeight()];
 		fOldMotionImage = new float[ScreenWidth() * ScreenHeight()];
 		fMotionImage	= new float[ScreenWidth() * ScreenHeight()];
 		// Initialise images to 0
 		memset(fOldCamera,		0, sizeof(float) * ScreenWidth() * ScreenHeight());
 		memset(fNewCamera,		0, sizeof(float) * ScreenWidth() * ScreenHeight());
 		memset(fFilteredCamera, 0, sizeof(float) * ScreenWidth() * ScreenHeight());
 		memset(fOldFilteredCamera, 0, sizeof(float) * ScreenWidth() * ScreenHeight());
 		memset(fFlowX,			0, sizeof(float) * ScreenWidth() * ScreenHeight());
 		memset(fFlowY,			0, sizeof(float) * ScreenWidth() * ScreenHeight());
 		memset(fOldMotionImage, 0, sizeof(float) * ScreenWidth() * ScreenHeight());
 		memset(fMotionImage,	0, sizeof(float) * ScreenWidth() * ScreenHeight());
 		// Set ball position to middle of frame
 		fBallX = ScreenWidth() / 2.0f;
 		fBallY = ScreenHeight() / 2.0f;
 		return true;
 	}
 	virtual bool OnUserUpdate(float fElapsedTime)
 	{
 		// Lambda function to draw "image" in greyscale
 		auto draw_image = [&](float *image)
 		{
 			for (int x = 0; x < capture.mWidth; x++)
 			{
 				for (int y = 0; y < capture.mHeight; y++)
 				{
 					wchar_t sym = 0;
 					short bg_col = 0;
 					short fg_col = 0;
 					int pixel_bw = (int)(image[y*ScreenWidth() + x] * 13.0f);
 					switch (pixel_bw)
 					{
 					case 0: bg_col = BG_BLACK; fg_col = FG_BLACK; sym = PIXEL_SOLID; break;
 					case 1: bg_col = BG_BLACK; fg_col = FG_DARK_GREY; sym = PIXEL_QUARTER; break;
 					case 2: bg_col = BG_BLACK; fg_col = FG_DARK_GREY; sym = PIXEL_HALF; break;
 					case 3: bg_col = BG_BLACK; fg_col = FG_DARK_GREY; sym = PIXEL_THREEQUARTERS; break;
 					case 4: bg_col = BG_BLACK; fg_col = FG_DARK_GREY; sym = PIXEL_SOLID; break;
 					case 5: bg_col = BG_DARK_GREY; fg_col = FG_GREY; sym = PIXEL_QUARTER; break;
 					case 6: bg_col = BG_DARK_GREY; fg_col = FG_GREY; sym = PIXEL_HALF; break;
 					case 7: bg_col = BG_DARK_GREY; fg_col = FG_GREY; sym = PIXEL_THREEQUARTERS; break;
 					case 8: bg_col = BG_DARK_GREY; fg_col = FG_GREY; sym = PIXEL_SOLID; break;
 					case 9:  bg_col = BG_GREY; fg_col = FG_WHITE; sym = PIXEL_QUARTER; break;
 					case 10: bg_col = BG_GREY; fg_col = FG_WHITE; sym = PIXEL_HALF; break;
 					case 11: bg_col = BG_GREY; fg_col = FG_WHITE; sym = PIXEL_THREEQUARTERS; break;
 					case 12: bg_col = BG_GREY; fg_col = FG_WHITE; sym = PIXEL_SOLID; break;
 					}
 					Draw(x, y, sym, bg_col | fg_col);
 				}
 			}
 		};
 		// Lambda function to read from a 2D array without error
 		auto get_pixel = [&](float* image, int x, int y)
 		{
 			if (x >= 0 && x < ScreenWidth() && y >= 0 && y < ScreenHeight())
 				return image[y*ScreenWidth() + x];
 			else
 				return 0.0f;
 		};
 		// === Capture & Filter New Input Image ==========================================
 		// Get Image from webcam
 		doCapture(0); while (isCaptureDone(0) == 0) {}
 		// Do Temporal Filtering per pixel
 		for (int y = 0; y < capture.mHeight; y++)
 			for (int x = 0; x < capture.mWidth; x++)
 			{
 				RGBint col;
 				int id = y * capture.mWidth + x;
 				col.rgb = capture.mTargetBuf[id];
 				int r = col.c[2], g = col.c[1], b = col.c[0];					
 				float fR = (float)r / 255.0f;
 				float fG = (float)g / 255.0f;
 				float fB = (float)b / 255.0f;
 				// Store previous camera frame for temporal processing
 				fOldCamera[y*ScreenWidth() + x] = fNewCamera[y*ScreenWidth() + x];
 				// Store previous camera frame for temporal processing
 				fOldFilteredCamera[y*ScreenWidth() + x] = fFilteredCamera[y*ScreenWidth() + x];
 				// Store previous motion only frame
 				fOldMotionImage[y*ScreenWidth() + x] = fMotionImage[y*ScreenWidth() + x];
 				// Calculate luminance (greyscale equivalent) of pixel
 				float fLuminance = 0.2987f * fR + 0.5870f * fG + 0.1140f * fB;
 				fNewCamera[y*ScreenWidth() + x] = fLuminance;
 				// Low-Pass filter camera image, to remove pixel jitter
 				fFilteredCamera[y*ScreenWidth() + x] += (fNewCamera[y*ScreenWidth() + x] - fFilteredCamera[y*ScreenWidth() + x]) * 0.8f;
 				// Create motion image as difference between two successive camera frames
 				float fDiff = fabs(get_pixel(fFilteredCamera, x, y) - get_pixel(fOldFilteredCamera, x, y));
 				// Threshold motion image to remove filter out camera noise
 				fMotionImage[y*ScreenWidth() + x] = (fDiff >= 0.05f) ? fDiff : 0.0f;
 			}
 		// === Calculate Optic Flow Vector Map ==========================================
 		// Brute Force Local Spatial Pattern Matching
 		int nPatchSize = 9;
 		int nSearchSize = 7;	
 		for (int x = 0; x < ScreenWidth(); x++)
 		{
 			for (int y = 0; y < ScreenHeight(); y++)
 			{
 				// Initialise serach variables
 				float fPatchDifferenceMax = INFINITY;
 				float fPatchDifferenceX = 0.0f;
 				float fPatchDifferenceY = 0.0f;
 				fFlowX[y*ScreenWidth() + x] = 0.0f;
 				fFlowY[y*ScreenWidth() + x] = 0.0f;
 				// Search over a given rectangular area for a "patch" of old image
 				// that "resembles" a patch of the new image.
 				for (int sx = 0; sx < nSearchSize; sx++)
 				{
 					for (int sy = 0; sy < nSearchSize; sy++)
 					{
 						// Search vector is centre of patch test
 						int nSearchVectorX = x + (sx - nSearchSize / 2);
 						int nSearchVectorY = y + (sy - nSearchSize / 2);
 						float fAccumulatedDifference = 0.0f;
 						// For each pixel in search patch, accumulate difference with base patch						
 						for (int px = 0; px < nPatchSize; px++)
 							for (int py = 0; py < nPatchSize; py++)
 							{
 								// Work out search patch offset indices
 								int nPatchPixelX = nSearchVectorX + (px - nPatchSize / 2);
 								int nPatchPixelY = nSearchVectorY + (py - nPatchSize / 2);
 								// Work out base patch indices
 								int nBasePixelX = x + (px - nPatchSize / 2);
 								int nBasePixelY = y + (py - nPatchSize / 2);
 								// Get adjacent values for each patch
 								float fPatchPixel = get_pixel(fNewCamera, nPatchPixelX, nPatchPixelY);
 								float fBasePixel = get_pixel(fOldCamera, nBasePixelX, nBasePixelY);
 								// Accumulate difference
 								fAccumulatedDifference += fabs(fPatchPixel - fBasePixel);
 							}
 						// Record the vector offset for the search patch that is the
 						// least different to the base patch
 						if (fAccumulatedDifference <= fPatchDifferenceMax)
 						{
 							fPatchDifferenceMax = fAccumulatedDifference;
 							fFlowX[y*ScreenWidth() + x] = (float)(nSearchVectorX - x);
 							fFlowY[y*ScreenWidth() + x] = (float)(nSearchVectorY - y);
 						}
 					}
 				}				
 			}
 		}
 		// Modulate Optic Flow Vector Map with motion map, to remove vectors that
 		// errornously indicate large local motion
 		for (int i = 0; i < ScreenWidth()*ScreenHeight(); i++)
 		{
 			fFlowX[i] *= fMotionImage[i] > 0 ? 1.0f : 0.0f;
 			fFlowY[i] *= fMotionImage[i] > 0 ? 1.0f : 0.0f;
 		}
 		// === Update Ball Physics ========================================================
 		// Ball velocity is updated by optic flow vector field
 		fBallVX += 100.0f * fFlowX[(int)fBallY * ScreenWidth() + (int)fBallX] * fElapsedTime;
 		fBallVY += 100.0f * fFlowY[(int)fBallY * ScreenWidth() + (int)fBallX] * fElapsedTime;
 		// Ball position is updated by velocity
 		fBallX += 1.0f * fBallVX * fElapsedTime;
 		fBallY += 1.0f * fBallVY * fElapsedTime;
 		// Add "drag" effect to ball velocity
 		fBallVX *= 0.85f;
 		fBallVY *= 0.85f;
 		// Wrap ball around screen
 		if (fBallX >= ScreenWidth()) fBallX -= (float)ScreenWidth();
 		if (fBallY >= ScreenHeight()) fBallY -= (float)ScreenHeight();
 		if (fBallX < 0) fBallX += (float)ScreenWidth();
 		if (fBallY < 0) fBallY += (float)ScreenHeight();
 		// === Update Screen =================================================================
 		// Draw Camera Image
 		draw_image(fNewCamera);
 		// Draw "Ball"
 		Fill(fBallX - 4, fBallY - 4, fBallX + 4, fBallY + 4,  PIXEL_SOLID, FG_RED);
 		return true;
 	}
 };
 int main()
 {
 	OneLoneCoder_AROpticFlow game;
 	game.ConstructConsole(80, 60, 16, 16);
 	game.Start();
 	return 0;
 }