Added Optic Flow, first video in Augmented Reality Series

7 years ago · 6f57a46f6a
parent 5ed77fbf79
commit 6f57a46f6a
1 changed files with 304 additions and 0 deletions
--- a/OneLoneCoder_AR_OpticFlow.cpp
+++ b/OneLoneCoder_AR_OpticFlow.cpp
@ -0,0 +1,304 @@
+/*
+OneLoneCoder.com - Augmenting Reality #1 - Optical Flow
+"My arms are tired now." - @Javidx9
+
+Disclaimer
+~~~~~~~~~~
+I don't care what you use this for. It's intended to be educational, and perhaps
+to the oddly minded - a little bit of fun. Please hack this, change it and use it
+in any way you see fit. BUT, you acknowledge that I am not responsible for anything
+bad that happens as a result of your actions. However, if good stuff happens, I
+would appreciate a shout out, or at least give the blog some publicity for me.
+Cheers!
+
+Background
+~~~~~~~~~~
+Optical flow is the determination of motion in a video stream at the pixel level.
+Each pixel is associated with a motion vector that is used to create a map of
+velocity vectors which are then used to interact with a virtual object superimposed
+on the video stream.
+
+You will need to have watched my webcam video for this one to make sense!
+https://youtu.be/pk1Y_26j1Y4
+
+Author
+~~~~~~
+Twitter: @javidx9
+Blog: www.onelonecoder.com
+
+Video:
+~~~~~~
+https://youtu.be/aNtzgoEGC1Y
+
+Last Updated: 15/11/2017
+*/
+#include <iostream>
+#include <string>
+#include <algorithm>
+using namespace std;
+
+#include "olcConsoleGameEngine.h"
+#include "escapi.h"
+
+class OneLoneCoder_AROpticFlow : public olcConsoleGameEngine
+{
+public:
+	OneLoneCoder_AROpticFlow()
+	{
+		m_sAppName = L"Augmented Reality Part #1 - Optic Flow";
+	}
+
+private:
+	union RGBint
+	{
+		int rgb;
+		unsigned char c[4];
+	};
+
+	int nCameras = 0;
+	SimpleCapParams capture;
+
+	// 2D Maps for image processing
+	float *fOldCamera = nullptr;		// Previous raw frame from camera
+	float *fNewCamera = nullptr;		// Recent raw frame from camera
+	float *fFilteredCamera = nullptr;	// low-pass filtered image
+	float *fOldFilteredCamera = nullptr;	// low-pass filtered image
+	float *fOldMotionImage = nullptr;	// previous motion image
+	float *fMotionImage = nullptr;		// recent motion image
+	float *fFlowX = nullptr;			// x-component of flow field vector
+	float *fFlowY = nullptr;			// y-component of flow field vector
+
+	// Object Physics Variables
+	float fBallX = 0.0f;				// Ball position 2D
+	float fBallY = 0.0f;
+	float fBallVX = 0.0f;				// Ball Velocity 2D
+	float fBallVY = 0.0f;
+
+protected:
+	virtual bool OnUserCreate()
+	{
+		// Initialise webcam to console dimensions
+		nCameras = setupESCAPI();
+		if (nCameras == 0)	return false;
+		capture.mWidth = ScreenWidth();
+		capture.mHeight = ScreenHeight();
+		capture.mTargetBuf = new int[ScreenWidth() * ScreenHeight()];
+		if (initCapture(0, &capture) == 0)	return false;
+
+		// Allocate memory for images
+		fOldCamera		= new float[ScreenWidth() * ScreenHeight()];
+		fNewCamera		= new float[ScreenWidth() * ScreenHeight()];
+		fFilteredCamera = new float[ScreenWidth() * ScreenHeight()];
+		fOldFilteredCamera = new float[ScreenWidth() * ScreenHeight()];
+		fFlowX			= new float[ScreenWidth() * ScreenHeight()];
+		fFlowY			= new float[ScreenWidth() * ScreenHeight()];
+		fOldMotionImage = new float[ScreenWidth() * ScreenHeight()];
+		fMotionImage	= new float[ScreenWidth() * ScreenHeight()];
+
+		// Initialise images to 0
+		memset(fOldCamera,		0, sizeof(float) * ScreenWidth() * ScreenHeight());
+		memset(fNewCamera,		0, sizeof(float) * ScreenWidth() * ScreenHeight());
+		memset(fFilteredCamera, 0, sizeof(float) * ScreenWidth() * ScreenHeight());
+		memset(fOldFilteredCamera, 0, sizeof(float) * ScreenWidth() * ScreenHeight());
+		memset(fFlowX,			0, sizeof(float) * ScreenWidth() * ScreenHeight());
+		memset(fFlowY,			0, sizeof(float) * ScreenWidth() * ScreenHeight());
+		memset(fOldMotionImage, 0, sizeof(float) * ScreenWidth() * ScreenHeight());
+		memset(fMotionImage,	0, sizeof(float) * ScreenWidth() * ScreenHeight());
+		
+		// Set ball position to middle of frame
+		fBallX = ScreenWidth() / 2.0f;
+		fBallY = ScreenHeight() / 2.0f;
+		return true;
+	}
+
+	virtual bool OnUserUpdate(float fElapsedTime)
+	{
+		// Lambda function to draw "image" in greyscale
+		auto draw_image = [&](float *image)
+		{
+			for (int x = 0; x < capture.mWidth; x++)
+			{
+				for (int y = 0; y < capture.mHeight; y++)
+				{
+					wchar_t sym = 0;
+					short bg_col = 0;
+					short fg_col = 0;
+					int pixel_bw = (int)(image[y*ScreenWidth() + x] * 13.0f);
+					switch (pixel_bw)
+					{
+					case 0: bg_col = BG_BLACK; fg_col = FG_BLACK; sym = PIXEL_SOLID; break;
+					case 1: bg_col = BG_BLACK; fg_col = FG_DARK_GREY; sym = PIXEL_QUARTER; break;
+					case 2: bg_col = BG_BLACK; fg_col = FG_DARK_GREY; sym = PIXEL_HALF; break;
+					case 3: bg_col = BG_BLACK; fg_col = FG_DARK_GREY; sym = PIXEL_THREEQUARTERS; break;
+					case 4: bg_col = BG_BLACK; fg_col = FG_DARK_GREY; sym = PIXEL_SOLID; break;
+					case 5: bg_col = BG_DARK_GREY; fg_col = FG_GREY; sym = PIXEL_QUARTER; break;
+					case 6: bg_col = BG_DARK_GREY; fg_col = FG_GREY; sym = PIXEL_HALF; break;
+					case 7: bg_col = BG_DARK_GREY; fg_col = FG_GREY; sym = PIXEL_THREEQUARTERS; break;
+					case 8: bg_col = BG_DARK_GREY; fg_col = FG_GREY; sym = PIXEL_SOLID; break;
+					case 9:  bg_col = BG_GREY; fg_col = FG_WHITE; sym = PIXEL_QUARTER; break;
+					case 10: bg_col = BG_GREY; fg_col = FG_WHITE; sym = PIXEL_HALF; break;
+					case 11: bg_col = BG_GREY; fg_col = FG_WHITE; sym = PIXEL_THREEQUARTERS; break;
+					case 12: bg_col = BG_GREY; fg_col = FG_WHITE; sym = PIXEL_SOLID; break;
+					}
+					Draw(x, y, sym, bg_col | fg_col);
+				}
+			}
+		};
+
+		// Lambda function to read from a 2D array without error
+		auto get_pixel = [&](float* image, int x, int y)
+		{
+			if (x >= 0 && x < ScreenWidth() && y >= 0 && y < ScreenHeight())
+				return image[y*ScreenWidth() + x];
+			else
+				return 0.0f;
+		};
+
+		// === Capture & Filter New Input Image ==========================================
+		
+		// Get Image from webcam
+		doCapture(0); while (isCaptureDone(0) == 0) {}
+
+		// Do Temporal Filtering per pixel
+		for (int y = 0; y < capture.mHeight; y++)
+			for (int x = 0; x < capture.mWidth; x++)
+			{
+				RGBint col;
+				int id = y * capture.mWidth + x;
+				col.rgb = capture.mTargetBuf[id];
+				int r = col.c[2], g = col.c[1], b = col.c[0];					
+				float fR = (float)r / 255.0f;
+				float fG = (float)g / 255.0f;
+				float fB = (float)b / 255.0f;
+				
+				// Store previous camera frame for temporal processing
+				fOldCamera[y*ScreenWidth() + x] = fNewCamera[y*ScreenWidth() + x];
+
+				// Store previous camera frame for temporal processing
+				fOldFilteredCamera[y*ScreenWidth() + x] = fFilteredCamera[y*ScreenWidth() + x];
+
+				// Store previous motion only frame
+				fOldMotionImage[y*ScreenWidth() + x] = fMotionImage[y*ScreenWidth() + x];
+
+				// Calculate luminance (greyscale equivalent) of pixel
+				float fLuminance = 0.2987f * fR + 0.5870f * fG + 0.1140f * fB;
+				fNewCamera[y*ScreenWidth() + x] = fLuminance;
+								
+				// Low-Pass filter camera image, to remove pixel jitter
+				fFilteredCamera[y*ScreenWidth() + x] += (fNewCamera[y*ScreenWidth() + x] - fFilteredCamera[y*ScreenWidth() + x]) * 0.8f;
+
+				// Create motion image as difference between two successive camera frames
+				float fDiff = fabs(get_pixel(fFilteredCamera, x, y) - get_pixel(fOldFilteredCamera, x, y));
+
+				// Threshold motion image to remove filter out camera noise
+				fMotionImage[y*ScreenWidth() + x] = (fDiff >= 0.05f) ? fDiff : 0.0f;
+			}
+		
+		// === Calculate Optic Flow Vector Map ==========================================
+
+		// Brute Force Local Spatial Pattern Matching
+		int nPatchSize = 9;
+		int nSearchSize = 7;	
+
+		for (int x = 0; x < ScreenWidth(); x++)
+		{
+			for (int y = 0; y < ScreenHeight(); y++)
+			{
+				// Initialise serach variables
+				float fPatchDifferenceMax = INFINITY;
+				float fPatchDifferenceX = 0.0f;
+				float fPatchDifferenceY = 0.0f;
+				fFlowX[y*ScreenWidth() + x] = 0.0f;
+				fFlowY[y*ScreenWidth() + x] = 0.0f;
+
+				// Search over a given rectangular area for a "patch" of old image
+				// that "resembles" a patch of the new image.
+				for (int sx = 0; sx < nSearchSize; sx++)
+				{
+					for (int sy = 0; sy < nSearchSize; sy++)
+					{
+						// Search vector is centre of patch test
+						int nSearchVectorX = x + (sx - nSearchSize / 2);
+						int nSearchVectorY = y + (sy - nSearchSize / 2);
+
+						float fAccumulatedDifference = 0.0f;
+
+						// For each pixel in search patch, accumulate difference with base patch						
+						for (int px = 0; px < nPatchSize; px++)
+							for (int py = 0; py < nPatchSize; py++)
+							{
+								// Work out search patch offset indices
+								int nPatchPixelX = nSearchVectorX + (px - nPatchSize / 2);
+								int nPatchPixelY = nSearchVectorY + (py - nPatchSize / 2);
+
+								// Work out base patch indices
+								int nBasePixelX = x + (px - nPatchSize / 2);
+								int nBasePixelY = y + (py - nPatchSize / 2);
+
+								// Get adjacent values for each patch
+								float fPatchPixel = get_pixel(fNewCamera, nPatchPixelX, nPatchPixelY);
+								float fBasePixel = get_pixel(fOldCamera, nBasePixelX, nBasePixelY);
+
+								// Accumulate difference
+								fAccumulatedDifference += fabs(fPatchPixel - fBasePixel);
+							}
+
+						// Record the vector offset for the search patch that is the
+						// least different to the base patch
+						if (fAccumulatedDifference <= fPatchDifferenceMax)
+						{
+							fPatchDifferenceMax = fAccumulatedDifference;
+							fFlowX[y*ScreenWidth() + x] = (float)(nSearchVectorX - x);
+							fFlowY[y*ScreenWidth() + x] = (float)(nSearchVectorY - y);
+						}
+					}
+				}				
+			}
+		}
+
+		// Modulate Optic Flow Vector Map with motion map, to remove vectors that
+		// errornously indicate large local motion
+		for (int i = 0; i < ScreenWidth()*ScreenHeight(); i++)
+		{
+			fFlowX[i] *= fMotionImage[i] > 0 ? 1.0f : 0.0f;
+			fFlowY[i] *= fMotionImage[i] > 0 ? 1.0f : 0.0f;
+		}
+
+		// === Update Ball Physics ========================================================
+
+		// Ball velocity is updated by optic flow vector field
+		fBallVX += 100.0f * fFlowX[(int)fBallY * ScreenWidth() + (int)fBallX] * fElapsedTime;
+		fBallVY += 100.0f * fFlowY[(int)fBallY * ScreenWidth() + (int)fBallX] * fElapsedTime;
+
+		// Ball position is updated by velocity
+		fBallX += 1.0f * fBallVX * fElapsedTime;
+		fBallY += 1.0f * fBallVY * fElapsedTime;
+
+		// Add "drag" effect to ball velocity
+		fBallVX *= 0.85f;
+		fBallVY *= 0.85f;
+
+		// Wrap ball around screen
+		if (fBallX >= ScreenWidth()) fBallX -= (float)ScreenWidth();
+		if (fBallY >= ScreenHeight()) fBallY -= (float)ScreenHeight();
+		if (fBallX < 0) fBallX += (float)ScreenWidth();
+		if (fBallY < 0) fBallY += (float)ScreenHeight();
+
+		// === Update Screen =================================================================
+
+		// Draw Camera Image
+		draw_image(fNewCamera);
+
+		// Draw "Ball"
+		Fill(fBallX - 4, fBallY - 4, fBallX + 4, fBallY + 4,  PIXEL_SOLID, FG_RED);
+		return true;
+	}
+};
+
+int main()
+{
+	OneLoneCoder_AROpticFlow game;
+	game.ConstructConsole(80, 60, 16, 16);
+	game.Start();
+	return 0;
+}