diff --git a/.gitignore b/.gitignore index bec4f9f..af03091 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,10 @@ .DS_Store /Data/fsoco_segmentation_train /Segmentation/Yolact_minimal +/DepthEstimation/TestData +/DepthEstimation/Lite-Mono +/DepthEstimation/output +/DepthEstimation/output_best +/DepthEstimation/weights /SampleData/driverless.mp4 *.txt diff --git a/DepthEstimation/README.md b/DepthEstimation/README.md new file mode 100644 index 0000000..d01f7d4 --- /dev/null +++ b/DepthEstimation/README.md @@ -0,0 +1,17 @@ +# Lite Mono archive +The model was deemed 'not good enough' to continue use. It is too slow, and just not good enough at large depths +Clone +``` +git@github.com:noahzn/Lite-Mono.git +``` +into this folder and run +``` +python main.py +``` +in order to get depth estimation + +A good test video source for /TestData/ is +``` +https://www.youtube.com/watch?v=o5vES5QaeiQ +``` + diff --git a/DepthEstimation/main.py b/DepthEstimation/main.py new file mode 100644 index 0000000..53f7675 --- /dev/null +++ b/DepthEstimation/main.py @@ -0,0 +1,171 @@ +import os +import sys +import argparse +import numpy as np +import cv2 +import torch +from torchvision import transforms +import matplotlib.cm as cm +import matplotlib.colors as colors +from PIL import Image + +sys.path.append('./Lite-Mono') +import networks +from layers import disp_to_depth + +MODEL_CONFIGS = { + 'tiny': { + 'name': 'lite-mono-tiny', + 'folder': 'lite-mono-tiny_640x192', + 'description': 'Lightest/fastest (2.2M params, 640x192)' + }, + 'best': { + 'name': 'lite-mono-8m', + 'folder': 'lite-mono-8m_1024x320', + 'description': 'Best quality/heaviest (8.7M params, 1024x320)' + } +} + + +class DepthEstimator: + def __init__(self, weightsFolder, model="lite-mono-tiny", useCuda=True): + self.device = torch.device("cuda" if torch.cuda.is_available() and useCuda else "cpu") + + encoderPath = os.path.join(weightsFolder, "encoder.pth") + decoderPath = os.path.join(weightsFolder, "depth.pth") + + encoderDict = torch.load(encoderPath, map_location=self.device) + decoderDict = torch.load(decoderPath, map_location=self.device) + + self.feedHeight = encoderDict['height'] + self.feedWidth = encoderDict['width'] + + print(f"Loading {model} model ({self.feedWidth}x{self.feedHeight}) on {self.device}") + + self.encoder = networks.LiteMono(model=model, height=self.feedHeight, width=self.feedWidth) + modelDict = self.encoder.state_dict() + self.encoder.load_state_dict({k: v for k, v in encoderDict.items() if k in modelDict}) + self.encoder.to(self.device) + self.encoder.eval() + + self.depthDecoder = networks.DepthDecoder(self.encoder.num_ch_enc, scales=range(3)) + depthModelDict = self.depthDecoder.state_dict() + self.depthDecoder.load_state_dict({k: v for k, v in decoderDict.items() if k in depthModelDict}) + self.depthDecoder.to(self.device) + self.depthDecoder.eval() + + self.toTensor = transforms.ToTensor() + + def processFrame(self, frame): + originalHeight, originalWidth = frame.shape[:2] + + frameRgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + framePil = Image.fromarray(frameRgb) + frameResized = framePil.resize((self.feedWidth, self.feedHeight), Image.LANCZOS) + + inputTensor = self.toTensor(frameResized).unsqueeze(0).to(self.device) + + with torch.no_grad(): + features = self.encoder(inputTensor) + outputs = self.depthDecoder(features) + + disp = outputs[("disp", 0)] + dispResized = torch.nn.functional.interpolate( + disp, (originalHeight, originalWidth), mode="bilinear", align_corners=False) + + dispNp = dispResized.squeeze().cpu().numpy() + + vmax = np.percentile(dispNp, 95) + normalizer = colors.Normalize(vmin=dispNp.min(), vmax=vmax) + mapper = cm.ScalarMappable(norm=normalizer, cmap='magma') + colormapped = (mapper.to_rgba(dispNp)[:, :, :3] * 255).astype(np.uint8) + + return colormapped, dispNp + + def processVideo(self, videoPath, outputFolder="output", saveFrames=True, displayLive=False): + os.makedirs(outputFolder, exist_ok=True) + + cap = cv2.VideoCapture(videoPath) + if not cap.isOpened(): + raise ValueError(f"Cannot open video: {videoPath}") + + fps = cap.get(cv2.CAP_PROP_FPS) + frameCount = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) + width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) + + print(f"Processing video: {frameCount} frames at {fps:.2f} FPS ({width}x{height})") + + outputVideoPath = os.path.join(outputFolder, "depth_output.mp4") + fourcc = cv2.VideoWriter_fourcc(*'mp4v') + outVideo = cv2.VideoWriter(outputVideoPath, fourcc, fps, (width, height)) + + frameIdx = 0 + while True: + ret, frame = cap.read() + if not ret: + break + + depthColor, depthRaw = self.processFrame(frame) + depthBgr = cv2.cvtColor(depthColor, cv2.COLOR_RGB2BGR) + + outVideo.write(depthBgr) + + if saveFrames and frameIdx % 30 == 0: + cv2.imwrite(os.path.join(outputFolder, f"frame_{frameIdx:06d}_depth.png"), depthBgr) + np.save(os.path.join(outputFolder, f"frame_{frameIdx:06d}_depth.npy"), depthRaw) + + if displayLive: + combined = np.hstack((frame, depthBgr)) + cv2.imshow('Original | Depth', combined) + if cv2.waitKey(1) & 0xFF == ord('q'): + break + + frameIdx += 1 + if frameIdx % 30 == 0: + print(f"Processed {frameIdx}/{frameCount} frames ({100*frameIdx/frameCount:.1f}%)") + + cap.release() + outVideo.release() + if displayLive: + cv2.destroyAllWindows() + + print(f"\nComplete! Processed {frameIdx} frames") + print(f"Output video: {outputVideoPath}") + return outputVideoPath + + +def main(): + parser = argparse.ArgumentParser(description='Lite-Mono Depth Estimation') + parser.add_argument('--model', type=str, default='tiny', choices=['tiny', 'best'], + help='Model to use: tiny (fastest) or best (highest quality)') + parser.add_argument('--video', type=str, default='./TestData/test1.mp4', + help='Path to input video') + parser.add_argument('--output', type=str, default=None, + help='Output folder (default: ./output_)') + parser.add_argument('--cuda', action='store_true', default=True, + help='Use CUDA if available') + + args = parser.parse_args() + + config = MODEL_CONFIGS[args.model] + weightsFolder = f"./weights/{config['folder']}" + outputFolder = args.output or f"./output_{args.model}" + + print(f"Using model: {config['description']}") + + if not os.path.exists(weightsFolder): + print(f"Error: Weights folder not found: {weightsFolder}") + print("Please download the model weights first") + return + + if not os.path.exists(args.video): + print(f"Error: Video not found: {args.video}") + return + + estimator = DepthEstimator(weightsFolder, model=config['name'], useCuda=args.cuda) + estimator.processVideo(args.video, outputFolder=outputFolder, saveFrames=True, displayLive=False) + + +if __name__ == '__main__': + main()