One paragraph description to go
- make id for each document in the form "videoNumber_startFrame"
db.scene_text.aggregate( [
$addFields: {
_id: {
$concat: [
{ $toString: "$video" },
{ $toString: "$startFrame" }
{ $out: "scene_text_id" }
- Combine the documents
// Join with mmdetection table
from: "mmdetection_id",
localField: "_id",
foreignField: "_id",
as: "my_mmdetection"
from: "scene_text_id",
localField: "_id",
foreignField: "_id",
as: "my_scene_text"
from: "color_id",
localField: "_id",
foreignField: "_id",
as: "my_color"
_id: 1,
video: 1,
keyFrame: 1,
startFrame: 1,
endFrame: 1,
startSecond: 1,
endSecond: 1,
object: {
$reduce: {
input: "$my_mmdetection.object",
initialValue: [],
in: {$concatArrays: ['$$value', '$$this']}
text: {
$reduce: {
input: "$my_scene_text.text",
initialValue: [],
in: {$concatArrays: ['$$value', '$$this']}
color: {
$reduce: {
input: "$my_color.color",
initialValue: [],
in: {$concatArrays: ['$$value', '$$this']}
{ $out: "allFrames_combined" }
- parse id with format "video_startFrame-endFrame"
db.scene_text.find().forEach( function(x){
var split_result = x._id.split('_')
var split_two = split_result[1].split('-') = parseInt(split_result[0], 10)
x.startFrame = parseInt(split_two[0], 10)
x.endFrame = parseInt(split_two[1], 10)
} );
- access mongoDB with shell
$ docker exec -it simpleflaskapp_mongo_1 mongo
- Export mongodb data
$ mongodump --db database_name --collection collection_name
- Copy file from container to host
docker cp <container id>:/source/file/path/in/container /destination/on/host
- Import mongodb data
$ mongorestore --db database_name path_to_bson_file