lab4d-org · gengshan-y · Jul 24, 2023 · Jul 24, 2023 · Jul 24, 2023 · Jul 25, 2023
diff --git a/.gitignore b/.gitignore
@@ -1,12 +1,17 @@
-projects
+submit.py
+scripts/run_all_exps.sh
+misc
 viewer
 run.sh
 run-long.sh
 /database/processed
 /database/configs
 /database/raw
-/logdir
+/database/ama
+/database/polycam
+/logdir*
 /tmp
+projects/csim
 
 lab4d.egg-info
 __pycache__/
@@ -16,3 +21,4 @@ __pycache__/
 preprocess/third_party/vcnplus/vcn_rob.pth
 preprocess/third_party/viewpoint/human.pth
 preprocess/third_party/viewpoint/quad.pth
+preprocess/third_party/omnivision/*.ckpt
diff --git a/.gitmodules b/.gitmodules
@@ -8,3 +8,9 @@
 [submodule "docs/pytorch_sphinx_theme"]
 	path = docs/pytorch_sphinx_theme
 	url = https://github.com/gengshan-y/pytorch_sphinx_theme
+[submodule "projects/ppr/ppr-diffphys"]
+	path = projects/ppr/ppr-diffphys
+	url = git@github.com:gengshan-y/ppr-diffphys.git
+[submodule "projects/ppr/eval/third_party/ChamferDistancePytorch"]
+	path = projects/ppr/eval/third_party/ChamferDistancePytorch
+	url = https://github.com/ThibaultGROUEIX/ChamferDistancePytorch
diff --git a/docs/source/tutorials/single_video_cat.rst b/docs/source/tutorials/single_video_cat.rst
@@ -123,7 +123,7 @@ To render novel views, run::
 
 To render a video of the proxy geometry and cameras over training iterations, run::
 
-  python scripts/render_intermediate.py --testdir logdir/$logname/
+  python lab4d/render_intermediate.py --testdir logdir/$logname/
 
 .. raw:: html
 

diff --git a/environment.yml b/environment.yml
@@ -7,8 +7,9 @@ dependencies:
   - python=3.9
   - setuptools=66.0.0
   - pip
-  - pytorch==2.0.0=py3.9_cuda11.7_cudnn8.5.0_0
-  - torchvision
+  - pytorch=2.0.0
+  - pytorch-cuda=11.7
+  - torchvision=0.15.2
   - cudatoolkit-dev=11.7
   - gcc_linux-64=10
   - gxx_linux-64=10
@@ -38,3 +39,5 @@ dependencies:
     - groundingdino @ git+https://github.com/IDEA-Research/GroundingDINO.git
     - openmim
     - pyrender
+    - open3d==0.17.0
+    - geomloss==0.2.6
diff --git a/lab4d/config.py b/lab4d/config.py
@@ -10,27 +10,28 @@ class TrainModelConfig:
     # weights of reconstruction terms
     flags.DEFINE_float("mask_wt", 0.1, "weight for silhouette loss")
     flags.DEFINE_float("rgb_wt", 0.1, "weight for color loss")
-    flags.DEFINE_float("depth_wt", 1e-4, "weight for depth loss")
+    flags.DEFINE_float("depth_wt", 0.0, "weight for depth loss")
+    flags.DEFINE_float("normal_wt", 0.0, "weight for normal loss")
     flags.DEFINE_float("flow_wt", 0.5, "weight for flow loss")
     flags.DEFINE_float("vis_wt", 1e-2, "weight for visibility loss")
     flags.DEFINE_float("feature_wt", 1e-2, "weight for feature reconstruction loss")
-    flags.DEFINE_float("feat_reproj_wt", 5e-2, "weight for feature reprojection loss")
+    flags.DEFINE_float("feat_reproj_wt", 0.05, "weight for feature reprojection loss")
 
     # weights of regularization terms
     flags.DEFINE_float(
         "reg_visibility_wt", 1e-4, "weight for visibility regularization"
     )
-    flags.DEFINE_float("reg_eikonal_wt", 1e-3, "weight for eikonal regularization")
+    flags.DEFINE_float("reg_eikonal_wt", 0.01, "weight for eikonal regularization")
+    flags.DEFINE_float("reg_eikonal_scale_max", 1, "max scaling for eikonal reg")
     flags.DEFINE_float(
-        "reg_deform_cyc_wt", 0.01, "weight for deform cyc regularization"
-    )
-    flags.DEFINE_float("reg_delta_skin_wt", 5e-3, "weight for delta skinning reg")
-    flags.DEFINE_float("reg_skin_entropy_wt", 5e-4, "weight for delta skinning reg")
-    flags.DEFINE_float(
-        "reg_gauss_skin_wt", 1e-3, "weight for gauss skinning consistency"
+        "reg_deform_cyc_wt", 0.05, "weight for deform cyc regularization"
     )
+    flags.DEFINE_float("reg_delta_skin_wt", 1e-3, "weight for delta skinning reg")
+    flags.DEFINE_float("reg_skin_entropy_wt", 0.0, "weight for delta skinning reg")
+    flags.DEFINE_float("reg_gauss_skin_wt", 0.02, "weight for gauss density loss in 3D")
+    # flags.DEFINE_float("reg_gauss_skin_wt", 0.0, "weight for gauss density loss in 3D")
     flags.DEFINE_float("reg_cam_prior_wt", 0.1, "weight for camera regularization")
-    flags.DEFINE_float("reg_skel_prior_wt", 0.1, "weight for skeleton regularization")
+    flags.DEFINE_float("reg_skel_prior_wt", 0.01, "weight for skeleton regularization")
     flags.DEFINE_float(
         "reg_gauss_mask_wt", 0.01, "weight for gauss mask regularization"
     )
@@ -41,7 +42,8 @@ class TrainModelConfig:
     flags.DEFINE_string(
         "fg_motion", "rigid", "{rigid, dense, bob, skel-human, skel-quad}"
     )
-    flags.DEFINE_bool("single_inst", True, "assume the same morphology over objs")
+    flags.DEFINE_bool("single_inst", True, "assume the same morphology over videos")
+    flags.DEFINE_bool("single_scene", True, "assume the same scene over videos")
 
 
 class TrainOptConfig:
@@ -57,22 +59,25 @@ class TrainOptConfig:
     flags.DEFINE_string("feature_type", "dinov2", "{dinov2, cse}")
     flags.DEFINE_string("load_path", "", "path to load pretrained model")
 
-    # accuracy-related
+    # optimization-related
     flags.DEFINE_float("learning_rate", 5e-4, "learning rate")
     flags.DEFINE_integer("num_rounds", 20, "number of rounds to train")
+    flags.DEFINE_integer("num_rounds_cam_init", 10, "number of rounds for camera init")
     flags.DEFINE_integer("iters_per_round", 200, "number of iterations per round")
     flags.DEFINE_integer("imgs_per_gpu", 128, "images samples per iter, per gpu")
     flags.DEFINE_integer("pixels_per_image", 16, "pixel samples per image")
     # flags.DEFINE_integer("imgs_per_gpu", 1, "size of minibatches per iter")
     # flags.DEFINE_integer("pixels_per_image", 4096, "number of pixel samples per image")
-    flags.DEFINE_boolean(
-        "freeze_bone_len", False, "do not change bone length of skeleton"
-    )
+    flags.DEFINE_boolean("use_freq_anneal", True, "whether to use frequency annealing")
     flags.DEFINE_boolean(
         "reset_steps",
         True,
         "reset steps of loss scheduling, set to False if resuming training",
     )
+    flags.DEFINE_boolean("pose_correction", False, "whether to execute pose correction")
+    flags.DEFINE_boolean("alter_flow", False, "alternatve between flow and all terms")
+    flags.DEFINE_boolean("freeze_intrinsics", False, "whether to freeze intrinsics")
+    flags.DEFINE_boolean("absorb_base", True, "whether to absorb se3 into base")
 
     # efficiency-related
     flags.DEFINE_integer("ngpu", 1, "number of gpus to use")

diff --git a/lab4d/config_omega.py b/lab4d/config_omega.py
@@ -26,6 +26,7 @@
                 "field_type": "bg",  # {bg, fg, comp}
                 "fg_motion": "rigid",  # {rigid, dense, bob, skel}
                 "single_inst": True,  # assume the same morphology over objs
+                "single_scene": True,  # assume the same scene over videos
             },
             "io": {
                 "seqname": "cat",  # name of the sequence

diff --git a/lab4d/dataloader/data_utils.py b/lab4d/dataloader/data_utils.py
@@ -1,6 +1,7 @@
 # Copyright (c) 2023 Gengshan Yang, Carnegie Mellon University.
 import configparser
 import glob
+import os
 import random
 
 import numpy as np
@@ -234,6 +235,7 @@ def get_data_info(loader):
     intrinsics = []
     raw_size = []
     feature_pxs = []
+    motion_scales = []
 
     for dataset in dataset_list:
         frame_info = FrameInfo(dataset.dict_list["ref"])
@@ -253,11 +255,29 @@ def get_data_info(loader):
         num_skip = max(1, len(feature_array) // 1000)
         feature_pxs.append(feature_array[::num_skip])
 
+        # compute motion magnitude
+        mask = dataset.mmap_list["mask"][:-1, ..., 0].copy()
+        if dataset.field_type == "bg":
+            mask = np.logical_not(mask)
+        elif dataset.field_type == "fg":
+            pass
+        elif dataset.field_type == "comp":
+            mask[:] = True
+        else:
+            raise ValueError("Unknown field type: %s" % dataset.field_type)
+        flow = dataset.mmap_list["flowfw"][1][mask, :2]
+        motion_scale = np.linalg.norm(flow, 2, -1).mean()
+        motion_scales.append(motion_scale)
+
     # compute PCA on non-zero features
     feature_pxs = np.concatenate(feature_pxs, 0)
     feature_pxs = feature_pxs[np.linalg.norm(feature_pxs, 2, -1) > 0]
     data_info["apply_pca_fn"] = pca_numpy(feature_pxs, n_components=3)
 
+    # store motion magnitude
+    data_info["motion_scales"] = motion_scales
+    # print("motion scales: ", motion_scales)
+
     frame_info = {}
     frame_info["frame_offset"] = np.asarray(frame_offset).cumsum()
     frame_info["frame_offset_raw"] = np.asarray(frame_offset_raw).cumsum()
@@ -310,23 +330,43 @@ def load_small_files(data_path_dict):
     #     [np.load(path).astype(np.float32) for path in data_path_dict["crop2raw"]], 0
     # )  # N,4
 
-    rtmat_bg = np.concatenate(
-        [np.load(path).astype(np.float32) for path in data_path_dict["cambg"]], 0
-    )  # N,4,4
-    rtmat_fg = np.concatenate(
-        [np.load(path).astype(np.float32) for path in data_path_dict["camfg"]], 0
-    )  # N,4,4
+    # bg/fg camera
+    rtmat_bg = []
+    for vid, path in enumerate(data_path_dict["cambg"]):
+        # get N
+        num_frames = np.load(data_path_dict["is_detected"][vid]).shape[0]
+        if os.path.exists(path):
+            rtmat_bg.append(np.load(path).astype(np.float32))
+        else:
+            rtmat_bg.append(np.eye(4)[None].repeat(num_frames, 0))
+            print("Warning: no bg camera found at %s" % path)
+    rtmat_bg = np.concatenate(rtmat_bg, 0)  # N,4,4
+
+    rtmat_fg = []
+    for vid, path in enumerate(data_path_dict["camfg"]):
+        # get N
+        num_frames = np.load(data_path_dict["is_detected"][vid]).shape[0]
+        if os.path.exists(path):
+            rtmat_fg.append(np.load(path).astype(np.float32))
+        else:
+            rtmat_fg.append(np.eye(4)[None].repeat(num_frames, 0))
+            print("Warning: no fg camera found at %s" % path)
+
+    rtmat_fg = np.concatenate(rtmat_fg, 0)
+
     # hard-code for now
     vis_info = {"bg": 0, "fg": 1}  # video instance segmentation info
     data_info["vis_info"] = vis_info
     data_info["rtmat"] = np.stack([rtmat_bg, rtmat_fg], 0)
 
     # path to centered mesh files
-    camera_prefix = data_path_dict["cambg"][0].rsplit("/", 1)[0]
-    data_info["geom_path"] = [
-        "%s/mesh-00-centered.obj" % camera_prefix,
-        "%s/mesh-01-centered.obj" % camera_prefix,
-    ]
+    geom_path_bg = []
+    geom_path_fg = []
+    for path in data_path_dict["cambg"]:
+        camera_prefix = path.rsplit("/", 1)[0]
+        geom_path_bg.append("%s/mesh-00-centered.obj" % camera_prefix)
+        geom_path_fg.append("%s/mesh-01-centered.obj" % camera_prefix)
+    data_info["geom_path"] = [geom_path_bg, geom_path_fg]
     return data_info