From ca915d88e439f25f870e7e6c02e7e8b31ebf05ae Mon Sep 17 00:00:00 2001 From: lucaw Date: Fri, 23 Feb 2024 12:03:01 -0800 Subject: [PATCH 1/6] Large refactor and revamping the way in which data is being stored (now using objathor) style. --- .github/workflows/ci.yaml | 37 + .github/workflows/python-publish.yml | 37 + Makefile | 17 + README.md | 21 +- connect_to_unity.py | 51 +- holodeck/__init__.py | 0 holodeck/constants.py | 20 + holodeck/generation/__init__.py | 0 .../generation}/ceiling_objects.py | 24 +- {modules => holodeck/generation}/doors.py | 26 +- .../generation}/empty_house.json | 0 .../generation}/floor_objects.py | 33 +- holodeck/generation/holodeck.py | 506 +++++++++++++ {modules => holodeck/generation}/layers.py | 0 {modules => holodeck/generation}/lights.py | 3 +- .../generation}/milp_utils.py | 0 holodeck/generation/objaverse_retriever.py | 144 ++++ .../generation}/object_selector.py | 665 +++++++++++++----- {modules => holodeck/generation}/prompts.py | 0 {modules => holodeck/generation}/rooms.py | 38 +- {modules => holodeck/generation}/skybox.py | 1 + .../generation}/small_objects.py | 42 +- {modules => holodeck/generation}/utils.py | 68 +- .../generation}/wall_objects.py | 24 +- {modules => holodeck/generation}/walls.py | 8 +- {modules => holodeck/generation}/windows.py | 17 +- holodeck/main.py | 198 ++++++ main.py | 101 --- modules/holodeck.py | 304 -------- modules/objaverse_retriever.py | 74 -- requirements.txt | 5 + 31 files changed, 1673 insertions(+), 791 deletions(-) create mode 100644 .github/workflows/ci.yaml create mode 100644 .github/workflows/python-publish.yml create mode 100644 Makefile create mode 100644 holodeck/__init__.py create mode 100644 holodeck/constants.py create mode 100644 holodeck/generation/__init__.py rename {modules => holodeck/generation}/ceiling_objects.py (87%) rename {modules => holodeck/generation}/doors.py (96%) rename {modules => holodeck/generation}/empty_house.json (100%) rename {modules => holodeck/generation}/floor_objects.py (98%) create mode 100644 holodeck/generation/holodeck.py rename {modules => holodeck/generation}/layers.py (100%) rename {modules => holodeck/generation}/lights.py (99%) rename {modules => holodeck/generation}/milp_utils.py (100%) create mode 100644 holodeck/generation/objaverse_retriever.py rename {modules => holodeck/generation}/object_selector.py (52%) rename {modules => holodeck/generation}/prompts.py (100%) rename {modules => holodeck/generation}/rooms.py (93%) rename {modules => holodeck/generation}/skybox.py (99%) rename {modules => holodeck/generation}/small_objects.py (92%) rename {modules => holodeck/generation}/utils.py (87%) rename {modules => holodeck/generation}/wall_objects.py (97%) rename {modules => holodeck/generation}/walls.py (98%) rename {modules => holodeck/generation}/windows.py (97%) create mode 100644 holodeck/main.py delete mode 100644 main.py delete mode 100644 modules/holodeck.py delete mode 100644 modules/objaverse_retriever.py diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml new file mode 100644 index 0000000..020ab0f --- /dev/null +++ b/.github/workflows/ci.yaml @@ -0,0 +1,37 @@ +name: Continuous integration + +on: + push: + branches: + - main + pull_request: + branches: + - main + +jobs: + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: psf/black@stable + tests: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ['3.10'] + + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + - name: Install + run: | + python3 -m venv .env + source .env/bin/activate + make install + - name: Unit tests + run: | + source .env/bin/activate + make test diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml new file mode 100644 index 0000000..c8e4e55 --- /dev/null +++ b/.github/workflows/python-publish.yml @@ -0,0 +1,37 @@ +name: Release + +on: + push: + branches: + - main +jobs: + deploy: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions-ecosystem/action-regex-match@v2 + id: regex-match + with: + text: ${{ github.event.head_commit.message }} + regex: '^Release ([^ ]+)' + - name: Set up Python + uses: actions/setup-python@v3 + with: + python-version: '3.10' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install setuptools wheel twine + - name: Release + if: ${{ steps.regex-match.outputs.match != '' }} + uses: softprops/action-gh-release@v1 + with: + tag_name: ${{ steps.regex-match.outputs.group1 }} + - name: Build and publish + if: ${{ steps.regex-match.outputs.match != '' }} + env: + TWINE_USERNAME: __token__ + TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD }} + run: | + python setup.py sdist bdist_wheel + twine upload dist/* \ No newline at end of file diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..39b9f67 --- /dev/null +++ b/Makefile @@ -0,0 +1,17 @@ +install: ## [Local development] Upgrade pip, install requirements, install package. + python -m pip install -U pip + python -m pip install -e . + +install-dev: ## [Local development] Install requirements + python -m pip install -r requirements.txt + +black: ## [Local development] Auto-format python code using black + python -m black . + +test: ## [Local development] Run unit tests + python -m pytest -x -s -v tests + +.PHONY: help + +help: # Run `make help` to get help on the make commands + @grep -E '^[0-9a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' \ No newline at end of file diff --git a/README.md b/README.md index 896f01c..bfbb76c 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@

- Paper | Project Page + Paper | Project Page

## Requirements @@ -19,25 +19,26 @@ Holodeck is based on [AI2-THOR](https://ai2thor.allenai.org/ithor/documentation/ ## Installation After cloning the repo, you can install the required dependencies using the following commands: ``` -conda create --name holodeck python=3.9.16 +conda create --name holodeck python=3.10 conda activate holodeck pip install -r requirements.txt -pip install --extra-index-url https://ai2thor-pypi.allenai.org ai2thor==0+6f165fdaf3cf2d03728f931f39261d14a67414d0 +pip install --extra-index-url https://ai2thor-pypi.allenai.org ai2thor==0+e24aa88d86d460f1f5352e28b8e754c3f5966865 ``` ## Data -Download the data from [here](https://drive.google.com/file/d/1MQbFbNfTz94x8Pxfkgbohz4l46O5e3G1/view?usp=sharing) and extract it to the `data/` folder, or use the following command: -``` -FILE_ID=1MQbFbNfTz94x8Pxfkgbohz4l46O5e3G1 -CONFIRM=$(curl -sc /tmp/gcookie "https://drive.google.com/uc?export=download&id=${FILE_ID}" | grep -o 'confirm=[^&]*' | sed 's/confirm=//') -wget --load-cookies /tmp/gcookie "https://drive.google.com/uc?export=download&confirm=${CONFIRM}&id=${FILE_ID}" -O data.zip && rm -rf /tmp/gcookie -unzip data.zip +Download the data by running the following commands: +```bash +python -m objathor.dataset.download_holodeck_metadata --version 2023_09_23 +python -m objathor.dataset.download_assets --version 2023_09_23 +python -m objathor.dataset.download_annotations --version 2023_09_23 +python -m objathor.dataset.download_features --version 2023_09_23 ``` +by default these will save to `~/.objathor-assets/...`, you can change this director by specifying the `--path` argument. ## Usage You can use the following command to generate a new environment. ``` -python main.py --query "a living room" --openai_api_key +python holodeck/main.py --query "a living room" --openai_api_key ``` To be noticed, our system uses `gpt-4-1106-preview`, so please ensure you have access to it. diff --git a/connect_to_unity.py b/connect_to_unity.py index e9c8e2b..c159378 100644 --- a/connect_to_unity.py +++ b/connect_to_unity.py @@ -1,33 +1,48 @@ -import json +import os +from argparse import ArgumentParser + import ai2thor +import compress_json from ai2thor.controller import Controller from ai2thor.hooks.procedural_asset_hook import ProceduralAssetHookRunner -from argparse import ArgumentParser + +from holodeck.constants import HOLODECK_BASE_DATA_DIR, THOR_COMMIT_ID, OBJATHOR_ASSETS_DIR parser = ArgumentParser() -parser.add_argument("--scene", help = "the directory of the scene to be generated", default = "./data/scenes/a_living_room/a_living_room.json") -parser.add_argument("--asset_dir", help = "the directory of the assets to be used", default = "./data/objaverse_holodeck/09_23_combine_scale/processed_2023_09_23_combine_scale") +parser.add_argument( + "--scene", + help="the directory of the scene to be generated", + default=os.path.join( + HOLODECK_BASE_DATA_DIR, "/scenes/a_living_room/a_living_room.json" + ), +) +parser.add_argument( + "--asset_dir", + help="the directory of the assets to be used", + default=OBJATHOR_ASSETS_DIR, +) args = parser.parse_args() -scene = json.load(open(args.scene, "r")) +scene = compress_json.load(args.scene) controller = Controller( - start_unity=False, - port=8200, - scene="Procedural", - gridSize=0.25, - width=300, - height=300, - server_class=ai2thor.wsgi_server.WsgiServer, - makeAgentsVisible=False, - visibilityScheme='Distance', - action_hook_runner=ProceduralAssetHookRunner( + commit_id=THOR_COMMIT_ID, + start_unity=False, + port=8200, + scene="Procedural", + gridSize=0.25, + width=300, + height=300, + server_class=ai2thor.wsgi_server.WsgiServer, + makeAgentsVisible=False, + visibilityScheme="Distance", + action_hook_runner=ProceduralAssetHookRunner( asset_directory=args.asset_dir, asset_symlink=True, verbose=True, - ) - ) + ), +) controller.step(action="CreateHouse", house=scene) -print("controller reset") \ No newline at end of file +print("controller reset") diff --git a/holodeck/__init__.py b/holodeck/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/holodeck/constants.py b/holodeck/constants.py new file mode 100644 index 0000000..ce7fd8a --- /dev/null +++ b/holodeck/constants.py @@ -0,0 +1,20 @@ +import os +from pathlib import Path + +ABS_PATH_OF_HOLODECK = os.path.abspath(os.path.dirname(Path(__file__))) + +VERSION = "2023_09_23" + +OBJATHOR_VERSIONED_DIR = os.path.expanduser(f"~/.objathor-assets/{VERSION}") +OBJATHOR_ASSETS_DIR = os.path.join(OBJATHOR_VERSIONED_DIR, "assets") +OBJATHOR_FEATURES_DIR = os.path.join(OBJATHOR_VERSIONED_DIR, "features") +OBJATHOR_ANNOTATIONS_PATH = os.path.join(OBJATHOR_VERSIONED_DIR, "annotations.json.gz") + +HOLODECK_BASE_DATA_DIR = os.path.expanduser(f"~/.objathor-assets/holodeck/{VERSION}") + +HOLODECK_THOR_FEATURES_DIR = os.path.join(HOLODECK_BASE_DATA_DIR, "thor_object_data") +HOLODECK_THOR_ANNOTATIONS_PATH = os.path.join( + HOLODECK_BASE_DATA_DIR, "thor_object_data", "annotations.json.gz" +) + +THOR_COMMIT_ID = "3213d486cd09bcbafce33561997355983bdf8d1a" \ No newline at end of file diff --git a/holodeck/generation/__init__.py b/holodeck/generation/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/modules/ceiling_objects.py b/holodeck/generation/ceiling_objects.py similarity index 87% rename from modules/ceiling_objects.py rename to holodeck/generation/ceiling_objects.py index 3703d5a..e3908ce 100644 --- a/modules/ceiling_objects.py +++ b/holodeck/generation/ceiling_objects.py @@ -1,15 +1,19 @@ -import re import copy +import re + import torch -from colorama import Fore import torch.nn.functional as F -import modules.prompts as prompts -from langchain import PromptTemplate +from colorama import Fore +from langchain import PromptTemplate, OpenAI from shapely.geometry import Polygon +import holodeck.generation.prompts as prompts +from holodeck.generation.objaverse_retriever import ObjathorRetriever +from holodeck.generation.utils import get_bbox_dims, get_annotations + class CeilingObjectGenerator(): - def __init__(self, llm, object_retriever): + def __init__(self, object_retriever: ObjathorRetriever, llm: OpenAI): self.json_template = {"assetId": None, "id": None, "kinematic": True, "position": {}, "rotation": {}, "material": None, "roomId": None} self.llm = llm @@ -38,14 +42,14 @@ def generate_ceiling_objects(self, scene, additional_requirements_ceiling="N/A") room = self.get_room_by_type(scene["rooms"], room_type) if room is None: - print("Room type {} not found in scene.".format(room_type)) + print(f"Room type {room_type} not found in scene.") continue ceiling_object_id = self.select_ceiling_object(ceiling_object_description) if ceiling_object_id is None: continue # Temporary solution: place at the center of the room - dimension = self.database[ceiling_object_id]['assetMetadata']['boundingBox'] + dimension = get_bbox_dims(self.database[ceiling_object_id]) floor_polygon = Polygon(room["vertices"]) x = floor_polygon.centroid.x @@ -58,7 +62,7 @@ def generate_ceiling_objects(self, scene, additional_requirements_ceiling="N/A") ceiling_object["position"] = {"x": x, "y": y, "z": z} ceiling_object["rotation"] = {"x": 0, "y": 0, "z": 0} ceiling_object["roomId"] = room["id"] - ceiling_object["object_name"] = self.database[ceiling_object_id]["annotations"]["category"] + ceiling_object["object_name"] = get_annotations(self.database[ceiling_object_id])["category"] ceiling_objects.append(ceiling_object) return raw_ceiling_plan, ceiling_objects @@ -90,11 +94,11 @@ def get_room_by_type(self, rooms, room_type): def select_ceiling_object(self, description): candidates = self.object_retriever.retrieve([f"a 3D model of {description}"], threshold=29) - ceiling_candiates = [candidate for candidate in candidates if self.database[candidate[0]]["annotations"]["onCeiling"] == True] + ceiling_candiates = [candidate for candidate in candidates if get_annotations(self.database[candidate[0]])["onCeiling"] == True] valid_ceiling_candiates = [] for candidate in ceiling_candiates: - dimension = self.database[candidate[0]]['assetMetadata']['boundingBox'] + dimension = get_bbox_dims(self.database[candidate[0]]) if dimension["y"] <= 1.0: valid_ceiling_candiates.append(candidate) if len(valid_ceiling_candiates) == 0: diff --git a/modules/doors.py b/holodeck/generation/doors.py similarity index 96% rename from modules/doors.py rename to holodeck/generation/doors.py index 00ddcc8..c5f122b 100644 --- a/modules/doors.py +++ b/holodeck/generation/doors.py @@ -1,24 +1,28 @@ import copy -import json -import torch -import pickle +import os import random + +import compress_json +import compress_pickle import numpy as np +import torch from PIL import Image -from tqdm import tqdm from colorama import Fore -import modules.prompts as prompts -from langchain import PromptTemplate +from langchain import PromptTemplate, OpenAI +from tqdm import tqdm + +import holodeck.generation.prompts as prompts +from holodeck.constants import HOLODECK_BASE_DATA_DIR class DoorGenerator(): - def __init__(self, clip_model, clip_preprocess, clip_tokenizer, llm): + def __init__(self, clip_model, clip_preprocess, clip_tokenizer, llm: OpenAI): self.json_template = {"assetId": None, "id": None, "openable": False, "openness": 0, "room0": None, "room1": None, "wall0": None, "wall1": None, "holePolygon": [], "assetPosition": {}} - self.door_data = json.load(open("data/doors/door-database.json", "r")) + self.door_data = compress_json.load(os.path.join(HOLODECK_BASE_DATA_DIR, "doors/door-database.json")) self.door_ids = list(self.door_data.keys()) self.used_assets = [] @@ -34,18 +38,18 @@ def __init__(self, clip_model, clip_preprocess, clip_tokenizer, llm): def load_features(self): try: - self.door_feature_clip = pickle.load(open("data/doors/door_feature_clip.p", "rb")) + self.door_feature_clip = compress_pickle.load(os.path.join(HOLODECK_BASE_DATA_DIR, "doors/door_feature_clip.pkl")) except: print("Precompute image features for doors...") self.door_feature_clip = [] for door_id in tqdm(self.door_ids): - image = self.preprocess(Image.open(f"data/doors/images/{door_id}.png")).unsqueeze(0) + image = self.preprocess(Image.open(os.path.join(HOLODECK_BASE_DATA_DIR, f"doors/images/{door_id}.png"))).unsqueeze(0) with torch.no_grad(): image_features = self.clip_model.encode_image(image) image_features /= image_features.norm(dim=-1, keepdim=True) self.door_feature_clip.append(image_features) self.door_feature_clip = torch.vstack(self.door_feature_clip) - pickle.dump(self.door_feature_clip, open("data/doors/door_feature_clip.p", "wb")) + compress_pickle.dump(self.door_feature_clip, os.path.join(HOLODECK_BASE_DATA_DIR, "doors/door_feature_clip.pkl")) def generate_doors(self, scene, additional_requirements_door): diff --git a/modules/empty_house.json b/holodeck/generation/empty_house.json similarity index 100% rename from modules/empty_house.json rename to holodeck/generation/empty_house.json diff --git a/modules/floor_objects.py b/holodeck/generation/floor_objects.py similarity index 98% rename from modules/floor_objects.py rename to holodeck/generation/floor_objects.py index 22acf07..f29b88d 100644 --- a/modules/floor_objects.py +++ b/holodeck/generation/floor_objects.py @@ -1,24 +1,27 @@ +import copy +import datetime import json import math +import multiprocessing +import random import re import time -import copy -import cvxpy as cp -import random -import datetime + +import matplotlib.pyplot as plt import numpy as np -import multiprocessing +from langchain import PromptTemplate, OpenAI from rtree import index -import matplotlib.pyplot as plt -import modules.prompts as prompts -from langchain import PromptTemplate from scipy.interpolate import interp1d from shapely.geometry import Polygon, Point, box, LineString -from modules.milp_utils import * + +import holodeck.generation.prompts as prompts +from holodeck.generation.milp_utils import * +from holodeck.generation.objaverse_retriever import ObjathorRetriever +from holodeck.generation.utils import get_bbox_dims class FloorObjectGenerator(): - def __init__(self, llm, object_retriever): + def __init__(self, object_retriever: ObjathorRetriever, llm: OpenAI): self.json_template = {"assetId": None, "id": None, "kinematic": True, "position": {}, "rotation": {}, "material": None, "roomId": None} self.llm = llm @@ -94,7 +97,7 @@ def generate_objects_per_room(self, args): constraints = self.parse_constraints(constraint_plan, object_names) # get objects list - object2dimension = {object_name: self.database[object_id]['assetMetadata']['boundingBox'] + object2dimension = {object_name: get_bbox_dims(self.database[object_id]) for object_name, object_id in object_name2id.items()} objects_list = [(object_name, (object2dimension[object_name]['x'] * 100 + self.size_buffer, object2dimension[object_name]['z'] * 100 + self.size_buffer)) for object_name in constraints] @@ -112,7 +115,7 @@ def generate_objects_per_room(self, args): object_information = "" for object_name in object_names: object_id = object_name2id[object_name] - dimension = self.database[object_name2id[object_name]]['assetMetadata']['boundingBox'] + dimension = get_bbox_dims(self.database[object_name2id[object_name]]) size_x = int(dimension["x"] * 100) size_z = int(dimension["z"] * 100) object_information += f"{object_name}: {size_x} cm x {size_z} cm\n" @@ -141,7 +144,7 @@ def generate_objects_per_room(self, args): all_is_placed = False break - dimension = self.database[object_name2id[object_name]]['assetMetadata']['boundingBox'] + dimension = get_bbox_dims(self.database[object_name2id[object_name]]) placement = self.json_template.copy() placement["id"] = f"{object_name} ({room_id})" placement["object_name"] = object_name @@ -205,7 +208,7 @@ def solution2placement(self, solutions, object_name2id, room_id): placements = [] for object_name, solution in solutions.items(): if "door" in object_name or "window" in object_name or "open" in object_name: continue - dimension = self.database[object_name2id[object_name]]['assetMetadata']['boundingBox'] + dimension = get_bbox_dims(self.database[object_name2id[object_name]]) placement = self.json_template.copy() placement["assetId"] = object_name2id[object_name] placement["id"] = f"{object_name} ({room_id})" @@ -303,7 +306,7 @@ def parse_constraints(self, constraint_text, object_names): def order_objects_by_size(self, selected_floor_objects): ordered_floor_objects = [] for object_name, asset_id in selected_floor_objects: - dimensions = self.database[asset_id]['assetMetadata']['boundingBox'] + dimensions = get_bbox_dims(self.database[asset_id]) size = dimensions["x"] * dimensions["z"] ordered_floor_objects.append([object_name, asset_id, size]) ordered_floor_objects.sort(key=lambda x: x[2], reverse=True) diff --git a/holodeck/generation/holodeck.py b/holodeck/generation/holodeck.py new file mode 100644 index 0000000..d35fd72 --- /dev/null +++ b/holodeck/generation/holodeck.py @@ -0,0 +1,506 @@ +import datetime +import os +from typing import Optional, Dict, Any, Tuple + +import compress_json +import open_clip +from langchain.llms import OpenAI +from sentence_transformers import SentenceTransformer +from tqdm import tqdm + +from holodeck.constants import ( + HOLODECK_BASE_DATA_DIR, + OBJATHOR_VERSIONED_DIR, + OBJATHOR_ASSETS_DIR, + OBJATHOR_FEATURES_DIR, + OBJATHOR_ANNOTATIONS_PATH, + HOLODECK_THOR_FEATURES_DIR, + HOLODECK_THOR_ANNOTATIONS_PATH, +) +from holodeck.generation.ceiling_objects import CeilingObjectGenerator +from holodeck.generation.doors import DoorGenerator +from holodeck.generation.floor_objects import FloorObjectGenerator +from holodeck.generation.layers import map_asset2layer +from holodeck.generation.lights import generate_lights +from holodeck.generation.objaverse_retriever import ObjathorRetriever +from holodeck.generation.object_selector import ObjectSelector +from holodeck.generation.rooms import FloorPlanGenerator +from holodeck.generation.skybox import getSkybox +from holodeck.generation.small_objects import SmallObjectGenerator +from holodeck.generation.utils import get_top_down_frame, room_video +from holodeck.generation.wall_objects import WallObjectGenerator +from holodeck.generation.walls import WallGenerator +from holodeck.generation.windows import WindowGenerator + + +def confirm_paths_exist(): + for p in [ + OBJATHOR_VERSIONED_DIR, + OBJATHOR_ASSETS_DIR, + OBJATHOR_FEATURES_DIR, + OBJATHOR_ANNOTATIONS_PATH, + HOLODECK_BASE_DATA_DIR, + HOLODECK_THOR_FEATURES_DIR, + HOLODECK_THOR_ANNOTATIONS_PATH, + ]: + if not os.path.exists(p): + raise FileNotFoundError( + f"Path {p} does not exist, this must exist for Holodeck generation to succeed." + f" Please see the Holodeck README file at https://github.com/allenai/Holodeck/blob/main/README.md" + f" for instruction on how to set up the required data directories." + ) + + +class Holodeck: + def __init__( + self, + openai_api_key: str, + openai_org: Optional[str], + objaverse_asset_dir: str, + single_room, + ): + confirm_paths_exist() + + if openai_org is not None: + os.environ["OPENAI_ORG"] = openai_org + + # initialize llm + self.llm = OpenAI( + model_name="gpt-4-1106-preview", + max_tokens=2048, + openai_api_key=openai_api_key, + ) + self.llm_fast = OpenAI( + model_name="gpt-3.5-turbo", + max_tokens=2048, + openai_api_key=openai_api_key, + ) + + # initialize CLIP + ( + self.clip_model, + _, + self.clip_preprocess, + ) = open_clip.create_model_and_transforms( + "ViT-L-14", pretrained="laion2b_s32b_b82k" + ) + self.clip_tokenizer = open_clip.get_tokenizer("ViT-L-14") + + # initialize sentence transformer + self.sbert_model = SentenceTransformer("all-mpnet-base-v2", device="cpu") + + # objaverse version and asset dir + self.objaverse_asset_dir = objaverse_asset_dir + + # initialize generation + self.retrieval_threshold = 28 + self.object_retriever = ObjathorRetriever( + clip_model=self.clip_model, + clip_preprocess=self.clip_preprocess, + clip_tokenizer=self.clip_tokenizer, + sbert_model=self.sbert_model, + retrieval_threshold=self.retrieval_threshold, + ) + self.floor_generator = FloorPlanGenerator( + self.clip_model, self.clip_preprocess, self.clip_tokenizer, self.llm + ) + self.wall_generator = WallGenerator(self.llm) + self.door_generator = DoorGenerator( + self.clip_model, self.clip_preprocess, self.clip_tokenizer, self.llm + ) + self.window_generator = WindowGenerator(self.llm) + self.object_selector = ObjectSelector(object_retriever=self.object_retriever, llm=self.llm) + self.floor_object_generator = FloorObjectGenerator( + object_retriever=self.object_retriever, llm=self.llm + ) + self.wall_object_generator = WallObjectGenerator( + object_retriever=self.object_retriever, llm=self.llm + ) + self.ceiling_generator = CeilingObjectGenerator(object_retriever=self.object_retriever, llm=self.llm) + self.small_object_generator = SmallObjectGenerator( + object_retriever=self.object_retriever, llm=self.llm + ) + + # additional requirements + single_room_requirements = "I only need one room" + + if single_room: + self.additional_requirements_room = single_room_requirements + else: + self.additional_requirements_room = "N/A" + + self.additional_requirements_door = "N/A" + self.additional_requirements_window = ( + "Only one wall of each room should have windows" + ) + self.additional_requirements_object = "N/A" + self.additional_requirements_ceiling = "N/A" + + def get_empty_scene(self): + return compress_json.load("generation/empty_house.json") + + def empty_house(self, scene): + scene["rooms"] = [] + scene["walls"] = [] + scene["doors"] = [] + scene["windows"] = [] + scene["objects"] = [] + scene["proceduralParameters"]["lights"] = [] + return scene + + def generate_rooms(self, scene, additional_requirements_room, used_assets=[]): + self.floor_generator.used_assets = used_assets + rooms = self.floor_generator.generate_rooms(scene, additional_requirements_room) + scene["rooms"] = rooms + return scene + + def generate_walls(self, scene): + wall_height, walls = self.wall_generator.generate_walls(scene) + scene["wall_height"] = wall_height + scene["walls"] = walls + return scene + + def generate_doors(self, scene, additional_requirements_door="N/A", used_assets=[]): + self.door_generator.used_assets = used_assets + + # generate doors + ( + raw_doorway_plan, + doors, + room_pairs, + open_room_pairs, + ) = self.door_generator.generate_doors(scene, additional_requirements_door) + scene["raw_doorway_plan"] = raw_doorway_plan + scene["doors"] = doors + scene["room_pairs"] = room_pairs + scene["open_room_pairs"] = open_room_pairs + + # update walls + updated_walls, open_walls = self.wall_generator.update_walls( + scene["walls"], open_room_pairs + ) + scene["walls"] = updated_walls + scene["open_walls"] = open_walls + return scene + + def generate_windows( + self, + scene, + additional_requirements_window="I want to install windows to only one wall of each room", + used_assets=[], + ): + self.window_generator.used_assets = used_assets + raw_window_plan, walls, windows = self.window_generator.generate_windows( + scene, additional_requirements_window + ) + scene["raw_window_plan"] = raw_window_plan + scene["windows"] = windows + scene["walls"] = walls + return scene + + def select_objects(self, scene, additional_requirements_object, used_assets=[]): + self.object_selector.used_assets = used_assets + object_selection_plan, selected_objects = self.object_selector.select_objects( + scene, additional_requirements_object + ) + scene["object_selection_plan"] = object_selection_plan + scene["selected_objects"] = selected_objects + return scene + + def generate_ceiling_objects(self, scene, additional_requirements_ceiling="N/A"): + ( + raw_ceiling_plan, + ceiling_objects, + ) = self.ceiling_generator.generate_ceiling_objects( + scene, additional_requirements_ceiling + ) + scene["ceiling_objects"] = ceiling_objects + scene["raw_ceiling_plan"] = raw_ceiling_plan + return scene + + def generate_small_objects(self, scene, used_assets=[]): + self.small_object_generator.used_assets = used_assets + controller = self.small_object_generator.start_controller( + scene, self.objaverse_asset_dir + ) + event = controller.reset() + receptacle_ids = [ + obj["objectId"] + for obj in event.metadata["objects"] + if obj["receptacle"] and "___" not in obj["objectId"] + ] + if "Floor" in receptacle_ids: + receptacle_ids.remove("Floor") + + try: + ( + small_objects, + receptacle2small_objects, + ) = self.small_object_generator.generate_small_objects( + scene, controller, receptacle_ids + ) + scene["small_objects"] = small_objects + scene["receptacle2small_objects"] = receptacle2small_objects + except: + scene["small_objects"] = [] + print("Failed to generate small objects") + + controller.stop() # stop controller to avoid memory leak + return scene + + def change_ceiling_material(self, scene): + first_wall_material = scene["rooms"][0]["wallMaterial"] + scene["proceduralParameters"]["ceilingMaterial"] = first_wall_material + return scene + + def generate_scene( + self, + scene, + query: str, + save_dir: str, + used_assets=[], + add_ceiling=False, + generate_image=True, + generate_video=False, + add_time=True, + use_constraint=True, + random_selection=False, + use_milp=False, + ) -> Tuple[Dict[str, Any], str]: + # initialize scene + query = query.replace("_", " ") + scene["query"] = query + + # empty house + scene = self.empty_house(scene) + + # generate rooms + scene = self.generate_rooms( + scene, + additional_requirements_room=self.additional_requirements_room, + used_assets=used_assets, + ) + + # generate walls + scene = self.generate_walls(scene) + + # generate doors + scene = self.generate_doors( + scene, + additional_requirements_door=self.additional_requirements_door, + used_assets=used_assets, + ) + + # generate windows + scene = self.generate_windows( + scene, + additional_requirements_window=self.additional_requirements_window, + used_assets=used_assets, + ) + + # select objects + self.object_selector.random_selection = random_selection + scene = self.select_objects( + scene, + additional_requirements_object=self.additional_requirements_object, + used_assets=used_assets, + ) + + # generate floor objects + self.floor_object_generator.use_milp = use_milp + scene["floor_objects"] = self.floor_object_generator.generate_objects( + scene, use_constraint=use_constraint + ) + + # generate wall objects + scene["wall_objects"] = self.wall_object_generator.generate_wall_objects( + scene, use_constraint=use_constraint + ) + + # combine floor and wall objects + scene["objects"] = scene["floor_objects"] + scene["wall_objects"] + + # generate small objects + scene = self.generate_small_objects(scene, used_assets=used_assets) + scene["objects"] += scene["small_objects"] + + # generate ceiling objects + if add_ceiling: + scene = self.generate_ceiling_objects( + scene, + additional_requirements_ceiling=self.additional_requirements_ceiling, + ) + scene["objects"] += scene["ceiling_objects"] + + # generate lights + lights = generate_lights(scene) + scene["proceduralParameters"]["lights"] = lights + + # assign layers + scene = map_asset2layer(scene) + + # assign skybox + scene = getSkybox(scene) + + # change ceiling material + scene = self.change_ceiling_material(scene) + + # create folder + query_name = query.replace(" ", "_").replace("'", "")[:30] + create_time = ( + str(datetime.datetime.now()) + .replace(" ", "-") + .replace(":", "-") + .replace(".", "-") + ) + + if add_time: + folder_name = f"{query_name}-{create_time}" # query name + time + else: + folder_name = query_name # query name only + + save_dir = os.path.abspath(os.path.join(save_dir, folder_name)) + os.makedirs(save_dir, exist_ok=True) + compress_json.dump( + scene, + os.path.join(save_dir, f"{query_name}.json"), + json_kwargs=dict(indent=4), + ) + + # save top down image + if generate_image: + top_image = get_top_down_frame(scene, self.objaverse_asset_dir, 1024, 1024) + top_image.show() + top_image.save(os.path.join(save_dir, f"{query_name}.png")) + + # save video + if generate_video: + scene["objects"] = ( + scene["floor_objects"] + scene["wall_objects"] + scene["small_objects"] + ) + final_video = room_video(scene, self.objaverse_asset_dir, 1024, 1024) + final_video.write_videofile( + os.path.join(save_dir, f"{query_name}.mp4"), fps=30 + ) + + return scene, save_dir + + def generate_variants( + self, + query, + original_scene, + save_dir=os.path.join(HOLODECK_BASE_DATA_DIR, "scenes"), + number_of_variants=5, + used_assets=[], + ): + self.object_selector.reuse_selection = ( + False # force the selector to retrieve different assets + ) + + # create the list of used assets + used_assets += [ + obj["assetId"] + for obj in original_scene["objects"] + + original_scene["windows"] + + original_scene["doors"] + ] + used_assets += [ + room["floorMaterial"]["name"] for room in original_scene["rooms"] + ] + used_assets += [wall["material"]["name"] for wall in original_scene["walls"]] + used_assets = list(set(used_assets)) + + variant_scenes = [] + for i in tqdm(range(number_of_variants)): + variant_scene, _ = self.generate_scene( + original_scene.copy(), + query, + save_dir, + used_assets, + generate_image=True, + generate_video=False, + add_time=True, + ) + variant_scenes.append(variant_scene) + used_assets += [ + obj["assetId"] + for obj in variant_scene["objects"] + + variant_scene["windows"] + + variant_scene["doors"] + ] + used_assets += [ + room["floorMaterial"]["name"] for room in variant_scene["rooms"] + ] + used_assets += [wall["material"]["name"] for wall in variant_scene["walls"]] + used_assets = list(set(used_assets)) + return variant_scenes + + def ablate_placement( + self, + scene, + query, + save_dir, + used_assets=[], + add_ceiling=False, + generate_image=True, + generate_video=False, + add_time=True, + use_constraint=False, + constraint_type="llm", + ): + # place floor objects + if use_constraint: + self.floor_object_generator.constraint_type = ( + constraint_type # ablate the constraint types + ) + scene["floor_objects"] = self.floor_object_generator.generate_objects( + scene, use_constraint=use_constraint + ) + if len(scene["floor_objects"]) == 0: + print("No object is placed, skip this scene") + return None # if no object is placed, return None + # place wall objects + if use_constraint: + self.wall_object_generator.constraint_type = constraint_type + scene["wall_objects"] = self.wall_object_generator.generate_wall_objects( + scene, use_constraint=use_constraint + ) + + # combine floor and wall objects + scene["objects"] = scene["floor_objects"] + scene["wall_objects"] + + # generate small objects + scene = self.generate_small_objects(scene, used_assets=used_assets) + scene["objects"] += scene["small_objects"] + + # assign layers + scene = map_asset2layer(scene) + + # take the first 30 characters of the query as the folder name + query_name = query.replace(" ", "_").replace("'", "")[:30] + create_time = ( + str(datetime.datetime.now()) + .replace(" ", "-") + .replace(":", "-") + .replace(".", "-") + ) + + if add_time: + folder_name = f"{query_name}-{create_time}" # query name + time + else: + folder_name = query_name # query name only + + os.makedirs(f"{save_dir}/{folder_name}", exist_ok=True) + compress_json.dump( + scene, + f"{save_dir}/{folder_name}/{query_name}.json", + json_kwargs=dict(indent=4), + ) + + # save top down image + if generate_image: + top_image = get_top_down_frame(scene, self.objaverse_asset_dir, 1024, 1024) + top_image.show() + top_image.save(f"{save_dir}/{folder_name}/{query_name}.png") + + return scene diff --git a/modules/layers.py b/holodeck/generation/layers.py similarity index 100% rename from modules/layers.py rename to holodeck/generation/layers.py diff --git a/modules/lights.py b/holodeck/generation/lights.py similarity index 99% rename from modules/lights.py rename to holodeck/generation/lights.py index 2cad73c..c0bd79d 100644 --- a/modules/lights.py +++ b/holodeck/generation/lights.py @@ -1,5 +1,6 @@ -from shapely import Polygon from procthor.utils.types import RGB, Light, LightShadow, Vector3 +from shapely import Polygon + def generate_lights(scene): lights = [ diff --git a/modules/milp_utils.py b/holodeck/generation/milp_utils.py similarity index 100% rename from modules/milp_utils.py rename to holodeck/generation/milp_utils.py diff --git a/holodeck/generation/objaverse_retriever.py b/holodeck/generation/objaverse_retriever.py new file mode 100644 index 0000000..7975b0e --- /dev/null +++ b/holodeck/generation/objaverse_retriever.py @@ -0,0 +1,144 @@ +import os + +import compress_json +import compress_pickle +import numpy as np +import torch +import torch.nn.functional as F + +from holodeck.constants import ( + OBJATHOR_ANNOTATIONS_PATH, + HOLODECK_THOR_ANNOTATIONS_PATH, + OBJATHOR_FEATURES_DIR, + HOLODECK_THOR_FEATURES_DIR, +) +from holodeck.generation.utils import get_bbox_dims + + +class ObjathorRetriever: + def __init__( + self, + clip_model, + clip_preprocess, + clip_tokenizer, + sbert_model, + retrieval_threshold, + ): + objathor_annotations = compress_json.load(OBJATHOR_ANNOTATIONS_PATH) + thor_annotations = compress_json.load(HOLODECK_THOR_ANNOTATIONS_PATH) + self.database = {**objathor_annotations, **thor_annotations} + + objathor_clip_features_dict = compress_pickle.load( + os.path.join(OBJATHOR_FEATURES_DIR, f"clip_features.pkl") + ) # clip features + objathor_sbert_features_dict = compress_pickle.load( + os.path.join(OBJATHOR_FEATURES_DIR, f"sbert_features.pkl") + ) # sbert features + assert ( + objathor_clip_features_dict["uids"] == objathor_sbert_features_dict["uids"] + ) + + objathor_uids = objathor_clip_features_dict["uids"] + objathor_clip_features = objathor_clip_features_dict["img_features"].astype( + np.float32 + ) + objathor_sbert_features = objathor_sbert_features_dict["text_features"].astype( + np.float32 + ) + + thor_clip_features_dict = compress_pickle.load( + os.path.join(HOLODECK_THOR_FEATURES_DIR, "clip_features.pkl") + ) # clip features + thor_sbert_features_dict = compress_pickle.load( + os.path.join(HOLODECK_THOR_FEATURES_DIR, "sbert_features.pkl") + ) # clip features + assert thor_clip_features_dict["uids"] == thor_sbert_features_dict["uids"] + + thor_uids = thor_clip_features_dict["uids"] + thor_clip_features = thor_clip_features_dict["img_features"].astype(np.float32) + thor_sbert_features = thor_sbert_features_dict["text_features"].astype( + np.float32 + ) + + self.clip_features = torch.from_numpy( + np.concatenate([objathor_clip_features, thor_clip_features], axis=0) + ) + self.clip_features = F.normalize(self.clip_features, p=2, dim=-1) + + self.sbert_features = torch.from_numpy( + np.concatenate([objathor_sbert_features, thor_sbert_features], axis=0) + ) + + self.asset_ids = objathor_uids + thor_uids + + self.clip_model = clip_model + self.clip_preprocess = clip_preprocess + self.clip_tokenizer = clip_tokenizer + self.sbert_model = sbert_model + + self.retrieval_threshold = retrieval_threshold + + self.use_text = True + + def retrieve(self, queries, threshold=28): + with torch.no_grad(): + query_feature_clip = self.clip_model.encode_text( + self.clip_tokenizer(queries) + ) + + query_feature_clip = F.normalize(query_feature_clip, p=2, dim=-1) + + clip_similarities = 100 * torch.einsum('ij, lkj -> ilk', query_feature_clip, self.clip_features) + clip_similarities = torch.max(clip_similarities, dim=-1).values + + query_feature_sbert = self.sbert_model.encode( + queries, convert_to_tensor=True, show_progress_bar=False + ) + sbert_similarities = query_feature_sbert @ self.sbert_features.T + + if self.use_text: + similarities = clip_similarities + sbert_similarities + else: + similarities = clip_similarities + + threshold_indices = torch.where(clip_similarities > threshold) + + unsorted_results = [] + for query_index, asset_index in zip(*threshold_indices): + score = similarities[query_index, asset_index].item() + unsorted_results.append((self.asset_ids[asset_index], score)) + + # Sorting the results in descending order by score + results = sorted(unsorted_results, key=lambda x: x[1], reverse=True) + + return results + + def compute_size_difference(self, target_size, candidates): + candidate_sizes = [] + for uid, _ in candidates: + size = get_bbox_dims(self.database[uid]) + size_list = [size["x"] * 100, size["y"] * 100, size["z"] * 100] + size_list.sort() + candidate_sizes.append(size_list) + + candidate_sizes = torch.tensor(candidate_sizes) + + target_size_list = list(target_size) + target_size_list.sort() + target_size = torch.tensor(target_size_list) + + size_difference = abs(candidate_sizes - target_size).mean(axis=1) / 100 + size_difference = size_difference.tolist() + + candidates_with_size_difference = [] + for i, (uid, score) in enumerate(candidates): + candidates_with_size_difference.append( + (uid, score - size_difference[i] * 10) + ) + + # sort the candidates by score + candidates_with_size_difference = sorted( + candidates_with_size_difference, key=lambda x: x[1], reverse=True + ) + + return candidates_with_size_difference diff --git a/modules/object_selector.py b/holodeck/generation/object_selector.py similarity index 52% rename from modules/object_selector.py rename to holodeck/generation/object_selector.py index c3f3f25..e673870 100644 --- a/modules/object_selector.py +++ b/holodeck/generation/object_selector.py @@ -1,21 +1,25 @@ -import re import copy import json -import torch -import random import multiprocessing +import random +import re from typing import Dict + +import torch +import torch.nn.functional as F from colorama import Fore +from langchain import PromptTemplate, OpenAI from shapely import Polygon -import torch.nn.functional as F -import modules.prompts as prompts -from langchain import PromptTemplate -from modules.floor_objects import DFS_Solver_Floor -from modules.wall_objects import DFS_Solver_Wall + +import holodeck.generation.prompts as prompts +from holodeck.generation.floor_objects import DFS_Solver_Floor +from holodeck.generation.objaverse_retriever import ObjathorRetriever +from holodeck.generation.utils import get_bbox_dims, get_annotations +from holodeck.generation.wall_objects import DFS_Solver_Wall class ObjectSelector: - def __init__(self, object_retriever, llm): + def __init__(self, object_retriever: ObjathorRetriever, llm: OpenAI): # object retriever self.object_retriever = object_retriever self.database = object_retriever.database @@ -23,14 +27,21 @@ def __init__(self, object_retriever, llm): # language model and prompt templates self.llm = llm self.object_selection_template_1 = prompts.object_selection_prompt_new_1 - self.object_selection_template_2 = PromptTemplate(input_variables=["object_selection_prompt_new_1", "object_selection_1", "room"], template=prompts.object_selection_prompt_new_2) + self.object_selection_template_2 = PromptTemplate( + input_variables=[ + "object_selection_prompt_new_1", + "object_selection_1", + "room", + ], + template=prompts.object_selection_prompt_new_2, + ) # hyperparameters self.floor_capacity_ratio = 0.4 self.wall_capacity_ratio = 0.5 self.object_size_tolerance = 0.8 - self.similarity_threshold_floor = 31 # need to be tuned - self.similarity_threshold_wall = 31 # need to be tuned + self.similarity_threshold_floor = 31 # need to be tuned + self.similarity_threshold_wall = 31 # need to be tuned self.thin_threshold = 3 self.used_assets = [] self.consider_size = True @@ -40,18 +51,35 @@ def __init__(self, object_retriever, llm): self.reuse_selection = False self.multiprocessing = True - def select_objects(self, scene, additional_requirements="N/A"): rooms_types = [room["roomType"] for room in scene["rooms"]] - room2area = {room["roomType"]: self.get_room_area(room) for room in scene["rooms"]} - room2size = {room["roomType"]: self.get_room_size(room, scene["wall_height"]) for room in scene["rooms"]} - room2perimeter = {room["roomType"]: self.get_room_perimeter(room) for room in scene["rooms"]} - room2vertices = {room["roomType"]: [(x * 100, y * 100) for (x, y) in room["vertices"]] for room in scene["rooms"]} - - room2floor_capacity = {room_type: [room_area * self.floor_capacity_ratio, 0] for room_type, room_area in room2area.items()} + room2area = { + room["roomType"]: self.get_room_area(room) for room in scene["rooms"] + } + room2size = { + room["roomType"]: self.get_room_size(room, scene["wall_height"]) + for room in scene["rooms"] + } + room2perimeter = { + room["roomType"]: self.get_room_perimeter(room) for room in scene["rooms"] + } + room2vertices = { + room["roomType"]: [(x * 100, y * 100) for (x, y) in room["vertices"]] + for room in scene["rooms"] + } + + room2floor_capacity = { + room_type: [room_area * self.floor_capacity_ratio, 0] + for room_type, room_area in room2area.items() + } room2floor_capacity = self.update_floor_capacity(room2floor_capacity, scene) - room2wall_capacity = {room_type: [room_perimeter * self.wall_capacity_ratio, 0] for room_type, room_perimeter in room2perimeter.items()} - selected_objects = {room["roomType"]: {"floor": [], "wall": []} for room in scene["rooms"]} + room2wall_capacity = { + room_type: [room_perimeter * self.wall_capacity_ratio, 0] + for room_type, room_perimeter in room2perimeter.items() + } + selected_objects = { + room["roomType"]: {"floor": [], "wall": []} for room in scene["rooms"] + } if "object_selection_plan" in scene: object_selection_plan = scene["object_selection_plan"] @@ -59,12 +87,30 @@ def select_objects(self, scene, additional_requirements="N/A"): selected_objects = scene["selected_objects"] else: for room_type in rooms_types: - floor_objects, _, wall_objects, _ = self.get_objects_by_room(object_selection_plan[room_type], scene, room2size[room_type], room2floor_capacity[room_type], room2wall_capacity[room_type], room2vertices[room_type]) + floor_objects, _, wall_objects, _ = self.get_objects_by_room( + object_selection_plan[room_type], + scene, + room2size[room_type], + room2floor_capacity[room_type], + room2wall_capacity[room_type], + room2vertices[room_type], + ) selected_objects[room_type]["floor"] = floor_objects selected_objects[room_type]["wall"] = wall_objects else: object_selection_plan = {room["roomType"]: [] for room in scene["rooms"]} - packed_args = [(room_type, scene, additional_requirements, room2size, room2floor_capacity, room2wall_capacity, room2vertices) for room_type in rooms_types] + packed_args = [ + ( + room_type, + scene, + additional_requirements, + room2size, + room2floor_capacity, + room2wall_capacity, + room2vertices, + ) + for room_type in rooms_types + ] if self.multiprocessing: pool = multiprocessing.Pool(processes=4) @@ -78,21 +124,35 @@ def select_objects(self, scene, additional_requirements="N/A"): selected_objects[room_type]["floor"] = result["floor"] selected_objects[room_type]["wall"] = result["wall"] object_selection_plan[room_type] = result["plan"] - - print(f"\n{Fore.GREEN}AI: Here is the object selection plan:\n{object_selection_plan}{Fore.RESET}") + + print( + f"\n{Fore.GREEN}AI: Here is the object selection plan:\n{object_selection_plan}{Fore.RESET}" + ) return object_selection_plan, selected_objects - def plan_room(self, args): - room_type, scene, additional_requirements, room2size, room2floor_capacity, room2wall_capacity, room2vertices = args + ( + room_type, + scene, + additional_requirements, + room2size, + room2floor_capacity, + room2wall_capacity, + room2vertices, + ) = args print(f"\n{Fore.GREEN}AI: Selecting objects for {room_type}...{Fore.RESET}\n") - + result = {} room_size_str = f"{int(room2size[room_type][0])*100}cm in length, {int(room2size[room_type][1])*100}cm in width, {int(room2size[room_type][2])*100}cm in height" - - prompt_1 = self.object_selection_template_1.replace("INPUT", scene["query"]).replace("ROOM_TYPE", room_type).replace("ROOM_SIZE", room_size_str).replace("REQUIREMENTS", additional_requirements) + + prompt_1 = ( + self.object_selection_template_1.replace("INPUT", scene["query"]) + .replace("ROOM_TYPE", room_type) + .replace("ROOM_SIZE", room_size_str) + .replace("REQUIREMENTS", additional_requirements) + ) # print(f"\nUser: {prompt_1}\n") - + output_1 = self.llm(prompt_1).lower() plan_1 = self.extract_json(output_1) @@ -100,32 +160,57 @@ def plan_room(self, args): print(f"Error while extracting the JSON for {room_type}.") return result - floor_objects, floor_capacity, wall_objects, wall_capacity = self.get_objects_by_room(plan_1, scene, room2size[room_type], room2floor_capacity[room_type], room2wall_capacity[room_type], room2vertices[room_type]) + ( + floor_objects, + floor_capacity, + wall_objects, + wall_capacity, + ) = self.get_objects_by_room( + plan_1, + scene, + room2size[room_type], + room2floor_capacity[room_type], + room2wall_capacity[room_type], + room2vertices[room_type], + ) if floor_capacity[1] / floor_capacity[0] >= 0.8: result["floor"] = floor_objects result["wall"] = wall_objects result["plan"] = plan_1 else: - print(f"{Fore.RED}AI: The floor capacity of {room_type} is {floor_capacity[1]}m^2, which is less than 70% of the total floor capacity {floor_capacity[0]}m^2.{Fore.RESET}") - prompt_2 = self.object_selection_template_2.format(object_selection_prompt_new_1=prompt_1, object_selection_1=output_1, room=room_type) + print( + f"{Fore.RED}AI: The floor capacity of {room_type} is {floor_capacity[1]:.2g}m^2, which is less than 70% of the total floor capacity {floor_capacity[0]:.2g}m^2.{Fore.RESET}" + ) + prompt_2 = self.object_selection_template_2.format( + object_selection_prompt_new_1=prompt_1, + object_selection_1=output_1, + room=room_type, + ) output_2 = self.llm(prompt_2).lower() plan_2 = self.extract_json(output_2) new_plan = copy.deepcopy(plan_1) - for object in plan_2: new_plan[object] = plan_2[object] - - floor_objects, _, wall_objects, _ = self.get_objects_by_room(new_plan, scene, room2size[room_type], room2floor_capacity[room_type], room2wall_capacity[room_type], room2vertices[room_type]) + for object in plan_2: + new_plan[object] = plan_2[object] + + floor_objects, _, wall_objects, _ = self.get_objects_by_room( + new_plan, + scene, + room2size[room_type], + room2floor_capacity[room_type], + room2wall_capacity[room_type], + room2vertices[room_type], + ) result["floor"] = floor_objects result["wall"] = wall_objects result["plan"] = new_plan return room_type, result - - + def extract_json(self, input_string): # Using regex to identify the JSON structure in the string - json_match = re.search(r'{.*}', input_string, re.DOTALL) + json_match = re.search(r"{.*}", input_string, re.DOTALL) if json_match: extracted_json = json_match.group(0) try: @@ -140,86 +225,130 @@ def extract_json(self, input_string): else: print("No valid JSON found.") return None - def check_dict(self, dict): valid = True - attributes = ["description", "location", "size", "quantity", "variance_type", "objects_on_top"] + attributes = [ + "description", + "location", + "size", + "quantity", + "variance_type", + "objects_on_top", + ] for key, value in dict.items(): - if not isinstance(key, str): valid = False; break + if not isinstance(key, str): + valid = False + break - if not isinstance(value, Dict): valid = False; break + if not isinstance(value, Dict): + valid = False + break for attribute in attributes: - if attribute not in value: valid = False; break - - if not isinstance(value["description"], str): valid = False; break + if attribute not in value: + valid = False + break + + if not isinstance(value["description"], str): + valid = False + break - if value["location"] not in ["floor", "wall"]: dict[key]["location"] = "floor" + if value["location"] not in ["floor", "wall"]: + dict[key]["location"] = "floor" - if not isinstance(value["size"], list) or len(value["size"]) != 3 or not all(isinstance(i, int) for i in value["size"]): dict[key]["size"] = None + if ( + not isinstance(value["size"], list) + or len(value["size"]) != 3 + or not all(isinstance(i, int) for i in value["size"]) + ): + dict[key]["size"] = None - if not isinstance(value["quantity"], int): dict[key]["quantity"] = 1 + if not isinstance(value["quantity"], int): + dict[key]["quantity"] = 1 - if not isinstance(value["variance_type"], str) or value["variance_type"] not in ["same", "varied"]: dict[key]["variance_type"] = "same" + if not isinstance(value["variance_type"], str) or value[ + "variance_type" + ] not in ["same", "varied"]: + dict[key]["variance_type"] = "same" - if not isinstance(value["objects_on_top"], list): dict[key]["objects_on_top"] = [] + if not isinstance(value["objects_on_top"], list): + dict[key]["objects_on_top"] = [] for i, child in enumerate(value["objects_on_top"]): - if not isinstance(child, Dict): valid = False; break - + if not isinstance(child, Dict): + valid = False + break + for attribute in ["object_name", "quantity", "variance_type"]: - if attribute not in child: valid = False; break - - if not isinstance(child["object_name"], str): valid = False; break + if attribute not in child: + valid = False + break + + if not isinstance(child["object_name"], str): + valid = False + break - if not isinstance(child["quantity"], int): dict[key]["objects_on_top"][i]["quantity"] = 1 + if not isinstance(child["quantity"], int): + dict[key]["objects_on_top"][i]["quantity"] = 1 - if not isinstance(child["variance_type"], str) or child["variance_type"] not in ["same", "varied"]: dict[key]["objects_on_top"][i]["variance_type"] = "same" + if not isinstance(child["variance_type"], str) or child[ + "variance_type" + ] not in ["same", "varied"]: + dict[key]["objects_on_top"][i]["variance_type"] = "same" - if not valid: return None - else: return dict - + if not valid: + return None + else: + return dict - def get_objects_by_room(self, parsed_plan, scene, room_size, floor_capacity, wall_capacity, vertices): + def get_objects_by_room( + self, parsed_plan, scene, room_size, floor_capacity, wall_capacity, vertices + ): # get the floor and wall objects floor_object_list = [] wall_object_list = [] for object_name, object_info in parsed_plan.items(): object_info["object_name"] = object_name - if object_info["location"] == "floor": floor_object_list.append(object_info) - else: wall_object_list.append(object_info) - - floor_objects, floor_capacity = self.get_floor_objects(floor_object_list, floor_capacity, room_size, vertices, scene) - wall_objects, wall_capacity = self.get_wall_objects(wall_object_list, wall_capacity, room_size, vertices, scene) + if object_info["location"] == "floor": + floor_object_list.append(object_info) + else: + wall_object_list.append(object_info) + + floor_objects, floor_capacity = self.get_floor_objects( + floor_object_list, floor_capacity, room_size, vertices, scene + ) + wall_objects, wall_capacity = self.get_wall_objects( + wall_object_list, wall_capacity, room_size, vertices, scene + ) return floor_objects, floor_capacity, wall_objects, wall_capacity - def get_room_size(self, room, wall_height): floor_polygon = room["floorPolygon"] - x_values = [point['x'] for point in floor_polygon] - z_values = [point['z'] for point in floor_polygon] + x_values = [point["x"] for point in floor_polygon] + z_values = [point["z"] for point in floor_polygon] x_dim = max(x_values) - min(x_values) z_dim = max(z_values) - min(z_values) - if x_dim > z_dim: return (x_dim, wall_height, z_dim) - else: return (z_dim, wall_height, x_dim) - + if x_dim > z_dim: + return (x_dim, wall_height, z_dim) + else: + return (z_dim, wall_height, x_dim) def get_room_area(self, room): room_vertices = room["vertices"] room_polygon = Polygon(room_vertices) return room_polygon.area - def get_room_perimeter(self, room): room_vertices = room["vertices"] room_polygon = Polygon(room_vertices) return room_polygon.length - - def get_floor_objects(self, floor_object_list, floor_capacity, room_size, room_vertices, scene): + def get_floor_objects( + self, floor_object_list, floor_capacity, room_size, room_vertices, scene + ): selected_floor_objects_all = [] for floor_object in floor_object_list: object_type = floor_object["object_name"] @@ -228,36 +357,65 @@ def get_floor_objects(self, floor_object_list, floor_capacity, room_size, room_v quantity = min(floor_object["quantity"], 10) variance_type = floor_object["variance_type"] - candidates = self.object_retriever.retrieve([f"a 3D model of {object_type}, {object_description}"], self.similarity_threshold_floor) - - # check on floor objects - candidates = [candidate for candidate in candidates if self.database[candidate[0]]["annotations"]["onFloor"] == True] # only select objects on the floor - candidates = [candidate for candidate in candidates if self.database[candidate[0]]["annotations"]["onCeiling"] == False] # only select objects not on the ceiling - - # ignore doors and windows and frames - candidates = [candidate for candidate in candidates if "door" not in self.database[candidate[0]]["annotations"]["category"].lower()] - candidates = [candidate for candidate in candidates if "window" not in self.database[candidate[0]]["annotations"]["category"].lower()] - candidates = [candidate for candidate in candidates if "frame" not in self.database[candidate[0]]["annotations"]["category"].lower()] + candidates = self.object_retriever.retrieve( + [f"a 3D model of {object_type}, {object_description}"], + self.similarity_threshold_floor, + ) + + candidates = [ + candidate + for candidate, annotation in zip( + candidates, + [ + get_annotations(self.database[candidate[0]]) + for candidate in candidates + ], + ) + if annotation["onFloor"] # only select objects on the floor + and ( + not annotation["onCeiling"] + ) # only select objects not on the ceiling + and all( # ignore doors and windows and frames + k not in annotation["category"].lower() + for k in ["door", "window", "frame"] + ) + ] # check if the object is too big candidates = self.check_object_size(candidates, room_size) # check if object can be placed on the floor - candidates = self.check_floor_placement(candidates[:20], room_vertices, scene) + candidates = self.check_floor_placement( + candidates[:20], room_vertices, scene + ) # No candidates found - if len(candidates) == 0: print("No candidates found for {} {}".format(object_type, object_description)); continue + if len(candidates) == 0: + print( + "No candidates found for {} {}".format( + object_type, object_description + ) + ) + continue # remove used assets top_one_candidate = candidates[0] - if len(candidates) > 1: candidates = [candidate for candidate in candidates if candidate[0] not in self.used_assets] - if len(candidates) == 0: candidates = [top_one_candidate] - + if len(candidates) > 1: + candidates = [ + candidate + for candidate in candidates + if candidate[0] not in self.used_assets + ] + if len(candidates) == 0: + candidates = [top_one_candidate] + # consider object size difference if object_size is not None and self.consider_size: - candidates = self.object_retriever.compute_size_difference(object_size, candidates) + candidates = self.object_retriever.compute_size_difference( + object_size, candidates + ) - candidates = candidates[:10] # only select top 10 candidates + candidates = candidates[:10] # only select top 10 candidates selected_asset_ids = [] if variance_type == "same": @@ -270,7 +428,8 @@ def get_floor_objects(self, floor_object_list, floor_capacity, room_size, room_v selected_candidate = self.random_select(candidates) selected_asset_id = selected_candidate[0] selected_asset_ids.append(selected_asset_id) - if len(candidates) > 1: candidates.remove(selected_candidate) + if len(candidates) > 1: + candidates.remove(selected_candidate) for i in range(quantity): selected_asset_id = selected_asset_ids[i] @@ -280,37 +439,56 @@ def get_floor_objects(self, floor_object_list, floor_capacity, room_size, room_v # reselect objects if they exceed floor capacity, consider the diversity of objects selected_floor_objects = [] while True: - if len(selected_floor_objects_all) == 0: break + if len(selected_floor_objects_all) == 0: + break current_selected_asset_ids = [] current_number_of_objects = len(selected_floor_objects) for object_name, selected_asset_id in selected_floor_objects_all: if selected_asset_id not in current_selected_asset_ids: - selected_asset_size = self.database[selected_asset_id]["assetMetadata"]["boundingBox"] - selected_asset_capacity = selected_asset_size["x"] * selected_asset_size["z"] - if floor_capacity[1] + selected_asset_capacity > floor_capacity[0] and len(selected_floor_objects) > 0: - print(f"{object_type} {object_description} exceeds floor capacity") + selected_asset_size = get_bbox_dims( + self.database[selected_asset_id] + ) + selected_asset_capacity = ( + selected_asset_size["x"] * selected_asset_size["z"] + ) + if ( + floor_capacity[1] + selected_asset_capacity > floor_capacity[0] + and len(selected_floor_objects) > 0 + ): + print( + f"{object_type} {object_description} exceeds floor capacity" + ) else: current_selected_asset_ids.append(selected_asset_id) selected_floor_objects.append((object_name, selected_asset_id)) - selected_floor_objects_all.remove((object_name, selected_asset_id)) - floor_capacity = (floor_capacity[0], floor_capacity[1] + selected_asset_capacity) - if len(selected_floor_objects) == current_number_of_objects: print("No more objects can be added"); break - + selected_floor_objects_all.remove( + (object_name, selected_asset_id) + ) + floor_capacity = ( + floor_capacity[0], + floor_capacity[1] + selected_asset_capacity, + ) + if len(selected_floor_objects) == current_number_of_objects: + print("No more objects can be added") + break + # sort objects by object type object_type2objects = {} for object_name, selected_asset_id in selected_floor_objects: object_type = object_name.split("-")[0] - if object_type not in object_type2objects: object_type2objects[object_type] = [] + if object_type not in object_type2objects: + object_type2objects[object_type] = [] object_type2objects[object_type].append((object_name, selected_asset_id)) - + selected_floor_objects_ordered = [] for object_type in object_type2objects: selected_floor_objects_ordered += sorted(object_type2objects[object_type]) return selected_floor_objects_ordered, floor_capacity - - def get_wall_objects(self, wall_object_list, wall_capacity, room_size, room_vertices, scene): + def get_wall_objects( + self, wall_object_list, wall_capacity, room_size, room_vertices, scene + ): selected_wall_objects_all = [] for wall_object in wall_object_list: object_type = wall_object["object_name"] @@ -319,14 +497,31 @@ def get_wall_objects(self, wall_object_list, wall_capacity, room_size, room_vert quantity = min(wall_object["quantity"], 10) variance_type = wall_object["variance_type"] - candidates = self.object_retriever.retrieve([f"a 3D model of {object_type}, {object_description}"], self.similarity_threshold_wall) + candidates = self.object_retriever.retrieve( + [f"a 3D model of {object_type}, {object_description}"], + self.similarity_threshold_wall, + ) # check on wall objects - candidates = [candidate for candidate in candidates if self.database[candidate[0]]["annotations"]["onWall"] == True] # only select objects on the wall + candidates = [ + candidate + for candidate in candidates + if get_annotations(self.database[candidate[0]])["onWall"] == True + ] # only select objects on the wall # ignore doors and windows - candidates = [candidate for candidate in candidates if "door" not in self.database[candidate[0]]["annotations"]["category"].lower()] - candidates = [candidate for candidate in candidates if "window" not in self.database[candidate[0]]["annotations"]["category"].lower()] + candidates = [ + candidate + for candidate in candidates + if "door" + not in get_annotations(self.database[candidate[0]])["category"].lower() + ] + candidates = [ + candidate + for candidate in candidates + if "window" + not in get_annotations(self.database[candidate[0]])["category"].lower() + ] # check if the object is too big candidates = self.check_object_size(candidates, room_size) @@ -335,20 +530,36 @@ def get_wall_objects(self, wall_object_list, wall_capacity, room_size, room_vert candidates = self.check_thin_object(candidates) # check if object can be placed on the wall - candidates = self.check_wall_placement(candidates[:20], room_vertices, scene) - - if len(candidates) == 0: print("No candidates found for {} {}".format(object_type, object_description)); continue + candidates = self.check_wall_placement( + candidates[:20], room_vertices, scene + ) + + if len(candidates) == 0: + print( + "No candidates found for {} {}".format( + object_type, object_description + ) + ) + continue # remove used assets top_one_candidate = candidates[0] - if len(candidates) > 1: candidates = [candidate for candidate in candidates if candidate[0] not in self.used_assets] - if len(candidates) == 0: candidates = [top_one_candidate] + if len(candidates) > 1: + candidates = [ + candidate + for candidate in candidates + if candidate[0] not in self.used_assets + ] + if len(candidates) == 0: + candidates = [top_one_candidate] # consider object size difference if object_size is not None and self.consider_size: - candidates = self.object_retriever.compute_size_difference(object_size, candidates) + candidates = self.object_retriever.compute_size_difference( + object_size, candidates + ) - candidates = candidates[:10] # only select top 10 candidates + candidates = candidates[:10] # only select top 10 candidates selected_asset_ids = [] if variance_type == "same": @@ -361,7 +572,8 @@ def get_wall_objects(self, wall_object_list, wall_capacity, room_size, room_vert selected_candidate = self.random_select(candidates) selected_asset_id = selected_candidate[0] selected_asset_ids.append(selected_asset_id) - if len(candidates) > 1: candidates.remove(selected_candidate) + if len(candidates) > 1: + candidates.remove(selected_candidate) for i in range(quantity): selected_asset_id = selected_asset_ids[i] @@ -371,75 +583,95 @@ def get_wall_objects(self, wall_object_list, wall_capacity, room_size, room_vert # reselect objects if they exceed wall capacity, consider the diversity of objects selected_wall_objects = [] while True: - if len(selected_wall_objects_all) == 0: break + if len(selected_wall_objects_all) == 0: + break current_selected_asset_ids = [] current_number_of_objects = len(selected_wall_objects) for object_name, selected_asset_id in selected_wall_objects_all: if selected_asset_id not in current_selected_asset_ids: - selected_asset_size = self.database[selected_asset_id]["assetMetadata"]["boundingBox"] + selected_asset_size = get_bbox_dims( + self.database[selected_asset_id] + ) selected_asset_capacity = selected_asset_size["x"] - if wall_capacity[1] + selected_asset_capacity > wall_capacity[0] and len(selected_wall_objects) > 0: - print(f"{object_type} {object_description} exceeds wall capacity") + if ( + wall_capacity[1] + selected_asset_capacity > wall_capacity[0] + and len(selected_wall_objects) > 0 + ): + print( + f"{object_type} {object_description} exceeds wall capacity" + ) else: current_selected_asset_ids.append(selected_asset_id) selected_wall_objects.append((object_name, selected_asset_id)) - selected_wall_objects_all.remove((object_name, selected_asset_id)) - wall_capacity = (wall_capacity[0], wall_capacity[1] + selected_asset_capacity) - if len(selected_wall_objects) == current_number_of_objects: print("No more objects can be added"); break - + selected_wall_objects_all.remove( + (object_name, selected_asset_id) + ) + wall_capacity = ( + wall_capacity[0], + wall_capacity[1] + selected_asset_capacity, + ) + if len(selected_wall_objects) == current_number_of_objects: + print("No more objects can be added") + break + # sort objects by object type object_type2objects = {} for object_name, selected_asset_id in selected_wall_objects: object_type = object_name.split("-")[0] - if object_type not in object_type2objects: object_type2objects[object_type] = [] + if object_type not in object_type2objects: + object_type2objects[object_type] = [] object_type2objects[object_type].append((object_name, selected_asset_id)) - + selected_wall_objects_ordered = [] for object_type in object_type2objects: selected_wall_objects_ordered += sorted(object_type2objects[object_type]) - - return selected_wall_objects_ordered, wall_capacity + return selected_wall_objects_ordered, wall_capacity def check_object_size(self, candidates, room_size): valid_candidates = [] for candidate in candidates: - dimension = self.database[candidate[0]]["assetMetadata"]["boundingBox"] + dimension = get_bbox_dims(self.database[candidate[0]]) size = [dimension["x"], dimension["y"], dimension["z"]] - if size[2] > size[0]: size = [size[2], size[1], size[0]] # make sure that x > z - - if size[0] > room_size[0] * self.object_size_tolerance: continue - if size[1] > room_size[1] * self.object_size_tolerance: continue - if size[2] > room_size[2] * self.object_size_tolerance: continue - if size[0] * size[2] > room_size[0] * room_size[2] * 0.5: continue # TODO: consider using the floor area instead of the room area + if size[2] > size[0]: + size = [size[2], size[1], size[0]] # make sure that x > z + + if size[0] > room_size[0] * self.object_size_tolerance: + continue + if size[1] > room_size[1] * self.object_size_tolerance: + continue + if size[2] > room_size[2] * self.object_size_tolerance: + continue + if size[0] * size[2] > room_size[0] * room_size[2] * 0.5: + continue # TODO: consider using the floor area instead of the room area valid_candidates.append(candidate) return valid_candidates - def check_thin_object(self, candidates): valid_candidates = [] for candidate in candidates: - dimension = self.database[candidate[0]]["assetMetadata"]["boundingBox"] + dimension = get_bbox_dims(self.database[candidate[0]]) size = [dimension["x"], dimension["y"], dimension["z"]] - if size[2] > min(size[0], size[1]) * self.thin_threshold: continue + if size[2] > min(size[0], size[1]) * self.thin_threshold: + continue valid_candidates.append(candidate) return valid_candidates - def random_select(self, candidates): if self.random_selection: selected_candidate = random.choice(candidates) else: scores = [candidate[1] for candidate in candidates] scores_tensor = torch.Tensor(scores) - probas = F.softmax(scores_tensor, dim=0) # TODO: consider using normalized scores + probas = F.softmax( + scores_tensor, dim=0 + ) # TODO: consider using normalized scores selected_index = torch.multinomial(probas, 1).item() selected_candidate = candidates[selected_index] return selected_candidate - def update_floor_capacity(self, room2floor_capacity, scene): for room in scene["rooms"]: room_vertices = room["vertices"] @@ -451,16 +683,15 @@ def update_floor_capacity(self, room2floor_capacity, scene): door_area = door_poly.area if room_poly.contains(door_center): room2floor_capacity[room["id"]][1] += door_area * 0.6 - + if scene["open_walls"] != []: for open_wall_vertices in scene["open_walls"]["openWallBoxes"]: open_wall_poly = Polygon(open_wall_vertices) open_wall_center = open_wall_poly.centroid if room_poly.contains(open_wall_center): room2floor_capacity[room["id"]][1] += open_wall_poly.area * 0.6 - + return room2floor_capacity - def update_wall_capacity(self, room2wall_capacity, scene): for room in scene["rooms"]: @@ -475,7 +706,7 @@ def update_wall_capacity(self, room2wall_capacity, scene): window_width = max(window_x, window_y) if room_poly.contains(window_center): room2wall_capacity[room["id"]][1] += window_width * 0.6 - + if scene["open_walls"] != []: for open_wall_vertices in scene["open_walls"]["openWallBoxes"]: open_wall_poly = Polygon(open_wall_vertices) @@ -485,65 +716,95 @@ def update_wall_capacity(self, room2wall_capacity, scene): open_wall_width = max(open_wall_x, open_wall_y) if room_poly.contains(open_wall_center): room2wall_capacity[room["id"]][1] += open_wall_width * 0.6 - + return room2wall_capacity - def check_floor_placement(self, candidates, room_vertices, scene): - room_x = max([vertex[0] for vertex in room_vertices]) - min([vertex[0] for vertex in room_vertices]) - room_z = max([vertex[1] for vertex in room_vertices]) - min([vertex[1] for vertex in room_vertices]) + room_x = max([vertex[0] for vertex in room_vertices]) - min( + [vertex[0] for vertex in room_vertices] + ) + room_z = max([vertex[1] for vertex in room_vertices]) - min( + [vertex[1] for vertex in room_vertices] + ) grid_size = int(max(room_x // 20, room_z // 20)) - + solver = DFS_Solver_Floor(grid_size=grid_size) room_poly = Polygon(room_vertices) - initial_state = self.get_initial_state_floor(room_vertices, scene, add_window=False) + initial_state = self.get_initial_state_floor( + room_vertices, scene, add_window=False + ) grid_points = solver.create_grids(room_poly) grid_points = solver.remove_points(grid_points, initial_state) valid_candidates = [] for candidate in candidates: - object_size = self.database[candidate[0]]["assetMetadata"]["boundingBox"] - object_dim = (object_size["x"]*100 + self.size_buffer, object_size["z"]*100 + self.size_buffer) + object_size = get_bbox_dims(self.database[candidate[0]]) + object_dim = ( + object_size["x"] * 100 + self.size_buffer, + object_size["z"] * 100 + self.size_buffer, + ) solutions = solver.get_all_solutions(room_poly, grid_points, object_dim) solutions = solver.filter_collision(initial_state, solutions) solutions = solver.place_edge(room_poly, solutions, object_dim) - if solutions != []: valid_candidates.append(candidate) - else: print(f"Floor Object {candidate[0]} (size: {object_dim}) cannot be placed in room"); continue + if solutions != []: + valid_candidates.append(candidate) + else: + print( + f"Floor Object {candidate[0]} (size: {object_dim}) cannot be placed in room" + ) + continue return valid_candidates - def check_wall_placement(self, candidates, room_vertices, scene): - room_x = max([vertex[0] for vertex in room_vertices]) - min([vertex[0] for vertex in room_vertices]) - room_z = max([vertex[1] for vertex in room_vertices]) - min([vertex[1] for vertex in room_vertices]) + room_x = max([vertex[0] for vertex in room_vertices]) - min( + [vertex[0] for vertex in room_vertices] + ) + room_z = max([vertex[1] for vertex in room_vertices]) - min( + [vertex[1] for vertex in room_vertices] + ) grid_size = int(max(room_x // 20, room_z // 20)) solver = DFS_Solver_Wall(grid_size=grid_size) - + room_poly = Polygon(room_vertices) initial_state = self.get_initial_state_wall(room_vertices, scene) grid_points = solver.create_grids(room_poly) valid_candidates = [] for candidate in candidates: - object_size = self.database[candidate[0]]["assetMetadata"]["boundingBox"] - object_dim = (object_size["x"]*100, object_size["y"]*100, object_size["z"]*100) - - solutions = solver.get_all_solutions(room_poly, grid_points, object_dim, height=0) + object_size = get_bbox_dims(self.database[candidate[0]]) + object_dim = ( + object_size["x"] * 100, + object_size["y"] * 100, + object_size["z"] * 100, + ) + + solutions = solver.get_all_solutions( + room_poly, grid_points, object_dim, height=0 + ) solutions = solver.filter_collision(initial_state, solutions) - if solutions != []: valid_candidates.append(candidate) - else: print(f"Wall Object {candidate[0]} (size: {object_dim}) cannot be placed in room"); continue - - return valid_candidates + if solutions != []: + valid_candidates.append(candidate) + else: + print( + f"Wall Object {candidate[0]} (size: {object_dim}) cannot be placed in room" + ) + continue + return valid_candidates def get_initial_state_floor(self, room_vertices, scene, add_window=True): - doors, windows, open_walls = scene["doors"], scene["windows"], scene["open_walls"] + doors, windows, open_walls = ( + scene["doors"], + scene["windows"], + scene["open_walls"], + ) room_poly = Polygon(room_vertices) initial_state = {} @@ -555,9 +816,14 @@ def get_initial_state_floor(self, room_vertices, scene, add_window=True): door_poly = Polygon(door_vertices) door_center = door_poly.centroid if room_poly.contains(door_center): - initial_state[f"door-{i}"] = ((door_center.x, door_center.y), 0, door_vertices, 1) + initial_state[f"door-{i}"] = ( + (door_center.x, door_center.y), + 0, + door_vertices, + 1, + ) i += 1 - + if add_window: for window in windows: window_boxes = window["windowBoxes"] @@ -566,7 +832,12 @@ def get_initial_state_floor(self, room_vertices, scene, add_window=True): window_poly = Polygon(window_vertices) window_center = window_poly.centroid if room_poly.contains(window_center): - initial_state[f"window-{i}"] = ((window_center.x, window_center.y), 0, window_vertices, 1) + initial_state[f"window-{i}"] = ( + (window_center.x, window_center.y), + 0, + window_vertices, + 1, + ) i += 1 if open_walls != []: @@ -575,14 +846,22 @@ def get_initial_state_floor(self, room_vertices, scene, add_window=True): open_wall_poly = Polygon(open_wall_vertices) open_wall_center = open_wall_poly.centroid if room_poly.contains(open_wall_center): - initial_state[f"open-{i}"] = ((open_wall_center.x, open_wall_center.y), 0, open_wall_vertices, 1) + initial_state[f"open-{i}"] = ( + (open_wall_center.x, open_wall_center.y), + 0, + open_wall_vertices, + 1, + ) i += 1 return initial_state - def get_initial_state_wall(self, room_vertices, scene): - doors, windows, open_walls = scene["doors"], scene["windows"], scene["open_walls"] + doors, windows, open_walls = ( + scene["doors"], + scene["windows"], + scene["open_walls"], + ) room_poly = Polygon(room_vertices) initial_state = {} i = 0 @@ -595,9 +874,15 @@ def get_initial_state_wall(self, room_vertices, scene): if room_poly.contains(door_center): door_height = door["assetPosition"]["y"] * 100 * 2 x_min, z_min, x_max, z_max = door_poly.bounds - initial_state[f"door-{i}"] = ((x_min, 0, z_min), (x_max, door_height, z_max), 0, door_vertices, 1) + initial_state[f"door-{i}"] = ( + (x_min, 0, z_min), + (x_max, door_height, z_max), + 0, + door_vertices, + 1, + ) i += 1 - + for window in windows: window_boxes = window["windowBoxes"] for window_box in window_boxes: @@ -608,9 +893,15 @@ def get_initial_state_wall(self, room_vertices, scene): y_min = window["holePolygon"][0]["y"] * 100 y_max = window["holePolygon"][1]["y"] * 100 x_min, z_min, x_max, z_max = window_poly.bounds - initial_state[f"window-{i}"] = ((x_min, y_min, z_min), (x_max, y_max, z_max), 0, window_vertices, 1) + initial_state[f"window-{i}"] = ( + (x_min, y_min, z_min), + (x_max, y_max, z_max), + 0, + window_vertices, + 1, + ) i += 1 - + if len(open_walls) != 0: open_wall_boxes = open_walls["openWallBoxes"] for open_wall_box in open_wall_boxes: @@ -619,7 +910,13 @@ def get_initial_state_wall(self, room_vertices, scene): open_wall_center = open_wall_poly.centroid if room_poly.contains(open_wall_center): x_min, z_min, x_max, z_max = open_wall_poly.bounds - initial_state[f"open-{i}"] = ((x_min, 0, z_min), (x_max, scene["wall_height"] * 100, z_max), 0, open_wall_vertices, 1) + initial_state[f"open-{i}"] = ( + (x_min, 0, z_min), + (x_max, scene["wall_height"] * 100, z_max), + 0, + open_wall_vertices, + 1, + ) i += 1 - return initial_state \ No newline at end of file + return initial_state diff --git a/modules/prompts.py b/holodeck/generation/prompts.py similarity index 100% rename from modules/prompts.py rename to holodeck/generation/prompts.py diff --git a/modules/rooms.py b/holodeck/generation/rooms.py similarity index 93% rename from modules/rooms.py rename to holodeck/generation/rooms.py index dc1b492..388d1fd 100644 --- a/modules/rooms.py +++ b/holodeck/generation/rooms.py @@ -1,24 +1,27 @@ import ast import copy import math -import json +import os +from difflib import SequenceMatcher + +import compress_json +import compress_pickle +import matplotlib.colors as mcolors +import matplotlib.patches as patches +import matplotlib.pyplot as plt import torch -import pickle -import numpy as np from PIL import Image -from tqdm import tqdm from colorama import Fore -import matplotlib.pyplot as plt -import matplotlib.patches as patches -import modules.prompts as prompts -import matplotlib.colors as mcolors -from difflib import SequenceMatcher -from langchain import PromptTemplate +from langchain import PromptTemplate, OpenAI from shapely.geometry import LineString, Point, Polygon +from tqdm import tqdm + +import holodeck.generation.prompts as prompts +from holodeck.constants import HOLODECK_BASE_DATA_DIR class FloorPlanGenerator(): - def __init__(self, clip_model, clip_process, clip_tokenizer, llm): + def __init__(self, clip_model, clip_process, clip_tokenizer, llm: OpenAI): self.json_template = {"ceilings": [], "children": [], "vertices": None, "floorMaterial": {"name": None, "color": None}, "floorPolygon": [], "id": None, "roomType": None} @@ -277,7 +280,7 @@ def midpoint(p1, p2): class MaterialSelector(): def __init__(self, clip_model, clip_preprocess, clip_tokenizer): - materials = json.load(open("data/materials/material-database.json", "r")) + materials = compress_json.load(os.path.join(HOLODECK_BASE_DATA_DIR,"materials/material-database.json")) self.selected_materials = materials["Wall"] + materials["Wood"] + materials["Fabric"] self.colors = list(mcolors.CSS4_COLORS.keys()) @@ -290,27 +293,28 @@ def __init__(self, clip_model, clip_preprocess, clip_tokenizer): def load_features(self): try: - self.material_feature_clip = pickle.load(open("data/materials/material_feature_clip.p", "rb")) + self.material_feature_clip = compress_pickle.load(os.path.join(HOLODECK_BASE_DATA_DIR, "materials/material_feature_clip.pkl")) except: print("Precompute image features for materials...") self.material_feature_clip = [] for material in tqdm(self.selected_materials): - image = self.preprocess(Image.open(f"data/materials/images/{material}.png")).unsqueeze(0) + image = self.preprocess(Image.open(os.path.join(HOLODECK_BASE_DATA_DIR, f"materials/images/{material}.png"))).unsqueeze(0) with torch.no_grad(): image_features = self.clip_model.encode_image(image) image_features /= image_features.norm(dim=-1, keepdim=True) self.material_feature_clip.append(image_features) self.material_feature_clip = torch.vstack(self.material_feature_clip) - pickle.dump(self.material_feature_clip, open("data/materials/material_feature_clip.p", "wb")) + compress_pickle.dump(self.material_feature_clip, os.path.join(HOLODECK_BASE_DATA_DIR, "materials/material_feature_clip.pkl")) try: - self.color_feature_clip = pickle.load(open("data/materials/color_feature_clip.p", "rb")) + self.color_feature_clip = compress_pickle.load(os.path.join(HOLODECK_BASE_DATA_DIR, "materials/color_feature_clip.pkl")) except: print("Precompute text features for colors...") with torch.no_grad(): self.color_feature_clip = self.clip_model.encode_text(self.clip_tokenizer(self.colors)) self.color_feature_clip /= self.color_feature_clip.norm(dim=-1, keepdim=True) - pickle.dump(self.color_feature_clip, open("data/materials/color_feature_clip.p", "wb")) + + compress_pickle.dump(self.color_feature_clip, os.path.join(HOLODECK_BASE_DATA_DIR, "materials/color_feature_clip.pkl")) def match_material(self, queries, topk=5): diff --git a/modules/skybox.py b/holodeck/generation/skybox.py similarity index 99% rename from modules/skybox.py rename to holodeck/generation/skybox.py index 6d00667..8643724 100644 --- a/modules/skybox.py +++ b/holodeck/generation/skybox.py @@ -1,4 +1,5 @@ import random + from procthor.utils.types import RGB, Vector3 skyboxes = ["Sky1", "Sky2", "SkyAlbany", "SkyAlbanyHill", "SkyDalyCity", "SkyEmeryville", "SkyGarden", "SkyTropical", diff --git a/modules/small_objects.py b/holodeck/generation/small_objects.py similarity index 92% rename from modules/small_objects.py rename to holodeck/generation/small_objects.py index b7783a7..2d25779 100644 --- a/modules/small_objects.py +++ b/holodeck/generation/small_objects.py @@ -1,20 +1,25 @@ import copy -import torch -import random import multiprocessing +import random + +import torch import torch.nn.functional as F from ai2thor.controller import Controller +from ai2thor.hooks.procedural_asset_hook import ProceduralAssetHookRunner +from langchain import OpenAI from procthor.constants import FLOOR_Y from procthor.utils.types import Vector3 -from ai2thor.hooks.procedural_asset_hook import ProceduralAssetHookRunner + +from holodeck.constants import THOR_COMMIT_ID +from holodeck.generation.objaverse_retriever import ObjathorRetriever +from holodeck.generation.utils import get_bbox_dims, get_annotations, get_secondary_properties class SmallObjectGenerator(): - def __init__(self, llm, object_retriever, objaverse_version): + def __init__(self, object_retriever: ObjathorRetriever, llm: OpenAI): self.llm = llm self.object_retriever = object_retriever self.database = object_retriever.database - self.objaverse_version = objaverse_version # set kinematic to false for small objects self.json_template = {"assetId": None, "id": None, "kinematic": False, @@ -50,7 +55,7 @@ def generate_small_objects(self, scene, controller, receptacle_ids): placement["assetId"] = asset_id placement["id"] = f"{object_name}|{receptacle}" placement["position"] = obj["position"] - asset_height = self.database[asset_id]['assetMetadata']['boundingBox']["y"] + asset_height = get_bbox_dims(self.database[asset_id])["y"] if obj["position"]["y"] + asset_height > scene["wall_height"]: continue # if the object is too high, skip it @@ -66,8 +71,8 @@ def generate_small_objects(self, scene, controller, receptacle_ids): if not small and not thin: placement["kinematic"] = True # set kinematic to true for non-small objects - if "breakable" in self.database[asset_id]["objectMetadata"].keys(): - if self.database[asset_id]["objectMetadata"]["breakable"] == True: placement["kinematic"] = True + if "CanBreak" in get_secondary_properties(self.database[asset_id]): + placement["kinematic"] = True placements.append(placement) @@ -147,7 +152,7 @@ def select_small_objects_per_receptacle(self, args): receptacle, small_objects, receptacle2asset_id = args results = [] - receptacle_dimensions = self.database[receptacle2asset_id[receptacle]]['assetMetadata']['boundingBox'] + receptacle_dimensions = get_bbox_dims(self.database[receptacle2asset_id[receptacle]]) receptacle_size = [receptacle_dimensions["x"], receptacle_dimensions["z"]] receptacle_area = receptacle_size[0] * receptacle_size[1] capacity = 0 @@ -160,12 +165,12 @@ def select_small_objects_per_receptacle(self, args): # Select the object candidates = self.object_retriever.retrieve([f"a 3D model of {object_name}"], self.clip_threshold) candidates = [candidate for candidate in candidates - if self.database[candidate[0]]["annotations"]["onObject"] == True] # Only select objects that can be placed on other objects + if get_annotations(self.database[candidate[0]])["onObject"] == True] # Only select objects that can be placed on other objects valid_candidates = [] # Only select objects with high confidence for candidate in candidates: - candidate_dimensions = self.database[candidate[0]]['assetMetadata']['boundingBox'] + candidate_dimensions = get_bbox_dims(self.database[candidate[0]]) candidate_size = [candidate_dimensions["x"], candidate_dimensions["z"]] sorted(candidate_size) if candidate_size[0] < receptacle_size[0] * 0.9 and candidate_size[1] < receptacle_size[1] * 0.9: # if the object is smaller than the receptacle, threshold is 90% @@ -194,7 +199,7 @@ def select_small_objects_per_receptacle(self, args): if len(valid_candidates) > 1: valid_candidates.remove(selected_candidate) for i in range(quantity): - small_object_dimensions = self.database[selected_asset_ids[i]]['assetMetadata']['boundingBox'] + small_object_dimensions = get_bbox_dims(self.database[selected_asset_ids[i]]) small_object_sizes = [small_object_dimensions["x"], small_object_dimensions["y"], small_object_dimensions["z"]] sorted(small_object_sizes) # small_object_area = small_object_dimensions["x"] * small_object_dimensions["z"] @@ -208,7 +213,7 @@ def select_small_objects_per_receptacle(self, args): ordered_small_objects = [] for object_name, asset_id in results: - dimensions = self.database[asset_id]['assetMetadata']['boundingBox'] + dimensions = get_bbox_dims(self.database[asset_id]) size = max(dimensions["x"], dimensions["z"]) ordered_small_objects.append((object_name, asset_id, size)) ordered_small_objects.sort(key=lambda x: x[2], reverse=True) @@ -218,6 +223,7 @@ def select_small_objects_per_receptacle(self, args): def start_controller(self, scene, objaverse_dir): controller = Controller( + commit_id=THOR_COMMIT_ID, agentMode="default", makeAgentsVisible=False, visibilityDistance=1.5, @@ -275,7 +281,7 @@ def place_object(self, controller, object_id, receptacle_id, rotation=[0, 0, 0]) def check_thin_asset(self, asset_id): - dimensions = self.database[asset_id]["assetMetadata"]["boundingBox"] + dimensions = get_bbox_dims(self.database[asset_id]) twod_size = (dimensions["x"]*100, dimensions["z"]*100) threshold = 5 # 3cm is the threshold for thin objects # TODO: need a better way to determine thin threshold @@ -294,7 +300,7 @@ def check_thin_asset(self, asset_id): def fix_placement_for_thin_assets(self, placement): asset_id = placement["assetId"] - dimensions = self.database[asset_id]["assetMetadata"]["boundingBox"] + dimensions = get_bbox_dims(self.database[asset_id]) threshold = 0.03 # 0.03 meter is the threshold for thin objects orginal_rotation = placement["rotation"] @@ -325,7 +331,7 @@ def fix_placement_for_thin_assets(self, placement): def check_small_asset(self, asset_id): - dimensions = self.database[asset_id]["assetMetadata"]["boundingBox"] + dimensions = get_bbox_dims(self.database[asset_id]) size = (dimensions["x"]*100, dimensions["y"]*100, dimensions["z"]*100) threshold = 25 * 25 # 25cm * 25cm is the threshold for small objects @@ -362,7 +368,7 @@ def check_collision(self, placements): remove_ids = [] colliding_ids = list(set([pair[0] for pair in colliding_pairs] + [pair[1] for pair in colliding_pairs])) # order by size from small to large - colliding_ids = sorted(colliding_ids, key=lambda x: self.database[id2assetId[x]]["assetMetadata"]["boundingBox"]["x"] * self.database[id2assetId[x]]["assetMetadata"]["boundingBox"]["z"]) + colliding_ids = sorted(colliding_ids, key=lambda x: get_bbox_dims(self.database[id2assetId[x]])["x"] * get_bbox_dims(self.database[id2assetId[x]])["z"]) for object_id in colliding_ids: remove_ids.append(object_id) colliding_pairs = [pair for pair in colliding_pairs if object_id not in pair] @@ -375,7 +381,7 @@ def check_collision(self, placements): def get_bounding_box(self, placement): asset_id = placement["assetId"] - dimensions = self.database[asset_id]["assetMetadata"]["boundingBox"] + dimensions = get_bbox_dims(self.database[asset_id]) size = (dimensions["x"]*100, dimensions["y"]*100, dimensions["z"]*100) position = placement["position"] box = {"min": [position["x"]*100 - size[0]/2, position["y"]*100 - size[1]/2, position["z"]*100 - size[2]/2], diff --git a/modules/utils.py b/holodeck/generation/utils.py similarity index 87% rename from modules/utils.py rename to holodeck/generation/utils.py index 106d1ad..8fc0912 100644 --- a/modules/utils.py +++ b/holodeck/generation/utils.py @@ -1,12 +1,17 @@ import copy -import json +import os +from argparse import ArgumentParser +from typing import Dict, Any + +import compress_json import numpy as np from PIL import Image -from tqdm import tqdm -from argparse import ArgumentParser from ai2thor.controller import Controller from ai2thor.hooks.procedural_asset_hook import ProceduralAssetHookRunner from moviepy.editor import TextClip, CompositeVideoClip, concatenate_videoclips, ImageSequenceClip +from tqdm import tqdm + +from holodeck.constants import HOLODECK_BASE_DATA_DIR, THOR_COMMIT_ID def all_edges_white(img): @@ -32,6 +37,7 @@ def all_edges_white(img): def get_top_down_frame(scene, objaverse_asset_dir, width=1024, height=1024): controller = Controller( + commit_id=THOR_COMMIT_ID, agentMode="default", makeAgentsVisible=False, visibilityDistance=1.5, @@ -87,6 +93,7 @@ def get_top_down_frame(scene, objaverse_asset_dir, width=1024, height=1024): def get_top_down_frame_ithor(scene, objaverse_asset_dir, width=1024, height=1024): controller = Controller( + commit_id=THOR_COMMIT_ID, agentMode="default", makeAgentsVisible=False, visibilityDistance=1.5, @@ -121,15 +128,15 @@ def get_top_down_frame_ithor(scene, objaverse_asset_dir, width=1024, height=1024 def main(save_path): - scene = json.load(open(save_path + f"scene.json", "r")) + scene = compress_json.load(save_path + f"scene.json", "r") image = get_top_down_frame(scene) image.save(f"test1.png") - with open(save_path + f"scene.json", "w") as f: - json.dump(scene, f, indent=4) + + compress_json.dump(scene, save_path + f"scene.json", json_kwargs=dict(indent=4)) def visualize_asset(asset_id, version): - empty_house = json.load(open("empty_house.json", "r")) + empty_house = compress_json.load("empty_house.json") empty_house["objects"] = [{ "assetId": asset_id, "id": "test_asset", @@ -152,6 +159,7 @@ def visualize_asset(asset_id, version): def get_room_images(scene, objaverse_asset_dir, width=1024, height=1024): controller = Controller( + commit_id=THOR_COMMIT_ID, agentMode="default", makeAgentsVisible=False, visibilityDistance=1.5, @@ -209,6 +217,7 @@ def get_room_images(scene, objaverse_asset_dir, width=1024, height=1024): def ithor_video(scene, objaverse_asset_dir, width, height, scene_type): controller = Controller( + commit_id=THOR_COMMIT_ID, agentMode="default", makeAgentsVisible=False, visibilityDistance=2, @@ -281,6 +290,7 @@ def add_line_breaks(text, max_line_length): """Saves a top-down video of the house.""" controller = Controller( + commit_id=THOR_COMMIT_ID, agentMode="default", makeAgentsVisible=False, visibilityDistance=2, @@ -356,14 +366,54 @@ def add_line_breaks(text, max_line_length): return final_video +def get_asset_metadata(obj_data: Dict[str, Any]): + if "assetMetadata" in obj_data: + return obj_data["assetMetadata"] + elif "thor_metadata" in obj_data: + return obj_data["thor_metadata"]["assetMetadata"] + else: + raise ValueError("Can not find assetMetadata in obj_data") + + +def get_annotations(obj_data: Dict[str, Any]): + if "annotations" in obj_data: + return obj_data["annotations"] + else: + # The assert here is just double-checking that a field that should exist does. + assert "onFloor" in obj_data, f"Can not find annotations in obj_data {obj_data}" + + return obj_data + +def get_bbox_dims(obj_data: Dict[str, Any]): + am = get_asset_metadata(obj_data) + + bbox_info = am["boundingBox"] + + if "x" in bbox_info: + return bbox_info + + if "size" in bbox_info: + return bbox_info["size"] + + mins = bbox_info["min"] + maxs = bbox_info["max"] + + return { + k: maxs[k] - mins[k] for k in ["x", "y", "z"] + } + +def get_secondary_properties(obj_data: Dict[str, Any]): + am = get_asset_metadata(obj_data) + return am["secondaryProperties"] + if __name__ == "__main__": parser = ArgumentParser() parser.add_argument("--mode", help = "Mode to run (top_down_frame, top_down_video, room_image).", default = "top_down_frame") parser.add_argument("--objaverse_asset_dir", help = "Directory to load assets from.", default = "./objaverse/processed_2023_09_23_combine_scale") - parser.add_argument("--scene", help = "Scene to load.", default = "data/scenes/a_living_room/a_living_room.json") + parser.add_argument("--scene", help = "Scene to load.", default = os.path.join(HOLODECK_BASE_DATA_DIR, "scenes/a_living_room/a_living_room.json")) args = parser.parse_args() - scene = json.load(open(args.scene, "r")) + scene = compress_json.load(args.scene) if "query" not in scene: scene["query"] = args.scene.split("/")[-1].split(".")[0] diff --git a/modules/wall_objects.py b/holodeck/generation/wall_objects.py similarity index 97% rename from modules/wall_objects.py rename to holodeck/generation/wall_objects.py index 0e15448..ccbbf58 100644 --- a/modules/wall_objects.py +++ b/holodeck/generation/wall_objects.py @@ -1,18 +1,22 @@ -import re -import time import copy -import random -import numpy as np import multiprocessing +import random +import re +import time + import matplotlib.pyplot as plt -import modules.prompts as prompts -from langchain import PromptTemplate -from shapely.ops import substring +import numpy as np +from langchain import PromptTemplate, OpenAI from shapely.geometry import Polygon, box, Point, LineString +from shapely.ops import substring + +import holodeck.generation.prompts as prompts +from holodeck.generation.objaverse_retriever import ObjathorRetriever +from holodeck.generation.utils import get_bbox_dims class WallObjectGenerator(): - def __init__(self, llm, object_retriever): + def __init__(self, object_retriever: ObjathorRetriever, llm: OpenAI): self.json_template = {"assetId": None, "id": None, "kinematic": True, "position": {}, "rotation": {}, "material": None, "roomId": None} self.llm = llm @@ -78,7 +82,7 @@ def generate_wall_objects_per_room(self, args): constraints = self.parse_wall_object_constraints(constraint_plan, wall_object_names, floor_object_names) # get wall objects - wall_object2dimension = {object_name: self.database[object_id]['assetMetadata']['boundingBox'] for object_name, object_id in wall_object_name2id.items()} + wall_object2dimension = {object_name: get_bbox_dims(self.database[object_id]) for object_name, object_id in wall_object_name2id.items()} wall_objects_list = [(object_name, (wall_object2dimension[object_name]['x'] * 100, wall_object2dimension[object_name]['y'] * 100, wall_object2dimension[object_name]['z'] * 100)) for object_name in constraints] # update constraints with max height @@ -235,7 +239,7 @@ def solution2placement(self, solutions, wall_object_name2id, room_id): def order_objects_by_size(self, selected_wall_objects): ordered_wall_objects = [] for object_name, asset_id in selected_wall_objects: - dimensions = self.database[asset_id]['assetMetadata']['boundingBox'] + dimensions = get_bbox_dims(self.database[asset_id]) size = dimensions["x"] ordered_wall_objects.append([object_name, asset_id, size]) ordered_wall_objects.sort(key=lambda x: x[2], reverse=True) diff --git a/modules/walls.py b/holodeck/generation/walls.py similarity index 98% rename from modules/walls.py rename to holodeck/generation/walls.py index 2119538..7b1210f 100644 --- a/modules/walls.py +++ b/holodeck/generation/walls.py @@ -1,14 +1,16 @@ import copy import random + import numpy as np from colorama import Fore -import modules.prompts as prompts -from langchain import PromptTemplate +from langchain import PromptTemplate, OpenAI from shapely.geometry import LineString, Polygon, Point +import holodeck.generation.prompts as prompts + class WallGenerator(): - def __init__(self, llm): + def __init__(self, llm: OpenAI): self.json_template = {"id": None, "roomId": None, "material": {"name": None, "color": None}, "polygon": []} diff --git a/modules/windows.py b/holodeck/generation/windows.py similarity index 97% rename from modules/windows.py rename to holodeck/generation/windows.py index 7c346b9..a4d44e2 100644 --- a/modules/windows.py +++ b/holodeck/generation/windows.py @@ -1,20 +1,25 @@ -import re import ast import copy -import json +import os import random +import re + +import compress_json import numpy as np from colorama import Fore -import modules.prompts as prompts -from langchain import PromptTemplate +from langchain import PromptTemplate, OpenAI + +import holodeck.generation.prompts as prompts +from holodeck.constants import HOLODECK_BASE_DATA_DIR + class WindowGenerator(): - def __init__(self, llm): + def __init__(self, llm: OpenAI): self.json_template = {"assetId": None, "id": None, "room0": None, "room1": None, "wall0": None, "wall1": None, "holePolygon": [], "assetPosition": {}, "roomId": None} - self.window_data = json.load(open("data/windows/window-database.json", "r")) + self.window_data = compress_json.load(os.path.join(HOLODECK_BASE_DATA_DIR, "windows/window-database.json")) self.window_ids = list(self.window_data.keys()) self.hole_offset = 0.05 # make the hole smaller than windows self.llm = llm diff --git a/holodeck/main.py b/holodeck/main.py new file mode 100644 index 0000000..a6c56ce --- /dev/null +++ b/holodeck/main.py @@ -0,0 +1,198 @@ +import ast +import os +import traceback +from argparse import ArgumentParser + +import compress_json +from tqdm import tqdm + +from holodeck.constants import HOLODECK_BASE_DATA_DIR, OBJATHOR_ASSETS_DIR +from holodeck.generation.holodeck import Holodeck + + +def str2bool(v: str): + v = v.lower().strip() + if v in ("yes", "true", "t", "y", "1"): + return True + elif v in ("no", "false", "f", "n", "0"): + return False + else: + raise ValueError(f"{v} cannot be converted to a bool") + + +def generate_single_scene(args): + folder_name = args.query.replace(" ", "_").replace("'", "") + try: + if args.original_scene is not None: + scene = compress_json.load(args.original_scene) + print(f"Loading exist scene from {args.original_scene}.") + else: + path = os.path.join( + HOLODECK_BASE_DATA_DIR, f"scenes/{folder_name}/{folder_name}.json" + ) + print(f"Loading exist scene from {path}.") + scene = compress_json.load(path) + except: + scene = args.model.get_empty_scene() + print("Generating from an empty scene.") + + try: + _, save_dir = args.model.generate_scene( + scene=scene, + query=args.query, + save_dir=args.save_dir, + used_assets=args.used_assets, + generate_image=ast.literal_eval(args.generate_image), + generate_video=ast.literal_eval(args.generate_video), + add_ceiling=ast.literal_eval(args.add_ceiling), + add_time=ast.literal_eval(args.add_time), + use_constraint=ast.literal_eval(args.use_constraint), + use_milp=ast.literal_eval(args.use_milp), + random_selection=ast.literal_eval(args.random_selection), + ) + except: + print( + f"[ERROR] Could not generate scene from {args.query}. Traceback:\n{traceback.format_exc()}" + ) + return + + print( + f"Generation complete for {args.query}. Scene saved and any other data saved to {save_dir}." + ) + + +def generate_multi_scenes(args): + with open(args.query_file, "r") as f: + queries = f.readlines() + queries = [query.strip() for query in queries] + + for query in tqdm(queries): + args.query = query + generate_single_scene(args) + + +def generate_variants(args): + try: + original_scene = compress_json.load(args.original_scene) + except: + raise Exception(f"Could not load original scene from {args.original_scene}.") + + try: + args.model.generate_variants( + query=args.query, + original_scene=original_scene, + save_dir=args.save_dir, + number_of_variants=int(args.number_of_variants), + used_assets=args.used_assets, + ) + except: + print(f"Could not generate variants from {args.query}.") + + +if __name__ == "__main__": + parser = ArgumentParser() + parser.add_argument( + "--mode", + help="Mode to run in (generate_single_scene, generate_multi_scenes or generate_variants).", + default="generate_single_scene", + ) + parser.add_argument( + "--query", help="Query to generate scene from.", default="a living room" + ) + parser.add_argument( + "--query_file", help="File to load queries from.", default="./data/queries.txt" + ) + parser.add_argument( + "--number_of_variants", help="Number of variants to generate.", default=5 + ) + parser.add_argument( + "--original_scene", + help="Original scene to generate variants from.", + default=None, + ) + parser.add_argument( + "--openai_api_key", + help="OpenAI API key. If none given, will attempt to read this from the OPENAI_API_KEY env variable.", + default=None, + ) + parser.add_argument( + "--openai_org", + help="OpenAI ORG string. If none given, will attempt to read this from the OPENAI_ORG env variable.", + default=None, + ) + parser.add_argument( + "--save_dir", help="Directory to save scene to.", default="./data/scenes" + ) + parser.add_argument( + "--generate_image", + help="Whether to generate an image of the scene.", + default="True", + ) + parser.add_argument( + "--generate_video", + help="Whether to generate a video of the scene.", + default="False", + ) + parser.add_argument( + "--add_ceiling", help="Whether to add a ceiling to the scene.", default="False" + ) + parser.add_argument( + "--add_time", help="Whether to add the time to the scene name.", default="True" + ) + parser.add_argument( + "--use_constraint", help="Whether to use constraints.", default="True" + ) + parser.add_argument( + "--use_milp", + help="Whether to use mixed integer linear programming for the constraint satisfaction solver.", + default="False", + ) + parser.add_argument( + "--random_selection", + help="Whether to more random object selection, set to False will be more precise, True will be more diverse", + default="False", + ) + parser.add_argument( + "--used_assets", + help="a list of assets which we want to exclude from the scene", + default=[], + ) + parser.add_argument( + "--single_room", + help="Whether to generate a single room scene.", + default="False", + ) + + args = parser.parse_args() + + if args.openai_api_key is None: + args.openai_api_key = os.environ.get("OPENAI_API_KEY") + + if args.openai_org is None: + args.openai_org = os.environ.get("OPENAI_ORG") + + args.model = Holodeck( + openai_api_key=args.openai_api_key, + openai_org=args.openai_org, + objaverse_asset_dir=OBJATHOR_ASSETS_DIR, + single_room=ast.literal_eval(args.single_room), + ) + + if args.used_assets != [] and args.used_assets.endswith(".txt"): + with open(args.used_assets, "r") as f: + args.used_assets = f.readlines() + args.used_assets = [asset.strip() for asset in args.used_assets] + else: + args.used_assets = [] + + if args.mode == "generate_single_scene": + generate_single_scene(args) + + elif args.mode == "generate_multi_scenes": + generate_multi_scenes(args) + + elif args.mode == "generate_variants": + generate_variants(args) + + else: + raise Exception(f"Mode {args.mode} not supported.") diff --git a/main.py b/main.py deleted file mode 100644 index 5819e06..0000000 --- a/main.py +++ /dev/null @@ -1,101 +0,0 @@ -import ast -import json -from tqdm import tqdm -from argparse import ArgumentParser -from modules.holodeck import Holodeck - - -def generate_single_scene(args): - folder_name = args.query.replace(" ", "_").replace("'", "") - try: - if args.original_scene is not None: - scene = json.load(open(args.original_scene, "r")) - print(f"Loading exist scene from {args.original_scene}.") - else: - scene = json.load(open(f"data/scenes/{folder_name}/{folder_name}.json", "r")) - print(f"Loading exist scene from data/scenes/{folder_name}/{folder_name}.json.") - except: - scene = args.model.get_empty_scene() - print("Generating from an empty scene.") - - args.model.generate_scene( - scene=scene, - query=args.query, - save_dir=args.save_dir, - used_assets=args.used_assets, - generate_image=ast.literal_eval(args.generate_image), - generate_video=ast.literal_eval(args.generate_video), - add_ceiling=ast.literal_eval(args.add_ceiling), - add_time=ast.literal_eval(args.add_time), - use_constraint=ast.literal_eval(args.use_constraint), - use_milp=ast.literal_eval(args.use_milp), - random_selection=ast.literal_eval(args.random_selection) - ) - print(f"Could not generate scene from {args.query}.") - - -def generate_multi_scenes(args): - with open(args.query_file, "r") as f: - queries = f.readlines() - queries = [query.strip() for query in queries] - - for query in tqdm(queries): - args.query = query - generate_single_scene(args) - - -def generate_variants(args): - try: original_scene = json.load(open(args.original_scene, "r")) - except: raise Exception(f"Could not load original scene from {args.original_scene}.") - try: - args.model.generate_variants( - query=args.query, - original_scene=original_scene, - save_dir=args.save_dir, - number_of_variants=int(args.number_of_variants), - used_assets=args.used_assets, - ) - except: - print(f"Could not generate variants from {args.query}.") - - -if __name__ == "__main__": - parser = ArgumentParser() - parser.add_argument("--mode", help = "Mode to run in (generate_single_scene, generate_multi_scenes or generate_variants).", default = "generate_single_scene") - parser.add_argument("--query", help = "Query to generate scene from.", default = "a living room") - parser.add_argument("--query_file", help = "File to load queries from.", default = "./data/queries.txt") - parser.add_argument("--number_of_variants", help = "Number of variants to generate.", default = 5) - parser.add_argument("--original_scene", help = "Original scene to generate variants from.", default = None) - parser.add_argument("--openai_api_key", help = "OpenAI API key.", default = None) - parser.add_argument("--objaverse_version", help = "Version of objaverse to use.", default = "09_23_combine_scale") - parser.add_argument("--asset_dir", help = "Directory to load assets from.", default = "./data/objaverse_holodeck/09_23_combine_scale/processed_2023_09_23_combine_scale") - parser.add_argument("--save_dir", help = "Directory to save scene to.", default = "./data/scenes") - parser.add_argument("--generate_image", help = "Whether to generate an image of the scene.", default = "True") - parser.add_argument("--generate_video", help = "Whether to generate a video of the scene.", default = "False") - parser.add_argument("--add_ceiling", help = "Whether to add a ceiling to the scene.", default = "False") - parser.add_argument("--add_time", help = "Whether to add the time to the scene name.", default = "True") - parser.add_argument("--use_constraint", help = "Whether to use constraints.", default = "True") - parser.add_argument("--use_milp", help = "Whether to use mixed integer linear programming for the constraint satisfaction solver.", default = "False") - parser.add_argument("--random_selection", help = "Whether to more random object selection, set to False will be more precise, True will be more diverse", default = "False") - parser.add_argument("--used_assets", help = "a list of assets which we want to exclude from the scene", default = []) - parser.add_argument("--single_room", help = "Whether to generate a single room scene.", default = "False") - - args = parser.parse_args() - - args.model = Holodeck(args.openai_api_key, args.objaverse_version, args.asset_dir, ast.literal_eval(args.single_room)) - - if args.used_assets != [] and args.used_assets.endswith(".txt"): - with open(args.used_assets, "r") as f: - args.used_assets = f.readlines() - args.used_assets = [asset.strip() for asset in args.used_assets] - else: - args.used_assets = [] - - if args.mode == "generate_single_scene": - generate_single_scene(args) - - elif args.mode == "generate_multi_scenes": - generate_multi_scenes(args) - - elif args.mode == "generate_variants": - generate_variants(args) diff --git a/modules/holodeck.py b/modules/holodeck.py deleted file mode 100644 index 5cc8833..0000000 --- a/modules/holodeck.py +++ /dev/null @@ -1,304 +0,0 @@ -import os -import json -import datetime -import open_clip -from tqdm import tqdm -from langchain.llms import OpenAI -from sentence_transformers import SentenceTransformer -from modules.rooms import FloorPlanGenerator -from modules.walls import WallGenerator -from modules.doors import DoorGenerator -from modules.windows import WindowGenerator -from modules.object_selector import ObjectSelector -from modules.floor_objects import FloorObjectGenerator -from modules.wall_objects import WallObjectGenerator -from modules.ceiling_objects import CeilingObjectGenerator -from modules.small_objects import SmallObjectGenerator -from modules.lights import generate_lights -from modules.skybox import getSkybox -from modules.layers import map_asset2layer -from modules.objaverse_retriever import ObjaverseRetriever -from modules.utils import get_top_down_frame, room_video - - -class Holodeck(): - def __init__(self, openai_api_key, objaverse_version, objaverse_asset_dir, single_room): - os.environ["OPENAI_API_KEY"] = openai_api_key - - # initialize llm - self.llm = OpenAI(model_name="gpt-4-1106-preview", max_tokens=2048) - self.llm_fast = OpenAI(model_name="gpt-3.5-turbo", max_tokens=2048) - - # initialize CLIP - self.clip_model, _, self.clip_preprocess = open_clip.create_model_and_transforms('ViT-L-14', pretrained='laion2b_s32b_b82k') - self.clip_tokenizer = open_clip.get_tokenizer('ViT-L-14') - - # initialize sentence transformer - self.sbert_model = SentenceTransformer('all-mpnet-base-v2') - - # objaverse version and asset dir - self.objaverse_version = objaverse_version - self.objaverse_asset_dir = objaverse_asset_dir - - # initialize modules - self.retrieval_threshold = 28 - self.object_retriever = ObjaverseRetriever(self.clip_model, self.clip_preprocess, self.clip_tokenizer, self.sbert_model, self.objaverse_version, self.retrieval_threshold) - self.floor_generator = FloorPlanGenerator(self.clip_model, self.clip_preprocess, self.clip_tokenizer, self.llm) - self.wall_generator = WallGenerator(self.llm) - self.door_generator = DoorGenerator(self.clip_model, self.clip_preprocess, self.clip_tokenizer, self.llm) - self.window_generator = WindowGenerator(self.llm) - self.object_selector = ObjectSelector(self.object_retriever, self.llm) - self.floor_object_generator = FloorObjectGenerator(self.llm, self.object_retriever) - self.wall_object_generator = WallObjectGenerator(self.llm, self.object_retriever) - self.ceiling_generator = CeilingObjectGenerator(self.llm, self.object_retriever) - self.small_object_generator = SmallObjectGenerator(self.llm, self.object_retriever, self.objaverse_version) - - # additional requirements - single_room_requirements = "I only need one room" - - if single_room: self.additional_requirements_room = single_room_requirements - else: self.additional_requirements_room = "N/A" - - self.additional_requirements_door = "N/A" - self.additional_requirements_window = "Only one wall of each room should have windows" - self.additional_requirements_object = "N/A" - self.additional_requirements_ceiling = "N/A" - - - def get_empty_scene(self): - with open("modules/empty_house.json", "r") as f: - scene = json.load(f) - return scene - - - def empty_house(self, scene): - scene["rooms"] = [] - scene["walls"] = [] - scene["doors"] = [] - scene["windows"] = [] - scene["objects"] = [] - scene["proceduralParameters"]["lights"] = [] - return scene - - - def generate_rooms(self, scene, additional_requirements_room, used_assets=[]): - self.floor_generator.used_assets = used_assets - rooms = self.floor_generator.generate_rooms(scene, additional_requirements_room) - scene["rooms"] = rooms - return scene - - - def generate_walls(self, scene): - wall_height, walls = self.wall_generator.generate_walls(scene) - scene["wall_height"] = wall_height - scene["walls"] = walls - return scene - - - def generate_doors(self, scene, additional_requirements_door="N/A", used_assets=[]): - self.door_generator.used_assets = used_assets - - # generate doors - raw_doorway_plan, doors, room_pairs, open_room_pairs = self.door_generator.generate_doors(scene, additional_requirements_door) - scene["raw_doorway_plan"] = raw_doorway_plan - scene["doors"] = doors - scene["room_pairs"] = room_pairs - scene["open_room_pairs"] = open_room_pairs - - # update walls - updated_walls, open_walls = self.wall_generator.update_walls(scene["walls"], open_room_pairs) - scene["walls"] = updated_walls - scene["open_walls"] = open_walls - return scene - - - def generate_windows(self, scene, additional_requirements_window="I want to install windows to only one wall of each room", used_assets=[]): - self.window_generator.used_assets = used_assets - raw_window_plan, walls, windows = self.window_generator.generate_windows(scene, additional_requirements_window) - scene["raw_window_plan"] = raw_window_plan - scene["windows"] = windows - scene["walls"] = walls - return scene - - - def select_objects(self, scene, additional_requirements_object, used_assets=[]): - self.object_selector.used_assets = used_assets - object_selection_plan, selected_objects = self.object_selector.select_objects(scene, additional_requirements_object) - scene["object_selection_plan"] = object_selection_plan - scene["selected_objects"] = selected_objects - return scene - - - def generate_ceiling_objects(self, scene, additional_requirements_ceiling="N/A"): - raw_ceiling_plan, ceiling_objects = self.ceiling_generator.generate_ceiling_objects(scene, additional_requirements_ceiling) - scene["ceiling_objects"] = ceiling_objects - scene["raw_ceiling_plan"] = raw_ceiling_plan - return scene - - - def generate_small_objects(self, scene, used_assets=[]): - self.small_object_generator.used_assets = used_assets - controller = self.small_object_generator.start_controller(scene, self.objaverse_asset_dir) - event = controller.reset() - receptacle_ids = [obj["objectId"] for obj in event.metadata["objects"] if obj["receptacle"] and "___" not in obj["objectId"]] - if "Floor" in receptacle_ids: receptacle_ids.remove("Floor") - - try: - small_objects, receptacle2small_objects = self.small_object_generator.generate_small_objects(scene, controller, receptacle_ids) - scene["small_objects"] = small_objects - scene["receptacle2small_objects"] = receptacle2small_objects - except: - scene["small_objects"] = [] - print("Failed to generate small objects") - - controller.stop() # stop controller to avoid memory leak - return scene - - - def change_ceiling_material(self, scene): - first_wall_material = scene["rooms"][0]["wallMaterial"] - scene["proceduralParameters"]["ceilingMaterial"] = first_wall_material - return scene - - - def generate_scene(self, scene, query, save_dir, used_assets=[], add_ceiling=False, generate_image=True, generate_video=False, add_time=True, use_constraint=True, random_selection=False, use_milp=False): - # initialize scene - query = query.replace("_", " ") - scene["query"] = query - - # empty house - scene = self.empty_house(scene) - - # generate rooms - scene = self.generate_rooms(scene, additional_requirements_room=self.additional_requirements_room, used_assets=used_assets) - - # generate walls - scene = self.generate_walls(scene) - - # generate doors - scene = self.generate_doors(scene, additional_requirements_door=self.additional_requirements_door, used_assets=used_assets) - - # generate windows - scene = self.generate_windows(scene, additional_requirements_window=self.additional_requirements_window, used_assets=used_assets) - - # select objects - self.object_selector.random_selection = random_selection - scene = self.select_objects(scene, additional_requirements_object=self.additional_requirements_object, used_assets=used_assets) - - # generate floor objects - self.floor_object_generator.use_milp = use_milp - scene["floor_objects"] = self.floor_object_generator.generate_objects(scene, use_constraint=use_constraint) - - # generate wall objects - scene["wall_objects"] = self.wall_object_generator.generate_wall_objects(scene, use_constraint=use_constraint) - - # combine floor and wall objects - scene["objects"] = scene["floor_objects"] + scene["wall_objects"] - - # generate small objects - scene = self.generate_small_objects(scene, used_assets=used_assets) - scene["objects"] += scene["small_objects"] - - # generate ceiling objects - if add_ceiling: - scene = self.generate_ceiling_objects(scene, additional_requirements_ceiling=self.additional_requirements_ceiling) - scene["objects"] += scene["ceiling_objects"] - - # generate lights - lights = generate_lights(scene) - scene["proceduralParameters"]["lights"] = lights - - # assign layers - scene = map_asset2layer(scene) - - # assign skybox - scene = getSkybox(scene) - - # change ceiling material - scene = self.change_ceiling_material(scene) - - # create folder - query_name = query.replace(" ", "_").replace("'", "")[:30] - create_time = str(datetime.datetime.now()).replace(" ", "-").replace(":", "-").replace(".", "-") - - if add_time: folder_name = f"{query_name}-{create_time}" # query name + time - else: folder_name = query_name # query name only - - os.makedirs(f"{save_dir}/{folder_name}", exist_ok=True) - with open(f"{save_dir}/{folder_name}/{query_name}.json", "w") as f: - json.dump(scene, f, indent=4) - - # save top down image - if generate_image: - top_image = get_top_down_frame(scene, self.objaverse_asset_dir, 1024, 1024) - top_image.show() - top_image.save(f"{save_dir}/{folder_name}/{query_name}.png") - - # save video - if generate_video: - scene["objects"] = scene["floor_objects"] + scene["wall_objects"] + scene["small_objects"] - final_video = room_video(scene, self.objaverse_asset_dir, 1024, 1024) - final_video.write_videofile(f"{save_dir}/{folder_name}/{query_name}.mp4", fps=30) - - return scene - - - def generate_variants(self, query, original_scene, save_dir="data/scenes", number_of_variants=5, used_assets=[]): - self.object_selector.reuse_selection = False # force the selector to retrieve different assets - - # create the list of used assets - used_assets += [obj["assetId"] for obj in original_scene["objects"] + original_scene["windows"] + original_scene["doors"]] - used_assets += [room["floorMaterial"]["name"] for room in original_scene["rooms"]] - used_assets += [wall["material"]["name"] for wall in original_scene["walls"]] - used_assets = list(set(used_assets)) - - variant_scenes = [] - for i in tqdm(range(number_of_variants)): - variant_scene = self.generate_scene(original_scene.copy(), query, save_dir, used_assets, generate_image=True, generate_video=False, add_time=True) - variant_scenes.append(variant_scene) - used_assets += [obj["assetId"] for obj in variant_scene["objects"] + variant_scene["windows"] + variant_scene["doors"]] - used_assets += [room["floorMaterial"]["name"] for room in variant_scene["rooms"]] - used_assets += [wall["material"]["name"] for wall in variant_scene["walls"]] - used_assets = list(set(used_assets)) - return variant_scenes - - - def ablate_placement(self, scene, query, save_dir, used_assets=[], add_ceiling=False, generate_image=True, generate_video=False, add_time=True, use_constraint=False, constraint_type="llm"): - # place floor objects - if use_constraint: self.floor_object_generator.constraint_type = constraint_type # ablate the constraint types - scene["floor_objects"] = self.floor_object_generator.generate_objects(scene, use_constraint=use_constraint) - if len(scene["floor_objects"]) == 0: - print("No object is placed, skip this scene") - return None # if no object is placed, return None - # place wall objects - if use_constraint: self.wall_object_generator.constraint_type = constraint_type - scene["wall_objects"] = self.wall_object_generator.generate_wall_objects(scene, use_constraint=use_constraint) - - # combine floor and wall objects - scene["objects"] = scene["floor_objects"] + scene["wall_objects"] - - # generate small objects - scene = self.generate_small_objects(scene, used_assets=used_assets) - scene["objects"] += scene["small_objects"] - - # assign layers - scene = map_asset2layer(scene) - - # take the first 30 characters of the query as the folder name - query_name = query.replace(" ", "_").replace("'", "")[:30] - create_time = str(datetime.datetime.now()).replace(" ", "-").replace(":", "-").replace(".", "-") - - if add_time: folder_name = f"{query_name}-{create_time}" # query name + time - else: folder_name = query_name # query name only - - os.makedirs(f"{save_dir}/{folder_name}", exist_ok=True) - with open(f"{save_dir}/{folder_name}/{query_name}.json", "w") as f: - json.dump(scene, f, indent=4) - - # save top down image - if generate_image: - top_image = get_top_down_frame(scene, self.objaverse_asset_dir, 1024, 1024) - top_image.show() - top_image.save(f"{save_dir}/{folder_name}/{query_name}.png") - - return scene \ No newline at end of file diff --git a/modules/objaverse_retriever.py b/modules/objaverse_retriever.py deleted file mode 100644 index 286a262..0000000 --- a/modules/objaverse_retriever.py +++ /dev/null @@ -1,74 +0,0 @@ -import json -import torch -import pickle - -class ObjaverseRetriever(): - def __init__(self, clip_model, clip_preprocess, clip_tokenizer, sbert_model, version, retrieval_threshold): - self.database = json.load(open(f"./data/objaverse_holodeck/{version}/objaverse_holodeck_database.json", "r")) - self.asset_ids = list(self.database.keys()) - - self.clip_model = clip_model - self.clip_preprocess = clip_preprocess - self.clip_tokenizer = clip_tokenizer - self.sbert_model = sbert_model - - self.clip_features = pickle.load(open(f"data/objaverse_holodeck/{version}/objaverse_holodeck_features_clip_3.p", "rb")).float() # clip features - self.sbert_features = pickle.load(open(f"data/objaverse_holodeck/{version}/objaverse_holodeck_description_features_sbert.p", "rb")).float() # sbert features - self.retrieval_threshold = retrieval_threshold - - self.use_text = True - - - def retrieve(self, queries, threshold=28): - with torch.no_grad(): - query_feature_clip = self.clip_model.encode_text(self.clip_tokenizer(queries)) - query_feature_clip /= query_feature_clip.norm(dim=-1, keepdim=True) - - clip_similarities = query_feature_clip @ self.clip_features.T * 100 - clip_similarities = clip_similarities.reshape((len(queries), len(self.asset_ids), 3)) - clip_similarities = torch.max(clip_similarities, dim=2).values - - query_feature_sbert = self.sbert_model.encode(queries, convert_to_tensor=True, show_progress_bar=False) - sbert_similarities = query_feature_sbert @ self.sbert_features.T - - if self.use_text: similarities = clip_similarities + sbert_similarities - else: similarities = clip_similarities - - threshold_indices = torch.where(clip_similarities > threshold) - - unsorted_results = [] - for query_index, asset_index in zip(*threshold_indices): - score = similarities[query_index, asset_index].item() - unsorted_results.append((self.asset_ids[asset_index], score)) - - # Sorting the results in descending order by score - results = sorted(unsorted_results, key=lambda x: x[1], reverse=True) - - return results - - - def compute_size_difference(self, target_size, candidates): - candidate_sizes = [] - for uid, _ in candidates: - size = self.database[uid]['assetMetadata']['boundingBox'] - size_list = [size['x'] * 100, size['y'] * 100, size['z'] * 100] - size_list.sort() - candidate_sizes.append(size_list) - - candidate_sizes = torch.tensor(candidate_sizes) - - target_size_list = list(target_size) - target_size_list.sort() - target_size = torch.tensor(target_size_list) - - size_difference = abs(candidate_sizes - target_size).mean(axis=1)/100 - size_difference = size_difference.tolist() - - candidates_with_size_difference = [] - for i, (uid, score) in enumerate(candidates): - candidates_with_size_difference.append((uid, score - size_difference[i] * 10)) - - # sort the candidates by score - candidates_with_size_difference = sorted(candidates_with_size_difference, key=lambda x: x[1], reverse=True) - - return candidates_with_size_difference \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 9292698..6b748f1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -18,3 +18,8 @@ langchain==0.0.171 torch==1.13.1 torchvision==0.14.1 gurobipy==10.0.3 +Werkzeug==2.0.1 +Flask==2.0.1 +compress-pickle +compress-json +black From dd32354223cf094516fc83704926ae4f229e46a4 Mon Sep 17 00:00:00 2001 From: lucaw Date: Fri, 23 Feb 2024 12:03:42 -0800 Subject: [PATCH 2/6] Auto formatting via black. --- connect_to_unity.py | 6 +- holodeck/constants.py | 2 +- holodeck/generation/ceiling_objects.py | 85 +- holodeck/generation/doors.py | 383 ++++-- holodeck/generation/floor_objects.py | 1240 ++++++++++++++------ holodeck/generation/holodeck.py | 8 +- holodeck/generation/layers.py | 64 +- holodeck/generation/lights.py | 8 +- holodeck/generation/milp_utils.py | 391 +++--- holodeck/generation/objaverse_retriever.py | 4 +- holodeck/generation/prompts.py | 12 +- holodeck/generation/rooms.py | 265 +++-- holodeck/generation/skybox.py | 30 +- holodeck/generation/small_objects.py | 307 +++-- holodeck/generation/utils.py | 230 ++-- holodeck/generation/wall_objects.py | 435 +++++-- holodeck/generation/walls.py | 193 +-- holodeck/generation/windows.py | 325 +++-- 18 files changed, 2676 insertions(+), 1312 deletions(-) diff --git a/connect_to_unity.py b/connect_to_unity.py index c159378..8ad09bd 100644 --- a/connect_to_unity.py +++ b/connect_to_unity.py @@ -6,7 +6,11 @@ from ai2thor.controller import Controller from ai2thor.hooks.procedural_asset_hook import ProceduralAssetHookRunner -from holodeck.constants import HOLODECK_BASE_DATA_DIR, THOR_COMMIT_ID, OBJATHOR_ASSETS_DIR +from holodeck.constants import ( + HOLODECK_BASE_DATA_DIR, + THOR_COMMIT_ID, + OBJATHOR_ASSETS_DIR, +) parser = ArgumentParser() parser.add_argument( diff --git a/holodeck/constants.py b/holodeck/constants.py index ce7fd8a..c155e18 100644 --- a/holodeck/constants.py +++ b/holodeck/constants.py @@ -17,4 +17,4 @@ HOLODECK_BASE_DATA_DIR, "thor_object_data", "annotations.json.gz" ) -THOR_COMMIT_ID = "3213d486cd09bcbafce33561997355983bdf8d1a" \ No newline at end of file +THOR_COMMIT_ID = "3213d486cd09bcbafce33561997355983bdf8d1a" diff --git a/holodeck/generation/ceiling_objects.py b/holodeck/generation/ceiling_objects.py index e3908ce..b2d0bfe 100644 --- a/holodeck/generation/ceiling_objects.py +++ b/holodeck/generation/ceiling_objects.py @@ -12,29 +12,43 @@ from holodeck.generation.utils import get_bbox_dims, get_annotations -class CeilingObjectGenerator(): +class CeilingObjectGenerator: def __init__(self, object_retriever: ObjathorRetriever, llm: OpenAI): - self.json_template = {"assetId": None, "id": None, "kinematic": True, - "position": {}, "rotation": {}, "material": None, "roomId": None} + self.json_template = { + "assetId": None, + "id": None, + "kinematic": True, + "position": {}, + "rotation": {}, + "material": None, + "roomId": None, + } self.llm = llm self.object_retriever = object_retriever self.database = object_retriever.database - self.ceiling_template = PromptTemplate(input_variables=["input", "rooms", "additional_requirements"], - template=prompts.ceiling_selection_prompt) - + self.ceiling_template = PromptTemplate( + input_variables=["input", "rooms", "additional_requirements"], + template=prompts.ceiling_selection_prompt, + ) def generate_ceiling_objects(self, scene, additional_requirements_ceiling="N/A"): room_types = [room["roomType"] for room in scene["rooms"]] room_types_str = str(room_types).replace("'", "")[1:-1] - ceiling_prompt = self.ceiling_template.format(input=scene["query"], - rooms=room_types_str, - additional_requirements=additional_requirements_ceiling) + ceiling_prompt = self.ceiling_template.format( + input=scene["query"], + rooms=room_types_str, + additional_requirements=additional_requirements_ceiling, + ) - if "raw_ceiling_plan" not in scene: raw_ceiling_plan = self.llm(ceiling_prompt) - else: raw_ceiling_plan = scene["raw_ceiling_plan"] + if "raw_ceiling_plan" not in scene: + raw_ceiling_plan = self.llm(ceiling_prompt) + else: + raw_ceiling_plan = scene["raw_ceiling_plan"] print(f"\nUser: {ceiling_prompt}\n") - print(f"{Fore.GREEN}AI: Here is the ceiling plan:\n{raw_ceiling_plan}{Fore.RESET}") + print( + f"{Fore.GREEN}AI: Here is the ceiling plan:\n{raw_ceiling_plan}{Fore.RESET}" + ) ceiling_objects = [] parsed_ceiling_plan = self.parse_ceiling_plan(raw_ceiling_plan) @@ -44,9 +58,10 @@ def generate_ceiling_objects(self, scene, additional_requirements_ceiling="N/A") if room is None: print(f"Room type {room_type} not found in scene.") continue - + ceiling_object_id = self.select_ceiling_object(ceiling_object_description) - if ceiling_object_id is None: continue + if ceiling_object_id is None: + continue # Temporary solution: place at the center of the room dimension = get_bbox_dims(self.database[ceiling_object_id]) @@ -55,64 +70,74 @@ def generate_ceiling_objects(self, scene, additional_requirements_ceiling="N/A") x = floor_polygon.centroid.x z = floor_polygon.centroid.y y = scene["wall_height"] - dimension["y"] / 2 - + ceiling_object = copy.deepcopy(self.json_template) ceiling_object["assetId"] = ceiling_object_id ceiling_object["id"] = f"ceiling ({room_type})" ceiling_object["position"] = {"x": x, "y": y, "z": z} ceiling_object["rotation"] = {"x": 0, "y": 0, "z": 0} ceiling_object["roomId"] = room["id"] - ceiling_object["object_name"] = get_annotations(self.database[ceiling_object_id])["category"] + ceiling_object["object_name"] = get_annotations( + self.database[ceiling_object_id] + )["category"] ceiling_objects.append(ceiling_object) return raw_ceiling_plan, ceiling_objects - def parse_ceiling_plan(self, raw_ceiling_plan): plans = [plan.lower() for plan in raw_ceiling_plan.split("\n") if "|" in plan] parsed_plans = {} for plan in plans: # remove index - pattern = re.compile(r'^\d+\.\s*') - plan = pattern.sub('', plan) - if plan[-1] == ".": plan = plan[:-1] # remove the last period + pattern = re.compile(r"^\d+\.\s*") + plan = pattern.sub("", plan) + if plan[-1] == ".": + plan = plan[:-1] # remove the last period room_type, ceiling_object_description = plan.split("|") room_type = room_type.strip() ceiling_object_description = ceiling_object_description.strip() - if room_type not in parsed_plans: # only consider one type of ceiling object for each room + if ( + room_type not in parsed_plans + ): # only consider one type of ceiling object for each room parsed_plans[room_type] = ceiling_object_description return parsed_plans - def get_room_by_type(self, rooms, room_type): for room in rooms: if room["roomType"] == room_type: return room return None - def select_ceiling_object(self, description): - candidates = self.object_retriever.retrieve([f"a 3D model of {description}"], threshold=29) - ceiling_candiates = [candidate for candidate in candidates if get_annotations(self.database[candidate[0]])["onCeiling"] == True] + candidates = self.object_retriever.retrieve( + [f"a 3D model of {description}"], threshold=29 + ) + ceiling_candiates = [ + candidate + for candidate in candidates + if get_annotations(self.database[candidate[0]])["onCeiling"] == True + ] valid_ceiling_candiates = [] for candidate in ceiling_candiates: dimension = get_bbox_dims(self.database[candidate[0]]) - if dimension["y"] <= 1.0: valid_ceiling_candiates.append(candidate) + if dimension["y"] <= 1.0: + valid_ceiling_candiates.append(candidate) if len(valid_ceiling_candiates) == 0: print("No ceiling object found for description: {}".format(description)) return None - + selected_ceiling_object_id = self.random_select(valid_ceiling_candiates)[0] return selected_ceiling_object_id - def random_select(self, candidates): scores = [candidate[1] for candidate in candidates] scores_tensor = torch.Tensor(scores) - probas = F.softmax(scores_tensor, dim=0) # TODO: consider using normalized scores + probas = F.softmax( + scores_tensor, dim=0 + ) # TODO: consider using normalized scores selected_index = torch.multinomial(probas, 1).item() selected_candidate = candidates[selected_index] - return selected_candidate \ No newline at end of file + return selected_candidate diff --git a/holodeck/generation/doors.py b/holodeck/generation/doors.py index c5f122b..8df3d68 100644 --- a/holodeck/generation/doors.py +++ b/holodeck/generation/doors.py @@ -15,14 +15,24 @@ from holodeck.constants import HOLODECK_BASE_DATA_DIR -class DoorGenerator(): +class DoorGenerator: def __init__(self, clip_model, clip_preprocess, clip_tokenizer, llm: OpenAI): - self.json_template = {"assetId": None, "id": None, "openable": False, - "openness": 0, "room0": None, "room1": None, - "wall0": None, "wall1": None, "holePolygon": [], - "assetPosition": {}} - - self.door_data = compress_json.load(os.path.join(HOLODECK_BASE_DATA_DIR, "doors/door-database.json")) + self.json_template = { + "assetId": None, + "id": None, + "openable": False, + "openness": 0, + "room0": None, + "room1": None, + "wall0": None, + "wall1": None, + "holePolygon": [], + "assetPosition": {}, + } + + self.door_data = compress_json.load( + os.path.join(HOLODECK_BASE_DATA_DIR, "doors/door-database.json") + ) self.door_ids = list(self.door_data.keys()) self.used_assets = [] @@ -32,25 +42,42 @@ def __init__(self, clip_model, clip_preprocess, clip_tokenizer, llm: OpenAI): self.load_features() self.llm = llm - self.doorway_template = PromptTemplate(input_variables=["input", "rooms", "room_sizes", "room_pairs", "additional_requirements"], - template=prompts.doorway_prompt) - + self.doorway_template = PromptTemplate( + input_variables=[ + "input", + "rooms", + "room_sizes", + "room_pairs", + "additional_requirements", + ], + template=prompts.doorway_prompt, + ) def load_features(self): try: - self.door_feature_clip = compress_pickle.load(os.path.join(HOLODECK_BASE_DATA_DIR, "doors/door_feature_clip.pkl")) + self.door_feature_clip = compress_pickle.load( + os.path.join(HOLODECK_BASE_DATA_DIR, "doors/door_feature_clip.pkl") + ) except: print("Precompute image features for doors...") self.door_feature_clip = [] for door_id in tqdm(self.door_ids): - image = self.preprocess(Image.open(os.path.join(HOLODECK_BASE_DATA_DIR, f"doors/images/{door_id}.png"))).unsqueeze(0) + image = self.preprocess( + Image.open( + os.path.join( + HOLODECK_BASE_DATA_DIR, f"doors/images/{door_id}.png" + ) + ) + ).unsqueeze(0) with torch.no_grad(): image_features = self.clip_model.encode_image(image) image_features /= image_features.norm(dim=-1, keepdim=True) self.door_feature_clip.append(image_features) self.door_feature_clip = torch.vstack(self.door_feature_clip) - compress_pickle.dump(self.door_feature_clip, os.path.join(HOLODECK_BASE_DATA_DIR, "doors/door_feature_clip.pkl")) - + compress_pickle.dump( + self.door_feature_clip, + os.path.join(HOLODECK_BASE_DATA_DIR, "doors/door_feature_clip.pkl"), + ) def generate_doors(self, scene, additional_requirements_door): # get room pairs @@ -60,70 +87,103 @@ def generate_doors(self, scene, additional_requirements_door): room_sizes_str = self.get_room_size_str(scene) room_pairs_str = str(room_pairs).replace("'", "")[1:-1] - doorway_prompt = self.doorway_template.format(input=scene["query"], - rooms=room_types_str, - room_sizes=room_sizes_str, - room_pairs=room_pairs_str, - additional_requirements=additional_requirements_door) - + doorway_prompt = self.doorway_template.format( + input=scene["query"], + rooms=room_types_str, + room_sizes=room_sizes_str, + room_pairs=room_pairs_str, + additional_requirements=additional_requirements_door, + ) + # generate raw doorway plan if not exist - if "raw_doorway_plan" not in scene: raw_doorway_plan = self.llm(doorway_prompt) - else: raw_doorway_plan = scene["raw_doorway_plan"] + if "raw_doorway_plan" not in scene: + raw_doorway_plan = self.llm(doorway_prompt) + else: + raw_doorway_plan = scene["raw_doorway_plan"] print(f"\nUser: {doorway_prompt}\n") - print(f"{Fore.GREEN}AI: Here is the doorway plan:\n{raw_doorway_plan}{Fore.RESET}") + print( + f"{Fore.GREEN}AI: Here is the doorway plan:\n{raw_doorway_plan}{Fore.RESET}" + ) rooms = scene["rooms"] walls = scene["walls"] doors = [] open_room_pairs = [] plans = [plan.lower() for plan in raw_doorway_plan.split("\n") if "|" in plan] - room_types = [room["roomType"] for room in rooms] + ['exterior'] + room_types = [room["roomType"] for room in rooms] + ["exterior"] for i, plan in enumerate(plans): # TODO: rewrite the parsing logic current_door = copy.deepcopy(self.json_template) parsed_plan = self.parse_door_plan(plan) - if parsed_plan == None: continue + if parsed_plan == None: + continue - if parsed_plan["room_type0"] not in room_types or parsed_plan["room_type1"] not in room_types: - print(f"{Fore.RED}{parsed_plan['room_type0']} or {parsed_plan['room_type1']} not exist{Fore.RESET}") + if ( + parsed_plan["room_type0"] not in room_types + or parsed_plan["room_type1"] not in room_types + ): + print( + f"{Fore.RED}{parsed_plan['room_type0']} or {parsed_plan['room_type1']} not exist{Fore.RESET}" + ) continue current_door["room0"] = parsed_plan["room_type0"] current_door["room1"] = parsed_plan["room_type1"] - current_door["id"] = f"door|{i}|{parsed_plan['room_type0']}|{parsed_plan['room_type1']}" + current_door["id"] = ( + f"door|{i}|{parsed_plan['room_type0']}|{parsed_plan['room_type1']}" + ) if parsed_plan["connection_type"] == "open": - open_room_pairs.append((parsed_plan["room_type0"], parsed_plan["room_type1"])) + open_room_pairs.append( + (parsed_plan["room_type0"], parsed_plan["room_type1"]) + ) continue # get connection exterior = False - if parsed_plan["room_type0"] == "exterior" or parsed_plan["room_type1"] == "exterior": - connection = self.get_connection_exterior(parsed_plan["room_type0"], parsed_plan["room_type1"], walls) + if ( + parsed_plan["room_type0"] == "exterior" + or parsed_plan["room_type1"] == "exterior" + ): + connection = self.get_connection_exterior( + parsed_plan["room_type0"], parsed_plan["room_type1"], walls + ) exterior = True else: - connection = self.get_connection(parsed_plan["room_type0"], parsed_plan["room_type1"], walls) - - if connection == None: continue + connection = self.get_connection( + parsed_plan["room_type0"], parsed_plan["room_type1"], walls + ) + + if connection == None: + continue # get wall information current_door["wall0"] = connection["wall0"] current_door["wall1"] = connection["wall1"] # get door asset - if exterior: parsed_plan["connection_type"] = "doorway" # force to use doorway for exterior - door_id = self.select_door(parsed_plan["connection_type"], parsed_plan["size"], parsed_plan["style"]) + if exterior: + parsed_plan["connection_type"] = ( + "doorway" # force to use doorway for exterior + ) + door_id = self.select_door( + parsed_plan["connection_type"], + parsed_plan["size"], + parsed_plan["style"], + ) current_door["assetId"] = door_id - + if parsed_plan["connection_type"] == "doorway" and not exterior: current_door["openable"] = True current_door["openness"] = 1 - + # get polygon - door_dimension = self.door_data[door_id]['boundingBox'] - door_polygon = self.get_door_polygon(connection["segment"], door_dimension, parsed_plan["connection_type"]) + door_dimension = self.door_data[door_id]["boundingBox"] + door_polygon = self.get_door_polygon( + connection["segment"], door_dimension, parsed_plan["connection_type"] + ) if door_polygon != None: polygon, position, door_boxes, door_segment = door_polygon @@ -132,36 +192,44 @@ def generate_doors(self, scene, additional_requirements_door): current_door["doorBoxes"] = door_boxes current_door["doorSegment"] = door_segment doors.append(current_door) - + # check if there is any room has no door connected_rooms = [] for door in doors: connected_rooms.append(door["room0"]) connected_rooms.append(door["room1"]) - + for pair in open_room_pairs: connected_rooms.append(pair[0]) connected_rooms.append(pair[1]) - + unconnected_rooms = [] for room in rooms: - if room["roomType"] not in connected_rooms: unconnected_rooms.append(room["roomType"]) - + if room["roomType"] not in connected_rooms: + unconnected_rooms.append(room["roomType"]) + if len(unconnected_rooms) > 0: for room in unconnected_rooms: - if room in connected_rooms: continue + if room in connected_rooms: + continue current_door = copy.deepcopy(self.json_template) - current_walls = [wall for wall in walls if wall["roomId"] == room and "exterior" not in wall["id"] and len(wall["connected_rooms"]) != 0] + current_walls = [ + wall + for wall in walls + if wall["roomId"] == room + and "exterior" not in wall["id"] + and len(wall["connected_rooms"]) != 0 + ] widest_wall = max(current_walls, key=lambda x: x["width"]) - room_to_connect = widest_wall['connected_rooms'][0]['roomId'] + room_to_connect = widest_wall["connected_rooms"][0]["roomId"] current_door["room0"] = room current_door["room1"] = room_to_connect current_door["id"] = f"door|{i}|{room}|{room_to_connect}" - wall_to_connect = widest_wall['connected_rooms'][0]['wallId'] + wall_to_connect = widest_wall["connected_rooms"][0]["wallId"] current_door["wall0"] = widest_wall["id"] current_door["wall1"] = wall_to_connect @@ -170,10 +238,14 @@ def generate_doors(self, scene, additional_requirements_door): current_door["assetId"] = door_id # get polygon - door_dimension = self.door_data[door_id]['boundingBox'] - door_type = self.door_data[door_id]['type'] - - door_polygon = self.get_door_polygon(widest_wall["connected_rooms"][0]["intersection"], door_dimension, door_type) + door_dimension = self.door_data[door_id]["boundingBox"] + door_type = self.door_data[door_id]["type"] + + door_polygon = self.get_door_polygon( + widest_wall["connected_rooms"][0]["intersection"], + door_dimension, + door_type, + ) if door_polygon != None: polygon, position, door_boxes, door_segment = door_polygon @@ -185,14 +257,13 @@ def generate_doors(self, scene, additional_requirements_door): connected_rooms.append(room) connected_rooms.append(room_to_connect) - + return raw_doorway_plan, doors, room_pairs, open_room_pairs - def get_room(self, rooms, room_type): for room in rooms: - if room_type == room["roomType"]: return room - + if room_type == room["roomType"]: + return room def parse_door_plan(self, plan): try: @@ -202,19 +273,18 @@ def parse_door_plan(self, plan): "room_type1": room_type1.strip(), "connection_type": connection_type.strip(), "size": size.strip(), - "style": style.strip() + "style": style.strip(), } except: print(f"{Fore.RED}Invalid door plan:{Fore.RESET}", plan) return None - def get_door_polygon(self, segment, door_dimension, connection_type): door_width = door_dimension["x"] door_height = door_dimension["y"] - start = np.array([segment[0]['x'], segment[0]['z']]) - end = np.array([segment[1]['x'], segment[1]['z']]) + start = np.array([segment[0]["x"], segment[0]["z"]]) + end = np.array([segment[1]["x"], segment[1]["z"]]) original_vector = end - start original_length = np.linalg.norm(original_vector) @@ -223,23 +293,29 @@ def get_door_polygon(self, segment, door_dimension, connection_type): if door_width >= original_length: print(f"{Fore.RED}The wall is too narrow to install a door.{Fore.RESET}") return None - + else: door_start = random.uniform(0, original_length - door_width) door_end = door_start + door_width - polygon = [{"x": door_start, "y": 0, "z": 0}, - {"x": door_end, "y": door_height, "z": 0}] - - door_segment = [list(start + normalized_vector * door_start), list(start + normalized_vector * door_end)] + polygon = [ + {"x": door_start, "y": 0, "z": 0}, + {"x": door_end, "y": door_height, "z": 0}, + ] + + door_segment = [ + list(start + normalized_vector * door_start), + list(start + normalized_vector * door_end), + ] door_boxes = self.create_rectangles(door_segment, connection_type) - - position = {"x": (polygon[0]["x"] + polygon[1]["x"]) / 2, - "y": (polygon[0]["y"] + polygon[1]["y"]) / 2, - "z": (polygon[0]["z"] + polygon[1]["z"]) / 2} - + + position = { + "x": (polygon[0]["x"] + polygon[1]["x"]) / 2, + "y": (polygon[0]["y"] + polygon[1]["y"]) / 2, + "z": (polygon[0]["z"] + polygon[1]["z"]) / 2, + } + return polygon, position, door_boxes, door_segment - def get_connection(self, room0_id, room1_id, walls): room0_walls = [wall for wall in walls if wall["roomId"] == room0_id] @@ -249,41 +325,59 @@ def get_connection(self, room0_id, room1_id, walls): if len(connections) != 0: for connection in connections: if connection["roomId"] == room1_id: - valid_connections.append({"wall0": wall["id"], - "wall1": connection["wallId"], - "segment": connection["intersection"]}) - + valid_connections.append( + { + "wall0": wall["id"], + "wall1": connection["wallId"], + "segment": connection["intersection"], + } + ) + if len(valid_connections) == 0: - print(f"{Fore.RED}There is no wall between {room0_id} and {room1_id}{Fore.RESET}") + print( + f"{Fore.RED}There is no wall between {room0_id} and {room1_id}{Fore.RESET}" + ) return None elif len(valid_connections) == 1: connection = valid_connections[0] - - else: # handle the case when there are multiple ways - print(f"{Fore.RED}There are multiple ways between {room0_id} and {room1_id}{Fore.RESET}") + + else: # handle the case when there are multiple ways + print( + f"{Fore.RED}There are multiple ways between {room0_id} and {room1_id}{Fore.RESET}" + ) longest_segment_length = 0 connection = None for current_connection in valid_connections: current_segment = current_connection["segment"] - current_segment_length = np.linalg.norm(np.array([current_segment[0]["x"], current_segment[0]["z"]]) - np.array([current_segment[1]["x"], current_segment[1]["z"]])) + current_segment_length = np.linalg.norm( + np.array([current_segment[0]["x"], current_segment[0]["z"]]) + - np.array([current_segment[1]["x"], current_segment[1]["z"]]) + ) if current_segment_length > longest_segment_length: connection = current_connection longest_segment_length = current_segment_length return connection - def get_connection_exterior(self, room0_id, room1_id, walls): room_id = room0_id if room0_id != "exterior" else room1_id - interior_walls = [wall["id"] for wall in walls if wall["roomId"] == room_id and "exterior" not in wall["id"]] - exterior_walls = [wall["id"] for wall in walls if wall["roomId"] == room_id and "exterior" in wall["id"]] + interior_walls = [ + wall["id"] + for wall in walls + if wall["roomId"] == room_id and "exterior" not in wall["id"] + ] + exterior_walls = [ + wall["id"] + for wall in walls + if wall["roomId"] == room_id and "exterior" in wall["id"] + ] wall_pairs = [] for interior_wall in interior_walls: for exterior_wall in exterior_walls: if interior_wall in exterior_wall: wall_pairs.append({"wall0": exterior_wall, "wall1": interior_wall}) - + valid_connections = [] for wall_pair in wall_pairs: wall0 = wall_pair["wall0"] @@ -292,52 +386,70 @@ def get_connection_exterior(self, room0_id, room1_id, walls): if wall["id"] == wall0: wall0_segment = wall["segment"] break - segment = [{"x": wall0_segment[0][0], "y": 0.0, "z": wall0_segment[0][1]}, - {"x": wall0_segment[1][0], "y": 0.0, "z": wall0_segment[1][1]}] + segment = [ + {"x": wall0_segment[0][0], "y": 0.0, "z": wall0_segment[0][1]}, + {"x": wall0_segment[1][0], "y": 0.0, "z": wall0_segment[1][1]}, + ] - valid_connections.append({"wall0": wall0, "wall1": wall1, "segment": segment}) - - if len(valid_connections) == 0: return None + valid_connections.append( + {"wall0": wall0, "wall1": wall1, "segment": segment} + ) - elif len(valid_connections) == 1: return valid_connections[0] + if len(valid_connections) == 0: + return None + + elif len(valid_connections) == 1: + return valid_connections[0] else: - print(f"{Fore.RED}There are multiple ways between {room0_id} and {room1_id}{Fore.RESET}") + print( + f"{Fore.RED}There are multiple ways between {room0_id} and {room1_id}{Fore.RESET}" + ) longest_segment_length = 0 connection = None for current_connection in valid_connections: current_segment = current_connection["segment"] - current_segment_length = np.linalg.norm(np.array([current_segment[0]["x"], current_segment[0]["z"]]) - np.array([current_segment[1]["x"], current_segment[1]["z"]])) + current_segment_length = np.linalg.norm( + np.array([current_segment[0]["x"], current_segment[0]["z"]]) + - np.array([current_segment[1]["x"], current_segment[1]["z"]]) + ) if current_segment_length > longest_segment_length: connection = current_connection longest_segment_length = current_segment_length return connection - def select_door(self, door_type, door_size, query): with torch.no_grad(): - query_feature_clip = self.clip_model.encode_text(self.clip_tokenizer([query])) + query_feature_clip = self.clip_model.encode_text( + self.clip_tokenizer([query]) + ) query_feature_clip /= query_feature_clip.norm(dim=-1, keepdim=True) - + clip_similarity = query_feature_clip @ self.door_feature_clip.T sorted_indices = torch.argsort(clip_similarity, descending=True)[0] valid_door_ids = [] for ind in sorted_indices: door_id = self.door_ids[ind] - if self.door_data[door_id]["type"] == door_type and self.door_data[door_id]["size"] == door_size: + if ( + self.door_data[door_id]["type"] == door_type + and self.door_data[door_id]["size"] == door_size + ): valid_door_ids.append(door_id) top_door_id = valid_door_ids[0] - valid_door_ids = [door_id for door_id in valid_door_ids if door_id not in self.used_assets] - if len(valid_door_ids) == 0: valid_door_ids = [top_door_id] - + valid_door_ids = [ + door_id for door_id in valid_door_ids if door_id not in self.used_assets + ] + if len(valid_door_ids) == 0: + valid_door_ids = [top_door_id] + return valid_door_ids[0] - def create_rectangles(self, segment, connection_type): box_width = 1.0 - if connection_type == "doorframe": box_width = 1.0 + if connection_type == "doorframe": + box_width = 1.0 # Convert to numpy arrays for easier calculations pt1 = np.array(segment[0]) @@ -352,21 +464,37 @@ def create_rectangles(self, segment, connection_type): perp_vec *= box_width # Calculate the four points for each rectangle - top_rectangle = [list(pt1 + perp_vec), list(pt2 + perp_vec), list(pt2), list(pt1)] - bottom_rectangle = [list(pt1), list(pt2), list(pt2 - perp_vec), list(pt1 - perp_vec)] + top_rectangle = [ + list(pt1 + perp_vec), + list(pt2 + perp_vec), + list(pt2), + list(pt1), + ] + bottom_rectangle = [ + list(pt1), + list(pt2), + list(pt2 - perp_vec), + list(pt1 - perp_vec), + ] return top_rectangle, bottom_rectangle - - + def get_room_pairs_str(self, rooms, walls): - room_pairs = [(wall["roomId"], wall["connected_rooms"][0]["roomId"]) for wall in walls if len(wall["connected_rooms"]) == 1 and wall["width"] >= 2.0] + room_pairs = [ + (wall["roomId"], wall["connected_rooms"][0]["roomId"]) + for wall in walls + if len(wall["connected_rooms"]) == 1 and wall["width"] >= 2.0 + ] for wall in walls: if "exterior" in wall["id"]: room_pairs.append(("exterior", wall["roomId"])) room_pairs_no_dup = [] for pair in room_pairs: - if pair not in room_pairs_no_dup and (pair[1], pair[0]) not in room_pairs_no_dup: + if ( + pair not in room_pairs_no_dup + and (pair[1], pair[0]) not in room_pairs_no_dup + ): room_pairs_no_dup.append(pair) room_pairs_clean = [] @@ -374,12 +502,13 @@ def get_room_pairs_str(self, rooms, walls): for pair in room_pairs_no_dup: if pair[0] not in existed_rooms or pair[1] not in existed_rooms: room_pairs_clean.append(pair) - - if pair[0] not in existed_rooms: existed_rooms.append(pair[0]) - if pair[1] not in existed_rooms: existed_rooms.append(pair[1]) - return room_pairs_clean + if pair[0] not in existed_rooms: + existed_rooms.append(pair[0]) + if pair[1] not in existed_rooms: + existed_rooms.append(pair[1]) + return room_pairs_clean def get_room_size_str(self, scene): wall_height = scene["wall_height"] @@ -387,21 +516,31 @@ def get_room_size_str(self, scene): for room in scene["rooms"]: room_name = room["roomType"] room_size = self.get_room_size(room) - room_size_str += f"{room_name}: {room_size[0]} m x {room_size[1]} m x {wall_height} m\n" + room_size_str += ( + f"{room_name}: {room_size[0]} m x {room_size[1]} m x {wall_height} m\n" + ) return room_size_str - def get_room_size(self, room): floor_polygon = room["floorPolygon"] - x_values = [point['x'] for point in floor_polygon] - z_values = [point['z'] for point in floor_polygon] + x_values = [point["x"] for point in floor_polygon] + z_values = [point["z"] for point in floor_polygon] return (max(x_values) - min(x_values), max(z_values) - min(z_values)) - def get_random_door(self, wall_width): - single_doors = [door_id for door_id in self.door_ids if self.door_data[door_id]["size"] == "single"] - double_doors = [door_id for door_id in self.door_ids if self.door_data[door_id]["size"] == "double"] - - if wall_width < 2.0: return random.choice(single_doors) - else: return random.choice(double_doors+single_doors) \ No newline at end of file + single_doors = [ + door_id + for door_id in self.door_ids + if self.door_data[door_id]["size"] == "single" + ] + double_doors = [ + door_id + for door_id in self.door_ids + if self.door_data[door_id]["size"] == "double" + ] + + if wall_width < 2.0: + return random.choice(single_doors) + else: + return random.choice(double_doors + single_doors) diff --git a/holodeck/generation/floor_objects.py b/holodeck/generation/floor_objects.py index f29b88d..1b01c76 100644 --- a/holodeck/generation/floor_objects.py +++ b/holodeck/generation/floor_objects.py @@ -20,26 +20,36 @@ from holodeck.generation.utils import get_bbox_dims -class FloorObjectGenerator(): +class FloorObjectGenerator: def __init__(self, object_retriever: ObjathorRetriever, llm: OpenAI): - self.json_template = {"assetId": None, "id": None, "kinematic": True, - "position": {}, "rotation": {}, "material": None, "roomId": None} + self.json_template = { + "assetId": None, + "id": None, + "kinematic": True, + "position": {}, + "rotation": {}, + "material": None, + "roomId": None, + } self.llm = llm self.object_retriever = object_retriever self.database = object_retriever.database - self.constraint_prompt = PromptTemplate(input_variables=["room_type", "room_size", "objects"], - template=prompts.object_constraints_prompt) - self.baseline_prompt = PromptTemplate(input_variables=["room_type", "room_size", "objects"], - template=prompts.floor_baseline_prompt) + self.constraint_prompt = PromptTemplate( + input_variables=["room_type", "room_size", "objects"], + template=prompts.object_constraints_prompt, + ) + self.baseline_prompt = PromptTemplate( + input_variables=["room_type", "room_size", "objects"], + template=prompts.floor_baseline_prompt, + ) self.grid_density = 20 self.add_window = False - self.size_buffer = 10 # add 10 cm buffer to object size + self.size_buffer = 10 # add 10 cm buffer to object size self.constraint_type = "llm" self.use_milp = False self.multiprocessing = False - def generate_objects(self, scene, use_constraint=True): rooms = scene["rooms"] doors = scene["doors"] @@ -47,27 +57,33 @@ def generate_objects(self, scene, use_constraint=True): open_walls = scene["open_walls"] selected_objects = scene["selected_objects"] results = [] - - packed_args = [(room, doors, windows, open_walls, selected_objects, use_constraint) for room in rooms] + + packed_args = [ + (room, doors, windows, open_walls, selected_objects, use_constraint) + for room in rooms + ] if self.multiprocessing: pool = multiprocessing.Pool(processes=4) all_placements = pool.map(self.generate_objects_per_room, packed_args) pool.close() pool.join() else: - all_placements = [self.generate_objects_per_room(args) for args in packed_args] + all_placements = [ + self.generate_objects_per_room(args) for args in packed_args + ] for placements in all_placements: results += placements return results - def generate_objects_per_room(self, args): room, doors, windows, open_walls, selected_objects, use_constraint = args selected_floor_objects = selected_objects[room["roomType"]]["floor"] - object_name2id = {object_name: asset_id for object_name, asset_id in selected_floor_objects} + object_name2id = { + object_name: asset_id for object_name, asset_id in selected_floor_objects + } room_id = room["id"] room_type = room["roomType"] @@ -80,10 +96,12 @@ def generate_objects_per_room(self, args): if use_constraint: # get constraints - constraint_prompt = self.constraint_prompt.format(room_type=room_type, - room_size=room_size, - objects=", ".join(object_names)) - + constraint_prompt = self.constraint_prompt.format( + room_type=room_type, + room_size=room_size, + objects=", ".join(object_names), + ) + if self.constraint_type == "llm": constraint_plan = self.llm(constraint_prompt) elif self.constraint_type in ["middle", "edge"]: @@ -97,19 +115,40 @@ def generate_objects_per_room(self, args): constraints = self.parse_constraints(constraint_plan, object_names) # get objects list - object2dimension = {object_name: get_bbox_dims(self.database[object_id]) - for object_name, object_id in object_name2id.items()} + object2dimension = { + object_name: get_bbox_dims(self.database[object_id]) + for object_name, object_id in object_name2id.items() + } + + objects_list = [ + ( + object_name, + ( + object2dimension[object_name]["x"] * 100 + self.size_buffer, + object2dimension[object_name]["z"] * 100 + self.size_buffer, + ), + ) + for object_name in constraints + ] - objects_list = [(object_name, (object2dimension[object_name]['x'] * 100 + self.size_buffer, object2dimension[object_name]['z'] * 100 + self.size_buffer)) for object_name in constraints] - # get initial state room_vertices = [(x * 100, y * 100) for (x, y) in room["vertices"]] room_poly = Polygon(room_vertices) - initial_state = self.get_door_window_placements(doors, windows, room_vertices, open_walls, self.add_window) + initial_state = self.get_door_window_placements( + doors, windows, room_vertices, open_walls, self.add_window + ) # solve - solver = DFS_Solver_Floor(grid_size=grid_size, max_duration=30, constraint_bouns=1) - solution = solver.get_solution(room_poly, objects_list, constraints, initial_state, use_milp=self.use_milp) + solver = DFS_Solver_Floor( + grid_size=grid_size, max_duration=30, constraint_bouns=1 + ) + solution = solver.get_solution( + room_poly, + objects_list, + constraints, + initial_state, + use_milp=self.use_milp, + ) placements = self.solution2placement(solution, object_name2id, room_id) else: object_information = "" @@ -120,16 +159,25 @@ def generate_objects_per_room(self, args): size_z = int(dimension["z"] * 100) object_information += f"{object_name}: {size_x} cm x {size_z} cm\n" - baseline_prompt = self.baseline_prompt.format(room_type=room_type, - room_size=room_size, - objects=", ".join(object_names)) - room_origin = [min(v[0] for v in room['vertices']), min(v[1] for v in room['vertices'])] + baseline_prompt = self.baseline_prompt.format( + room_type=room_type, + room_size=room_size, + objects=", ".join(object_names), + ) + room_origin = [ + min(v[0] for v in room["vertices"]), + min(v[1] for v in room["vertices"]), + ] all_is_placed = False while not all_is_placed: completion_text = self.llm(baseline_prompt) try: - completion_text = re.findall(r'```(.*?)```', completion_text, re.DOTALL)[0] - completion_text = re.sub(r'^json', '', completion_text, flags=re.MULTILINE) + completion_text = re.findall( + r"```(.*?)```", completion_text, re.DOTALL + )[0] + completion_text = re.sub( + r"^json", "", completion_text, flags=re.MULTILINE + ) all_data = json.loads(completion_text) except json.JSONDecodeError: continue @@ -137,30 +185,35 @@ def generate_objects_per_room(self, args): placements = list() all_is_placed = True for data in all_data: - object_name = data['object_name'] + object_name = data["object_name"] try: object_id = object_name2id[object_name] except KeyError: all_is_placed = False break - dimension = get_bbox_dims(self.database[object_name2id[object_name]]) + dimension = get_bbox_dims( + self.database[object_name2id[object_name]] + ) placement = self.json_template.copy() placement["id"] = f"{object_name} ({room_id})" placement["object_name"] = object_name placement["assetId"] = object_id placement["roomId"] = room_id - placement["position"] = {"x": room_origin[0] + (data['position']["X"]/100), - "y": dimension["y"] / 2, - "z": room_origin[1] + (data["position"]["Y"]/100)} + placement["position"] = { + "x": room_origin[0] + (data["position"]["X"] / 100), + "y": dimension["y"] / 2, + "z": room_origin[1] + (data["position"]["Y"] / 100), + } placement["rotation"] = {"x": 0, "y": data["rotation"], "z": 0} placements.append(placement) - break # only one iteration + break # only one iteration return placements - - def get_door_window_placements(self, doors, windows, room_vertices, open_walls, add_window=True): + def get_door_window_placements( + self, doors, windows, room_vertices, open_walls, add_window=True + ): room_poly = Polygon(room_vertices) door_window_placements = {} i = 0 @@ -171,9 +224,14 @@ def get_door_window_placements(self, doors, windows, room_vertices, open_walls, door_poly = Polygon(door_vertices) door_center = door_poly.centroid if room_poly.contains(door_center): - door_window_placements[f"door-{i}"] = ((door_center.x, door_center.y), 0, door_vertices, 1) + door_window_placements[f"door-{i}"] = ( + (door_center.x, door_center.y), + 0, + door_vertices, + 1, + ) i += 1 - + if add_window: for window in windows: window_boxes = window["windowBoxes"] @@ -182,7 +240,12 @@ def get_door_window_placements(self, doors, windows, room_vertices, open_walls, window_poly = Polygon(window_vertices) window_center = window_poly.centroid if room_poly.contains(window_center): - door_window_placements[f"window-{i}"] = ((window_center.x, window_center.y), 0, window_vertices, 1) + door_window_placements[f"window-{i}"] = ( + (window_center.x, window_center.y), + 0, + window_vertices, + 1, + ) i += 1 if open_walls != []: @@ -191,35 +254,49 @@ def get_door_window_placements(self, doors, windows, room_vertices, open_walls, open_wall_poly = Polygon(open_wall_vertices) open_wall_center = open_wall_poly.centroid if room_poly.contains(open_wall_center): - door_window_placements[f"open-{i}"] = ((open_wall_center.x, open_wall_center.y), 0, open_wall_vertices, 1) + door_window_placements[f"open-{i}"] = ( + (open_wall_center.x, open_wall_center.y), + 0, + open_wall_vertices, + 1, + ) i += 1 return door_window_placements - def get_room_size(self, room): floor_polygon = room["floorPolygon"] - x_values = [point['x'] for point in floor_polygon] - z_values = [point['z'] for point in floor_polygon] - return (int(max(x_values) - min(x_values)) * 100, int(max(z_values) - min(z_values)) * 100) - + x_values = [point["x"] for point in floor_polygon] + z_values = [point["z"] for point in floor_polygon] + return ( + int(max(x_values) - min(x_values)) * 100, + int(max(z_values) - min(z_values)) * 100, + ) def solution2placement(self, solutions, object_name2id, room_id): placements = [] for object_name, solution in solutions.items(): - if "door" in object_name or "window" in object_name or "open" in object_name: continue + if ( + "door" in object_name + or "window" in object_name + or "open" in object_name + ): + continue dimension = get_bbox_dims(self.database[object_name2id[object_name]]) placement = self.json_template.copy() placement["assetId"] = object_name2id[object_name] placement["id"] = f"{object_name} ({room_id})" - placement["position"] = {"x": solution[0][0] / 100, "y": dimension["y"] / 2, "z": solution[0][1] / 100} + placement["position"] = { + "x": solution[0][0] / 100, + "y": dimension["y"] / 2, + "z": solution[0][1] / 100, + } placement["rotation"] = {"x": 0, "y": solution[1], "z": 0} placement["roomId"] = room_id placement["vertices"] = list(solution[2]) placement["object_name"] = object_name placements.append(placement) return placements - def parse_constraints(self, constraint_text, object_names): constraint_name2type = { @@ -239,21 +316,25 @@ def parse_constraints(self, constraint_text, object_names): "aligned center": "alignment", "edge alignment": "alignment", "near": "distance", - "far": "distance" + "far": "distance", } object2constraints = {} - plans = [plan.lower() for plan in constraint_text.split('\n') if "|" in plan] - + plans = [plan.lower() for plan in constraint_text.split("\n") if "|" in plan] + for plan in plans: # remove index - pattern = re.compile(r'^(\d+[\.\)]\s*|- )') - plan = pattern.sub('', plan) - if plan[-1] == ".": plan = plan[:-1] + pattern = re.compile(r"^(\d+[\.\)]\s*|- )") + plan = pattern.sub("", plan) + if plan[-1] == ".": + plan = plan[:-1] - object_name = plan.split("|")[0].replace("*", "").strip() # remove * in object name + object_name = ( + plan.split("|")[0].replace("*", "").strip() + ) # remove * in object name - if object_name not in object_names: continue + if object_name not in object_names: + continue object2constraints[object_name] = [] @@ -261,34 +342,80 @@ def parse_constraints(self, constraint_text, object_names): for constraint in constraints: constraint = constraint.strip() constraint_name = constraint.split(",")[0].strip() - - if constraint_name == "n/a": continue - try: constraint_type = constraint_name2type[constraint_name] - except: print(f"constraint type {constraint_name} not found"); continue + if constraint_name == "n/a": + continue + + try: + constraint_type = constraint_name2type[constraint_name] + except: + print(f"constraint type {constraint_name} not found") + continue if constraint_type == "global": - object2constraints[object_name].append({"type": constraint_type, "constraint": constraint_name}) - elif constraint_type in ["relative", "direction", "alignment", "distance"]: - try: target = constraint.split(",")[1].strip() - except: print(f"wrong format of constraint: {constraint}"); continue + object2constraints[object_name].append( + {"type": constraint_type, "constraint": constraint_name} + ) + elif constraint_type in [ + "relative", + "direction", + "alignment", + "distance", + ]: + try: + target = constraint.split(",")[1].strip() + except: + print(f"wrong format of constraint: {constraint}") + continue if target in object2constraints: if constraint_name == "around": - object2constraints[object_name].append({"type": "distance", "constraint": "near", "target": target}) - object2constraints[object_name].append({"type": "direction", "constraint": "face to", "target": target}) + object2constraints[object_name].append( + { + "type": "distance", + "constraint": "near", + "target": target, + } + ) + object2constraints[object_name].append( + { + "type": "direction", + "constraint": "face to", + "target": target, + } + ) elif constraint_name == "in front of": - object2constraints[object_name].append({"type": "relative", "constraint": "in front of", "target": target}) - object2constraints[object_name].append({"type": "alignment", "constraint": "center aligned", "target": target}) + object2constraints[object_name].append( + { + "type": "relative", + "constraint": "in front of", + "target": target, + } + ) + object2constraints[object_name].append( + { + "type": "alignment", + "constraint": "center aligned", + "target": target, + } + ) else: - object2constraints[object_name].append({"type": constraint_type, "constraint": constraint_name, "target": target}) + object2constraints[object_name].append( + { + "type": constraint_type, + "constraint": constraint_name, + "target": target, + } + ) else: - print(f"target object {target} not found in the existing constraint plan") + print( + f"target object {target} not found in the existing constraint plan" + ) continue else: print(f"constraint type {constraint_type} not found") continue - + # clean the constraints object2constraints_cleaned = {} for object_name, constraints in object2constraints.items(): @@ -299,10 +426,9 @@ def parse_constraints(self, constraint_text, object_names): constraint_types.append(constraint["type"]) constraints_cleaned.append(constraint) object2constraints_cleaned[object_name] = constraints_cleaned - + return object2constraints - def order_objects_by_size(self, selected_floor_objects): ordered_floor_objects = [] for object_name, asset_id in selected_floor_objects: @@ -310,7 +436,10 @@ def order_objects_by_size(self, selected_floor_objects): size = dimensions["x"] * dimensions["z"] ordered_floor_objects.append([object_name, asset_id, size]) ordered_floor_objects.sort(key=lambda x: x[2], reverse=True) - ordered_floor_objects_no_size = [[object_name, asset_id] for object_name, asset_id, size in ordered_floor_objects] + ordered_floor_objects_no_size = [ + [object_name, asset_id] + for object_name, asset_id, size in ordered_floor_objects + ] return ordered_floor_objects_no_size @@ -319,7 +448,7 @@ def __init__(self, solution): self.solution = solution -class DFS_Solver_Floor(): +class DFS_Solver_Floor: def __init__(self, grid_size, random_seed=0, max_duration=5, constraint_bouns=0.2): self.grid_size = grid_size self.random_seed = random_seed @@ -331,13 +460,11 @@ def __init__(self, grid_size, random_seed=0, max_duration=5, constraint_bouns=0. # Define the functions in a dictionary to avoid if-else conditions self.func_dict = { - "global": { - "edge": self.place_edge - }, + "global": {"edge": self.place_edge}, "relative": self.place_relative, "direction": self.place_face, "alignment": self.place_alignment_center, - "distance": self.place_distance + "distance": self.place_distance, } self.constraint_type2weight = { @@ -348,10 +475,11 @@ def __init__(self, grid_size, random_seed=0, max_duration=5, constraint_bouns=0. "distance": 1.8, } - self.edge_bouns = 0.0 # worth more than one constraint + self.edge_bouns = 0.0 # worth more than one constraint - - def get_solution(self, bounds, objects_list, constraints, initial_state, use_milp=False): + def get_solution( + self, bounds, objects_list, constraints, initial_state, use_milp=False + ): self.start_time = time.time() if use_milp: # iterate through the constraints list @@ -363,13 +491,18 @@ def get_solution(self, bounds, objects_list, constraints, initial_state, use_mil target_object_name = constraint["target"] if target_object_name in constraints.keys(): # if there is already a distance constraint of target object_name, continue - if any(constraint["type"] == "distance" and constraint["target"] == object_name for constraint in constraints[target_object_name]): continue + if any( + constraint["type"] == "distance" + and constraint["target"] == object_name + for constraint in constraints[target_object_name] + ): + continue new_constraint = constraint.copy() new_constraint["target"] = object_name new_constraints[target_object_name].append(new_constraint) # iterate through the constraints list # for each constraint type "left of" or "right of", add the same constraint to the target object - #for object_name, object_constraints in constraints.items(): + # for object_name, object_constraints in constraints.items(): # for constraint in object_constraints: if constraint["type"] == "relative": # if constraint["constraint"] == "left of": constraints = new_constraints @@ -378,15 +511,17 @@ def get_solution(self, bounds, objects_list, constraints, initial_state, use_mil self.milp_dfs(bounds, objects_list, constraints, initial_state, 10) except SolutionFound as e: print(f"Time taken: {time.time() - self.start_time}") - + else: grid_points = self.create_grids(bounds) grid_points = self.remove_points(grid_points, initial_state) try: - self.dfs(bounds, objects_list, constraints, grid_points, initial_state, 30) + self.dfs( + bounds, objects_list, constraints, grid_points, initial_state, 30 + ) except SolutionFound as e: print(f"Time taken: {time.time() - self.start_time}") - + print(f"Number of solutions found: {len(self.solutions)}") max_solution = self.get_max_solution(self.solutions) @@ -394,7 +529,6 @@ def get_solution(self, bounds, objects_list, constraints, initial_state, use_mil self.visualize_grid(bounds, grid_points, max_solution) return max_solution - def get_max_solution(self, solutions): path_weights = [] @@ -403,70 +537,119 @@ def get_max_solution(self, solutions): max_index = np.argmax(path_weights) return solutions[max_index] - - def dfs(self, room_poly, objects_list, constraints, grid_points, placed_objects, branch_factor): + def dfs( + self, + room_poly, + objects_list, + constraints, + grid_points, + placed_objects, + branch_factor, + ): if len(objects_list) == 0: self.solutions.append(placed_objects) return placed_objects - + if time.time() - self.start_time > self.max_duration: print(f"Time limit reached.") raise SolutionFound(self.solutions) - + object_name, object_dim = objects_list[0] - placements = self.get_possible_placements(room_poly, object_dim, constraints[object_name], grid_points, placed_objects) - + placements = self.get_possible_placements( + room_poly, object_dim, constraints[object_name], grid_points, placed_objects + ) + if len(placements) == 0 and len(placed_objects) != 0: self.solutions.append(placed_objects) paths = [] - if branch_factor > 1: random.shuffle(placements) # shuffle the placements of the first object + if branch_factor > 1: + random.shuffle(placements) # shuffle the placements of the first object for placement in placements[:branch_factor]: placed_objects_updated = copy.deepcopy(placed_objects) placed_objects_updated[object_name] = placement - grid_points_updated = self.remove_points(grid_points, placed_objects_updated) - - sub_paths = self.dfs(room_poly, objects_list[1:], constraints, grid_points_updated, placed_objects_updated, 1) + grid_points_updated = self.remove_points( + grid_points, placed_objects_updated + ) + + sub_paths = self.dfs( + room_poly, + objects_list[1:], + constraints, + grid_points_updated, + placed_objects_updated, + 1, + ) paths.extend(sub_paths) return paths - - def get_possible_placements(self, room_poly, object_dim, constraints, grid_points, placed_objects): - solutions = self.filter_collision(placed_objects, self.get_all_solutions(room_poly, grid_points, object_dim)) + def get_possible_placements( + self, room_poly, object_dim, constraints, grid_points, placed_objects + ): + solutions = self.filter_collision( + placed_objects, self.get_all_solutions(room_poly, grid_points, object_dim) + ) solutions = self.filter_facing_wall(room_poly, solutions, object_dim) - edge_solutions = self.place_edge(room_poly, copy.deepcopy(solutions), object_dim) + edge_solutions = self.place_edge( + room_poly, copy.deepcopy(solutions), object_dim + ) - if len(edge_solutions) == 0: return edge_solutions + if len(edge_solutions) == 0: + return edge_solutions - global_constraint = next((constraint for constraint in constraints if constraint["type"] == "global"), None) + global_constraint = next( + ( + constraint + for constraint in constraints + if constraint["type"] == "global" + ), + None, + ) - if global_constraint is None: global_constraint = {"type": "global", "constraint": "edge"} + if global_constraint is None: + global_constraint = {"type": "global", "constraint": "edge"} if global_constraint["constraint"] == "edge": - candidate_solutions = copy.deepcopy(edge_solutions) # edge is hard constraint + candidate_solutions = copy.deepcopy( + edge_solutions + ) # edge is hard constraint else: - if len(constraints) > 1: candidate_solutions = solutions + edge_solutions # edge is soft constraint - else: candidate_solutions = copy.deepcopy(solutions) # the first object + if len(constraints) > 1: + candidate_solutions = ( + solutions + edge_solutions + ) # edge is soft constraint + else: + candidate_solutions = copy.deepcopy(solutions) # the first object - candidate_solutions = self.filter_collision(placed_objects, candidate_solutions) # filter again after global constraint + candidate_solutions = self.filter_collision( + placed_objects, candidate_solutions + ) # filter again after global constraint - if candidate_solutions == []: return candidate_solutions + if candidate_solutions == []: + return candidate_solutions random.shuffle(candidate_solutions) - placement2score = {tuple(solution[:3]): solution[-1] for solution in candidate_solutions} + placement2score = { + tuple(solution[:3]): solution[-1] for solution in candidate_solutions + } # add a bias to edge solutions for solution in candidate_solutions: if solution in edge_solutions and len(constraints) >= 1: placement2score[tuple(solution[:3])] += self.edge_bouns - + for constraint in constraints: - if "target" not in constraint: continue + if "target" not in constraint: + continue func = self.func_dict.get(constraint["type"]) - valid_solutions = func(constraint["constraint"], placed_objects[constraint["target"]], candidate_solutions) - + valid_solutions = func( + constraint["constraint"], + placed_objects[constraint["target"]], + candidate_solutions, + ) + weight = self.constraint_type2weight[constraint["type"]] if constraint["type"] == "distance": for solution in valid_solutions: @@ -474,17 +657,24 @@ def get_possible_placements(self, room_poly, object_dim, constraints, grid_point placement2score[tuple(solution[:3])] += bouns * weight else: for solution in valid_solutions: - placement2score[tuple(solution[:3])] += self.constraint_bouns * weight + placement2score[tuple(solution[:3])] += ( + self.constraint_bouns * weight + ) # normalize the scores - for placement in placement2score: placement2score[placement] /= max(len(constraints), 1) + for placement in placement2score: + placement2score[placement] /= max(len(constraints), 1) - sorted_placements = sorted(placement2score, key=placement2score.get, reverse=True) - sorted_solutions = [list(placement) + [placement2score[placement]] for placement in sorted_placements] + sorted_placements = sorted( + placement2score, key=placement2score.get, reverse=True + ) + sorted_solutions = [ + list(placement) + [placement2score[placement]] + for placement in sorted_placements + ] return sorted_solutions - def create_grids(self, room_poly): # get the min and max bounds of the room min_x, min_z, max_x, max_z = room_poly.bounds @@ -498,7 +688,6 @@ def create_grids(self, room_poly): grid_points.append((x, y)) return grid_points - def remove_points(self, grid_points, objects_dict): # Create an r-tree index @@ -507,12 +696,12 @@ def remove_points(self, grid_points, objects_dict): # Populate the index with bounding boxes of the objects for i, (_, _, obj, _) in enumerate(objects_dict.values()): idx.insert(i, Polygon(obj).bounds) - + # Create Shapely Polygon objects only once polygons = [Polygon(obj) for _, _, obj, _ in objects_dict.values()] valid_points = [] - + for point in grid_points: p = Point(point) # Get a list of potential candidates @@ -520,9 +709,8 @@ def remove_points(self, grid_points, objects_dict): # Check if point is in any of the candidate polygons if not any(candidate.contains(p) for candidate in candidates): valid_points.append(point) - + return valid_points - def get_all_solutions(self, room_poly, grid_points, object_dim): obj_length, obj_width = object_dim @@ -530,29 +718,49 @@ def get_all_solutions(self, room_poly, grid_points, object_dim): rotation_adjustments = { 0: ((-obj_half_length, -obj_half_width), (obj_half_length, obj_half_width)), - 90: ((-obj_half_width, -obj_half_length), (obj_half_width, obj_half_length)), - 180: ((-obj_half_length, obj_half_width), (obj_half_length, -obj_half_width)), - 270: ((obj_half_width, -obj_half_length), (-obj_half_width, obj_half_length)), + 90: ( + (-obj_half_width, -obj_half_length), + (obj_half_width, obj_half_length), + ), + 180: ( + (-obj_half_length, obj_half_width), + (obj_half_length, -obj_half_width), + ), + 270: ( + (obj_half_width, -obj_half_length), + (-obj_half_width, obj_half_length), + ), } solutions = [] for rotation in [0, 90, 180, 270]: for point in grid_points: center_x, center_y = point - lower_left_adjustment, upper_right_adjustment = rotation_adjustments[rotation] - lower_left = (center_x + lower_left_adjustment[0], center_y + lower_left_adjustment[1]) - upper_right = (center_x + upper_right_adjustment[0], center_y + upper_right_adjustment[1]) + lower_left_adjustment, upper_right_adjustment = rotation_adjustments[ + rotation + ] + lower_left = ( + center_x + lower_left_adjustment[0], + center_y + lower_left_adjustment[1], + ) + upper_right = ( + center_x + upper_right_adjustment[0], + center_y + upper_right_adjustment[1], + ) obj_box = box(*lower_left, *upper_right) if room_poly.contains(obj_box): - solutions.append([point, rotation, tuple(obj_box.exterior.coords[:]), 1]) - + solutions.append( + [point, rotation, tuple(obj_box.exterior.coords[:]), 1] + ) + return solutions - def filter_collision(self, objects_dict, solutions): valid_solutions = [] - object_polygons = [Polygon(obj_coords) for _, _, obj_coords, _ in list(objects_dict.values())] + object_polygons = [ + Polygon(obj_coords) for _, _, obj_coords, _ in list(objects_dict.values()) + ] for solution in solutions: sol_obj_coords = solution[2] sol_obj = Polygon(sol_obj_coords) @@ -560,7 +768,6 @@ def filter_collision(self, objects_dict, solutions): valid_solutions.append(solution) return valid_solutions - def filter_facing_wall(self, room_poly, solutions, obj_dim): valid_solutions = [] obj_width = obj_dim[1] @@ -579,15 +786,19 @@ def filter_facing_wall(self, room_poly, solutions, obj_dim): rotation = solution[1] front_center_adjustment = front_center_adjustments[rotation] - front_center_x, front_center_y = center_x + front_center_adjustment[0], center_y + front_center_adjustment[1] + front_center_x, front_center_y = ( + center_x + front_center_adjustment[0], + center_y + front_center_adjustment[1], + ) - front_center_distance = room_poly.boundary.distance(Point(front_center_x, front_center_y)) + front_center_distance = room_poly.boundary.distance( + Point(front_center_x, front_center_y) + ) - if front_center_distance >= 30: # TODO: make this a parameter + if front_center_distance >= 30: # TODO: make this a parameter valid_solutions.append(solution) return valid_solutions - def place_edge(self, room_poly, solutions, obj_dim): valid_solutions = [] @@ -606,28 +817,41 @@ def place_edge(self, room_poly, solutions, obj_dim): rotation = solution[1] back_center_adjustment = back_center_adjustments[rotation] - back_center_x, back_center_y = center_x + back_center_adjustment[0], center_y + back_center_adjustment[1] - - back_center_distance = room_poly.boundary.distance(Point(back_center_x, back_center_y)) + back_center_x, back_center_y = ( + center_x + back_center_adjustment[0], + center_y + back_center_adjustment[1], + ) + + back_center_distance = room_poly.boundary.distance( + Point(back_center_x, back_center_y) + ) center_distance = room_poly.boundary.distance(Point(center_x, center_y)) - if back_center_distance <= self.grid_size and back_center_distance < center_distance: + if ( + back_center_distance <= self.grid_size + and back_center_distance < center_distance + ): solution[-1] += self.constraint_bouns # valid_solutions.append(solution) # those are still valid solutions, but we need to move the object to the edge # move the object to the edge - center2back_vector = np.array([back_center_x - center_x, back_center_y - center_y]) + center2back_vector = np.array( + [back_center_x - center_x, back_center_y - center_y] + ) center2back_vector /= np.linalg.norm(center2back_vector) - offset = center2back_vector * (back_center_distance + 4.5) # add a small distance to avoid the object cross the wall + offset = center2back_vector * ( + back_center_distance + 4.5 + ) # add a small distance to avoid the object cross the wall solution[0] = (center_x + offset[0], center_y + offset[1]) - solution[2] = ((solution[2][0][0] + offset[0], solution[2][0][1] + offset[1]), \ - (solution[2][1][0] + offset[0], solution[2][1][1] + offset[1]), \ - (solution[2][2][0] + offset[0], solution[2][2][1] + offset[1]), \ - (solution[2][3][0] + offset[0], solution[2][3][1] + offset[1])) + solution[2] = ( + (solution[2][0][0] + offset[0], solution[2][0][1] + offset[1]), + (solution[2][1][0] + offset[0], solution[2][1][1] + offset[1]), + (solution[2][2][0] + offset[0], solution[2][2][1] + offset[1]), + (solution[2][3][0] + offset[0], solution[2][3][1] + offset[1]), + ) valid_solutions.append(solution) return valid_solutions - def place_corner(self, room_poly, solutions, obj_dim): obj_length, obj_width = obj_dim @@ -637,7 +861,7 @@ def place_corner(self, room_poly, solutions, obj_dim): 0: ((-obj_half_length, 0), (obj_half_length, 0)), 90: ((0, obj_half_length), (0, -obj_half_length)), 180: ((obj_half_length, 0), (-obj_half_length, 0)), - 270: ((0, -obj_half_length), (0, obj_half_length)) + 270: ((0, -obj_half_length), (0, obj_half_length)), } edge_solutions = self.place_edge(room_poly, solutions, obj_dim) @@ -646,20 +870,25 @@ def place_corner(self, room_poly, solutions, obj_dim): for solution in edge_solutions: (center_x, center_y), rotation = solution[:2] - (dx_left, dy_left), (dx_right, dy_right) = rotation_center_adjustments[rotation] + (dx_left, dy_left), (dx_right, dy_right) = rotation_center_adjustments[ + rotation + ] left_center_x, left_center_y = center_x + dx_left, center_y + dy_left right_center_x, right_center_y = center_x + dx_right, center_y + dy_right - - left_center_distance = room_poly.boundary.distance(Point(left_center_x, left_center_y)) - right_center_distance = room_poly.boundary.distance(Point(right_center_x, right_center_y)) + + left_center_distance = room_poly.boundary.distance( + Point(left_center_x, left_center_y) + ) + right_center_distance = room_poly.boundary.distance( + Point(right_center_x, right_center_y) + ) if min(left_center_distance, right_center_distance) < self.grid_size: solution[-1] += self.constraint_bouns valid_solutions.append(solution) return valid_solutions - def place_relative(self, place_type, target_object, solutions): valid_solutions = [] @@ -671,38 +900,56 @@ def place_relative(self, place_type, target_object, solutions): mean_y = (min_y + max_y) / 2 comparison_dict = { - 'left of': { - 0: lambda sol_center: sol_center[0] < min_x and min_y <= sol_center[1] <= max_y, - 90: lambda sol_center: sol_center[1] > max_y and min_x <= sol_center[0] <= max_x, - 180: lambda sol_center: sol_center[0] > max_x and min_y <= sol_center[1] <= max_y, - 270: lambda sol_center: sol_center[1] < min_y and min_x <= sol_center[0] <= max_x, + "left of": { + 0: lambda sol_center: sol_center[0] < min_x + and min_y <= sol_center[1] <= max_y, + 90: lambda sol_center: sol_center[1] > max_y + and min_x <= sol_center[0] <= max_x, + 180: lambda sol_center: sol_center[0] > max_x + and min_y <= sol_center[1] <= max_y, + 270: lambda sol_center: sol_center[1] < min_y + and min_x <= sol_center[0] <= max_x, }, - 'right of': { - 0: lambda sol_center: sol_center[0] > max_x and min_y <= sol_center[1] <= max_y, - 90: lambda sol_center: sol_center[1] < min_y and min_x <= sol_center[0] <= max_x, - 180: lambda sol_center: sol_center[0] < min_x and min_y <= sol_center[1] <= max_y, - 270: lambda sol_center: sol_center[1] > max_y and min_x <= sol_center[0] <= max_x, + "right of": { + 0: lambda sol_center: sol_center[0] > max_x + and min_y <= sol_center[1] <= max_y, + 90: lambda sol_center: sol_center[1] < min_y + and min_x <= sol_center[0] <= max_x, + 180: lambda sol_center: sol_center[0] < min_x + and min_y <= sol_center[1] <= max_y, + 270: lambda sol_center: sol_center[1] > max_y + and min_x <= sol_center[0] <= max_x, }, - 'in front of': { - 0: lambda sol_center: sol_center[1] > max_y and mean_x - self.grid_size < sol_center[0] < mean_x + self.grid_size, # in front of and centered - 90: lambda sol_center: sol_center[0] > max_x and mean_y - self.grid_size < sol_center[1] < mean_y + self.grid_size, - 180: lambda sol_center: sol_center[1] < min_y and mean_x - self.grid_size < sol_center[0] < mean_x + self.grid_size, - 270: lambda sol_center: sol_center[0] < min_x and mean_y - self.grid_size < sol_center[1] < mean_y + self.grid_size, + "in front of": { + 0: lambda sol_center: sol_center[1] > max_y + and mean_x - self.grid_size + < sol_center[0] + < mean_x + self.grid_size, # in front of and centered + 90: lambda sol_center: sol_center[0] > max_x + and mean_y - self.grid_size < sol_center[1] < mean_y + self.grid_size, + 180: lambda sol_center: sol_center[1] < min_y + and mean_x - self.grid_size < sol_center[0] < mean_x + self.grid_size, + 270: lambda sol_center: sol_center[0] < min_x + and mean_y - self.grid_size < sol_center[1] < mean_y + self.grid_size, }, - 'behind': { - 0: lambda sol_center: sol_center[1] < min_y and min_x <= sol_center[0] <= max_x, - 90: lambda sol_center: sol_center[0] < min_x and min_y <= sol_center[1] <= max_y, - 180: lambda sol_center: sol_center[1] > max_y and min_x <= sol_center[0] <= max_x, - 270: lambda sol_center: sol_center[0] > max_x and min_y <= sol_center[1] <= max_y, + "behind": { + 0: lambda sol_center: sol_center[1] < min_y + and min_x <= sol_center[0] <= max_x, + 90: lambda sol_center: sol_center[0] < min_x + and min_y <= sol_center[1] <= max_y, + 180: lambda sol_center: sol_center[1] > max_y + and min_x <= sol_center[0] <= max_x, + 270: lambda sol_center: sol_center[0] > max_x + and min_y <= sol_center[1] <= max_y, }, "side of": { 0: lambda sol_center: min_y <= sol_center[1] <= max_y, 90: lambda sol_center: min_x <= sol_center[0] <= max_x, 180: lambda sol_center: min_y <= sol_center[1] <= max_y, - 270: lambda sol_center: min_x <= sol_center[0] <= max_x - } + 270: lambda sol_center: min_x <= sol_center[0] <= max_x, + }, } - + compare_func = comparison_dict.get(place_type).get(target_rotation) for solution in solutions: @@ -711,9 +958,8 @@ def place_relative(self, place_type, target_object, solutions): if compare_func(sol_center): solution[-1] += self.constraint_bouns valid_solutions.append(solution) - + return valid_solutions - def place_distance(self, distance_type, target_object, solutions): target_coords = target_object[2] @@ -728,7 +974,7 @@ def place_distance(self, distance_type, target_object, solutions): solution[-1] = distance valid_solutions.append(solution) - + min_distance = min(distances) max_distance = max(distances) @@ -740,44 +986,42 @@ def place_distance(self, distance_type, target_object, solutions): elif distance_type == "far": points = [(min_distance, 0), (max_distance, 1)] - + x = [point[0] for point in points] y = [point[1] for point in points] - f = interp1d(x, y, kind='linear', fill_value='extrapolate') - + f = interp1d(x, y, kind="linear", fill_value="extrapolate") + for solution in valid_solutions: distance = solution[-1] solution[-1] = float(f(distance)) return valid_solutions - def place_face(self, face_type, target_object, solutions): if face_type == "face to": return self.place_face_to(target_object, solutions) - + elif face_type == "face same as": return self.place_face_same(target_object, solutions) - + elif face_type == "face opposite to": return self.place_face_opposite(target_object, solutions) - def place_face_to(self, target_object, solutions): # Define unit vectors for each rotation unit_vectors = { - 0: np.array([0., 1.]), # Facing up - 90: np.array([1., 0.]), # Facing right - 180: np.array([0., -1.]), # Facing down - 270: np.array([-1., 0.]) # Facing left + 0: np.array([0.0, 1.0]), # Facing up + 90: np.array([1.0, 0.0]), # Facing right + 180: np.array([0.0, -1.0]), # Facing down + 270: np.array([-1.0, 0.0]), # Facing left } - + target_coords = target_object[2] target_poly = Polygon(target_coords) - + valid_solutions = [] - + for solution in solutions: sol_center = solution[0] sol_rotation = solution[1] @@ -792,14 +1036,13 @@ def place_face_to(self, target_object, solutions): if half_line.intersects(target_poly): solution[-1] += self.constraint_bouns valid_solutions.append(solution) - + return valid_solutions - def place_face_same(self, target_object, solutions): target_rotation = target_object[1] valid_solutions = [] - + for solution in solutions: sol_rotation = solution[1] if sol_rotation == target_rotation: @@ -807,20 +1050,18 @@ def place_face_same(self, target_object, solutions): valid_solutions.append(solution) return valid_solutions - def place_face_opposite(self, target_object, solutions): target_rotation = (target_object[1] + 180) % 360 valid_solutions = [] - + for solution in solutions: sol_rotation = solution[1] if sol_rotation == target_rotation: solution[-1] += self.constraint_bouns valid_solutions.append(solution) - - return valid_solutions + return valid_solutions def place_alignment_center(self, alignment_type, target_object, solutions): target_center = target_object[0] @@ -828,12 +1069,14 @@ def place_alignment_center(self, alignment_type, target_object, solutions): eps = 5 for solution in solutions: sol_center = solution[0] - if abs(sol_center[0] - target_center[0]) < eps or abs(sol_center[1] - target_center[1]) < eps: + if ( + abs(sol_center[0] - target_center[0]) < eps + or abs(sol_center[1] - target_center[1]) < eps + ): solution[-1] += self.constraint_bouns valid_solutions.append(solution) return valid_solutions - def visualize_grid(self, room_poly, grid_points, solutions): plt.rcParams["font.family"] = "Times New Roman" plt.rcParams["font.size"] = 22 @@ -843,12 +1086,12 @@ def visualize_grid(self, room_poly, grid_points, solutions): # draw the room x, y = room_poly.exterior.xy - ax.plot(x, y, '-', label='Room', color='black', linewidth=2) + ax.plot(x, y, "-", label="Room", color="black", linewidth=2) # draw the grid points grid_x = [point[0] for point in grid_points] grid_y = [point[1] for point in grid_points] - ax.plot(grid_x, grid_y, 'o', markersize=2, color="grey") + ax.plot(grid_x, grid_y, "o", markersize=2, color="grey") # draw the solutions for object_name, solution in solutions.items(): @@ -858,43 +1101,51 @@ def visualize_grid(self, room_poly, grid_points, solutions): # create a polygon for the solution obj_poly = Polygon(box_coords) x, y = obj_poly.exterior.xy - ax.plot(x, y, '-', linewidth=2, color='black') + ax.plot(x, y, "-", linewidth=2, color="black") # ax.text(center_x, center_y, object_name, fontsize=18, ha='center') # set arrow direction based on rotation if rotation == 0: - ax.arrow(center_x, center_y, 0, 25, head_width=10, fc='black') + ax.arrow(center_x, center_y, 0, 25, head_width=10, fc="black") elif rotation == 90: - ax.arrow(center_x, center_y, 25, 0, head_width=10, fc='black') + ax.arrow(center_x, center_y, 25, 0, head_width=10, fc="black") elif rotation == 180: - ax.arrow(center_x, center_y, 0, -25, head_width=10, fc='black') + ax.arrow(center_x, center_y, 0, -25, head_width=10, fc="black") elif rotation == 270: - ax.arrow(center_x, center_y, -25, 0, head_width=10, fc='black') + ax.arrow(center_x, center_y, -25, 0, head_width=10, fc="black") # axis off - ax.axis('off') - ax.set_aspect('equal', 'box') # to keep the ratios equal along x and y axis - create_time = str(datetime.datetime.now()).replace(" ", "-").replace(":", "-").replace(".", "-") - plt.savefig(f"{create_time}.pdf", bbox_inches='tight', dpi=300) + ax.axis("off") + ax.set_aspect("equal", "box") # to keep the ratios equal along x and y axis + create_time = ( + str(datetime.datetime.now()) + .replace(" ", "-") + .replace(":", "-") + .replace(".", "-") + ) + plt.savefig(f"{create_time}.pdf", bbox_inches="tight", dpi=300) plt.show() - - def milp_dfs(self, room_poly, all_objects_list, constraints, placed_objects, branch_factor=1): + def milp_dfs( + self, room_poly, all_objects_list, constraints, placed_objects, branch_factor=1 + ): if len(all_objects_list) == 0: self.solutions.append(placed_objects) return placed_objects - + if time.time() - self.start_time > self.max_duration: print(f"Time limit reached.") raise SolutionFound(self.solutions) - + def milp_solve(soft_constraints_list, hard_constraints_list, verbose=False): - problem = cp.Problem(cp.Maximize(sum(soft_constraints_list)), hard_constraints_list) + problem = cp.Problem( + cp.Maximize(sum(soft_constraints_list)), hard_constraints_list + ) if verbose: - print('solving milp using GUROBI ...') + print("solving milp using GUROBI ...") problem.solve(solver=cp.GUROBI, reoptimize=True, verbose=False) return problem.value - + def parse_object_properties(object_properties): x, y = object_properties[0] rotation = int(object_properties[1] or 0) @@ -906,32 +1157,39 @@ def parse_object_properties(object_properties): max_x = max([point[0] for point in object_bbox]) min_y = min([point[1] for point in object_bbox]) max_y = max([point[1] for point in object_bbox]) - object_dim = (max_x - min_x, max_y - min_y) if rotation == 0 or rotation == 180 else (max_y - min_y, max_x - min_x) + object_dim = ( + (max_x - min_x, max_y - min_y) + if rotation == 0 or rotation == 180 + else (max_y - min_y, max_x - min_x) + ) return x, y, rotation, object_dim def find_object_dim(target_object_name, objects_list, placed_objects): target_object_dim = None for object_name_1, object_dim_1 in objects_list: - if object_name_1 == target_object_name: + if object_name_1 == target_object_name: target_object_dim = object_dim_1 return target_object_dim - if not None: + if not None: for object_name_1, object_properties in placed_objects.items(): if object_name_1 == target_object_name: - x, y, rotation, target_object_dim = parse_object_properties(object_properties) + x, y, rotation, target_object_dim = parse_object_properties( + object_properties + ) return target_object_dim return None - found_a_solution = False # randomly select a set of objects from all_objects_list - # start with the largest object + more objects --> gradually reduce the number of objects + # start with the largest object + more objects --> gradually reduce the number of objects for branch_idx in range(branch_factor): # sample a set of objects from a list that contains the first object - - k = random.randint(0, min(5, len(all_objects_list)-1)) - objects_list = [all_objects_list[0]] + random.sample(all_objects_list[1:], k) + + k = random.randint(0, min(5, len(all_objects_list) - 1)) + objects_list = [all_objects_list[0]] + random.sample( + all_objects_list[1:], k + ) hard_constraints_list = [] soft_constraints_list = [0] @@ -939,125 +1197,198 @@ def find_object_dim(target_object_name, objects_list, placed_objects): # formulate the milp problem # object_name, object_dim = objects_list[0] # x, y, rotate_180, rotate_90 - variables_dict = {object[0]: [cp.Variable(), cp.Variable(), cp.Variable(boolean=True), cp.Variable(boolean=True)] for object in objects_list} + variables_dict = { + object[0]: [ + cp.Variable(), + cp.Variable(), + cp.Variable(boolean=True), + cp.Variable(boolean=True), + ] + for object in objects_list + } # add placed objects into variables dict even though they are not variables for object, object_properties in placed_objects.items(): x, y = object_properties[0] rotation = int(object_properties[1]) - variables_dict[object] = [x, y, rotation == 180, rotation == 90 or rotation == 270] + variables_dict[object] = [ + x, + y, + rotation == 180, + rotation == 90 or rotation == 270, + ] # Initialize a list of variables, each variable represents the coordinate for each object room_min_x, room_min_y, room_max_x, room_max_y = room_poly.bounds # Add boundary constraints to all objects for object_name, object_dim in objects_list: - hard_constraints_list.extend(create_boundary_constraints(variables_dict[object_name], - object_dim, - (room_min_x, room_min_y, room_max_x, room_max_y))) + hard_constraints_list.extend( + create_boundary_constraints( + variables_dict[object_name], + object_dim, + (room_min_x, room_min_y, room_max_x, room_max_y), + ) + ) # Add pariwise collision constraints for object_name_1, object_dim_1 in objects_list: for object_name_2, object_dim_2 in objects_list: - if object_name_1 == object_name_2: continue + if object_name_1 == object_name_2: + continue # collision constraints should be hard constraints - hard_constraints_list.extend(create_nooverlap_constraints(variables_dict[object_name_1], - variables_dict[object_name_2], - object_dim_1, - object_dim_2)) + hard_constraints_list.extend( + create_nooverlap_constraints( + variables_dict[object_name_1], + variables_dict[object_name_2], + object_dim_1, + object_dim_2, + ) + ) # Add pariwise collision constraints with placed objects for object_name_1, object_dim_1 in objects_list: for object_name_2, object_properties_2 in placed_objects.items(): # bbox is a list of four points - x, y, rotation, object_dim_2 = parse_object_properties(object_properties_2) - - hard_constraints_list.extend(create_nooverlap_constraints(variables_dict[object_name_1], - [x, y, rotation == 180, rotation == 90 or rotation == 270], - object_dim_1, object_dim_2)) + x, y, rotation, object_dim_2 = parse_object_properties( + object_properties_2 + ) + + hard_constraints_list.extend( + create_nooverlap_constraints( + variables_dict[object_name_1], + [x, y, rotation == 180, rotation == 90 or rotation == 270], + object_dim_1, + object_dim_2, + ) + ) # default constraints / heuristics? for object_name, object_dim in objects_list: # encourage dispersement of assets - all_other_objects_list = [x[0] for x in objects_list if x[0] != object_name] + list(placed_objects.keys()) + all_other_objects_list = [ + x[0] for x in objects_list if x[0] != object_name + ] + list(placed_objects.keys()) for target_object_name in all_other_objects_list: - hard_constraints, soft_constraints = create_distance_constraints(variables_dict[object_name], - variables_dict[target_object_name], - upper_bound=[room_max_x-room_min_x, room_max_y-room_min_y], - type='far') + hard_constraints, soft_constraints = create_distance_constraints( + variables_dict[object_name], + variables_dict[target_object_name], + upper_bound=[room_max_x - room_min_x, room_max_y - room_min_y], + type="far", + ) assert len(soft_constraints) == 1 # soft_constraints[0] *= 0.001 hard_constraints_list.extend(hard_constraints) soft_constraints_list.extend(soft_constraints) - # use cvxpy to solve for the hard constraints for object_name, object_dim in objects_list: # by default - add soft edge constraints although this might make the solver take a longer time - if not any(constraint['type'] == 'global' for constraint in constraints[object_name]): - hard_constraints, soft_constraints = create_edge_constraints(variables_dict[object_name], - object_dim, - room_dim=(room_min_x, room_min_y, room_max_x, room_max_y), - hard=False) + if not any( + constraint["type"] == "global" + for constraint in constraints[object_name] + ): + hard_constraints, soft_constraints = create_edge_constraints( + variables_dict[object_name], + object_dim, + room_dim=(room_min_x, room_min_y, room_max_x, room_max_y), + hard=False, + ) soft_constraints[0] *= 100 hard_constraints_list.extend(hard_constraints) soft_constraints_list.extend(soft_constraints) - - for constraint in constraints[object_name]: - if constraint['type'] == 'global': - if constraint['constraint'] == 'edge': # hard constraints - hard_constraints, soft_constraints = create_edge_constraints(variables_dict[object_name], - object_dim, - room_dim=(room_min_x, room_min_y, room_max_x, room_max_y), - hard=True) + if constraint["type"] == "global": + if constraint["constraint"] == "edge": # hard constraints + hard_constraints, soft_constraints = ( + create_edge_constraints( + variables_dict[object_name], + object_dim, + room_dim=( + room_min_x, + room_min_y, + room_max_x, + room_max_y, + ), + hard=True, + ) + ) hard_constraints_list.extend(hard_constraints) soft_constraints_list.extend(soft_constraints) - if constraint['type'] == 'direction': - assert constraint['constraint'] == 'face to' - target_object_name = constraint['target'] - target_object_dim = find_object_dim(target_object_name, objects_list, placed_objects) + if constraint["type"] == "direction": + assert constraint["constraint"] == "face to" + target_object_name = constraint["target"] + target_object_dim = find_object_dim( + target_object_name, objects_list, placed_objects + ) if target_object_dim: - hard_constraints_list.extend(create_directional_constraints(variables_dict[object_name], - variables_dict[target_object_name], - object_dim, - target_object_dim)) - - if constraint['type'] == 'alignment': - assert constraint['constraint'] == 'center aligned' - target_object_name = constraint['target'] - target_object_dim = find_object_dim(target_object_name, objects_list, placed_objects) + hard_constraints_list.extend( + create_directional_constraints( + variables_dict[object_name], + variables_dict[target_object_name], + object_dim, + target_object_dim, + ) + ) + + if constraint["type"] == "alignment": + assert constraint["constraint"] == "center aligned" + target_object_name = constraint["target"] + target_object_dim = find_object_dim( + target_object_name, objects_list, placed_objects + ) if target_object_dim: - hard_constraints_list.extend(create_alignment_constraints(variables_dict[object_name], - variables_dict[target_object_name], - object_dim, - target_object_dim)) - - if constraint['type'] == 'distance': - target_object_name = constraint['target'] - target_object_dim = find_object_dim(target_object_name, objects_list, placed_objects) + hard_constraints_list.extend( + create_alignment_constraints( + variables_dict[object_name], + variables_dict[target_object_name], + object_dim, + target_object_dim, + ) + ) + + if constraint["type"] == "distance": + target_object_name = constraint["target"] + target_object_dim = find_object_dim( + target_object_name, objects_list, placed_objects + ) if target_object_dim: - hard_constraints, soft_constraints = create_distance_constraints(variables_dict[object_name], - variables_dict[target_object_name], - upper_bound=[room_max_x-room_min_x, room_max_y-room_min_y], - type=constraint['constraint']) + hard_constraints, soft_constraints = ( + create_distance_constraints( + variables_dict[object_name], + variables_dict[target_object_name], + upper_bound=[ + room_max_x - room_min_x, + room_max_y - room_min_y, + ], + type=constraint["constraint"], + ) + ) hard_constraints_list.extend(hard_constraints) soft_constraints_list.extend(soft_constraints) assert len(soft_constraints) == 1 # higher weighting soft_constraints[0] *= 0.01 - if constraint['type'] == 'relative': - target_object_name = constraint['target'] - target_object_dim = find_object_dim(target_object_name, objects_list, placed_objects) + if constraint["type"] == "relative": + target_object_name = constraint["target"] + target_object_dim = find_object_dim( + target_object_name, objects_list, placed_objects + ) if target_object_dim: - hard_constraints_list.extend(create_relative_constraints(variables_dict[object_name], - variables_dict[target_object_name], - object_dim, - target_object_dim, - constraint['constraint'])) - - result = milp_solve(soft_constraints_list, hard_constraints_list, verbose=False) + hard_constraints_list.extend( + create_relative_constraints( + variables_dict[object_name], + variables_dict[target_object_name], + object_dim, + target_object_dim, + constraint["constraint"], + ) + ) + + result = milp_solve( + soft_constraints_list, hard_constraints_list, verbose=False + ) if result is None or math.isnan(result) or math.isinf(result): continue @@ -1072,31 +1403,46 @@ def find_object_dim(target_object_name, objects_list, placed_objects): x = variables_dict[object_name][0].value.item() y = variables_dict[object_name][1].value.item() rotate_180 = variables_dict[object_name][2].value - rotate_90 = variables_dict[object_name][3].value - if not rotate_180: rotate_180 = 0 - if not rotate_90: rotate_90 = 0 + rotate_90 = variables_dict[object_name][3].value + if not rotate_180: + rotate_180 = 0 + if not rotate_90: + rotate_90 = 0 # bbox has taken into account of the rotation if rotate_90: - bbox = [(x - object_dim[1]/2, y - object_dim[0]/2), - (x + object_dim[1]/2, y - object_dim[0]/2), - (x + object_dim[1]/2, y + object_dim[0]/2), - (x - object_dim[1]/2, y + object_dim[0]/2)] + bbox = [ + (x - object_dim[1] / 2, y - object_dim[0] / 2), + (x + object_dim[1] / 2, y - object_dim[0] / 2), + (x + object_dim[1] / 2, y + object_dim[0] / 2), + (x - object_dim[1] / 2, y + object_dim[0] / 2), + ] else: - bbox = [(x - object_dim[0]/2, y - object_dim[1]/2), - (x + object_dim[0]/2, y - object_dim[1]/2), - (x + object_dim[0]/2, y + object_dim[1]/2), - (x - object_dim[0]/2, y + object_dim[1]/2)] - - placed_objects_updated[object_name] = [(x,y), rotate_180 * 180 + rotate_90 * 90, bbox, - len(constraints[object_name])] + bbox = [ + (x - object_dim[0] / 2, y - object_dim[1] / 2), + (x + object_dim[0] / 2, y - object_dim[1] / 2), + (x + object_dim[0] / 2, y + object_dim[1] / 2), + (x - object_dim[0] / 2, y + object_dim[1] / 2), + ] + + placed_objects_updated[object_name] = [ + (x, y), + rotate_180 * 180 + rotate_90 * 90, + bbox, + len(constraints[object_name]), + ] + + # remove all elemnts in objects_list from all_objects_list + self.milp_dfs( + room_poly, + [x for x in all_objects_list if x not in objects_list], + constraints, + placed_objects_updated, + branch_factor=1, + ) - # remove all elemnts in objects_list from all_objects_list - self.milp_dfs(room_poly, [x for x in all_objects_list if x not in objects_list], constraints, placed_objects_updated, branch_factor=1) - if not found_a_solution and len(placed_objects) != 0: self.solutions.append(placed_objects) - def test_dfs_placement(self): room_vertices = ((0, 0), (0, 500), (500, 500), (500, 0)) @@ -1126,88 +1472,208 @@ def test_dfs_placement(self): # random.seed(42) # for i, solution in enumerate(random.sample(solutions_1, 25)): # objects[f"coffee table-{i}"] = solution - + # objects[f"coffee table"] = [(300, 350), 0, ((350.0, 325.0), (350.0, 375.0), (250.0, 375.0), (250.0, 325.0), (350.0, 325.0)), 1.0] # self.visualize_grid(room_poly, grid_points, objects) - + solutions_1 = self.place_face_to(objects["sofa"], solutions_1) solutions_1 = self.place_relative("in front of", objects["sofa"], solutions_1) - solutions_1 = self.place_alignment_center("center alignment", objects["sofa"], solutions_1) + solutions_1 = self.place_alignment_center( + "center alignment", objects["sofa"], solutions_1 + ) solutions_1 = self.place_distance("near", objects["sofa"], solutions_1) objects[f"coffee table"] = solutions_1[-1] self.visualize_grid(room_poly, grid_points, objects) - def test_milp_placement(self, simple=False, use_milp=True): room_vertices = ((0, 0), (0, 600), (800, 600), (800, 0)) room_poly = Polygon(room_vertices) grid_points = self.create_grids(room_poly) if not simple: - constraints = {'sofa-0': [{'type': 'global', 'constraint': 'edge'}], - 'sofa-1': [{'type': 'global', 'constraint': 'edge'}, - {'type': 'distance', 'constraint': 'near', 'target': 'sofa-0'}, - {'type': 'alignment', 'constraint': 'center aligned', 'target': 'sofa-0'}], - 'tv stand-0': [{'type': 'global', 'constraint': 'edge'}, - {'type': 'distance', 'constraint': 'far', 'target': 'sofa-1'}, - {'type': 'alignment', 'constraint': 'center aligned', 'target': 'sofa-1'}], - 'coffee table-0': [{'type': 'global', 'constraint': 'middle'}, - {'type': 'distance', 'constraint': 'near', 'target': 'sofa-0'}, - {'type': 'relative', 'constraint': 'in front of', 'target': 'sofa-0'}, - {'type': 'alignment', 'constraint': 'center aligned', 'target': 'sofa-0'}, - {'type': 'alignment', 'constraint': 'center aligned', 'target': 'sofa-0'}, - {'type': 'direction', 'constraint': 'face to', 'target': 'tv stand-0'}], - 'coffee table-1': [{'type': 'global', 'constraint': 'middle'}, - {'type': 'distance', 'constraint': 'near', 'target': 'sofa-1'}, - {'type': 'relative', 'constraint': 'in front of', 'target': 'sofa-1'}, - {'type': 'alignment', 'constraint': 'center aligned', 'target': 'sofa-1'}, - {'type': 'alignment', 'constraint': 'center aligned', 'target': 'sofa-1'}, - {'type': 'direction', 'constraint': 'face to', 'target': 'tv stand-0'}], - 'side table-0': [{'type': 'global', 'constraint': 'edge'}, - {'type': 'distance', 'constraint': 'near', 'target': 'sofa-0'}, - {'type': 'relative', 'constraint': 'side of', 'target': 'sofa-0'}], - 'side table-1': [{'type': 'global', 'constraint': 'edge'}, - {'type': 'distance', 'constraint': 'near', 'target': 'sofa-1'}, - {'type': 'relative', 'constraint': 'side of', 'target': 'sofa-1'}], - 'armchair-0': [{'type': 'global', 'constraint': 'middle'}, - {'type': 'distance', 'constraint': 'near', 'target': 'coffee table-0'}, - {'type': 'direction', 'constraint': 'face to', 'target': 'coffee table-0'}, - {'type': 'direction', 'constraint': 'face to', 'target': 'coffee table-0'}], - 'armchair-1': [{'type': 'global', 'constraint': 'middle'}, - {'type': 'distance', 'constraint': 'near', 'target': 'coffee table-1'}, - {'type': 'direction', 'constraint': 'face to', 'target': 'coffee table-1'}, - {'type': 'direction', 'constraint': 'face to', 'target': 'coffee table-1'}], - 'bookshelf-0': [{'type': 'global', 'constraint': 'edge'}, - {'type': 'distance', 'constraint': 'far', 'target': 'tv stand-0'}], - 'bookshelf-1': [{'type': 'global', 'constraint': 'edge'}, - {'type': 'distance', 'constraint': 'far', 'target': 'bookshelf-0'}, - {'type': 'alignment', 'constraint': 'center aligned', 'target': 'bookshelf-0'}]} - - initial_state = {'door-0': ((586.7550200520433, 550.0), 0, [(640.8300346432603, 500.0), (532.6800054608262, 500.0), (532.6800054608262, 600.0), (640.8300346432603, 600.0)], 1)} - - objects = [('sofa-0', (301.6667297651499, 106.48952360032415)), - ('sofa-1', (301.6667297651499, 106.48952360032415)), - ('tv stand-0', (201.0964714933229, 59.39910836195032)), - ('coffee table-0', (69.15754261308616, 126.69169450358964)), - ('coffee table-1', (69.15754261308616, 126.69169450358964)), - ('side table-0', (61.74632023132328, 61.74453745262855)), - ('side table-1', (61.74632023132328, 61.74453745262855)), - ('armchair-0', (79.0368498902692, 89.4893987892571)), - ('armchair-1', (79.0368498902692, 89.4893987892571)), - ('bookshelf-0', (67.94689517917222, 43.8934937031396)), - ('bookshelf-1', (67.94689517917222, 43.8934937031396))] - solution = self.get_solution(room_poly, objects, constraints, initial_state, use_milp=use_milp) + constraints = { + "sofa-0": [{"type": "global", "constraint": "edge"}], + "sofa-1": [ + {"type": "global", "constraint": "edge"}, + {"type": "distance", "constraint": "near", "target": "sofa-0"}, + { + "type": "alignment", + "constraint": "center aligned", + "target": "sofa-0", + }, + ], + "tv stand-0": [ + {"type": "global", "constraint": "edge"}, + {"type": "distance", "constraint": "far", "target": "sofa-1"}, + { + "type": "alignment", + "constraint": "center aligned", + "target": "sofa-1", + }, + ], + "coffee table-0": [ + {"type": "global", "constraint": "middle"}, + {"type": "distance", "constraint": "near", "target": "sofa-0"}, + { + "type": "relative", + "constraint": "in front of", + "target": "sofa-0", + }, + { + "type": "alignment", + "constraint": "center aligned", + "target": "sofa-0", + }, + { + "type": "alignment", + "constraint": "center aligned", + "target": "sofa-0", + }, + { + "type": "direction", + "constraint": "face to", + "target": "tv stand-0", + }, + ], + "coffee table-1": [ + {"type": "global", "constraint": "middle"}, + {"type": "distance", "constraint": "near", "target": "sofa-1"}, + { + "type": "relative", + "constraint": "in front of", + "target": "sofa-1", + }, + { + "type": "alignment", + "constraint": "center aligned", + "target": "sofa-1", + }, + { + "type": "alignment", + "constraint": "center aligned", + "target": "sofa-1", + }, + { + "type": "direction", + "constraint": "face to", + "target": "tv stand-0", + }, + ], + "side table-0": [ + {"type": "global", "constraint": "edge"}, + {"type": "distance", "constraint": "near", "target": "sofa-0"}, + {"type": "relative", "constraint": "side of", "target": "sofa-0"}, + ], + "side table-1": [ + {"type": "global", "constraint": "edge"}, + {"type": "distance", "constraint": "near", "target": "sofa-1"}, + {"type": "relative", "constraint": "side of", "target": "sofa-1"}, + ], + "armchair-0": [ + {"type": "global", "constraint": "middle"}, + { + "type": "distance", + "constraint": "near", + "target": "coffee table-0", + }, + { + "type": "direction", + "constraint": "face to", + "target": "coffee table-0", + }, + { + "type": "direction", + "constraint": "face to", + "target": "coffee table-0", + }, + ], + "armchair-1": [ + {"type": "global", "constraint": "middle"}, + { + "type": "distance", + "constraint": "near", + "target": "coffee table-1", + }, + { + "type": "direction", + "constraint": "face to", + "target": "coffee table-1", + }, + { + "type": "direction", + "constraint": "face to", + "target": "coffee table-1", + }, + ], + "bookshelf-0": [ + {"type": "global", "constraint": "edge"}, + {"type": "distance", "constraint": "far", "target": "tv stand-0"}, + ], + "bookshelf-1": [ + {"type": "global", "constraint": "edge"}, + {"type": "distance", "constraint": "far", "target": "bookshelf-0"}, + { + "type": "alignment", + "constraint": "center aligned", + "target": "bookshelf-0", + }, + ], + } + + initial_state = { + "door-0": ( + (586.7550200520433, 550.0), + 0, + [ + (640.8300346432603, 500.0), + (532.6800054608262, 500.0), + (532.6800054608262, 600.0), + (640.8300346432603, 600.0), + ], + 1, + ) + } + + objects = [ + ("sofa-0", (301.6667297651499, 106.48952360032415)), + ("sofa-1", (301.6667297651499, 106.48952360032415)), + ("tv stand-0", (201.0964714933229, 59.39910836195032)), + ("coffee table-0", (69.15754261308616, 126.69169450358964)), + ("coffee table-1", (69.15754261308616, 126.69169450358964)), + ("side table-0", (61.74632023132328, 61.74453745262855)), + ("side table-1", (61.74632023132328, 61.74453745262855)), + ("armchair-0", (79.0368498902692, 89.4893987892571)), + ("armchair-1", (79.0368498902692, 89.4893987892571)), + ("bookshelf-0", (67.94689517917222, 43.8934937031396)), + ("bookshelf-1", (67.94689517917222, 43.8934937031396)), + ] + solution = self.get_solution( + room_poly, objects, constraints, initial_state, use_milp=use_milp + ) else: - constraints = {'dining table': [{'type': 'global', 'constraint': 'edge'}, - {'type': 'distance', 'constraint': 'far', 'target': 'door'}, - {'type': 'distance', 'constraint': 'near', 'target': 'chair'}], - 'chair': [{'type': 'relative', 'constraint': 'side of', 'target': 'dining table'}] + constraints = { + "dining table": [ + {"type": "global", "constraint": "edge"}, + {"type": "distance", "constraint": "far", "target": "door"}, + {"type": "distance", "constraint": "near", "target": "chair"}, + ], + "chair": [ + { + "type": "relative", + "constraint": "side of", + "target": "dining table", + } + ], + } + initial_state = { + "door": ((50, 50), 0, ((0, 0), (100, 0), (100, 100), (0, 100)), 1) } - initial_state = {"door": ((50, 50), 0, ((0, 0), (100, 0), (100, 100), (0, 100)), 1)} objects = [("dining table", (100, 50)), ("chair", (50, 50))] - solution = self.get_solution(room_poly, objects, constraints, initial_state, use_milp=use_milp) + solution = self.get_solution( + room_poly, objects, constraints, initial_state, use_milp=use_milp + ) - print('milp solution:', len(solution)) + print("milp solution:", len(solution)) for object_name, object_properties in solution.items(): print(object_name, object_properties) # if object_properties[2] == 90 or object_properties[2] == 270: diff --git a/holodeck/generation/holodeck.py b/holodeck/generation/holodeck.py index d35fd72..b51faf2 100644 --- a/holodeck/generation/holodeck.py +++ b/holodeck/generation/holodeck.py @@ -109,14 +109,18 @@ def __init__( self.clip_model, self.clip_preprocess, self.clip_tokenizer, self.llm ) self.window_generator = WindowGenerator(self.llm) - self.object_selector = ObjectSelector(object_retriever=self.object_retriever, llm=self.llm) + self.object_selector = ObjectSelector( + object_retriever=self.object_retriever, llm=self.llm + ) self.floor_object_generator = FloorObjectGenerator( object_retriever=self.object_retriever, llm=self.llm ) self.wall_object_generator = WallObjectGenerator( object_retriever=self.object_retriever, llm=self.llm ) - self.ceiling_generator = CeilingObjectGenerator(object_retriever=self.object_retriever, llm=self.llm) + self.ceiling_generator = CeilingObjectGenerator( + object_retriever=self.object_retriever, llm=self.llm + ) self.small_object_generator = SmallObjectGenerator( object_retriever=self.object_retriever, llm=self.llm ) diff --git a/holodeck/generation/layers.py b/holodeck/generation/layers.py index 04b54bd..6327cb2 100644 --- a/holodeck/generation/layers.py +++ b/holodeck/generation/layers.py @@ -33,8 +33,8 @@ def color_rooms(room): if room == len(adjacency_list.keys()): return True for color in range(4): # Use color range 0-3 - if assign_color(list(adjacency_list.keys())[room], f'Procedural{color}'): - colors[list(adjacency_list.keys())[room]] = f'Procedural{color}' + if assign_color(list(adjacency_list.keys())[room], f"Procedural{color}"): + colors[list(adjacency_list.keys())[room]] = f"Procedural{color}" if color_rooms(room + 1): return True colors[list(adjacency_list.keys())[room]] = -1 @@ -48,47 +48,55 @@ def color_rooms(room): def map_asset2layer(scene): - room2layer = get_room2layer(scene['room_pairs'], scene['open_room_pairs']) - all_layers = ['Procedural0', 'Procedural1', 'Procedural2', 'Procedural3'] - + room2layer = get_room2layer(scene["room_pairs"], scene["open_room_pairs"]) + all_layers = ["Procedural0", "Procedural1", "Procedural2", "Procedural3"] + if len(scene["rooms"]) == 1: print("Only one room in the scene. Assigning the room to Procedural0.") room2layer = {scene["rooms"][0]["id"]: "Procedural0"} # Check if all rooms are assigned a layer - for room in scene['rooms']: - if room['id'] not in room2layer: - room2layer[room['id']] = 'Procedural0' + for room in scene["rooms"]: + if room["id"] not in room2layer: + room2layer[room["id"]] = "Procedural0" # Assign layer to each room - for room in scene['rooms']: - room['layer'] = room2layer[room['id']] + for room in scene["rooms"]: + room["layer"] = room2layer[room["id"]] # Assign layer to each wall - for wall in scene['walls']: - wall['layer'] = room2layer[wall['roomId']] - + for wall in scene["walls"]: + wall["layer"] = room2layer[wall["roomId"]] + # Assign layer to each object # TODO: consider small children objects - for obj in scene['objects']: - obj['layer'] = room2layer[obj['roomId']] - + for obj in scene["objects"]: + obj["layer"] = room2layer[obj["roomId"]] + # Assign layer to each window - for window in scene['windows']: - window['layer'] = room2layer[window['roomId']] + for window in scene["windows"]: + window["layer"] = room2layer[window["roomId"]] # Assign layer to each light - for light in scene['proceduralParameters']['lights']: - try: light['layer'] = room2layer[light['roomId']] - except: continue - - light['cullingMaskOff'] = [layer for layer in all_layers if layer != light['layer']] + for light in scene["proceduralParameters"]["lights"]: + try: + light["layer"] = room2layer[light["roomId"]] + except: + continue + + light["cullingMaskOff"] = [ + layer for layer in all_layers if layer != light["layer"] + ] return scene - + if __name__ == "__main__": - room_pairs = [('Living Room', 'Bedroom'), ('Living Room', 'Kitchen'), - ('Kitchen', 'Bathroom'), ('Bedroom', 'Bathroom')] - open_room_pairs = [('Living Room', 'Kitchen'), ('Living Room', 'Bedroom')] - room2layer = get_room2layer(room_pairs, open_room_pairs) \ No newline at end of file + room_pairs = [ + ("Living Room", "Bedroom"), + ("Living Room", "Kitchen"), + ("Kitchen", "Bathroom"), + ("Bedroom", "Bathroom"), + ] + open_room_pairs = [("Living Room", "Kitchen"), ("Living Room", "Bedroom")] + room2layer = get_room2layer(room_pairs, open_room_pairs) diff --git a/holodeck/generation/lights.py b/holodeck/generation/lights.py index c0bd79d..39779ef 100644 --- a/holodeck/generation/lights.py +++ b/holodeck/generation/lights.py @@ -32,10 +32,10 @@ def generate_lights(scene): light_height = scene["wall_height"] - 0.2 try: for object in scene["ceiling_objects"]: - if object["roomId"] == room_id: light_height = object["position"]["y"] - 0.2 + if object["roomId"] == room_id: + light_height = object["position"]["y"] - 0.2 except: light_height = scene["wall_height"] - 0.2 - lights.append( Light( @@ -53,8 +53,8 @@ def generate_lights(scene): nearPlane=0.2, resolution="FromQualitySettings", ), - roomId=room_id + roomId=room_id, ) ) - return lights \ No newline at end of file + return lights diff --git a/holodeck/generation/milp_utils.py b/holodeck/generation/milp_utils.py index c975e50..3ac03f3 100644 --- a/holodeck/generation/milp_utils.py +++ b/holodeck/generation/milp_utils.py @@ -12,8 +12,12 @@ def create_boundary_constraints(c, object_dim, bbox): cx, cy, rotate_90 = c[0], c[1], c[3] # Half-sizes considering rotation - half_width = cp.multiply(0.5 * x_size, 1 - rotate_90) + cp.multiply(0.5 * y_size, rotate_90) - half_height = cp.multiply(0.5 * y_size, 1 - rotate_90) + cp.multiply(0.5 * x_size, rotate_90) + half_width = cp.multiply(0.5 * x_size, 1 - rotate_90) + cp.multiply( + 0.5 * y_size, rotate_90 + ) + half_height = cp.multiply(0.5 * y_size, 1 - rotate_90) + cp.multiply( + 0.5 * x_size, rotate_90 + ) # Constraints constraints = [ @@ -41,11 +45,19 @@ def create_directional_constraints(c1, c2, object_dim_1, object_dim_2): rotate_90_2 = c2[3] # Half-sizes considering rotation - half_width1 = cp.multiply(0.5 * x_size1, 1 - rotate_90_1) + cp.multiply(0.5 * y_size1, rotate_90_1) - half_height1 = cp.multiply(0.5 * y_size1, 1 - rotate_90_1) + cp.multiply(0.5 * x_size1, rotate_90_1) - - half_width2 = cp.multiply(0.5 * x_size2, 1 - rotate_90_2) + cp.multiply(0.5 * y_size2, rotate_90_2) - half_height2 = cp.multiply(0.5 * y_size2, 1 - rotate_90_2) + cp.multiply(0.5 * x_size2, rotate_90_2) + half_width1 = cp.multiply(0.5 * x_size1, 1 - rotate_90_1) + cp.multiply( + 0.5 * y_size1, rotate_90_1 + ) + half_height1 = cp.multiply(0.5 * y_size1, 1 - rotate_90_1) + cp.multiply( + 0.5 * x_size1, rotate_90_1 + ) + + half_width2 = cp.multiply(0.5 * x_size2, 1 - rotate_90_2) + cp.multiply( + 0.5 * y_size2, rotate_90_2 + ) + half_height2 = cp.multiply(0.5 * y_size2, 1 - rotate_90_2) + cp.multiply( + 0.5 * x_size2, rotate_90_2 + ) # Binary variables to determine the relative positions left_of = cp.Variable(boolean=True) @@ -66,7 +78,6 @@ def create_directional_constraints(c1, c2, object_dim_1, object_dim_2): cy1 - cy2 >= EPSILON + half_height1 + half_height2 - M * (1 - above), # Ensure that at least one of the binary variables must be True left_of + right_of + above + below >= 1, - # make sure the object does not face the wall # when "left of" is true, the object should not face left rotate_90_1 + rotate_180_1 <= 1 + M * (1 - left_of), @@ -95,11 +106,19 @@ def create_nooverlap_constraints(c1, c2, object_dim_1, object_dim_2): rotate_90_2 = c2[3] # Half-sizes considering rotation - half_width1 = cp.multiply(0.5 * x_size1, 1 - rotate_90_1) + cp.multiply(0.5 * y_size1, rotate_90_1) - half_height1 = cp.multiply(0.5 * y_size1, 1 - rotate_90_1) + cp.multiply(0.5 * x_size1, rotate_90_1) - - half_width2 = cp.multiply(0.5 * x_size2, 1 - rotate_90_2) + cp.multiply(0.5 * y_size2, rotate_90_2) - half_height2 = cp.multiply(0.5 * y_size2, 1 - rotate_90_2) + cp.multiply(0.5 * x_size2, rotate_90_2) + half_width1 = cp.multiply(0.5 * x_size1, 1 - rotate_90_1) + cp.multiply( + 0.5 * y_size1, rotate_90_1 + ) + half_height1 = cp.multiply(0.5 * y_size1, 1 - rotate_90_1) + cp.multiply( + 0.5 * x_size1, rotate_90_1 + ) + + half_width2 = cp.multiply(0.5 * x_size2, 1 - rotate_90_2) + cp.multiply( + 0.5 * y_size2, rotate_90_2 + ) + half_height2 = cp.multiply(0.5 * y_size2, 1 - rotate_90_2) + cp.multiply( + 0.5 * x_size2, rotate_90_2 + ) # Binary variables to determine the relative positions left_of = cp.Variable(boolean=True) @@ -110,7 +129,6 @@ def create_nooverlap_constraints(c1, c2, object_dim_1, object_dim_2): # Constraints constraints = [ # Constraints for binary variable activation - # object 1 is left of object 2 cx2 - cx1 >= EPSILON + half_width1 + half_width2 - M * (1 - left_of), # object 1 is right of object 2 @@ -119,7 +137,6 @@ def create_nooverlap_constraints(c1, c2, object_dim_1, object_dim_2): cy2 - cy1 >= EPSILON + half_height1 + half_height2 - M * (1 - below), # object 1 is above object 2 cy1 - cy2 >= EPSILON + half_height1 + half_height2 - M * (1 - above), - # Ensure that at least one of the binary variables must be True left_of + right_of + above + below >= 1, ] @@ -152,7 +169,6 @@ def create_alignment_constraints(c1, c2, object_dim_1, object_dim_2): cx1 - cx2 <= M * (1 - x_aligned), cy2 - cy1 <= M * (1 - y_aligned), cy1 - cy2 <= M * (1 - y_aligned), - # Ensure that at least one of the binary variables must be True x_aligned + y_aligned >= 1, ] @@ -168,8 +184,12 @@ def create_edge_constraints(var, object_dim, room_dim, hard=True, use_longer_edg room_min_x, room_min_y, room_max_x, room_max_y = room_dim # Half-sizes considering rotation - half_width1 = cp.multiply(0.5 * x_size, 1 - rotate_90) + cp.multiply(0.5 * y_size, rotate_90) - half_height1 = cp.multiply(0.5 * y_size, 1 - rotate_90) + cp.multiply(0.5 * x_size, rotate_90) + half_width1 = cp.multiply(0.5 * x_size, 1 - rotate_90) + cp.multiply( + 0.5 * y_size, rotate_90 + ) + half_height1 = cp.multiply(0.5 * y_size, 1 - rotate_90) + cp.multiply( + 0.5 * x_size, rotate_90 + ) a = room_min_x + half_width1 b = room_max_x - half_width1 @@ -183,7 +203,6 @@ def create_edge_constraints(var, object_dim, room_dim, hard=True, use_longer_edg by_c = cp.Variable(boolean=True) by_d = cp.Variable(boolean=True) - # Constraints that link the binary variables with the conditions # Constraints if hard: @@ -192,7 +211,6 @@ def create_edge_constraints(var, object_dim, room_dim, hard=True, use_longer_edg # half_width1 - half_height1 >= -M * (1 - x_size_longer), # If x_size_longer is false (0), then half_width1 must not be greater than half_height1 # half_width1 - half_height1 <= M * x_size_longer, - # Constraints for binary variable activation x - a <= M * (1 - bx_a), a - x <= M * (1 - bx_a), @@ -202,7 +220,6 @@ def create_edge_constraints(var, object_dim, room_dim, hard=True, use_longer_edg c - y <= M * (1 - by_c), y - d <= M * (1 - by_d), d - y <= M * (1 - by_d), - # make sure the object does not face the wall # 0: facing up, 90: facing right ... # when bx_a is true, the left edge is aligned with the left wall @@ -215,7 +232,6 @@ def create_edge_constraints(var, object_dim, room_dim, hard=True, use_longer_edg (1 - rotate_90) + rotate_180 <= 1 + M * (1 - by_c), # when by_d is true, the top edge is aligned with the top wall (1 - rotate_90) + (1 - rotate_180) <= 1 + M * (1 - by_d), - # Only one of the binary variables needs to be true (logical OR) bx_a + bx_b + by_c + by_d >= 1, ] @@ -228,12 +244,10 @@ def create_edge_constraints(var, object_dim, room_dim, hard=True, use_longer_edg half_width1 - half_height1 >= -M * (1 - x_size_longer), # If x_size_longer is false (0), then half_width1 must not be greater than half_height1 half_width1 - half_height1 <= M * x_size_longer, - by_c + by_d >= 1 - M * (1 - x_size_longer), bx_a + bx_b >= 1 - M * x_size_longer, ] - return hard_constraints, [bx_a + bx_b + by_c + by_d] else: hard_constraints = [ @@ -246,7 +260,6 @@ def create_edge_constraints(var, object_dim, room_dim, hard=True, use_longer_edg c - y <= M * (1 - by_c), y - d <= M * (1 - by_d), d - y <= M * (1 - by_d), - # when bx_a is true, the left edge is aligned with the left wall rotate_90 + rotate_180 <= 1 + M * (1 - bx_a), # when bx_b is true, the right edge is aligned with the right wall @@ -261,7 +274,7 @@ def create_edge_constraints(var, object_dim, room_dim, hard=True, use_longer_edg return hard_constraints, [bx_a + bx_b + by_c + by_d] -def create_abs_constraints(X, Y, a, constraint_type='geq'): +def create_abs_constraints(X, Y, a, constraint_type="geq"): """ Create a constraint for |X - Y| <= a or |X - Y| >= a. @@ -272,31 +285,31 @@ def create_abs_constraints(X, Y, a, constraint_type='geq'): :return: A list of one or two cvxpy constraints. """ constraints = [] - - if constraint_type == 'leq': + + if constraint_type == "leq": # For |X - Y| <= a, we need two inequalities: constraints.append(X - Y <= a) constraints.append(Y - X <= a) - - elif constraint_type == 'geq': + + elif constraint_type == "geq": # For |X - Y| >= a, we introduce an auxiliary boolean variable to handle the OR condition z = cp.Variable(boolean=True) # Now we create two constraints that together represent the OR condition # If z is True (1), then the first constraint (X - Y >= a) must be satisfied. # If z is False (0), then the second constraint (Y - X >= a) must be satisfied. - constraints.append((X - Y) - M*z >= a - M) - constraints.append((Y - X) - M*(1 - z) >= a - M) - + constraints.append((X - Y) - M * z >= a - M) + constraints.append((Y - X) - M * (1 - z) >= a - M) + else: raise ValueError("Invalid constraint_type. Use 'leq' or 'geq'.") - + return constraints -def create_distance_constraints(c1, c2, upper_bound, type='near'): +def create_distance_constraints(c1, c2, upper_bound, type="near"): X1, Y1 = c1[0], c1[1] X2, Y2 = c2[0], c2[1] - if type == 'near': + if type == "near": # Auxiliary variables for the absolute differences abs_diff_x = cp.Variable() abs_diff_y = cp.Variable() @@ -310,14 +323,14 @@ def create_distance_constraints(c1, c2, upper_bound, type='near'): abs_diff_x >= 0, abs_diff_y >= 0, abs_diff_x <= upper_bound[0], - abs_diff_y <= upper_bound[1] + abs_diff_y <= upper_bound[1], ] - + # L1 distance is the sum of the absolute differences l1_distance = abs_diff_x + abs_diff_y soft_constraints = [-l1_distance] - elif type == 'far': + elif type == "far": x_lower_bound = cp.Variable() y_lower_bound = cp.Variable() # Maximize L1 distance @@ -336,7 +349,7 @@ def create_distance_constraints(c1, c2, upper_bound, type='near'): soft_constraints = [x_lower_bound + y_lower_bound] else: raise ValueError("type must be 'near' or 'far'") - + # Return the objective and the constraints together return hard_constraints, soft_constraints @@ -345,11 +358,7 @@ def create_if_and_constraints(A, B): # A and B are binary conditions # A and B are true if and only if z is true z = cp.Variable(boolean=True) # New binary variable for the AND condition - constraints = [ - z <= A, - z <= B, - z >= A + B - 1 - ] + constraints = [z <= A, z <= B, z >= A + B - 1] return constraints, z @@ -373,187 +382,229 @@ def create_relative_constraints(c1, c2, object_dim_1, object_dim_2, constraint_t # half_xwidth1 = cp.multiply(0.5 * x_size1, 1 - rotate_90_1) + cp.multiply(0.5 * y_size1, rotate_90_1) # half_yheight1 = cp.multiply(0.5 * y_size1, 1 - rotate_90_1) + cp.multiply(0.5 * x_size1, rotate_90_1) - half_xwidth2 = cp.multiply(0.5 * x_size2, 1 - rotate_90_2) + cp.multiply(0.5 * y_size2, rotate_90_2) - half_yheight2 = cp.multiply(0.5 * y_size2, 1 - rotate_90_2) + cp.multiply(0.5 * x_size2, rotate_90_2) + half_xwidth2 = cp.multiply(0.5 * x_size2, 1 - rotate_90_2) + cp.multiply( + 0.5 * y_size2, rotate_90_2 + ) + half_yheight2 = cp.multiply(0.5 * y_size2, 1 - rotate_90_2) + cp.multiply( + 0.5 * x_size2, rotate_90_2 + ) hard_constraints = [] soft_constraints = [] - if constraint_type == 'left of': + if constraint_type == "left of": # if rotate_90_2 == 0 and rotate_180_2 == 0, face up - constraints, z = create_if_and_constraints(1-rotate_90_2, 1-rotate_180_2) + constraints, z = create_if_and_constraints(1 - rotate_90_2, 1 - rotate_180_2) hard_constraints.extend(constraints) # constraints activated by z being true - hard_constraints.extend([ - cx1 <= cx2 + M * (1 - z), - cy1 <= cy2 + half_yheight2 + M * (1 - z), - cy1 >= cy2 - half_yheight2 - M * (1 - z), - ]) + hard_constraints.extend( + [ + cx1 <= cx2 + M * (1 - z), + cy1 <= cy2 + half_yheight2 + M * (1 - z), + cy1 >= cy2 - half_yheight2 - M * (1 - z), + ] + ) ## if rotate_90_2 == 1 and rotate_180_2 == 0, face right - constraints, z = create_if_and_constraints(rotate_90_2, 1-rotate_180_2) + constraints, z = create_if_and_constraints(rotate_90_2, 1 - rotate_180_2) hard_constraints.extend(constraints) - hard_constraints.extend([ - cy1 >= cy2 - M * (1 - z), - cx1 <= cx2 + half_xwidth2 + M * (1 - z), - cx1 >= cx2 - half_xwidth2 - M * (1 - z), - ]) + hard_constraints.extend( + [ + cy1 >= cy2 - M * (1 - z), + cx1 <= cx2 + half_xwidth2 + M * (1 - z), + cx1 >= cx2 - half_xwidth2 - M * (1 - z), + ] + ) # if rotate_90_2 == 0 and rotate_180_2 == 1, face down - constraints, z = create_if_and_constraints(1-rotate_90_2, rotate_180_2) + constraints, z = create_if_and_constraints(1 - rotate_90_2, rotate_180_2) hard_constraints.extend(constraints) - hard_constraints.extend([ - cx1 >= cx2 - M * (1 - z), - cy1 <= cy2 + half_yheight2 + M * (1 - z), - cy1 >= cy2 - half_yheight2 - M * (1 - z), - ]) + hard_constraints.extend( + [ + cx1 >= cx2 - M * (1 - z), + cy1 <= cy2 + half_yheight2 + M * (1 - z), + cy1 >= cy2 - half_yheight2 - M * (1 - z), + ] + ) # if rotate_90_2 == 1 and rotate_180_2 == 1, face left constraints, z = create_if_and_constraints(rotate_90_2, rotate_180_2) hard_constraints.extend(constraints) - hard_constraints.extend([ - cy1 <= cy2 + M * (1 - z), - cx1 <= cx2 + half_xwidth2 + M * (1 - z), - cx1 >= cx2 - half_xwidth2 - M * (1 - z), - ]) + hard_constraints.extend( + [ + cy1 <= cy2 + M * (1 - z), + cx1 <= cx2 + half_xwidth2 + M * (1 - z), + cx1 >= cx2 - half_xwidth2 - M * (1 - z), + ] + ) - if constraint_type == 'right of': + if constraint_type == "right of": # if rotate_90_2 == 0 and rotate_180_2 == 0, face up - constraints, z = create_if_and_constraints(1-rotate_90_2, 1-rotate_180_2) + constraints, z = create_if_and_constraints(1 - rotate_90_2, 1 - rotate_180_2) hard_constraints.extend(constraints) # constraints activated by z being true - hard_constraints.extend([ - cx1 >= cx2 - M * (1 - z), - cy1 <= cy2 + half_yheight2 + M * (1 - z), - cy1 >= cy2 - half_yheight2 - M * (1 - z), - ]) + hard_constraints.extend( + [ + cx1 >= cx2 - M * (1 - z), + cy1 <= cy2 + half_yheight2 + M * (1 - z), + cy1 >= cy2 - half_yheight2 - M * (1 - z), + ] + ) ## if rotate_90_2 == 1 and rotate_180_2 == 0, face right - constraints, z = create_if_and_constraints(rotate_90_2, 1-rotate_180_2) + constraints, z = create_if_and_constraints(rotate_90_2, 1 - rotate_180_2) hard_constraints.extend(constraints) - hard_constraints.extend([ - cy1 <= cy2 + M * (1 - z), - cx1 <= cx2 + half_xwidth2 + M * (1 - z), - cx1 >= cx2 - half_xwidth2 - M * (1 - z), - ]) + hard_constraints.extend( + [ + cy1 <= cy2 + M * (1 - z), + cx1 <= cx2 + half_xwidth2 + M * (1 - z), + cx1 >= cx2 - half_xwidth2 - M * (1 - z), + ] + ) # if rotate_90_2 == 0 and rotate_180_2 == 1, face down - constraints, z = create_if_and_constraints(1-rotate_90_2, rotate_180_2) + constraints, z = create_if_and_constraints(1 - rotate_90_2, rotate_180_2) hard_constraints.extend(constraints) - hard_constraints.extend([ - cx1 <= cx2 + M * (1 - z), - cy1 <= cy2 + half_yheight2 + M * (1 - z), - cy1 >= cy2 - half_yheight2 - M * (1 - z), - ]) + hard_constraints.extend( + [ + cx1 <= cx2 + M * (1 - z), + cy1 <= cy2 + half_yheight2 + M * (1 - z), + cy1 >= cy2 - half_yheight2 - M * (1 - z), + ] + ) # if rotate_90_2 == 1 and rotate_180_2 == 1, face left constraints, z = create_if_and_constraints(rotate_90_2, rotate_180_2) hard_constraints.extend(constraints) - hard_constraints.extend([ - cy1 >= cy2 + M * (1 - z), - cx1 <= cx2 + half_xwidth2 + M * (1 - z), - cx1 >= cx2 - half_xwidth2 - M * (1 - z), - ]) + hard_constraints.extend( + [ + cy1 >= cy2 + M * (1 - z), + cx1 <= cx2 + half_xwidth2 + M * (1 - z), + cx1 >= cx2 - half_xwidth2 - M * (1 - z), + ] + ) - if constraint_type == 'side of': + if constraint_type == "side of": # if rotate_90_2 == 0 and rotate_180_2 == 0, face up - constraints, z = create_if_and_constraints(1-rotate_90_2, 1-rotate_180_2) + constraints, z = create_if_and_constraints(1 - rotate_90_2, 1 - rotate_180_2) hard_constraints.extend(constraints) # constraints activated by z being true - hard_constraints.extend([ - # cx1 >= cx2 - M * (1 - z), - cy1 <= cy2 + half_yheight2 + M * (1 - z), - cy1 >= cy2 - half_yheight2 - M * (1 - z), - ]) + hard_constraints.extend( + [ + # cx1 >= cx2 - M * (1 - z), + cy1 <= cy2 + half_yheight2 + M * (1 - z), + cy1 >= cy2 - half_yheight2 - M * (1 - z), + ] + ) ## if rotate_90_2 == 1 and rotate_180_2 == 0, face right - constraints, z = create_if_and_constraints(rotate_90_2, 1-rotate_180_2) + constraints, z = create_if_and_constraints(rotate_90_2, 1 - rotate_180_2) hard_constraints.extend(constraints) - hard_constraints.extend([ - # cy1 <= cy2 + M * (1 - z), - cx1 <= cx2 + half_xwidth2 + M * (1 - z), - cx1 >= cx2 - half_xwidth2 - M * (1 - z), - ]) + hard_constraints.extend( + [ + # cy1 <= cy2 + M * (1 - z), + cx1 <= cx2 + half_xwidth2 + M * (1 - z), + cx1 >= cx2 - half_xwidth2 - M * (1 - z), + ] + ) # if rotate_90_2 == 0 and rotate_180_2 == 1, face down - constraints, z = create_if_and_constraints(1-rotate_90_2, rotate_180_2) + constraints, z = create_if_and_constraints(1 - rotate_90_2, rotate_180_2) hard_constraints.extend(constraints) - hard_constraints.extend([ - # cx1 <= cx2 + M * (1 - z), - cy1 <= cy2 + half_yheight2 + M * (1 - z), - cy1 >= cy2 - half_yheight2 - M * (1 - z), - ]) + hard_constraints.extend( + [ + # cx1 <= cx2 + M * (1 - z), + cy1 <= cy2 + half_yheight2 + M * (1 - z), + cy1 >= cy2 - half_yheight2 - M * (1 - z), + ] + ) # if rotate_90_2 == 1 and rotate_180_2 == 1, face left constraints, z = create_if_and_constraints(rotate_90_2, rotate_180_2) hard_constraints.extend(constraints) - hard_constraints.extend([ - # cy1 >= cy2 + M * (1 - z), - cx1 <= cx2 + half_xwidth2 + M * (1 - z), - cx1 >= cx2 - half_xwidth2 - M * (1 - z), - ]) + hard_constraints.extend( + [ + # cy1 >= cy2 + M * (1 - z), + cx1 <= cx2 + half_xwidth2 + M * (1 - z), + cx1 >= cx2 - half_xwidth2 - M * (1 - z), + ] + ) - if constraint_type == 'in front of': + if constraint_type == "in front of": # if rotate_90_2 == 0 and rotate_180_2 == 0, face up - constraints, z = create_if_and_constraints(1-rotate_90_2, 1-rotate_180_2) + constraints, z = create_if_and_constraints(1 - rotate_90_2, 1 - rotate_180_2) hard_constraints.extend(constraints) # constraints activated by z being true - hard_constraints.extend([ - cy1 >= cy2 + M * (1 - z), - cx1 <= cx2 + half_xwidth2 + M * (1 - z), - cx1 >= cx2 - half_xwidth2 - M * (1 - z), - ]) + hard_constraints.extend( + [ + cy1 >= cy2 + M * (1 - z), + cx1 <= cx2 + half_xwidth2 + M * (1 - z), + cx1 >= cx2 - half_xwidth2 - M * (1 - z), + ] + ) ## if rotate_90_2 == 1 and rotate_180_2 == 0, face right - constraints, z = create_if_and_constraints(rotate_90_2, 1-rotate_180_2) + constraints, z = create_if_and_constraints(rotate_90_2, 1 - rotate_180_2) hard_constraints.extend(constraints) - hard_constraints.extend([ - cx1 >= cx2 - M * (1 - z), - cy1 <= cy2 + half_yheight2 + M * (1 - z), - cy1 >= cy2 - half_yheight2 - M * (1 - z), - ]) + hard_constraints.extend( + [ + cx1 >= cx2 - M * (1 - z), + cy1 <= cy2 + half_yheight2 + M * (1 - z), + cy1 >= cy2 - half_yheight2 - M * (1 - z), + ] + ) # if rotate_90_2 == 0 and rotate_180_2 == 1, face down - constraints, z = create_if_and_constraints(1-rotate_90_2, rotate_180_2) + constraints, z = create_if_and_constraints(1 - rotate_90_2, rotate_180_2) hard_constraints.extend(constraints) - hard_constraints.extend([ - cy1 <= cy2 + M * (1 - z), - cx1 <= cx2 + half_xwidth2 + M * (1 - z), - cx1 >= cx2 - half_xwidth2 - M * (1 - z), - - ]) + hard_constraints.extend( + [ + cy1 <= cy2 + M * (1 - z), + cx1 <= cx2 + half_xwidth2 + M * (1 - z), + cx1 >= cx2 - half_xwidth2 - M * (1 - z), + ] + ) # if rotate_90_2 == 1 and rotate_180_2 == 1, face left constraints, z = create_if_and_constraints(rotate_90_2, rotate_180_2) hard_constraints.extend(constraints) - hard_constraints.extend([ - cx1 <= cx2 + M * (1 - z), - cy1 <= cy2 + half_yheight2 + M * (1 - z), - cy1 >= cy2 - half_yheight2 - M * (1 - z), - ]) + hard_constraints.extend( + [ + cx1 <= cx2 + M * (1 - z), + cy1 <= cy2 + half_yheight2 + M * (1 - z), + cy1 >= cy2 - half_yheight2 - M * (1 - z), + ] + ) - if constraint_type == 'behind': + if constraint_type == "behind": # if rotate_90_2 == 0 and rotate_180_2 == 0, face up - constraints, z = create_if_and_constraints(1-rotate_90_2, 1-rotate_180_2) + constraints, z = create_if_and_constraints(1 - rotate_90_2, 1 - rotate_180_2) hard_constraints.extend(constraints) # constraints activated by z being true - hard_constraints.extend([ - cy1 <= cy2 + M * (1 - z), - cx1 <= cx2 + half_xwidth2 + M * (1 - z), - cx1 >= cx2 - half_xwidth2 - M * (1 - z), - ]) + hard_constraints.extend( + [ + cy1 <= cy2 + M * (1 - z), + cx1 <= cx2 + half_xwidth2 + M * (1 - z), + cx1 >= cx2 - half_xwidth2 - M * (1 - z), + ] + ) ## if rotate_90_2 == 1 and rotate_180_2 == 0, face right - constraints, z = create_if_and_constraints(rotate_90_2, 1-rotate_180_2) + constraints, z = create_if_and_constraints(rotate_90_2, 1 - rotate_180_2) hard_constraints.extend(constraints) - hard_constraints.extend([ - cx1 <= cx2 + M * (1 - z), - cy1 <= cy2 + half_yheight2 + M * (1 - z), - cy1 >= cy2 - half_yheight2 - M * (1 - z), - ]) + hard_constraints.extend( + [ + cx1 <= cx2 + M * (1 - z), + cy1 <= cy2 + half_yheight2 + M * (1 - z), + cy1 >= cy2 - half_yheight2 - M * (1 - z), + ] + ) # if rotate_90_2 == 0 and rotate_180_2 == 1, face down - constraints, z = create_if_and_constraints(1-rotate_90_2, rotate_180_2) + constraints, z = create_if_and_constraints(1 - rotate_90_2, rotate_180_2) hard_constraints.extend(constraints) - hard_constraints.extend([ - cy1 >= cy2 + M * (1 - z), - cx1 <= cx2 + half_xwidth2 + M * (1 - z), - cx1 >= cx2 - half_xwidth2 - M * (1 - z), - - ]) + hard_constraints.extend( + [ + cy1 >= cy2 + M * (1 - z), + cx1 <= cx2 + half_xwidth2 + M * (1 - z), + cx1 >= cx2 - half_xwidth2 - M * (1 - z), + ] + ) # if rotate_90_2 == 1 and rotate_180_2 == 1, face left constraints, z = create_if_and_constraints(rotate_90_2, rotate_180_2) hard_constraints.extend(constraints) - hard_constraints.extend([ - cx1 >= cx2 - M * (1 - z), - cy1 <= cy2 + half_yheight2 + M * (1 - z), - cy1 >= cy2 - half_yheight2 - M * (1 - z), - ]) + hard_constraints.extend( + [ + cx1 >= cx2 - M * (1 - z), + cy1 <= cy2 + half_yheight2 + M * (1 - z), + cy1 >= cy2 - half_yheight2 - M * (1 - z), + ] + ) - return hard_constraints \ No newline at end of file + return hard_constraints diff --git a/holodeck/generation/objaverse_retriever.py b/holodeck/generation/objaverse_retriever.py index 7975b0e..0b32490 100644 --- a/holodeck/generation/objaverse_retriever.py +++ b/holodeck/generation/objaverse_retriever.py @@ -88,7 +88,9 @@ def retrieve(self, queries, threshold=28): query_feature_clip = F.normalize(query_feature_clip, p=2, dim=-1) - clip_similarities = 100 * torch.einsum('ij, lkj -> ilk', query_feature_clip, self.clip_features) + clip_similarities = 100 * torch.einsum( + "ij, lkj -> ilk", query_feature_clip, self.clip_features + ) clip_similarities = torch.max(clip_similarities, dim=-1).values query_feature_sbert = self.sbert_model.encode( diff --git a/holodeck/generation/prompts.py b/holodeck/generation/prompts.py index 328eb16..d5fd0b8 100644 --- a/holodeck/generation/prompts.py +++ b/holodeck/generation/prompts.py @@ -52,7 +52,7 @@ Provide a concise response, omitting any additional text at the beginning or end. """ -object_selection_prompt = """Assist me in selecting large, floor-based objects to furnish each room, excluding mats, carpets, and rugs. Provide a comprehensive description since I will use it to retrieve object. If multiple identical items are to be placed in the room, please indicate the quantity. +object_selection_prompt = """Assist me in selecting large, floor-based objects to furnish each room, excluding mats, carpets, and rugs. Provide a comprehensive description since I will use it to retrieve object. If multiple identical items are to be placed in the room, please indicate the quantity. Present your recommendations in this format: room type | object category | object description | quantity For example: @@ -113,7 +113,7 @@ Please first use natural language to explain your high-level design strategy, and then follow the desired format *strictly* (do not add any additional text at the beginning or end) to provide the constraints for each object.""" -wall_object_selection_prompt = """Assist me in selecting wall-based objects to furnish each room. +wall_object_selection_prompt = """Assist me in selecting wall-based objects to furnish each room. Present your recommendations in this format: room type | object category | object description | quantity For example: living room | painting | abstract painting | 2 @@ -136,7 +136,7 @@ Please do not add additional text at the beginning or in the end.""" -ceiling_selection_prompt = """Assist me in selecting ceiling objects (light/fan) to furnish each room. +ceiling_selection_prompt = """Assist me in selecting ceiling objects (light/fan) to furnish each room. Present your recommendations in this format: room type | ceiling object description For example: living room | modern, 3-light, semi-flush mount ceiling light @@ -147,7 +147,7 @@ Your response should be precise, without additional text at the beginning or end. """ -small_object_selection_prompt = """As an experienced room designer, you are tasked to bring life into the room by strategically placing more *small* objects. Those objects should only be arranged *on top of* large objects which serve as receptacles. +small_object_selection_prompt = """As an experienced room designer, you are tasked to bring life into the room by strategically placing more *small* objects. Those objects should only be arranged *on top of* large objects which serve as receptacles. The output should be formatted as follows: receptacle | small object-1, quantity, variance type | small object-2, quantity, variance type | ... Here, the variance type specifies whether the small objects are same or varied. There's no restriction on the number of small objects you can select for each receptacle. An example of this format is as follows: sofa-0 (living room) | remote control for TV, 1, same | book, 2, varied | gray fabric pillow, 2, varied @@ -174,7 +174,7 @@ # Your response should be precise, without additional text at the beginning or end. -object_selection_prompt_1 = """You are an experienced room designer, please assist me in selecting *large* floor and wall objects to furnish each room. I want the objects that can be directly placed on the floor or wall, *not* the small objects that need to be placed on the large objects. +object_selection_prompt_1 = """You are an experienced room designer, please assist me in selecting *large* floor and wall objects to furnish each room. I want the objects that can be directly placed on the floor or wall, *not* the small objects that need to be placed on the large objects. You must provide a comprehensive description for each object since I will use it to retrieve object. If multiple identical items are to be placed in the room, please indicate the quantity and variance type (same or varied). Present your recommendations in this format: room type | location | object category | object description | quantity, variance type For example: @@ -203,7 +203,7 @@ Agent: """ -object_selection_prompt_new_1 = """You are an experienced room designer, please assist me in selecting large *floor*/*wall* objects and small objects on top of them to furnish the room. You need to select appropriate objects to satisfy the customer's requirements. +object_selection_prompt_new_1 = """You are an experienced room designer, please assist me in selecting large *floor*/*wall* objects and small objects on top of them to furnish the room. You need to select appropriate objects to satisfy the customer's requirements. You must provide a description and desired size for each object since I will use it to retrieve object. If multiple identical items are to be placed in the room, please indicate the quantity and variance type (same or varied). Present your recommendations in JSON format: { diff --git a/holodeck/generation/rooms.py b/holodeck/generation/rooms.py index 388d1fd..4558251 100644 --- a/holodeck/generation/rooms.py +++ b/holodeck/generation/rooms.py @@ -20,33 +20,44 @@ from holodeck.constants import HOLODECK_BASE_DATA_DIR -class FloorPlanGenerator(): +class FloorPlanGenerator: def __init__(self, clip_model, clip_process, clip_tokenizer, llm: OpenAI): - self.json_template = {"ceilings": [], "children": [], "vertices": None, - "floorMaterial": {"name": None, "color": None}, - "floorPolygon": [], "id": None, "roomType": None} - self.material_selector = MaterialSelector(clip_model, clip_process, clip_tokenizer) - self.floor_plan_template = PromptTemplate(input_variables=["input", "additional_requirements"], template=prompts.floor_plan_prompt) + self.json_template = { + "ceilings": [], + "children": [], + "vertices": None, + "floorMaterial": {"name": None, "color": None}, + "floorPolygon": [], + "id": None, + "roomType": None, + } + self.material_selector = MaterialSelector( + clip_model, clip_process, clip_tokenizer + ) + self.floor_plan_template = PromptTemplate( + input_variables=["input", "additional_requirements"], + template=prompts.floor_plan_prompt, + ) self.llm = llm self.used_assets = [] - def generate_rooms(self, scene, additional_requirements="N/A", visualize=False): # get floor plan if not provided - floor_plan_prompt = self.floor_plan_template.format(input=scene["query"], additional_requirements=additional_requirements) + floor_plan_prompt = self.floor_plan_template.format( + input=scene["query"], additional_requirements=additional_requirements + ) if "raw_floor_plan" not in scene: raw_floor_plan = self.llm(floor_plan_prompt) scene["raw_floor_plan"] = raw_floor_plan else: raw_floor_plan = scene["raw_floor_plan"] - + print(f"User: {floor_plan_prompt}\n") print(f"{Fore.GREEN}AI: Here is the floor plan:\n{raw_floor_plan}{Fore.RESET}") - + rooms = self.get_plan(scene["query"], scene["raw_floor_plan"], visualize) return rooms - def get_plan(self, query, raw_plan, visualize=False): parsed_plan = self.parse_raw_plan(raw_plan) @@ -56,16 +67,20 @@ def get_plan(self, query, raw_plan, visualize=False): all_designs.append(room["floor_design"]) all_designs.append(room["wall_design"]) design2material = self.select_materials(all_designs, topk=5) - + # assign materials for i in range(len(parsed_plan)): - parsed_plan[i]["floorMaterial"] = design2material[parsed_plan[i]["floor_design"]] - parsed_plan[i]["wallMaterial"] = design2material[parsed_plan[i]["wall_design"]] + parsed_plan[i]["floorMaterial"] = design2material[ + parsed_plan[i]["floor_design"] + ] + parsed_plan[i]["wallMaterial"] = design2material[ + parsed_plan[i]["wall_design"] + ] - if visualize: self.visualize_floor_plan(query, parsed_plan) + if visualize: + self.visualize_floor_plan(query, parsed_plan) return parsed_plan - def parse_raw_plan(self, raw_plan): parsed_plan = [] @@ -73,9 +88,10 @@ def parse_raw_plan(self, raw_plan): plans = [plan.lower() for plan in raw_plan.split("\n") if "|" in plan] for i, plan in enumerate(plans): room_type, floor_design, wall_design, vertices = plan.split("|") - room_type = room_type.strip().replace("'", "") # remove single quote + room_type = room_type.strip().replace("'", "") # remove single quote - if room_type in room_types: room_type += f"-{i}" + if room_type in room_types: + room_type += f"-{i}" room_types.append(room_type) floor_design = floor_design.strip() @@ -87,7 +103,9 @@ def parse_raw_plan(self, raw_plan): current_plan = copy.deepcopy(self.json_template) current_plan["id"] = room_type current_plan["roomType"] = room_type - current_plan["vertices"], current_plan["floorPolygon"] = self.vertices2xyz(vertices) + current_plan["vertices"], current_plan["floorPolygon"] = self.vertices2xyz( + vertices + ) current_plan["floor_design"] = floor_design current_plan["wall_design"] = wall_design parsed_plan.append(current_plan) @@ -101,46 +119,62 @@ def parse_raw_plan(self, raw_plan): for room in parsed_plan: full_vertices = self.get_full_vertices(room["vertices"], all_vertices) full_vertices = list(set(map(tuple, full_vertices))) - room["full_vertices"], room["floorPolygon"] = self.vertices2xyz(full_vertices) - + room["full_vertices"], room["floorPolygon"] = self.vertices2xyz( + full_vertices + ) + valid, msg = self.check_validity(parsed_plan) - if not valid: print(f"{Fore.RED}AI: {msg}{Fore.RESET}"); raise ValueError(msg) - else: print(f"{Fore.GREEN}AI: {msg}{Fore.RESET}"); return parsed_plan - + if not valid: + print(f"{Fore.RED}AI: {msg}{Fore.RESET}") + raise ValueError(msg) + else: + print(f"{Fore.GREEN}AI: {msg}{Fore.RESET}") + return parsed_plan def vertices2xyz(self, vertices): sort_vertices = self.sort_vertices(vertices) - xyz_vertices = [{"x": vertex[0], "y": 0, "z": vertex[1]} for vertex in sort_vertices] + xyz_vertices = [ + {"x": vertex[0], "y": 0, "z": vertex[1]} for vertex in sort_vertices + ] return sort_vertices, xyz_vertices - def xyz2vertices(self, xyz_vertices): vertices = [(vertex["x"], vertex["z"]) for vertex in xyz_vertices] return vertices - def sort_vertices(self, vertices): # Calculate the centroid of the polygon cx = sum(x for x, y in vertices) / max(len(vertices), 1) cy = sum(y for x, y in vertices) / max(len(vertices), 1) # Sort the vertices in clockwise order - vertices_clockwise = sorted(vertices, key=lambda v: (-math.atan2(v[1]-cy, v[0]-cx)) % (2*math.pi)) + vertices_clockwise = sorted( + vertices, key=lambda v: (-math.atan2(v[1] - cy, v[0] - cx)) % (2 * math.pi) + ) # Find the vertex with the smallest x value min_vertex = min(vertices_clockwise, key=lambda v: v[0]) # Rotate the vertices so the vertex with the smallest x value is first min_index = vertices_clockwise.index(min_vertex) - vertices_clockwise = vertices_clockwise[min_index:] + vertices_clockwise[:min_index] + vertices_clockwise = ( + vertices_clockwise[min_index:] + vertices_clockwise[:min_index] + ) return vertices_clockwise - def get_full_vertices(self, original_vertices, all_vertices): # Create line segments from the original vertices - lines = [LineString([original_vertices[i], original_vertices[(i+1)%len(original_vertices)]]) for i in range(len(original_vertices))] + lines = [ + LineString( + [ + original_vertices[i], + original_vertices[(i + 1) % len(original_vertices)], + ] + ) + for i in range(len(original_vertices)) + ] # Check each vertex against each line segment full_vertices = [] @@ -152,19 +186,25 @@ def get_full_vertices(self, original_vertices, all_vertices): return full_vertices - def select_materials(self, designs, topk): - candidate_materials = self.material_selector.match_material(designs, topk=topk)[0] + candidate_materials = self.material_selector.match_material(designs, topk=topk)[ + 0 + ] candidate_colors = self.material_selector.select_color(designs, topk=topk)[0] - + # remove used materials top_materials = [[materials[0]] for materials in candidate_materials] - candidate_materials = [[material for material in materials if material not in self.used_assets] for materials in candidate_materials] + candidate_materials = [ + [material for material in materials if material not in self.used_assets] + for materials in candidate_materials + ] selected_materials = [] for i in range(len(designs)): - if len(candidate_materials[i]) == 0: selected_materials.append(top_materials[i][0]) - else: selected_materials.append(candidate_materials[i][0]) + if len(candidate_materials[i]) == 0: + selected_materials.append(top_materials[i][0]) + else: + selected_materials.append(candidate_materials[i][0]) selected_colors = [candidates[0] for candidates in candidate_colors] @@ -173,62 +213,81 @@ def select_materials(self, designs, topk): design2materials[design]["name"] = selected_materials[i] # design2materials[design]["color"] = self.color2rgb(selected_colors[i]) return design2materials - def color2rgb(self, color_name): rgb = mcolors.to_rgb(color_name) return {"r": rgb[0], "g": rgb[1], "b": rgb[2]} - def parsed2raw(self, rooms): raw_plan = "" for room in rooms: - raw_plan += " | ".join([room["roomType"], room["floor_design"], room["wall_design"], str(room["vertices"])]) + raw_plan += " | ".join( + [ + room["roomType"], + room["floor_design"], + room["wall_design"], + str(room["vertices"]), + ] + ) raw_plan += "\n" return raw_plan - def check_interior_angles(self, vertices): n = len(vertices) for i in range(n): a, b, c = vertices[i], vertices[(i + 1) % n], vertices[(i + 2) % n] - angle = abs(math.degrees(math.atan2(c[1]-b[1], c[0]-b[0]) - math.atan2(a[1]-b[1], a[0]-b[0]))) + angle = abs( + math.degrees( + math.atan2(c[1] - b[1], c[0] - b[0]) + - math.atan2(a[1] - b[1], a[0] - b[0]) + ) + ) if angle < 90 or angle > 270: return False return True - def check_validity(self, rooms): room_polygons = [Polygon(room["vertices"]) for room in rooms] # check interior angles for room in rooms: if not self.check_interior_angles(room["vertices"]): - return False, "All interior angles of the room must be greater than or equal to 90 degrees." - - if len(room_polygons) == 1: + return ( + False, + "All interior angles of the room must be greater than or equal to 90 degrees.", + ) + + if len(room_polygons) == 1: return True, "The floor plan is valid. (Only one room)" - + # check overlap, connectivity and vertex inside another room for i in range(len(room_polygons)): has_neighbor = False for j in range(len(room_polygons)): if i != j: - if room_polygons[i].equals(room_polygons[j]) or room_polygons[i].contains(room_polygons[j]) or room_polygons[j].contains(room_polygons[i]): + if ( + room_polygons[i].equals(room_polygons[j]) + or room_polygons[i].contains(room_polygons[j]) + or room_polygons[j].contains(room_polygons[i]) + ): return False, "Room polygons must not overlap." intersection = room_polygons[i].intersection(room_polygons[j]) if isinstance(intersection, LineString): has_neighbor = True for vertex in rooms[j]["vertices"]: if Polygon(rooms[i]["vertices"]).contains(Point(vertex)): - return False, "No vertex of a room can be inside another room." + return ( + False, + "No vertex of a room can be inside another room.", + ) if not has_neighbor: - return False, "Each room polygon must share an edge with at least one other room polygon." + return ( + False, + "Each room polygon must share an edge with at least one other room polygon.", + ) return True, "The floor plan is valid." - - def visualize_floor_plan(self, query, parsed_plan): plt.rcParams["font.family"] = "Times New Roman" plt.rcParams["font.size"] = 22 @@ -245,7 +304,9 @@ def midpoint(p1, p2): for i, room in enumerate(parsed_plan): coordinates = room["vertices"] - polygon = patches.Polygon(coordinates, closed=True, edgecolor='black', linewidth=2) + polygon = patches.Polygon( + coordinates, closed=True, edgecolor="black", linewidth=2 + ) polygon.set_facecolor(colors[i % len(colors)]) ax.add_patch(polygon) @@ -258,7 +319,7 @@ def midpoint(p1, p2): # ax.text(room_x, room_y, room["roomType"], ha='center', va='center') # Add points to the corners - ax.scatter(x, y, s=100, color='black') # s is the size of the point + ax.scatter(x, y, s=100, color="black") # s is the size of the point # # Display width and length # for i in range(len(coordinates)): @@ -267,21 +328,25 @@ def midpoint(p1, p2): # ax.text(*midpoint(p1, p2), label, ha='center', va='center', fontsize=20, bbox=dict(facecolor='white', edgecolor='black', boxstyle='round4')) # Set aspect of the plot to be equal, so squares appear as squares - ax.set_aspect('equal') + ax.set_aspect("equal") ax.autoscale_view() # Turn off the axis - ax.axis('off') + ax.axis("off") folder_name = query.replace(" ", "_") - plt.savefig(f"{folder_name}.pdf", bbox_inches='tight', dpi=300) + plt.savefig(f"{folder_name}.pdf", bbox_inches="tight", dpi=300) plt.show() - -class MaterialSelector(): + +class MaterialSelector: def __init__(self, clip_model, clip_preprocess, clip_tokenizer): - materials = compress_json.load(os.path.join(HOLODECK_BASE_DATA_DIR,"materials/material-database.json")) - self.selected_materials = materials["Wall"] + materials["Wood"] + materials["Fabric"] + materials = compress_json.load( + os.path.join(HOLODECK_BASE_DATA_DIR, "materials/material-database.json") + ) + self.selected_materials = ( + materials["Wall"] + materials["Wood"] + materials["Fabric"] + ) self.colors = list(mcolors.CSS4_COLORS.keys()) self.clip_model = clip_model @@ -290,42 +355,78 @@ def __init__(self, clip_model, clip_preprocess, clip_tokenizer): self.load_features() - - def load_features(self): + def load_features(self): try: - self.material_feature_clip = compress_pickle.load(os.path.join(HOLODECK_BASE_DATA_DIR, "materials/material_feature_clip.pkl")) + self.material_feature_clip = compress_pickle.load( + os.path.join( + HOLODECK_BASE_DATA_DIR, "materials/material_feature_clip.pkl" + ) + ) except: print("Precompute image features for materials...") self.material_feature_clip = [] for material in tqdm(self.selected_materials): - image = self.preprocess(Image.open(os.path.join(HOLODECK_BASE_DATA_DIR, f"materials/images/{material}.png"))).unsqueeze(0) + image = self.preprocess( + Image.open( + os.path.join( + HOLODECK_BASE_DATA_DIR, f"materials/images/{material}.png" + ) + ) + ).unsqueeze(0) with torch.no_grad(): image_features = self.clip_model.encode_image(image) image_features /= image_features.norm(dim=-1, keepdim=True) self.material_feature_clip.append(image_features) self.material_feature_clip = torch.vstack(self.material_feature_clip) - compress_pickle.dump(self.material_feature_clip, os.path.join(HOLODECK_BASE_DATA_DIR, "materials/material_feature_clip.pkl")) - + compress_pickle.dump( + self.material_feature_clip, + os.path.join( + HOLODECK_BASE_DATA_DIR, "materials/material_feature_clip.pkl" + ), + ) + try: - self.color_feature_clip = compress_pickle.load(os.path.join(HOLODECK_BASE_DATA_DIR, "materials/color_feature_clip.pkl")) + self.color_feature_clip = compress_pickle.load( + os.path.join(HOLODECK_BASE_DATA_DIR, "materials/color_feature_clip.pkl") + ) except: print("Precompute text features for colors...") with torch.no_grad(): - self.color_feature_clip = self.clip_model.encode_text(self.clip_tokenizer(self.colors)) - self.color_feature_clip /= self.color_feature_clip.norm(dim=-1, keepdim=True) - - compress_pickle.dump(self.color_feature_clip, os.path.join(HOLODECK_BASE_DATA_DIR, "materials/color_feature_clip.pkl")) - + self.color_feature_clip = self.clip_model.encode_text( + self.clip_tokenizer(self.colors) + ) + self.color_feature_clip /= self.color_feature_clip.norm( + dim=-1, keepdim=True + ) + + compress_pickle.dump( + self.color_feature_clip, + os.path.join( + HOLODECK_BASE_DATA_DIR, "materials/color_feature_clip.pkl" + ), + ) def match_material(self, queries, topk=5): with torch.no_grad(): - query_feature_clip = self.clip_model.encode_text(self.clip_tokenizer(queries)) + query_feature_clip = self.clip_model.encode_text( + self.clip_tokenizer(queries) + ) query_feature_clip /= query_feature_clip.norm(dim=-1, keepdim=True) - + clip_similarity = query_feature_clip @ self.material_feature_clip.T - string_similarity = torch.tensor([[self.string_match(query, material) for material in self.selected_materials] for query in queries]) - - joint_similarity = string_similarity + clip_similarity # use visual embedding only seems to be better + string_similarity = torch.tensor( + [ + [ + self.string_match(query, material) + for material in self.selected_materials + ] + for query in queries + ] + ) + + joint_similarity = ( + string_similarity + clip_similarity + ) # use visual embedding only seems to be better results = [] scores = [] @@ -335,10 +436,11 @@ def match_material(self, queries, topk=5): scores.append([sim[ind] for ind in indices]) return results, scores - def select_color(self, queries, topk=5): with torch.no_grad(): - query_feature_clip = self.clip_model.encode_text(self.clip_tokenizer(queries)) + query_feature_clip = self.clip_model.encode_text( + self.clip_tokenizer(queries) + ) query_feature_clip /= query_feature_clip.norm(dim=-1, keepdim=True) clip_similarity = query_feature_clip @ self.color_feature_clip.T @@ -349,7 +451,6 @@ def select_color(self, queries, topk=5): results.append([self.colors[ind] for ind in indices]) scores.append([sim[ind] for ind in indices]) return results, scores - def string_match(self, a, b): - return SequenceMatcher(None, a, b).ratio() \ No newline at end of file + return SequenceMatcher(None, a, b).ratio() diff --git a/holodeck/generation/skybox.py b/holodeck/generation/skybox.py index 8643724..2194dd9 100644 --- a/holodeck/generation/skybox.py +++ b/holodeck/generation/skybox.py @@ -2,12 +2,34 @@ from procthor.utils.types import RGB, Vector3 -skyboxes = ["Sky1", "Sky2", "SkyAlbany", "SkyAlbanyHill", "SkyDalyCity", "SkyEmeryville", "SkyGarden", "SkyTropical", - "SkyGasworks", "SkyMosconeCenter", "SkyMountain", "SkyOakland", "SkySeaStacks", "SkySFCityHall", "Sky2Dusk", - "SkySFDowntown", "SkySFGarden", "SkySnow", "SkyNeighborhood", "SkySouthLakeUnion", "SkySunset", "SkyTreeFarm"] +skyboxes = [ + "Sky1", + "Sky2", + "SkyAlbany", + "SkyAlbanyHill", + "SkyDalyCity", + "SkyEmeryville", + "SkyGarden", + "SkyTropical", + "SkyGasworks", + "SkyMosconeCenter", + "SkyMountain", + "SkyOakland", + "SkySeaStacks", + "SkySFCityHall", + "Sky2Dusk", + "SkySFDowntown", + "SkySFGarden", + "SkySnow", + "SkyNeighborhood", + "SkySouthLakeUnion", + "SkySunset", + "SkyTreeFarm", +] # timeOfDays = ["Midday", "GoldenHour", "BlueHour", "Midnight"] timeOfDays = ["Midday", "GoldenHour", "BlueHour"] + def getSkybox(scene): skybox = random.choice(skyboxes) time_of_day = random.choice(timeOfDays) @@ -40,4 +62,4 @@ def getSkybox(scene): directional_light["rgb"] = RGB(r=0.93, g=0.965, b=1.0) directional_light["rotation"] = Vector3(x=41, y=-50, z=0) - return scene \ No newline at end of file + return scene diff --git a/holodeck/generation/small_objects.py b/holodeck/generation/small_objects.py index 2d25779..5da2058 100644 --- a/holodeck/generation/small_objects.py +++ b/holodeck/generation/small_objects.py @@ -12,33 +12,47 @@ from holodeck.constants import THOR_COMMIT_ID from holodeck.generation.objaverse_retriever import ObjathorRetriever -from holodeck.generation.utils import get_bbox_dims, get_annotations, get_secondary_properties +from holodeck.generation.utils import ( + get_bbox_dims, + get_annotations, + get_secondary_properties, +) -class SmallObjectGenerator(): +class SmallObjectGenerator: def __init__(self, object_retriever: ObjathorRetriever, llm: OpenAI): self.llm = llm self.object_retriever = object_retriever self.database = object_retriever.database # set kinematic to false for small objects - self.json_template = {"assetId": None, "id": None, "kinematic": False, - "position": {}, "rotation": {}, "material": None, "roomId": None} + self.json_template = { + "assetId": None, + "id": None, + "kinematic": False, + "position": {}, + "rotation": {}, + "material": None, + "roomId": None, + } self.clip_threshold = 30 self.used_assets = [] self.reuse_assets = True - def generate_small_objects(self, scene, controller, receptacle_ids): object_selection_plan = scene["object_selection_plan"] receptacle2asset_id = self.get_receptacle2asset_id(scene, receptacle_ids) # receptacle2rotation = self.get_receptacle2rotation(scene, receptacle_ids) # receptacle2position = self.get_receptacle2position(scene, receptacle_ids) - - if "receptacle2small_objects" in scene and self.reuse_assets: receptacle2small_objects = scene["receptacle2small_objects"] - else: receptacle2small_objects = self.select_small_objects(object_selection_plan, receptacle_ids, receptacle2asset_id) + + if "receptacle2small_objects" in scene and self.reuse_assets: + receptacle2small_objects = scene["receptacle2small_objects"] + else: + receptacle2small_objects = self.select_small_objects( + object_selection_plan, receptacle_ids, receptacle2asset_id + ) results = [] # Place the objects @@ -46,43 +60,54 @@ def generate_small_objects(self, scene, controller, receptacle_ids): placements = [] for object_name, asset_id, _ in small_objects: thin, rotation = self.check_thin_asset(asset_id) - small, y_rotation = self.check_small_asset(asset_id) # check if the object is small and rotate around y axis randomly + small, y_rotation = self.check_small_asset( + asset_id + ) # check if the object is small and rotate around y axis randomly obj = self.place_object(controller, asset_id, receptacle, rotation) - if obj != None: # If the object is successfully placed + if obj != None: # If the object is successfully placed placement = self.json_template.copy() placement["assetId"] = asset_id placement["id"] = f"{object_name}|{receptacle}" placement["position"] = obj["position"] asset_height = get_bbox_dims(self.database[asset_id])["y"] - if obj["position"]["y"] + asset_height > scene["wall_height"]: continue # if the object is too high, skip it + if obj["position"]["y"] + asset_height > scene["wall_height"]: + continue # if the object is too high, skip it - placement["position"]["y"] = obj["position"]["y"] + (asset_height / 2) + 0.001 # add half of the height to the y position and a small offset + placement["position"]["y"] = ( + obj["position"]["y"] + (asset_height / 2) + 0.001 + ) # add half of the height to the y position and a small offset placement["rotation"] = obj["rotation"] placement["roomId"] = receptacle.split("(")[1].split(")")[0] # temporary solution fix position and rotation for thin objects - if thin: placement = self.fix_placement_for_thin_assets(placement) + if thin: + placement = self.fix_placement_for_thin_assets(placement) - if small: placement["rotation"]["y"] = y_rotation # temporary solution for random rotation around y axis for small objects + if small: + placement["rotation"][ + "y" + ] = y_rotation # temporary solution for random rotation around y axis for small objects # else: placement["rotation"]["y"] = receptacle2rotation[receptacle]["y"] - if not small and not thin: placement["kinematic"] = True # set kinematic to true for non-small objects + if not small and not thin: + placement["kinematic"] = ( + True # set kinematic to true for non-small objects + ) if "CanBreak" in get_secondary_properties(self.database[asset_id]): placement["kinematic"] = True placements.append(placement) - + # TODO: check collision between small objects on the same receptacle valid_placements = self.check_collision(placements) results.extend(valid_placements) controller.stop() return results, receptacle2small_objects - def get_receptacle2asset_id(self, scene, receptacle_ids): receptacle2asset_id = {} @@ -92,7 +117,6 @@ def get_receptacle2asset_id(self, scene, receptacle_ids): # if receptacle_id not in receptacle2asset_id and "___" in receptacle_id: # receptacle2asset_id[receptacle_id] = receptacle2asset_id[receptacle_id.split("___")[0]] return receptacle2asset_id - def get_receptacle2rotation(self, scene, receptacle_ids): receptacle2rotation = {} @@ -103,7 +127,6 @@ def get_receptacle2rotation(self, scene, receptacle_ids): # receptacle2rotation[receptacle_id] = receptacle2rotation[receptacle_id.split("___")[0]] return receptacle2rotation - def get_receptacle2position(self, scene, receptacle_ids): receptacle2rotation = {} for object in scene["objects"]: @@ -113,8 +136,9 @@ def get_receptacle2position(self, scene, receptacle_ids): # receptacle2rotation[receptacle_id] = receptacle2rotation[receptacle_id.split("___")[0]] return receptacle2rotation - - def select_small_objects(self, object_selection_plan, recpetacle_ids, receptacle2asset_id): + def select_small_objects( + self, object_selection_plan, recpetacle_ids, receptacle2asset_id + ): children_plans = [] for room_type, objects in object_selection_plan.items(): for object_name, object_info in objects.items(): @@ -123,20 +147,26 @@ def select_small_objects(self, object_selection_plan, recpetacle_ids, receptacle child_plan["room_type"] = room_type child_plan["parent"] = object_name children_plans.append(child_plan) - + receptacle2small_object_plans = {} for receptacle_id in recpetacle_ids: small_object_plans = [] for child_plan in children_plans: - if child_plan["room_type"] in receptacle_id and child_plan["parent"] in receptacle_id: + if ( + child_plan["room_type"] in receptacle_id + and child_plan["parent"] in receptacle_id + ): small_object_plans.append(child_plan) if len(small_object_plans) > 0: receptacle2small_object_plans[receptacle_id] = small_object_plans - + receptacle2small_objects = {} - packed_args = [(receptacle, small_objects, receptacle2asset_id) for receptacle, small_objects in receptacle2small_object_plans.items()] + packed_args = [ + (receptacle, small_objects, receptacle2asset_id) + for receptacle, small_objects in receptacle2small_object_plans.items() + ] pool = multiprocessing.Pool(processes=4) results = pool.map(self.select_small_objects_per_receptacle, packed_args) pool.close() @@ -146,44 +176,65 @@ def select_small_objects(self, object_selection_plan, recpetacle_ids, receptacle receptacle2small_objects[result[0]] = result[1] return receptacle2small_objects - def select_small_objects_per_receptacle(self, args): receptacle, small_objects, receptacle2asset_id = args results = [] - receptacle_dimensions = get_bbox_dims(self.database[receptacle2asset_id[receptacle]]) + receptacle_dimensions = get_bbox_dims( + self.database[receptacle2asset_id[receptacle]] + ) receptacle_size = [receptacle_dimensions["x"], receptacle_dimensions["z"]] receptacle_area = receptacle_size[0] * receptacle_size[1] capacity = 0 num_objects = 0 sorted(receptacle_size) for small_object in small_objects: - object_name, quantity, variance_type = small_object["object_name"], small_object["quantity"], small_object["variance_type"] - quantity = min(quantity, 5) # maximum 5 objects per receptacle + object_name, quantity, variance_type = ( + small_object["object_name"], + small_object["quantity"], + small_object["variance_type"], + ) + quantity = min(quantity, 5) # maximum 5 objects per receptacle print(f"Selecting {quantity} {object_name} for {receptacle}") # Select the object - candidates = self.object_retriever.retrieve([f"a 3D model of {object_name}"], self.clip_threshold) - candidates = [candidate for candidate in candidates - if get_annotations(self.database[candidate[0]])["onObject"] == True] # Only select objects that can be placed on other objects - - valid_candidates = [] # Only select objects with high confidence + candidates = self.object_retriever.retrieve( + [f"a 3D model of {object_name}"], self.clip_threshold + ) + candidates = [ + candidate + for candidate in candidates + if get_annotations(self.database[candidate[0]])["onObject"] == True + ] # Only select objects that can be placed on other objects + + valid_candidates = [] # Only select objects with high confidence for candidate in candidates: candidate_dimensions = get_bbox_dims(self.database[candidate[0]]) candidate_size = [candidate_dimensions["x"], candidate_dimensions["z"]] sorted(candidate_size) - if candidate_size[0] < receptacle_size[0] * 0.9 and candidate_size[1] < receptacle_size[1] * 0.9: # if the object is smaller than the receptacle, threshold is 90% + if ( + candidate_size[0] < receptacle_size[0] * 0.9 + and candidate_size[1] < receptacle_size[1] * 0.9 + ): # if the object is smaller than the receptacle, threshold is 90% valid_candidates.append(candidate) - - if len(valid_candidates) == 0: print(f"No valid candidate for {object_name}."); continue + + if len(valid_candidates) == 0: + print(f"No valid candidate for {object_name}.") + continue # remove used assets top_one_candidate = valid_candidates[0] - if len(valid_candidates) > 1: valid_candidates = [candidate for candidate in valid_candidates if candidate[0] not in self.used_assets] - if len(valid_candidates) == 0: valid_candidates = [top_one_candidate] - - valid_candidates = valid_candidates[:5] # only select top 5 candidates + if len(valid_candidates) > 1: + valid_candidates = [ + candidate + for candidate in valid_candidates + if candidate[0] not in self.used_assets + ] + if len(valid_candidates) == 0: + valid_candidates = [top_one_candidate] + + valid_candidates = valid_candidates[:5] # only select top 5 candidates selected_asset_ids = [] if variance_type == "same": @@ -196,21 +247,33 @@ def select_small_objects_per_receptacle(self, args): selected_candidate = self.random_select(valid_candidates) selected_asset_id = selected_candidate[0] selected_asset_ids.append(selected_asset_id) - if len(valid_candidates) > 1: valid_candidates.remove(selected_candidate) - + if len(valid_candidates) > 1: + valid_candidates.remove(selected_candidate) + for i in range(quantity): - small_object_dimensions = get_bbox_dims(self.database[selected_asset_ids[i]]) - small_object_sizes = [small_object_dimensions["x"], small_object_dimensions["y"], small_object_dimensions["z"]] + small_object_dimensions = get_bbox_dims( + self.database[selected_asset_ids[i]] + ) + small_object_sizes = [ + small_object_dimensions["x"], + small_object_dimensions["y"], + small_object_dimensions["z"], + ] sorted(small_object_sizes) # small_object_area = small_object_dimensions["x"] * small_object_dimensions["z"] # take the maximum 2 dimensions and multiply them small_object_area = small_object_sizes[1] * small_object_sizes[2] * 0.8 capacity += small_object_area num_objects += 1 - if capacity > receptacle_area * 0.9 and num_objects > 1: print(f"Warning: {receptacle} is overfilled."); break - if num_objects > 15: print(f"Warning: {receptacle} has too many objects."); break - else: results.append((f"{object_name}-{i}", selected_asset_ids[i])) - + if capacity > receptacle_area * 0.9 and num_objects > 1: + print(f"Warning: {receptacle} is overfilled.") + break + if num_objects > 15: + print(f"Warning: {receptacle} has too many objects.") + break + else: + results.append((f"{object_name}-{i}", selected_asset_ids[i])) + ordered_small_objects = [] for object_name, asset_id in results: dimensions = get_bbox_dims(self.database[asset_id]) @@ -220,7 +283,6 @@ def select_small_objects_per_receptacle(self, args): return receptacle, ordered_small_objects - def start_controller(self, scene, objaverse_dir): controller = Controller( commit_id=THOR_COMMIT_ID, @@ -238,7 +300,6 @@ def start_controller(self, scene, objaverse_dir): ), ) return controller - def place_object(self, controller, object_id, receptacle_id, rotation=[0, 0, 0]): generated_id = f"small|{object_id}" @@ -250,8 +311,8 @@ def place_object(self, controller, object_id, receptacle_id, rotation=[0, 0, 0]) position=Vector3(x=0, y=FLOOR_Y - 20, z=0), rotation=Vector3(x=0, y=0, z=0), renderImage=False, - ) - + ) + # Place the object in the receptacle # Question: Can I spawn multiple objects at once? event = controller.step( @@ -263,10 +324,12 @@ def place_object(self, controller, object_id, receptacle_id, rotation=[0, 0, 0]) allowFloor=False, renderImage=False, allowMoveable=True, - numPlacementAttempts=10, # TODO: need to find a better way to determine the number of placement attempts + numPlacementAttempts=10, # TODO: need to find a better way to determine the number of placement attempts ) - obj = next(obj for obj in event.metadata["objects"] if obj["objectId"] == generated_id) + obj = next( + obj for obj in event.metadata["objects"] if obj["objectId"] == generated_id + ) center_position = obj["axisAlignedBoundingBox"]["center"].copy() if event and center_position["y"] > FLOOR_Y: @@ -276,123 +339,163 @@ def place_object(self, controller, object_id, receptacle_id, rotation=[0, 0, 0]) action="DisableObject", objectId=generated_id, renderImage=False, - ) + ) return None - def check_thin_asset(self, asset_id): dimensions = get_bbox_dims(self.database[asset_id]) - twod_size = (dimensions["x"]*100, dimensions["z"]*100) - threshold = 5 # 3cm is the threshold for thin objects # TODO: need a better way to determine thin threshold + twod_size = (dimensions["x"] * 100, dimensions["z"] * 100) + threshold = 5 # 3cm is the threshold for thin objects # TODO: need a better way to determine thin threshold rotations = [0, 0, 0] if twod_size[0] < threshold: - rotations = [0, 90, 0] # asset is thin in x direction + rotations = [0, 90, 0] # asset is thin in x direction return True, rotations - elif twod_size[1] < threshold: - rotations = [90, 0, 0] # asset is thin in z direction + elif twod_size[1] < threshold: + rotations = [90, 0, 0] # asset is thin in z direction return True, rotations else: return False, rotations - def fix_placement_for_thin_assets(self, placement): asset_id = placement["assetId"] dimensions = get_bbox_dims(self.database[asset_id]) - threshold = 0.03 # 0.03 meter is the threshold for thin objects + threshold = 0.03 # 0.03 meter is the threshold for thin objects orginal_rotation = placement["rotation"] orginal_position = placement["position"] - bottom_center_position = {"x": orginal_position["x"], - "y": orginal_position["y"] - dimensions["y"]/2, - "z": orginal_position["z"]} + bottom_center_position = { + "x": orginal_position["x"], + "y": orginal_position["y"] - dimensions["y"] / 2, + "z": orginal_position["z"], + } if dimensions["x"] <= threshold: # asset is thin in x direction, need to rotate in z direction - placement["rotation"] = {"x": orginal_rotation["x"], - "y": orginal_rotation["y"], - "z": orginal_rotation["z"] + 90} - placement["position"] = {"x": bottom_center_position["x"], - "y": bottom_center_position["y"] + dimensions["x"]/2, - "z": bottom_center_position["z"]} + placement["rotation"] = { + "x": orginal_rotation["x"], + "y": orginal_rotation["y"], + "z": orginal_rotation["z"] + 90, + } + placement["position"] = { + "x": bottom_center_position["x"], + "y": bottom_center_position["y"] + dimensions["x"] / 2, + "z": bottom_center_position["z"], + } elif dimensions["z"] <= threshold: # asset is thin in z direction, need to rotate in x direction - placement["rotation"] = {"x": orginal_rotation["x"] + 90, - "y": orginal_rotation["y"], - "z": orginal_rotation["z"]} - placement["position"] = {"x": bottom_center_position["x"], - "y": bottom_center_position["y"] + dimensions["z"]/2, - "z": bottom_center_position["z"]} + placement["rotation"] = { + "x": orginal_rotation["x"] + 90, + "y": orginal_rotation["y"], + "z": orginal_rotation["z"], + } + placement["position"] = { + "x": bottom_center_position["x"], + "y": bottom_center_position["y"] + dimensions["z"] / 2, + "z": bottom_center_position["z"], + } return placement - def check_small_asset(self, asset_id): dimensions = get_bbox_dims(self.database[asset_id]) - size = (dimensions["x"]*100, dimensions["y"]*100, dimensions["z"]*100) - threshold = 25 * 25 # 25cm * 25cm is the threshold for small objects - - if size[0] * size[2] <= threshold and size[0] <= 25 and size[1] <= 25 and size[2] <= 25: + size = (dimensions["x"] * 100, dimensions["y"] * 100, dimensions["z"] * 100) + threshold = 25 * 25 # 25cm * 25cm is the threshold for small objects + + if ( + size[0] * size[2] <= threshold + and size[0] <= 25 + and size[1] <= 25 + and size[2] <= 25 + ): return True, random.randint(0, 360) else: return False, 0 - def random_select(self, candidates): scores = [candidate[1] for candidate in candidates] scores_tensor = torch.Tensor(scores) - probas = F.softmax(scores_tensor, dim=0) # TODO: consider using normalized scores + probas = F.softmax( + scores_tensor, dim=0 + ) # TODO: consider using normalized scores selected_index = torch.multinomial(probas, 1).item() selected_candidate = candidates[selected_index] return selected_candidate - def check_collision(self, placements): - static_placements = [placement for placement in placements if placement["kinematic"] == True] + static_placements = [ + placement for placement in placements if placement["kinematic"] == True + ] if len(static_placements) <= 1: return placements else: colliding_pairs = [] for i, placement_1 in enumerate(static_placements[:-1]): - for placement_2 in static_placements[i+1:]: + for placement_2 in static_placements[i + 1 :]: box1 = self.get_bounding_box(placement_1) box2 = self.get_bounding_box(placement_2) if self.intersect_3d(box1, box2): colliding_pairs.append((placement_1["id"], placement_2["id"])) - id2assetId = {placement["id"]: placement["assetId"] for placement in placements} + id2assetId = { + placement["id"]: placement["assetId"] for placement in placements + } if len(colliding_pairs) != 0: remove_ids = [] - colliding_ids = list(set([pair[0] for pair in colliding_pairs] + [pair[1] for pair in colliding_pairs])) + colliding_ids = list( + set( + [pair[0] for pair in colliding_pairs] + + [pair[1] for pair in colliding_pairs] + ) + ) # order by size from small to large - colliding_ids = sorted(colliding_ids, key=lambda x: get_bbox_dims(self.database[id2assetId[x]])["x"] * get_bbox_dims(self.database[id2assetId[x]])["z"]) + colliding_ids = sorted( + colliding_ids, + key=lambda x: get_bbox_dims(self.database[id2assetId[x]])["x"] + * get_bbox_dims(self.database[id2assetId[x]])["z"], + ) for object_id in colliding_ids: remove_ids.append(object_id) - colliding_pairs = [pair for pair in colliding_pairs if object_id not in pair] - if len(colliding_pairs) == 0: break - valid_placements = [placement for placement in placements if placement["id"] not in remove_ids] + colliding_pairs = [ + pair for pair in colliding_pairs if object_id not in pair + ] + if len(colliding_pairs) == 0: + break + valid_placements = [ + placement + for placement in placements + if placement["id"] not in remove_ids + ] return valid_placements else: return placements - def get_bounding_box(self, placement): asset_id = placement["assetId"] dimensions = get_bbox_dims(self.database[asset_id]) - size = (dimensions["x"]*100, dimensions["y"]*100, dimensions["z"]*100) + size = (dimensions["x"] * 100, dimensions["y"] * 100, dimensions["z"] * 100) position = placement["position"] - box = {"min": [position["x"]*100 - size[0]/2, position["y"]*100 - size[1]/2, position["z"]*100 - size[2]/2], - "max": [position["x"]*100 + size[0]/2, position["y"]*100 + size[1]/2, position["z"]*100 + size[2]/2]} + box = { + "min": [ + position["x"] * 100 - size[0] / 2, + position["y"] * 100 - size[1] / 2, + position["z"] * 100 - size[2] / 2, + ], + "max": [ + position["x"] * 100 + size[0] / 2, + position["y"] * 100 + size[1] / 2, + position["z"] * 100 + size[2] / 2, + ], + } return box - def intersect_3d(self, box1, box2): # box1 and box2 are dictionaries with 'min' and 'max' keys, # which are tuples representing the minimum and maximum corners of the 3D box. for i in range(3): - if box1['max'][i] < box2['min'][i] or box1['min'][i] > box2['max'][i]: + if box1["max"][i] < box2["min"][i] or box1["min"][i] > box2["max"][i]: return False - return True \ No newline at end of file + return True diff --git a/holodeck/generation/utils.py b/holodeck/generation/utils.py index 8fc0912..f87996c 100644 --- a/holodeck/generation/utils.py +++ b/holodeck/generation/utils.py @@ -8,7 +8,12 @@ from PIL import Image from ai2thor.controller import Controller from ai2thor.hooks.procedural_asset_hook import ProceduralAssetHookRunner -from moviepy.editor import TextClip, CompositeVideoClip, concatenate_videoclips, ImageSequenceClip +from moviepy.editor import ( + TextClip, + CompositeVideoClip, + concatenate_videoclips, + ImageSequenceClip, +) from tqdm import tqdm from holodeck.constants import HOLODECK_BASE_DATA_DIR, THOR_COMMIT_ID @@ -49,7 +54,7 @@ def get_top_down_frame(scene, objaverse_asset_dir, width=1024, height=1024): asset_directory=objaverse_asset_dir, asset_symlink=True, verbose=True, - ) + ), ) # Setup the top-down camera @@ -62,15 +67,19 @@ def get_top_down_frame(scene, objaverse_asset_dir, width=1024, height=1024): pose["position"]["y"] = bounds["y"] del pose["orthographicSize"] - try: wall_height = wall_height = max([point["y"] for point in scene["walls"][0]["polygon"]]) - except: wall_height = 2.5 + try: + wall_height = wall_height = max( + [point["y"] for point in scene["walls"][0]["polygon"]] + ) + except: + wall_height = 2.5 - for i in range(20): + for i in range(20): pose["orthographic"] = False - + pose["farClippingPlane"] = pose["position"]["y"] + 10 pose["nearClippingPlane"] = pose["position"]["y"] - wall_height - + # add the camera to the scene event = controller.step( action="AddThirdPartyCamera", @@ -81,8 +90,9 @@ def get_top_down_frame(scene, objaverse_asset_dir, width=1024, height=1024): top_down_frame = event.third_party_camera_frames[-1] # check if the edge of the frame is white - if all_edges_white(top_down_frame): break - + if all_edges_white(top_down_frame): + break + pose["position"]["y"] += 0.75 controller.stop() @@ -105,20 +115,20 @@ def get_top_down_frame_ithor(scene, objaverse_asset_dir, width=1024, height=1024 asset_directory=objaverse_asset_dir, asset_symlink=True, verbose=True, - ) + ), ) - + controller.reset(scene) - + event = controller.step(action="GetMapViewCameraProperties") pose = copy.deepcopy(event.metadata["actionReturn"]) - + event = controller.step( - action="AddThirdPartyCamera", - **pose, - skyboxColor="white", - raise_for_failure=True, - ) + action="AddThirdPartyCamera", + **pose, + skyboxColor="white", + raise_for_failure=True, + ) controller.stop() @@ -137,22 +147,16 @@ def main(save_path): def visualize_asset(asset_id, version): empty_house = compress_json.load("empty_house.json") - empty_house["objects"] = [{ + empty_house["objects"] = [ + { "assetId": asset_id, "id": "test_asset", "kinematic": True, - "position": { - "x": 0, - "y": 0, - "z": 0 - }, - "rotation": { - "x": 0, - "y": 0, - "z": 0 - }, - "material": None - }] + "position": {"x": 0, "y": 0, "z": 0}, + "rotation": {"x": 0, "y": 0, "z": 0}, + "material": None, + } + ] image = get_top_down_frame(empty_house, version) image.show() @@ -181,36 +185,52 @@ def get_room_images(scene, objaverse_asset_dir, width=1024, height=1024): room_name = room["roomType"] camera_height = wall_height - 0.2 - room_vertices = [[point["x"], point["z"]] for point in room['floorPolygon']] + room_vertices = [[point["x"], point["z"]] for point in room["floorPolygon"]] room_center = np.mean(room_vertices, axis=0) floor_center = np.array([room_center[0], 0, room_center[1]]) camera_center = np.array([room_center[0], camera_height, room_center[1]]) - corners = np.array([[point[0], camera_height, point[1]] for point in room_vertices]) + corners = np.array( + [[point[0], camera_height, point[1]] for point in room_vertices] + ) farest_corner = np.argmax(np.linalg.norm(corners - camera_center, axis=1)) vector_1 = floor_center - camera_center vector_2 = farest_corner - camera_center - x_angle = 90 - np.arccos(np.dot(vector_1, vector_2) / (np.linalg.norm(vector_1) * np.linalg.norm(vector_2))) * 180 / np.pi + x_angle = ( + 90 + - np.arccos( + np.dot(vector_1, vector_2) + / (np.linalg.norm(vector_1) * np.linalg.norm(vector_2)) + ) + * 180 + / np.pi + ) if not controller.last_event.third_party_camera_frames: controller.step( action="AddThirdPartyCamera", - position=dict(x=camera_center[0], y=camera_center[1], z=camera_center[2]), + position=dict( + x=camera_center[0], y=camera_center[1], z=camera_center[2] + ), rotation=dict(x=0, y=0, z=0), ) - + images = [] for angle in tqdm(range(0, 360, 90)): controller.step( action="UpdateThirdPartyCamera", - rotation=dict(x=x_angle, y=angle+45, z=0), - position=dict(x=camera_center[0], y=camera_center[1], z=camera_center[2]), + rotation=dict(x=x_angle, y=angle + 45, z=0), + position=dict( + x=camera_center[0], y=camera_center[1], z=camera_center[2] + ), ) - images.append(Image.fromarray(controller.last_event.third_party_camera_frames[0])) - + images.append( + Image.fromarray(controller.last_event.third_party_camera_frames[0]) + ) + room_images[room_name] = images - + controller.stop() return room_images @@ -241,27 +261,39 @@ def ithor_video(scene, objaverse_asset_dir, width, height, scene_type): if not controller.last_event.third_party_camera_frames: controller.step( action="AddThirdPartyCamera", - position=dict(x=pose["position"]["x"], y=camera_height, z=pose["position"]["z"]), + position=dict( + x=pose["position"]["x"], y=camera_height, z=pose["position"]["z"] + ), rotation=dict(x=0, y=0, z=0), ) - + images = [] for angle in tqdm(range(0, 360, 1)): controller.step( action="UpdateThirdPartyCamera", rotation=dict(x=45, y=angle, z=0), - position=dict(x=pose["position"]["x"], y=camera_height, z=pose["position"]["z"]), + position=dict( + x=pose["position"]["x"], y=camera_height, z=pose["position"]["z"] + ), ) images.append(controller.last_event.third_party_camera_frames[0]) imsn = ImageSequenceClip(images, fps=30) # Create text clips - txt_clip_query = (TextClip(f"Query: {scene_type}", fontsize=30, color='white', font='Arial-Bold') - .set_pos(('center', 'top')).set_duration(imsn.duration)) - txt_clip_room = (TextClip(f"Room Type: {scene_type}", fontsize=30, color='white', font='Arial-Bold') - .set_pos(('center', 'bottom')).set_duration(imsn.duration)) + txt_clip_query = ( + TextClip(f"Query: {scene_type}", fontsize=30, color="white", font="Arial-Bold") + .set_pos(("center", "top")) + .set_duration(imsn.duration) + ) + txt_clip_room = ( + TextClip( + f"Room Type: {scene_type}", fontsize=30, color="white", font="Arial-Bold" + ) + .set_pos(("center", "bottom")) + .set_duration(imsn.duration) + ) # Overlay the text clip on the first video clip video = CompositeVideoClip([imsn, txt_clip_query, txt_clip_room]) @@ -273,20 +305,20 @@ def ithor_video(scene, objaverse_asset_dir, width, height, scene_type): def room_video(scene, objaverse_asset_dir, width, height): def add_line_breaks(text, max_line_length): - words = text.split(' ') + words = text.split(" ") lines = [] current_line = [] for word in words: - if len(' '.join(current_line + [word])) <= max_line_length: + if len(" ".join(current_line + [word])) <= max_line_length: current_line.append(word) else: - lines.append(' '.join(current_line)) + lines.append(" ".join(current_line)) current_line = [word] - lines.append(' '.join(current_line)) + lines.append(" ".join(current_line)) - return '\n'.join(lines) + return "\n".join(lines) """Saves a top-down video of the house.""" controller = Controller( @@ -304,9 +336,11 @@ def add_line_breaks(text, max_line_length): verbose=True, ), ) - - try: query = scene["query"] - except: query = scene["rooms"][0]['roomType'] + + try: + query = scene["query"] + except: + query = scene["rooms"][0]["roomType"] wall_height = max([point["y"] for point in scene["walls"][0]["polygon"]]) @@ -317,23 +351,37 @@ def add_line_breaks(text, max_line_length): camera_height = wall_height - 0.2 print("camera height: ", camera_height) - room_vertices = [[point["x"], point["z"]] for point in room['floorPolygon']] + room_vertices = [[point["x"], point["z"]] for point in room["floorPolygon"]] room_center = np.mean(room_vertices, axis=0) floor_center = np.array([room_center[0], 0, room_center[1]]) camera_center = np.array([room_center[0], camera_height, room_center[1]]) - corners = np.array([[point["x"], point["y"], point["z"]] for point in room['floorPolygon']]) - farest_corner = corners[np.argmax(np.linalg.norm(corners - camera_center, axis=1))] + corners = np.array( + [[point["x"], point["y"], point["z"]] for point in room["floorPolygon"]] + ) + farest_corner = corners[ + np.argmax(np.linalg.norm(corners - camera_center, axis=1)) + ] vector_1 = floor_center - camera_center vector_2 = farest_corner - camera_center - x_angle = 90 - np.arccos(np.dot(vector_1, vector_2) / (np.linalg.norm(vector_1) * np.linalg.norm(vector_2))) * 180 / np.pi + x_angle = ( + 90 + - np.arccos( + np.dot(vector_1, vector_2) + / (np.linalg.norm(vector_1) * np.linalg.norm(vector_2)) + ) + * 180 + / np.pi + ) images = [] if not controller.last_event.third_party_camera_frames: controller.step( action="AddThirdPartyCamera", - position=dict(x=camera_center[0], y=camera_center[1], z=camera_center[2]), + position=dict( + x=camera_center[0], y=camera_center[1], z=camera_center[2] + ), rotation=dict(x=0, y=0, z=0), ) @@ -341,17 +389,29 @@ def add_line_breaks(text, max_line_length): controller.step( action="UpdateThirdPartyCamera", rotation=dict(x=x_angle, y=angle, z=0), - position=dict(x=camera_center[0], y=camera_center[1], z=camera_center[2]), + position=dict( + x=camera_center[0], y=camera_center[1], z=camera_center[2] + ), ) images.append(controller.last_event.third_party_camera_frames[0]) imsn = ImageSequenceClip(images, fps=30) - + # Create text clips - txt_clip_query = (TextClip(f"Query: {text_query}", fontsize=30, color='white', font='Arial-Bold') - .set_pos(('center', 'top')).set_duration(imsn.duration)) - txt_clip_room = (TextClip(f"Room Type: {room_name}", fontsize=30, color='white', font='Arial-Bold') - .set_pos(('center', 'bottom')).set_duration(imsn.duration)) + txt_clip_query = ( + TextClip( + f"Query: {text_query}", fontsize=30, color="white", font="Arial-Bold" + ) + .set_pos(("center", "top")) + .set_duration(imsn.duration) + ) + txt_clip_room = ( + TextClip( + f"Room Type: {room_name}", fontsize=30, color="white", font="Arial-Bold" + ) + .set_pos(("center", "bottom")) + .set_duration(imsn.duration) + ) # Overlay the text clip on the first video clip video = CompositeVideoClip([imsn, txt_clip_query, txt_clip_room]) @@ -384,6 +444,7 @@ def get_annotations(obj_data: Dict[str, Any]): return obj_data + def get_bbox_dims(obj_data: Dict[str, Any]): am = get_asset_metadata(obj_data) @@ -398,36 +459,51 @@ def get_bbox_dims(obj_data: Dict[str, Any]): mins = bbox_info["min"] maxs = bbox_info["max"] - return { - k: maxs[k] - mins[k] for k in ["x", "y", "z"] - } + return {k: maxs[k] - mins[k] for k in ["x", "y", "z"]} + def get_secondary_properties(obj_data: Dict[str, Any]): am = get_asset_metadata(obj_data) return am["secondaryProperties"] + if __name__ == "__main__": parser = ArgumentParser() - parser.add_argument("--mode", help = "Mode to run (top_down_frame, top_down_video, room_image).", default = "top_down_frame") - parser.add_argument("--objaverse_asset_dir", help = "Directory to load assets from.", default = "./objaverse/processed_2023_09_23_combine_scale") - parser.add_argument("--scene", help = "Scene to load.", default = os.path.join(HOLODECK_BASE_DATA_DIR, "scenes/a_living_room/a_living_room.json")) + parser.add_argument( + "--mode", + help="Mode to run (top_down_frame, top_down_video, room_image).", + default="top_down_frame", + ) + parser.add_argument( + "--objaverse_asset_dir", + help="Directory to load assets from.", + default="./objaverse/processed_2023_09_23_combine_scale", + ) + parser.add_argument( + "--scene", + help="Scene to load.", + default=os.path.join( + HOLODECK_BASE_DATA_DIR, "scenes/a_living_room/a_living_room.json" + ), + ) args = parser.parse_args() scene = compress_json.load(args.scene) - if "query" not in scene: scene["query"] = args.scene.split("/")[-1].split(".")[0] + if "query" not in scene: + scene["query"] = args.scene.split("/")[-1].split(".")[0] if args.mode == "top_down_frame": image = get_top_down_frame(scene, args.objaverse_asset_dir) image.show() - + elif args.mode == "room_video": video = room_video(scene, args.objaverse_asset_dir, 1024, 1024) video.write_videofile(args.scene.replace(".json", ".mp4"), fps=30) - + elif args.mode == "room_image": room_images = get_room_images(scene, args.objaverse_asset_dir, 1024, 1024) save_folder = "/".join(args.scene.split("/")[:-1]) for room_name, images in room_images.items(): for i, image in enumerate(images): - image.save(f"{save_folder}/{room_name}_{i}.png") \ No newline at end of file + image.save(f"{save_folder}/{room_name}_{i}.png") diff --git a/holodeck/generation/wall_objects.py b/holodeck/generation/wall_objects.py index ccbbf58..4ed8d7b 100644 --- a/holodeck/generation/wall_objects.py +++ b/holodeck/generation/wall_objects.py @@ -15,20 +15,33 @@ from holodeck.generation.utils import get_bbox_dims -class WallObjectGenerator(): +class WallObjectGenerator: def __init__(self, object_retriever: ObjathorRetriever, llm: OpenAI): - self.json_template = {"assetId": None, "id": None, "kinematic": True, - "position": {}, "rotation": {}, "material": None, "roomId": None} + self.json_template = { + "assetId": None, + "id": None, + "kinematic": True, + "position": {}, + "rotation": {}, + "material": None, + "roomId": None, + } self.llm = llm self.object_retriever = object_retriever self.database = object_retriever.database - self.constraint_prompt_template = PromptTemplate(input_variables=["room_type", "wall_height", "floor_objects", "wall_objects"], - template=prompts.wall_object_constraints_prompt) + self.constraint_prompt_template = PromptTemplate( + input_variables=[ + "room_type", + "wall_height", + "floor_objects", + "wall_objects", + ], + template=prompts.wall_object_constraints_prompt, + ) self.grid_size = 25 self.default_height = 150 self.constraint_type = "llm" - def generate_wall_objects(self, scene, use_constraint=True): doors = scene["doors"] windows = scene["windows"] @@ -38,7 +51,19 @@ def generate_wall_objects(self, scene, use_constraint=True): wall_objects = [] selected_objects = scene["selected_objects"] - packed_args = [(room, scene, doors, windows, open_walls, wall_height, selected_objects, use_constraint) for room in scene["rooms"]] + packed_args = [ + ( + room, + scene, + doors, + windows, + open_walls, + wall_height, + selected_objects, + use_constraint, + ) + for room in scene["rooms"] + ] pool = multiprocessing.Pool(processes=4) all_placements = pool.map(self.generate_wall_objects_per_room, packed_args) pool.close() @@ -48,73 +73,122 @@ def generate_wall_objects(self, scene, use_constraint=True): wall_objects += placements return wall_objects - def generate_wall_objects_per_room(self, args): - room, scene, doors, windows, open_walls, wall_height, selected_objects, use_constraint = args + ( + room, + scene, + doors, + windows, + open_walls, + wall_height, + selected_objects, + use_constraint, + ) = args selected_wall_objects = selected_objects[room["roomType"]]["wall"] selected_wall_objects = self.order_objects_by_size(selected_wall_objects) - wall_object_name2id = {object_name: asset_id for object_name, asset_id in selected_wall_objects} + wall_object_name2id = { + object_name: asset_id for object_name, asset_id in selected_wall_objects + } room_id = room["id"] room_type = room["roomType"] wall_object_names = list(wall_object_name2id.keys()) - - floor_object_name2id = {object["object_name"]: object["assetId"] for object in scene["floor_objects"] if object["roomId"] == room["id"]} + + floor_object_name2id = { + object["object_name"]: object["assetId"] + for object in scene["floor_objects"] + if object["roomId"] == room["id"] + } floor_object_names = list(floor_object_name2id.keys()) - + # get constraints - constraints_prompt = self.constraint_prompt_template.format(room_type=room_type, - wall_height=int(wall_height*100), - floor_objects=", ".join(floor_object_names), - wall_objects=", ".join(wall_object_names)) + constraints_prompt = self.constraint_prompt_template.format( + room_type=room_type, + wall_height=int(wall_height * 100), + floor_objects=", ".join(floor_object_names), + wall_objects=", ".join(wall_object_names), + ) if self.constraint_type == "llm" and use_constraint: constraint_plan = self.llm(constraints_prompt) else: constraint_plan = "" for object_name in wall_object_names: - random_height = random.randint(0, int(wall_height*100)) + random_height = random.randint(0, int(wall_height * 100)) constraint_plan += f"{object_name} | N/A | {random_height} \n" print(f"\nwall object constraint plan for {room_type}:\n{constraint_plan}") - constraints = self.parse_wall_object_constraints(constraint_plan, wall_object_names, floor_object_names) - + constraints = self.parse_wall_object_constraints( + constraint_plan, wall_object_names, floor_object_names + ) + # get wall objects - wall_object2dimension = {object_name: get_bbox_dims(self.database[object_id]) for object_name, object_id in wall_object_name2id.items()} - wall_objects_list = [(object_name, (wall_object2dimension[object_name]['x'] * 100, wall_object2dimension[object_name]['y'] * 100, wall_object2dimension[object_name]['z'] * 100)) for object_name in constraints] - + wall_object2dimension = { + object_name: get_bbox_dims(self.database[object_id]) + for object_name, object_id in wall_object_name2id.items() + } + wall_objects_list = [ + ( + object_name, + ( + wall_object2dimension[object_name]["x"] * 100, + wall_object2dimension[object_name]["y"] * 100, + wall_object2dimension[object_name]["z"] * 100, + ), + ) + for object_name in constraints + ] + # update constraints with max height - wall_object2max_height = {object_name: min(scene["wall_height"] * 100 - wall_object2dimension[object_name]["y"] * 100 - 20, constraints[object_name]["height"]) for object_name in constraints} + wall_object2max_height = { + object_name: min( + scene["wall_height"] * 100 + - wall_object2dimension[object_name]["y"] * 100 + - 20, + constraints[object_name]["height"], + ) + for object_name in constraints + } for object_name in constraints: - constraints[object_name]["height"] = max(wall_object2max_height[object_name], 0) # avoid negative height - + constraints[object_name]["height"] = max( + wall_object2max_height[object_name], 0 + ) # avoid negative height + # get initial state room_vertices = [(x * 100, y * 100) for (x, y) in room["vertices"]] room_poly = Polygon(room_vertices) - initial_state = self.get_initial_state(scene, doors, windows, room_vertices, open_walls) + initial_state = self.get_initial_state( + scene, doors, windows, room_vertices, open_walls + ) # solve room_x, room_z = self.get_room_size(room) grid_size = max(room_x // 20, room_z // 20) - solver = DFS_Solver_Wall(grid_size=grid_size, max_duration=5, constraint_bouns=100) - solutions = solver.get_solution(room_poly, wall_objects_list, constraints, initial_state) - + solver = DFS_Solver_Wall( + grid_size=grid_size, max_duration=5, constraint_bouns=100 + ) + solutions = solver.get_solution( + room_poly, wall_objects_list, constraints, initial_state + ) + placements = self.solution2placement(solutions, wall_object_name2id, room_id) return placements - - def parse_wall_object_constraints(self, constraint_text, wall_object_names, floor_object_names): + def parse_wall_object_constraints( + self, constraint_text, wall_object_names, floor_object_names + ): object2constraints = {} - lines = [line.lower() for line in constraint_text.split('\n') if "|" in line] + lines = [line.lower() for line in constraint_text.split("\n") if "|" in line] for line in lines: # remove index - pattern = re.compile(r'^\d+\.\s*') - line = pattern.sub('', line) - if line[-1] == ".": line = line[:-1] # remove the last period + pattern = re.compile(r"^\d+\.\s*") + line = pattern.sub("", line) + if line[-1] == ".": + line = line[:-1] # remove the last period try: object_name, location, height = line.split("|") object_name = object_name.replace("*", "").strip() @@ -123,37 +197,51 @@ def parse_wall_object_constraints(self, constraint_text, wall_object_names, floo except: print(f"Warning: cannot parse {line}.") continue - - if object_name not in wall_object_names: continue - try: target_floor_object_name = location.split(", ")[-1] - except: print(f"Warning: cannot parse {location}."); target_floor_object_name = None + if object_name not in wall_object_names: + continue + + try: + target_floor_object_name = location.split(", ")[-1] + except: + print(f"Warning: cannot parse {location}.") + target_floor_object_name = None + + try: + height = int(height) + except: + height = self.default_height - try: height = int(height) - except: height = self.default_height - if target_floor_object_name in floor_object_names: - object2constraints[object_name] = {"target_floor_object_name": target_floor_object_name, "height": height} + object2constraints[object_name] = { + "target_floor_object_name": target_floor_object_name, + "height": height, + } else: - object2constraints[object_name] = {"target_floor_object_name": None, "height": height} - + object2constraints[object_name] = { + "target_floor_object_name": None, + "height": height, + } + return object2constraints - def get_room_size(self, room): floor_polygon = room["floorPolygon"] - x_values = [point['x'] for point in floor_polygon] - z_values = [point['z'] for point in floor_polygon] - return (int(max(x_values) - min(x_values)) * 100, int(max(z_values) - min(z_values)) * 100) - + x_values = [point["x"] for point in floor_polygon] + z_values = [point["z"] for point in floor_polygon] + return ( + int(max(x_values) - min(x_values)) * 100, + int(max(z_values) - min(z_values)) * 100, + ) def check_wall_object_size(self, room_size, object_size): if object_size["x"] * 100 > max(room_size) * 0.5: - print(f"Warning: object size {object_size} is too large for room size {room_size}.") + print( + f"Warning: object size {object_size} is too large for room size {room_size}." + ) return False else: return True - def get_initial_state(self, scene, doors, windows, room_vertices, open_walls): room_poly = Polygon(room_vertices) @@ -168,9 +256,15 @@ def get_initial_state(self, scene, doors, windows, room_vertices, open_walls): if room_poly.contains(door_center): door_height = door["assetPosition"]["y"] * 100 * 2 x_min, z_min, x_max, z_max = door_poly.bounds - initial_state[f"door-{i}"] = ((x_min, 0, z_min), (x_max, door_height, z_max), 0, door_vertices, 1) + initial_state[f"door-{i}"] = ( + (x_min, 0, z_min), + (x_max, door_height, z_max), + 0, + door_vertices, + 1, + ) i += 1 - + for window in windows: window_boxes = window["windowBoxes"] for window_box in window_boxes: @@ -181,9 +275,15 @@ def get_initial_state(self, scene, doors, windows, room_vertices, open_walls): y_min = window["holePolygon"][0]["y"] * 100 y_max = window["holePolygon"][1]["y"] * 100 x_min, z_min, x_max, z_max = window_poly.bounds - initial_state[f"window-{i}"] = ((x_min, y_min, z_min), (x_max, y_max, z_max), 0, window_vertices, 1) + initial_state[f"window-{i}"] = ( + (x_min, y_min, z_min), + (x_max, y_max, z_max), + 0, + window_vertices, + 1, + ) i += 1 - + if len(open_walls) != 0: open_wall_boxes = open_walls["openWallBoxes"] for open_wall_box in open_wall_boxes: @@ -192,49 +292,68 @@ def get_initial_state(self, scene, doors, windows, room_vertices, open_walls): open_wall_center = open_wall_poly.centroid if room_poly.contains(open_wall_center): x_min, z_min, x_max, z_max = open_wall_poly.bounds - initial_state[f"open-{i}"] = ((x_min, 0, z_min), (x_max, scene["wall_height"] * 100, z_max), 0, open_wall_vertices, 1) + initial_state[f"open-{i}"] = ( + (x_min, 0, z_min), + (x_max, scene["wall_height"] * 100, z_max), + 0, + open_wall_vertices, + 1, + ) i += 1 - + for object in scene["floor_objects"]: - try: object_vertices = object["vertices"] - except: continue - + try: + object_vertices = object["vertices"] + except: + continue + object_poly = Polygon(object_vertices) object_center = object_poly.centroid if room_poly.contains(object_center): - object_height = object["position"]["y"] * 100 * 2 # the height should be twice the value of the y coordinate + object_height = ( + object["position"]["y"] * 100 * 2 + ) # the height should be twice the value of the y coordinate x_min, z_min, x_max, z_max = object_poly.bounds - initial_state[object["object_name"]] = ((x_min, 0, z_min), (x_max, object_height, z_max), object["rotation"]["y"], object_vertices, 1) + initial_state[object["object_name"]] = ( + (x_min, 0, z_min), + (x_max, object_height, z_max), + object["rotation"]["y"], + object_vertices, + 1, + ) return initial_state - def solution2placement(self, solutions, wall_object_name2id, room_id): placements = [] for object_name, solution in solutions.items(): - if object_name not in wall_object_name2id: continue + if object_name not in wall_object_name2id: + continue placement = self.json_template.copy() placement["assetId"] = wall_object_name2id[object_name] placement["id"] = f"{object_name} ({room_id})" position_x = (solution[0][0] + solution[1][0]) / 200 position_y = (solution[0][1] + solution[1][1]) / 200 position_z = (solution[0][2] + solution[1][2]) / 200 - + placement["position"] = {"x": position_x, "y": position_y, "z": position_z} placement["rotation"] = {"x": 0, "y": solution[2], "z": 0} - + # move the object a little bit to avoid collision - if placement["rotation"]["y"] == 0: placement["position"]["z"] += 0.01 - elif placement["rotation"]["y"] == 90: placement["position"]["x"] += 0.01 - elif placement["rotation"]["y"]== 180: placement["position"]["z"] -= 0.01 - elif placement["rotation"]["y"] == 270: placement["position"]["x"] -= 0.01 + if placement["rotation"]["y"] == 0: + placement["position"]["z"] += 0.01 + elif placement["rotation"]["y"] == 90: + placement["position"]["x"] += 0.01 + elif placement["rotation"]["y"] == 180: + placement["position"]["z"] -= 0.01 + elif placement["rotation"]["y"] == 270: + placement["position"]["x"] -= 0.01 placement["roomId"] = room_id placement["vertices"] = list(solution[3]) placement["object_name"] = object_name placements.append(placement) return placements - def order_objects_by_size(self, selected_wall_objects): ordered_wall_objects = [] @@ -243,7 +362,10 @@ def order_objects_by_size(self, selected_wall_objects): size = dimensions["x"] ordered_wall_objects.append([object_name, asset_id, size]) ordered_wall_objects.sort(key=lambda x: x[2], reverse=True) - ordered_wall_objects_no_size = [[object_name, asset_id] for object_name, asset_id, size in ordered_wall_objects] + ordered_wall_objects_no_size = [ + [object_name, asset_id] + for object_name, asset_id, size in ordered_wall_objects + ] return ordered_wall_objects_no_size @@ -253,7 +375,7 @@ def __init__(self, solution): pass -class DFS_Solver_Wall(): +class DFS_Solver_Wall: def __init__(self, grid_size, random_seed=0, max_duration=5, constraint_bouns=100): self.grid_size = grid_size self.random_seed = random_seed @@ -263,21 +385,22 @@ def __init__(self, grid_size, random_seed=0, max_duration=5, constraint_bouns=10 self.solutions = [] self.visualize = False - def get_solution(self, room_poly, wall_objects_list, constraints, initial_state): grid_points = self.create_grids(room_poly) self.start_time = time.time() try: - self.dfs(room_poly, wall_objects_list, constraints, grid_points, initial_state) + self.dfs( + room_poly, wall_objects_list, constraints, grid_points, initial_state + ) except SolutionFound as e: print(f"Time taken: {time.time() - self.start_time}") - + max_solution = self.get_max_solution(self.solutions) - - if self.visualize: self.visualize_grid(room_poly, grid_points, max_solution) - return max_solution + if self.visualize: + self.visualize_grid(room_poly, grid_points, max_solution) + return max_solution def get_max_solution(self, solutions): path_weights = [] @@ -286,19 +409,22 @@ def get_max_solution(self, solutions): max_index = np.argmax(path_weights) return solutions[max_index] - - def dfs(self, room_poly, wall_objects_list, constraints, grid_points, placed_objects): + def dfs( + self, room_poly, wall_objects_list, constraints, grid_points, placed_objects + ): if len(wall_objects_list) == 0: self.solutions.append(placed_objects) return placed_objects - + if time.time() - self.start_time > self.max_duration: print(f"Time limit reached.") raise SolutionFound(self.solutions) - + object_name, object_dim = wall_objects_list[0] - placements = self.get_possible_placements(room_poly, object_dim, constraints[object_name], grid_points, placed_objects) - + placements = self.get_possible_placements( + room_poly, object_dim, constraints[object_name], grid_points, placed_objects + ) + if len(placements) == 0: self.solutions.append(placed_objects) @@ -307,23 +433,39 @@ def dfs(self, room_poly, wall_objects_list, constraints, grid_points, placed_obj placed_objects_updated = copy.deepcopy(placed_objects) placed_objects_updated[object_name] = placement - sub_paths = self.dfs(room_poly, wall_objects_list[1:], constraints, grid_points, placed_objects_updated) + sub_paths = self.dfs( + room_poly, + wall_objects_list[1:], + constraints, + grid_points, + placed_objects_updated, + ) paths.extend(sub_paths) return paths - - def get_possible_placements(self, room_poly, object_dim, constraint, grid_points, placed_objects): - all_solutions = self.filter_collision(placed_objects, self.get_all_solutions(room_poly, grid_points, object_dim, constraint["height"])) + def get_possible_placements( + self, room_poly, object_dim, constraint, grid_points, placed_objects + ): + all_solutions = self.filter_collision( + placed_objects, + self.get_all_solutions( + room_poly, grid_points, object_dim, constraint["height"] + ), + ) random.shuffle(all_solutions) target_floor_object_name = constraint["target_floor_object_name"] - if target_floor_object_name is not None and target_floor_object_name in placed_objects: - all_solutions = self.score_solution_by_distance(all_solutions, placed_objects[target_floor_object_name]) + if ( + target_floor_object_name is not None + and target_floor_object_name in placed_objects + ): + all_solutions = self.score_solution_by_distance( + all_solutions, placed_objects[target_floor_object_name] + ) # order solutions by distance to target floor object all_solutions = sorted(all_solutions, key=lambda x: x[-1], reverse=True) return all_solutions - def create_grids(self, room_poly): # Get the coordinates of the polygon poly_coords = list(room_poly.exterior.coords) @@ -336,12 +478,13 @@ def create_grids(self, room_poly): # Create points along the edge at intervals of grid size for j in range(0, int(line_length), self.grid_size): - point_on_line = substring(line, j, j) # Get a point at distance j from the start of the line + point_on_line = substring( + line, j, j + ) # Get a point at distance j from the start of the line if point_on_line: grid_points.append((point_on_line.x, point_on_line.y)) - + return grid_points - def get_all_solutions(self, room_poly, grid_points, object_dim, height): obj_length, obj_height, obj_width = object_dim @@ -351,42 +494,68 @@ def get_all_solutions(self, room_poly, grid_points, object_dim, height): 0: ((-obj_half_length, 0), (obj_half_length, obj_width)), 90: ((0, -obj_half_length), (obj_width, obj_half_length)), 180: ((-obj_half_length, -obj_width), (obj_half_length, 0)), - 270: ((-obj_width, -obj_half_length), (0, obj_half_length)) + 270: ((-obj_width, -obj_half_length), (0, obj_half_length)), } solutions = [] for rotation in [0, 90, 180, 270]: for point in grid_points: center_x, center_y = point - lower_left_adjustment, upper_right_adjustment = rotation_adjustments[rotation] - lower_left = (center_x + lower_left_adjustment[0], center_y + lower_left_adjustment[1]) - upper_right = (center_x + upper_right_adjustment[0], center_y + upper_right_adjustment[1]) + lower_left_adjustment, upper_right_adjustment = rotation_adjustments[ + rotation + ] + lower_left = ( + center_x + lower_left_adjustment[0], + center_y + lower_left_adjustment[1], + ) + upper_right = ( + center_x + upper_right_adjustment[0], + center_y + upper_right_adjustment[1], + ) obj_box = box(*lower_left, *upper_right) if room_poly.contains(obj_box): object_coords = obj_box.exterior.coords[:] - coordinates_on_edge = [coord for coord in object_coords if room_poly.boundary.contains(Point(coord))] + coordinates_on_edge = [ + coord + for coord in object_coords + if room_poly.boundary.contains(Point(coord)) + ] coordinates_on_edge = list(set(coordinates_on_edge)) if len(coordinates_on_edge) >= 2: vertex_min = (lower_left[0], height, lower_left[1]) - vertex_max = (upper_right[0], height + obj_height, upper_right[1]) + vertex_max = ( + upper_right[0], + height + obj_height, + upper_right[1], + ) + + solutions.append( + [ + vertex_min, + vertex_max, + rotation, + tuple(obj_box.exterior.coords[:]), + 1, + ] + ) - solutions.append([vertex_min, vertex_max, rotation, tuple(obj_box.exterior.coords[:]), 1]) - return solutions - def filter_collision(self, placed_objects, solutions): def intersect_3d(box1, box2): # box1 and box2 are dictionaries with 'min' and 'max' keys, # which are tuples representing the minimum and maximum corners of the 3D box. for i in range(3): - if box1['max'][i] < box2['min'][i] or box1['min'][i] > box2['max'][i]: + if box1["max"][i] < box2["min"][i] or box1["min"][i] > box2["max"][i]: return False return True valid_solutions = [] - boxes = [{"min": vertex_min, "max": vertex_max} for vertex_min, vertex_max, rotation, box_coords, path_weight in placed_objects.values()] + boxes = [ + {"min": vertex_min, "max": vertex_max} + for vertex_min, vertex_max, rotation, box_coords, path_weight in placed_objects.values() + ] for solution in solutions: for box in boxes: @@ -394,58 +563,70 @@ def intersect_3d(box1, box2): break else: valid_solutions.append(solution) - + return valid_solutions - def score_solution_by_distance(self, solutions, target_object): distances = [] scored_solutions = [] for solution in solutions: - center_x, center_y, center_z = (solution[0][0]+solution[1][0])/2, (solution[0][1]+solution[1][1])/2, (solution[0][2]+solution[1][2])/2 - target_x, target_y, target_z = (target_object[0][0]+target_object[1][0])/2, (target_object[0][1]+target_object[1][1])/2, (target_object[0][2]+target_object[1][2])/2 - distance = np.sqrt((center_x - target_x)**2 + (center_y - target_y)**2 + (center_z - target_z)**2) + center_x, center_y, center_z = ( + (solution[0][0] + solution[1][0]) / 2, + (solution[0][1] + solution[1][1]) / 2, + (solution[0][2] + solution[1][2]) / 2, + ) + target_x, target_y, target_z = ( + (target_object[0][0] + target_object[1][0]) / 2, + (target_object[0][1] + target_object[1][1]) / 2, + (target_object[0][2] + target_object[1][2]) / 2, + ) + distance = np.sqrt( + (center_x - target_x) ** 2 + + (center_y - target_y) ** 2 + + (center_z - target_z) ** 2 + ) distances.append(distance) scored_solution = solution.copy() - scored_solution[-1] = solution[-1] + self.constraint_bouns * (1/distance) + scored_solution[-1] = solution[-1] + self.constraint_bouns * (1 / distance) scored_solutions.append(scored_solution) return scored_solutions - - + def visualize_grid(self, room_poly, grid_points, solutions): # create a new figure fig, ax = plt.subplots() # draw the room x, y = room_poly.exterior.xy - ax.plot(x, y, 'b-', label='Room') + ax.plot(x, y, "b-", label="Room") # draw the grid points grid_x = [point[0] for point in grid_points] grid_y = [point[1] for point in grid_points] - ax.plot(grid_x, grid_y, 'ro', markersize=2) + ax.plot(grid_x, grid_y, "ro", markersize=2) # draw the solutions for object_name, solution in solutions.items(): vertex_min, vertex_max, rotation, box_coords = solution[:-1] - center_x, center_y = (vertex_min[0]+vertex_max[0])/2, (vertex_min[2]+vertex_max[2])/2 + center_x, center_y = (vertex_min[0] + vertex_max[0]) / 2, ( + vertex_min[2] + vertex_max[2] + ) / 2 # create a polygon for the solution obj_poly = Polygon(box_coords) x, y = obj_poly.exterior.xy - ax.plot(x, y, 'g-', linewidth=2) + ax.plot(x, y, "g-", linewidth=2) - ax.text(center_x, center_y, object_name, fontsize=12, ha='center') + ax.text(center_x, center_y, object_name, fontsize=12, ha="center") # set arrow direction based on rotation if rotation == 0: - ax.arrow(center_x, center_y, 0, 25, head_width=10, fc='g') + ax.arrow(center_x, center_y, 0, 25, head_width=10, fc="g") elif rotation == 90: - ax.arrow(center_x, center_y, 25, 0, head_width=10, fc='g') + ax.arrow(center_x, center_y, 25, 0, head_width=10, fc="g") elif rotation == 180: - ax.arrow(center_x, center_y, 0, -25, head_width=10, fc='g') + ax.arrow(center_x, center_y, 0, -25, head_width=10, fc="g") elif rotation == 270: - ax.arrow(center_x, center_y, -25, 0, head_width=10, fc='g') + ax.arrow(center_x, center_y, -25, 0, head_width=10, fc="g") - ax.set_aspect('equal', 'box') # to keep the ratios equal along x and y axis - plt.show() \ No newline at end of file + ax.set_aspect("equal", "box") # to keep the ratios equal along x and y axis + plt.show() diff --git a/holodeck/generation/walls.py b/holodeck/generation/walls.py index 7b1210f..c1bac44 100644 --- a/holodeck/generation/walls.py +++ b/holodeck/generation/walls.py @@ -9,28 +9,34 @@ import holodeck.generation.prompts as prompts -class WallGenerator(): +class WallGenerator: def __init__(self, llm: OpenAI): - self.json_template = {"id": None, "roomId": None, - "material": {"name": None, "color": None}, - "polygon": []} + self.json_template = { + "id": None, + "roomId": None, + "material": {"name": None, "color": None}, + "polygon": [], + } self.llm = llm - self.wall_height_template = PromptTemplate(input_variables=["input"], template=prompts.wall_height_prompt) + self.wall_height_template = PromptTemplate( + input_variables=["input"], template=prompts.wall_height_prompt + ) self.used_assets = [] - def generate_walls(self, scene): # get wall height - if "wall_height" not in scene: wall_height = self.get_wall_height(scene) - else: wall_height = scene["wall_height"] + if "wall_height" not in scene: + wall_height = self.get_wall_height(scene) + else: + wall_height = scene["wall_height"] walls = [] rooms = scene["rooms"] for room in rooms: roomId = room["id"] material = room["wallMaterial"] - full_vertices = room['full_vertices'] - + full_vertices = room["full_vertices"] + for j in range(len(full_vertices)): wall = copy.deepcopy(self.json_template) wall["roomId"] = roomId @@ -38,32 +44,42 @@ def generate_walls(self, scene): # generate the wall polygon wall_endpoint1 = full_vertices[j] - wall_endpoint2 = full_vertices[(j+1) % len(full_vertices)] - wall["polygon"] = self.generate_wall_polygon(wall_endpoint1, wall_endpoint2, wall_height) + wall_endpoint2 = full_vertices[(j + 1) % len(full_vertices)] + wall["polygon"] = self.generate_wall_polygon( + wall_endpoint1, wall_endpoint2, wall_height + ) # add the room connection information - wall["connected_rooms"] = self.get_connected_rooms(wall["polygon"], rooms, wall["roomId"]) - + wall["connected_rooms"] = self.get_connected_rooms( + wall["polygon"], rooms, wall["roomId"] + ) + # add the wall direction and width - wall_width, wall_direction = self.get_wall_direction(wall_endpoint1, wall_endpoint2, full_vertices) + wall_width, wall_direction = self.get_wall_direction( + wall_endpoint1, wall_endpoint2, full_vertices + ) wall["width"] = wall_width wall["height"] = wall_height wall["direction"] = wall_direction wall["segment"] = [wall_endpoint1, wall_endpoint2] wall["id"] = f"wall|{roomId}|{wall_direction}|{j}" walls.append(wall) - + # update wall connection information for wall in walls: if wall["connected_rooms"] != []: for connection in wall["connected_rooms"]: connect_room_id = connection["roomId"] - candidate_walls = [wall for wall in walls if wall["roomId"] == connect_room_id] + candidate_walls = [ + wall for wall in walls if wall["roomId"] == connect_room_id + ] line1 = connection["line1"] for candidate_wall in candidate_walls: - if line1[0] in candidate_wall["polygon"] and line1[1] in candidate_wall["polygon"]: + if ( + line1[0] in candidate_wall["polygon"] + and line1[1] in candidate_wall["polygon"] + ): connection["wallId"] = candidate_wall["id"] - # add exterior walls updated_walls = [] @@ -78,9 +94,8 @@ def generate_walls(self, scene): updated_walls.append(exterior_wall) updated_walls.append(wall) walls = updated_walls - + return wall_height, walls - def get_wall_height(self, scene): # get wall height @@ -88,11 +103,17 @@ def get_wall_height(self, scene): if "wall_height" not in scene: wall_height = self.llm(wall_height_prompt).split("\n")[0].strip() - - try: wall_height = float(wall_height) - except: wall_height = round(random.uniform(2.5, 4.5), 1) # if failed, random height between 2.5 and 4.5 - scene["wall_height"] = min(max(wall_height, 2.0), 4.5) # limit the wall height between 2.0 and 4.5 + try: + wall_height = float(wall_height) + except: + wall_height = round( + random.uniform(2.5, 4.5), 1 + ) # if failed, random height between 2.5 and 4.5 + + scene["wall_height"] = min( + max(wall_height, 2.0), 4.5 + ) # limit the wall height between 2.0 and 4.5 wall_height = scene["wall_height"] print(f"\nUser: {wall_height_prompt}\n") @@ -100,31 +121,35 @@ def get_wall_height(self, scene): return wall_height - def generate_wall_polygon(self, point, next_point, wall_height): wall_polygon = [] # add the base point - wall_polygon.append({'x': point[0], 'y': 0, 'z': point[1]}) + wall_polygon.append({"x": point[0], "y": 0, "z": point[1]}) # add the top point (with the same x and z, but y = wall_height) - wall_polygon.append({'x': point[0], 'y': wall_height, 'z': point[1]}) + wall_polygon.append({"x": point[0], "y": wall_height, "z": point[1]}) # add the top point of the next base point - wall_polygon.append({'x': next_point[0], 'y': wall_height, 'z': next_point[1]}) + wall_polygon.append({"x": next_point[0], "y": wall_height, "z": next_point[1]}) # add the next base point - wall_polygon.append({'x': next_point[0], 'y': 0, 'z': next_point[1]}) + wall_polygon.append({"x": next_point[0], "y": 0, "z": next_point[1]}) return wall_polygon - - + def get_connected_rooms(self, wall_polygon, rooms, roomId): connected_rooms = [] - vertices0 = [(vertex['x'], vertex['z']) for vertex in wall_polygon if vertex['y'] == 0] + vertices0 = [ + (vertex["x"], vertex["z"]) for vertex in wall_polygon if vertex["y"] == 0 + ] lines0 = [LineString([vertices0[0], vertices0[1]])] for room in rooms: - if room["id"] == roomId: continue # do not consider the room itself + if room["id"] == roomId: + continue # do not consider the room itself room_polygon = room["floorPolygon"] - vertices1 = [(vertex['x'], vertex['z']) for vertex in room_polygon] - lines1 = [LineString([vertices1[i], vertices1[(i+1)%len(vertices1)]]) for i in range(len(vertices1))] + vertices1 = [(vertex["x"], vertex["z"]) for vertex in room_polygon] + lines1 = [ + LineString([vertices1[i], vertices1[(i + 1) % len(vertices1)]]) + for i in range(len(vertices1)) + ] shared_segments = self.check_connected(lines0, lines1) @@ -132,9 +157,8 @@ def get_connected_rooms(self, wall_polygon, rooms, roomId): connected_room = shared_segments[0] connected_room["roomId"] = room["id"] connected_rooms.append(connected_room) - - return connected_rooms + return connected_rooms def check_connected(self, lines0, lines1): shared_segments = [] @@ -142,29 +166,38 @@ def check_connected(self, lines0, lines1): for line1 in lines1: if line0.intersects(line1): intersection = line0.intersection(line1) - if intersection.geom_type == 'LineString': - shared_segments.append({ - "intersection": [ - {"x": intersection.xy[0][0], "y": 0, "z": intersection.xy[1][0]}, - {"x": intersection.xy[0][1], "y": 0, "z": intersection.xy[1][1]} - ], - "line0": [ - {"x": line0.xy[0][0], "y": 0, "z": line0.xy[1][0]}, - {"x": line0.xy[0][1], "y": 0, "z": line0.xy[1][1]} - ], - "line1": [ - {"x": line1.xy[0][0], "y": 0, "z": line1.xy[1][0]}, - {"x": line1.xy[0][1], "y": 0, "z": line1.xy[1][1]} - ], - }) - + if intersection.geom_type == "LineString": + shared_segments.append( + { + "intersection": [ + { + "x": intersection.xy[0][0], + "y": 0, + "z": intersection.xy[1][0], + }, + { + "x": intersection.xy[0][1], + "y": 0, + "z": intersection.xy[1][1], + }, + ], + "line0": [ + {"x": line0.xy[0][0], "y": 0, "z": line0.xy[1][0]}, + {"x": line0.xy[0][1], "y": 0, "z": line0.xy[1][1]}, + ], + "line1": [ + {"x": line1.xy[0][0], "y": 0, "z": line1.xy[1][0]}, + {"x": line1.xy[0][1], "y": 0, "z": line1.xy[1][1]}, + ], + } + ) + # Return shared line segments, if any if shared_segments: return shared_segments # If no shared line segments, return None return None - def update_walls(self, original_walls, open_room_pairs): # update walls since there could be open connections @@ -177,7 +210,10 @@ def update_walls(self, original_walls, open_room_pairs): updated_walls.append(wall) else: room1_id = connection[0]["roomId"] - if (room0_id, room1_id) in open_room_pairs or (room1_id, room0_id) in open_room_pairs: + if (room0_id, room1_id) in open_room_pairs or ( + room1_id, + room0_id, + ) in open_room_pairs: deleted_wallIds.append(wall["id"]) else: updated_walls.append(wall) @@ -190,7 +226,10 @@ def update_walls(self, original_walls, open_room_pairs): open_wall_segments_no_overlap = [] for segment in open_wall_segments: - if segment not in open_wall_segments_no_overlap and segment[::-1] not in open_wall_segments_no_overlap: + if ( + segment not in open_wall_segments_no_overlap + and segment[::-1] not in open_wall_segments_no_overlap + ): open_wall_segments_no_overlap.append(segment) open_wall_rectangles = [] @@ -198,18 +237,23 @@ def update_walls(self, original_walls, open_room_pairs): top_rectangle, bottom_rectangle = self.create_rectangles(segment) open_wall_rectangles.append(top_rectangle) open_wall_rectangles.append(bottom_rectangle) - - open_walls = {"segments": open_wall_segments_no_overlap, "openWallBoxes": open_wall_rectangles} + + open_walls = { + "segments": open_wall_segments_no_overlap, + "openWallBoxes": open_wall_rectangles, + } return updated_walls, open_walls - def get_wall_direction(self, wall_endpoint1, wall_endpoint2, room_vertices): wall_width = np.linalg.norm(np.array(wall_endpoint1) - np.array(wall_endpoint2)) - + wall_direction = None room_polygon = Polygon(room_vertices) - wall_center = [(wall_endpoint1[0] + wall_endpoint2[0])/2, (wall_endpoint1[1] + wall_endpoint2[1])/2] + wall_center = [ + (wall_endpoint1[0] + wall_endpoint2[0]) / 2, + (wall_endpoint1[1] + wall_endpoint2[1]) / 2, + ] if wall_endpoint1[1] == wall_endpoint2[1]: extend_point_1 = [wall_center[0], wall_center[1] + 0.01] @@ -219,7 +263,7 @@ def get_wall_direction(self, wall_endpoint1, wall_endpoint2, room_vertices): wall_direction = "south" elif room_polygon.contains(Point(extend_point_2)): wall_direction = "north" - + elif wall_endpoint1[0] == wall_endpoint2[0]: extend_point_1 = [wall_center[0] + 0.01, wall_center[1]] extend_point_2 = [wall_center[0] - 0.01, wall_center[1]] @@ -231,7 +275,6 @@ def get_wall_direction(self, wall_endpoint1, wall_endpoint2, room_vertices): return wall_width, wall_direction - def create_rectangles(self, segment): # Convert to numpy arrays for easier calculations pt1 = np.array(segment[0]) @@ -243,10 +286,22 @@ def create_rectangles(self, segment): # Calculate a perpendicular vector with length 1 perp_vec = np.array([-vec[1], vec[0]], dtype=np.float32) perp_vec /= np.linalg.norm(perp_vec) - perp_vec *= 0.5 # 0.5 is the hyperparameter for the width of the open connection + perp_vec *= ( + 0.5 # 0.5 is the hyperparameter for the width of the open connection + ) # Calculate the four points for each rectangle - top_rectangle = [list(pt1 + perp_vec), list(pt2 + perp_vec), list(pt2), list(pt1)] - bottom_rectangle = [list(pt1), list(pt2), list(pt2 - perp_vec), list(pt1 - perp_vec)] - - return top_rectangle, bottom_rectangle \ No newline at end of file + top_rectangle = [ + list(pt1 + perp_vec), + list(pt2 + perp_vec), + list(pt2), + list(pt1), + ] + bottom_rectangle = [ + list(pt1), + list(pt2), + list(pt2 - perp_vec), + list(pt1 - perp_vec), + ] + + return top_rectangle, bottom_rectangle diff --git a/holodeck/generation/windows.py b/holodeck/generation/windows.py index a4d44e2..2b488dc 100644 --- a/holodeck/generation/windows.py +++ b/holodeck/generation/windows.py @@ -13,34 +13,56 @@ from holodeck.constants import HOLODECK_BASE_DATA_DIR -class WindowGenerator(): +class WindowGenerator: def __init__(self, llm: OpenAI): - self.json_template = {"assetId": None, "id": None, "room0": None, "room1": None, - "wall0": None, "wall1": None, "holePolygon": [], - "assetPosition": {}, "roomId": None} - - self.window_data = compress_json.load(os.path.join(HOLODECK_BASE_DATA_DIR, "windows/window-database.json")) + self.json_template = { + "assetId": None, + "id": None, + "room0": None, + "room1": None, + "wall0": None, + "wall1": None, + "holePolygon": [], + "assetPosition": {}, + "roomId": None, + } + + self.window_data = compress_json.load( + os.path.join(HOLODECK_BASE_DATA_DIR, "windows/window-database.json") + ) self.window_ids = list(self.window_data.keys()) - self.hole_offset = 0.05 # make the hole smaller than windows + self.hole_offset = 0.05 # make the hole smaller than windows self.llm = llm - self.window_template = PromptTemplate(input_variables=["input", "walls", "wall_height", "additional_requirements"], - template=prompts.window_prompt) + self.window_template = PromptTemplate( + input_variables=[ + "input", + "walls", + "wall_height", + "additional_requirements", + ], + template=prompts.window_prompt, + ) self.used_assets = [] - def generate_windows(self, scene, additional_requirements_window): # get organized walls organized_walls, available_wall_str = self.get_wall_for_windows(scene) - window_prompt = self.window_template.format(input=scene["query"], - walls=available_wall_str, - wall_height=int(scene["wall_height"] * 100), - additional_requirements=additional_requirements_window) - - if "raw_window_plan" not in scene: raw_window_plan = self.llm(window_prompt) - else: raw_window_plan = scene["raw_window_plan"] + window_prompt = self.window_template.format( + input=scene["query"], + walls=available_wall_str, + wall_height=int(scene["wall_height"] * 100), + additional_requirements=additional_requirements_window, + ) + + if "raw_window_plan" not in scene: + raw_window_plan = self.llm(window_prompt) + else: + raw_window_plan = scene["raw_window_plan"] print(f"\nUser: {window_prompt}\n") - print(f"{Fore.GREEN}AI: Here is the window plan:\n{raw_window_plan}{Fore.RESET}") + print( + f"{Fore.GREEN}AI: Here is the window plan:\n{raw_window_plan}{Fore.RESET}" + ) walls = scene["walls"] windows = [] @@ -50,29 +72,55 @@ def generate_windows(self, scene, additional_requirements_window): for row in rows: # parse window plan parsed_plan = self.parse_raw_plan(row) - if parsed_plan is None: continue - + if parsed_plan is None: + continue + # get room id room_id = parsed_plan["room_id"] # only one wall with windows per room - if room_id not in room_with_windows: room_with_windows.append(room_id) - else: print(f"Warning: room {room_id} already has windows"); continue + if room_id not in room_with_windows: + room_with_windows.append(room_id) + else: + print(f"Warning: room {room_id} already has windows") + continue # get wall id - try: wall_id = organized_walls[room_id][parsed_plan["wall_direction"]]["wall_id"] - except: print("Warning: no available wall for {}".format(row)); continue - + try: + wall_id = organized_walls[room_id][parsed_plan["wall_direction"]][ + "wall_id" + ] + except: + print("Warning: no available wall for {}".format(row)) + continue + for wall in walls: if wall["id"] == wall_id: wall_info = wall # select window - window_id = self.select_window(parsed_plan["window_type"], parsed_plan["window_size"]) - window_polygons, window_positions, window_segments, window_boxes, new_wall_ids, updated_walls = self.get_window_polygon(window_id, parsed_plan["window_height"], parsed_plan["quantity"], wall_info, walls) - walls = updated_walls # update walls - - if window_polygons == []: print("Warning: no windows generated for {}".format(row)); continue + window_id = self.select_window( + parsed_plan["window_type"], parsed_plan["window_size"] + ) + ( + window_polygons, + window_positions, + window_segments, + window_boxes, + new_wall_ids, + updated_walls, + ) = self.get_window_polygon( + window_id, + parsed_plan["window_height"], + parsed_plan["quantity"], + wall_info, + walls, + ) + walls = updated_walls # update walls + + if window_polygons == []: + print("Warning: no windows generated for {}".format(row)) + continue # generate window json for i in range(len(window_polygons)): @@ -95,32 +143,42 @@ def generate_windows(self, scene, additional_requirements_window): window_ids.append(current_window["id"]) windows.append(current_window) else: - print("Warning: duplicated window id: {}".format(current_window["id"])) + print( + "Warning: duplicated window id: {}".format(current_window["id"]) + ) return raw_window_plan, walls, windows - def parse_raw_plan(self, plan): try: - pattern = re.compile(r'^(\d+[\.\)]\s*|- )') - plan = pattern.sub('', plan) - if plan[-1] == ".": plan = plan[:-1] # remove the last period - room_id, wall_direction, window_type, window_size, quantity, window_height = plan.split("|") - return {"room_id": room_id.strip(), - "wall_direction": wall_direction.strip().lower(), - "window_type": window_type.strip().lower(), - "window_size": ast.literal_eval(window_size.strip()), - "quantity": int(quantity.strip()), - "window_height": float(window_height.strip())} + pattern = re.compile(r"^(\d+[\.\)]\s*|- )") + plan = pattern.sub("", plan) + if plan[-1] == ".": + plan = plan[:-1] # remove the last period + ( + room_id, + wall_direction, + window_type, + window_size, + quantity, + window_height, + ) = plan.split("|") + return { + "room_id": room_id.strip(), + "wall_direction": wall_direction.strip().lower(), + "window_type": window_type.strip().lower(), + "window_size": ast.literal_eval(window_size.strip()), + "quantity": int(quantity.strip()), + "window_height": float(window_height.strip()), + } except: print("Error: could not parse window plan: {}".format(plan)) return None - def get_room(self, rooms, room_type): for room in rooms: - if room_type == room["roomType"]: return room - + if room_type == room["roomType"]: + return room def get_wall_for_windows(self, scene): walls_with_door = [] @@ -136,48 +194,74 @@ def get_wall_for_windows(self, scene): organized_walls = {} for wall in available_walls: - room_id = wall['roomId'] - wall_direction = wall['direction'] + room_id = wall["roomId"] + wall_direction = wall["direction"] + + wall_width = wall["width"] + if wall_width < 2.0: + continue - wall_width = wall['width'] - if wall_width < 2.0: continue + if room_id not in organized_walls: + organized_walls[room_id] = {} - if room_id not in organized_walls: organized_walls[room_id] = {} - if wall_direction not in organized_walls[room_id]: - organized_walls[room_id][wall_direction] = {"wall_id": wall["id"], "wall_width": wall_width} + organized_walls[room_id][wall_direction] = { + "wall_id": wall["id"], + "wall_width": wall_width, + } else: if wall_width > organized_walls[room_id][wall_direction]["wall_width"]: - organized_walls[room_id][wall_direction] = {"wall_id": wall["id"], "wall_width": wall_width} - + organized_walls[room_id][wall_direction] = { + "wall_id": wall["id"], + "wall_width": wall_width, + } + available_wall_str = "" for room_id in organized_walls: current_str = "{}: ".format(room_id) for wall_direction in organized_walls[room_id]: - current_str += "{}, {} cm; ".format(wall_direction, int(organized_walls[room_id][wall_direction]["wall_width"] * 100)) + current_str += "{}, {} cm; ".format( + wall_direction, + int(organized_walls[room_id][wall_direction]["wall_width"] * 100), + ) available_wall_str += current_str + "\n" return organized_walls, available_wall_str - def select_window(self, window_type, window_size): - candidate_window_ids = [window_id for window_id in self.window_ids if self.window_data[window_id]["type"] == window_type] - size_differences = [np.linalg.norm(np.array(window_size) - np.array(self.window_data[window_id]["size"])) for window_id in candidate_window_ids] - sorted_window_ids = [x for _, x in sorted(zip(size_differences, candidate_window_ids))] + candidate_window_ids = [ + window_id + for window_id in self.window_ids + if self.window_data[window_id]["type"] == window_type + ] + size_differences = [ + np.linalg.norm( + np.array(window_size) - np.array(self.window_data[window_id]["size"]) + ) + for window_id in candidate_window_ids + ] + sorted_window_ids = [ + x for _, x in sorted(zip(size_differences, candidate_window_ids)) + ] top_window_ids = sorted_window_ids[0] - sorted_window_ids = [window_id for window_id in sorted_window_ids if window_id not in self.used_assets] - - if len(sorted_window_ids) == 0: selected_window_id = top_window_ids - else: selected_window_id = sorted_window_ids[0] + sorted_window_ids = [ + window_id + for window_id in sorted_window_ids + if window_id not in self.used_assets + ] + + if len(sorted_window_ids) == 0: + selected_window_id = top_window_ids + else: + selected_window_id = sorted_window_ids[0] return selected_window_id - def get_window_polygon(self, window_id, window_height, quantity, wall_info, walls): window_x = self.window_data[window_id]["boundingBox"]["x"] - self.hole_offset window_y = self.window_data[window_id]["boundingBox"]["y"] - self.hole_offset - + wall_width = wall_info["width"] wall_height = wall_info["height"] wall_segment = wall_info["segment"] @@ -193,30 +277,44 @@ def get_window_polygon(self, window_id, window_height, quantity, wall_info, wall normalized_vector = original_vector / original_length subwall_length = original_length / quantity - if quantity == 0: + if quantity == 0: return [], [], [], [], [], walls elif quantity == 1: window_start = random.uniform(0, wall_width - window_x) window_end = window_start + window_x - polygon = [{"x": window_start, "y": window_height, "z": 0}, - {"x": window_end, "y": window_height + window_y, "z": 0}] - position = {"x": (polygon[0]["x"] + polygon[1]["x"]) / 2, - "y": (polygon[0]["y"] + polygon[1]["y"]) / 2, - "z": (polygon[0]["z"] + polygon[1]["z"]) / 2} - window_segment = [list(wall_start + normalized_vector * window_start), list(wall_start + normalized_vector * window_end)] + polygon = [ + {"x": window_start, "y": window_height, "z": 0}, + {"x": window_end, "y": window_height + window_y, "z": 0}, + ] + position = { + "x": (polygon[0]["x"] + polygon[1]["x"]) / 2, + "y": (polygon[0]["y"] + polygon[1]["y"]) / 2, + "z": (polygon[0]["z"] + polygon[1]["z"]) / 2, + } + window_segment = [ + list(wall_start + normalized_vector * window_start), + list(wall_start + normalized_vector * window_end), + ] window_boxes = self.create_rectangles(window_segment) - return [polygon], [position], [window_segment], [window_boxes], [wall_info["id"]], walls - + return ( + [polygon], + [position], + [window_segment], + [window_boxes], + [wall_info["id"]], + walls, + ) + else: # split walls into subwalls segments = [] for i in range(quantity): segment_start = wall_start + i * subwall_length * normalized_vector - segment_end = wall_start + (i+1) * subwall_length * normalized_vector + segment_end = wall_start + (i + 1) * subwall_length * normalized_vector segments.append((segment_start, segment_end)) - + # update walls updated_walls = [] new_wall_ids = [] @@ -228,9 +326,14 @@ def get_window_polygon(self, window_id, window_height, quantity, wall_info, wall # generate new subwall json current_wall = copy.deepcopy(wall_info) current_wall["id"] = f"{wall_info['id']}|{i}" - current_wall["segment"] = [segments[i][0].tolist(), segments[i][1].tolist()] + current_wall["segment"] = [ + segments[i][0].tolist(), + segments[i][1].tolist(), + ] current_wall["width"] = subwall_length - current_wall["polygon"] = self.generate_wall_polygon(segments[i][0].tolist(), segments[i][1].tolist(), wall_height) + current_wall["polygon"] = self.generate_wall_polygon( + segments[i][0].tolist(), segments[i][1].tolist(), wall_height + ) current_wall["connect_exterior"] = current_wall["id"] + "|exterior" # add exterior wall @@ -244,43 +347,57 @@ def get_window_polygon(self, window_id, window_height, quantity, wall_info, wall updated_walls.append(current_wall) updated_walls.append(current_wall_exterior) new_wall_ids.append(current_wall["id"]) - + # generate window polygons window_polygons = [] window_positions = [] window_segments = [] window_boxes = [] for i in range(len(segments)): - window_start = random.uniform(0, subwall_length - window_x) # TODO: consider the same start point for all windows + window_start = random.uniform( + 0, subwall_length - window_x + ) # TODO: consider the same start point for all windows window_end = window_start + window_x - polygon = [{"x": window_start, "y": window_height, "z": 0}, - {"x": window_end, "y": window_height + window_y, "z": 0}] - position = {"x": (polygon[0]["x"] + polygon[1]["x"]) / 2, - "y": (polygon[0]["y"] + polygon[1]["y"]) / 2, - "z": (polygon[0]["z"] + polygon[1]["z"]) / 2} - - window_segment = [list(segments[i][0] + normalized_vector * window_start), list(segments[i][0] + normalized_vector * window_end)] + polygon = [ + {"x": window_start, "y": window_height, "z": 0}, + {"x": window_end, "y": window_height + window_y, "z": 0}, + ] + position = { + "x": (polygon[0]["x"] + polygon[1]["x"]) / 2, + "y": (polygon[0]["y"] + polygon[1]["y"]) / 2, + "z": (polygon[0]["z"] + polygon[1]["z"]) / 2, + } + + window_segment = [ + list(segments[i][0] + normalized_vector * window_start), + list(segments[i][0] + normalized_vector * window_end), + ] window_box = self.create_rectangles(window_segment) window_polygons.append(polygon) window_positions.append(position) window_segments.append(window_segment) window_boxes.append(window_box) - - return window_polygons, window_positions, window_segments, window_boxes, new_wall_ids, updated_walls + return ( + window_polygons, + window_positions, + window_segments, + window_boxes, + new_wall_ids, + updated_walls, + ) def generate_wall_polygon(self, point, next_point, wall_height): wall_polygon = [] # add the base point - wall_polygon.append({'x': point[0], 'y': 0, 'z': point[1]}) + wall_polygon.append({"x": point[0], "y": 0, "z": point[1]}) # add the top point (with the same x and z, but y = wall_height) - wall_polygon.append({'x': point[0], 'y': wall_height, 'z': point[1]}) + wall_polygon.append({"x": point[0], "y": wall_height, "z": point[1]}) # add the top point of the next base point - wall_polygon.append({'x': next_point[0], 'y': wall_height, 'z': next_point[1]}) + wall_polygon.append({"x": next_point[0], "y": wall_height, "z": next_point[1]}) # add the next base point - wall_polygon.append({'x': next_point[0], 'y': 0, 'z': next_point[1]}) + wall_polygon.append({"x": next_point[0], "y": 0, "z": next_point[1]}) return wall_polygon - def create_rectangles(self, segment): # Convert to numpy arrays for easier calculations @@ -293,10 +410,20 @@ def create_rectangles(self, segment): # Calculate a perpendicular vector with length 1 perp_vec = np.array([-vec[1], vec[0]]) perp_vec /= np.linalg.norm(perp_vec) - perp_vec *= 0.1 # 0.1 is the width of the window + perp_vec *= 0.1 # 0.1 is the width of the window # Calculate the four points for each rectangle - top_rectangle = [list(pt1 + perp_vec), list(pt2 + perp_vec), list(pt2), list(pt1)] - bottom_rectangle = [list(pt1), list(pt2), list(pt2 - perp_vec), list(pt1 - perp_vec)] - - return top_rectangle, bottom_rectangle \ No newline at end of file + top_rectangle = [ + list(pt1 + perp_vec), + list(pt2 + perp_vec), + list(pt2), + list(pt1), + ] + bottom_rectangle = [ + list(pt1), + list(pt2), + list(pt2 - perp_vec), + list(pt1 - perp_vec), + ] + + return top_rectangle, bottom_rectangle From 1eccd0bdc01b8f1046258c186ea8e905fedf93b2 Mon Sep 17 00:00:00 2001 From: lucaw Date: Thu, 30 May 2024 16:53:19 -0700 Subject: [PATCH 3/6] Some more improvements. --- README.md | 6 +- holodeck/constants.py | 22 ++++- holodeck/generation/holodeck.py | 8 +- holodeck/generation/object_selector.py | 113 +++++++++++++++++++------ holodeck/generation/prompts.py | 2 +- holodeck/generation/rooms.py | 15 +++- holodeck/main.py | 36 +++++--- requirements.txt | 1 + setup.py | 55 ++++++++++++ 9 files changed, 207 insertions(+), 51 deletions(-) create mode 100644 setup.py diff --git a/README.md b/README.md index bfbb76c..24af537 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ After cloning the repo, you can install the required dependencies using the foll conda create --name holodeck python=3.10 conda activate holodeck pip install -r requirements.txt -pip install --extra-index-url https://ai2thor-pypi.allenai.org ai2thor==0+e24aa88d86d460f1f5352e28b8e754c3f5966865 +pip install --extra-index-url https://ai2thor-pypi.allenai.org ai2thor==0+8524eadda94df0ab2dbb2ef5a577e4d37c712897 ``` ## Data @@ -33,14 +33,14 @@ python -m objathor.dataset.download_assets --version 2023_09_23 python -m objathor.dataset.download_annotations --version 2023_09_23 python -m objathor.dataset.download_features --version 2023_09_23 ``` -by default these will save to `~/.objathor-assets/...`, you can change this director by specifying the `--path` argument. +by default these will save to `~/.objathor-assets/...`, you can change this director by specifying the `--path` argument. If you change the `--path`, you'll need to set the `OBJAVERSE_ASSETS_DIR` environment variable to the path where the assets are stored when you use Holodeck. ## Usage You can use the following command to generate a new environment. ``` python holodeck/main.py --query "a living room" --openai_api_key ``` -To be noticed, our system uses `gpt-4-1106-preview`, so please ensure you have access to it. +Our system uses `gpt-4-1106-preview`, **so please ensure you have access to it.** **Note:** To yield better layouts, use `DFS` as the solver. If you pull the repo before `12/28/2023`, you must set the [argument](https://github.com/allenai/Holodeck/blob/386b0a868def29175436dc3b1ed85b6309eb3cad/main.py#L78) `--use_milp` to `False` to use `DFS`. diff --git a/holodeck/constants.py b/holodeck/constants.py index c155e18..38c1e7e 100644 --- a/holodeck/constants.py +++ b/holodeck/constants.py @@ -3,18 +3,32 @@ ABS_PATH_OF_HOLODECK = os.path.abspath(os.path.dirname(Path(__file__))) -VERSION = "2023_09_23" +ASSETS_VERSION = os.environ.get("ASSETS_VERSION", "2023_09_23") +HD_BASE_VERSION = os.environ.get("HD_BASE_VERSION", "2023_09_23") -OBJATHOR_VERSIONED_DIR = os.path.expanduser(f"~/.objathor-assets/{VERSION}") +OBJATHOR_ASSETS_BASE_DIR = os.environ.get( + "OBJATHOR_ASSETS_BASE_DIR", os.path.expanduser(f"~/.objathor-assets") +) + +OBJATHOR_VERSIONED_DIR = os.path.join(OBJATHOR_ASSETS_BASE_DIR, ASSETS_VERSION) OBJATHOR_ASSETS_DIR = os.path.join(OBJATHOR_VERSIONED_DIR, "assets") OBJATHOR_FEATURES_DIR = os.path.join(OBJATHOR_VERSIONED_DIR, "features") OBJATHOR_ANNOTATIONS_PATH = os.path.join(OBJATHOR_VERSIONED_DIR, "annotations.json.gz") -HOLODECK_BASE_DATA_DIR = os.path.expanduser(f"~/.objathor-assets/holodeck/{VERSION}") +HOLODECK_BASE_DATA_DIR = os.path.join( + OBJATHOR_ASSETS_BASE_DIR, "holodeck", HD_BASE_VERSION +) HOLODECK_THOR_FEATURES_DIR = os.path.join(HOLODECK_BASE_DATA_DIR, "thor_object_data") HOLODECK_THOR_ANNOTATIONS_PATH = os.path.join( HOLODECK_BASE_DATA_DIR, "thor_object_data", "annotations.json.gz" ) -THOR_COMMIT_ID = "3213d486cd09bcbafce33561997355983bdf8d1a" +if ASSETS_VERSION > "2023_09_23": + THOR_COMMIT_ID = "8524eadda94df0ab2dbb2ef5a577e4d37c712897" +else: + THOR_COMMIT_ID = "3213d486cd09bcbafce33561997355983bdf8d1a" + +LLM_MODEL_NAME = "gpt-4-1106-preview" + +DEBUGGING = os.environ.get("DEBUGGING", "0").lower() in ["1", "true", "True", "t", "T"] diff --git a/holodeck/generation/holodeck.py b/holodeck/generation/holodeck.py index b51faf2..ae7ddec 100644 --- a/holodeck/generation/holodeck.py +++ b/holodeck/generation/holodeck.py @@ -16,6 +16,7 @@ OBJATHOR_ANNOTATIONS_PATH, HOLODECK_THOR_FEATURES_DIR, HOLODECK_THOR_ANNOTATIONS_PATH, + LLM_MODEL_NAME, ) from holodeck.generation.ceiling_objects import CeilingObjectGenerator from holodeck.generation.doors import DoorGenerator @@ -66,12 +67,7 @@ def __init__( # initialize llm self.llm = OpenAI( - model_name="gpt-4-1106-preview", - max_tokens=2048, - openai_api_key=openai_api_key, - ) - self.llm_fast = OpenAI( - model_name="gpt-3.5-turbo", + model_name=LLM_MODEL_NAME, max_tokens=2048, openai_api_key=openai_api_key, ) diff --git a/holodeck/generation/object_selector.py b/holodeck/generation/object_selector.py index e673870..f38a0ea 100644 --- a/holodeck/generation/object_selector.py +++ b/holodeck/generation/object_selector.py @@ -1,9 +1,11 @@ +import ast import copy import json import multiprocessing import random import re -from typing import Dict +import traceback +from typing import Dict, List import torch import torch.nn.functional as F @@ -17,6 +19,15 @@ from holodeck.generation.utils import get_bbox_dims, get_annotations from holodeck.generation.wall_objects import DFS_Solver_Wall +EXPECTED_OBJECT_ATTRIBUTES = [ + "description", + "location", + "size", + "quantity", + "variance_type", + "objects_on_top", +] + class ObjectSelector: def __init__(self, object_retriever: ObjathorRetriever, llm: OpenAI): @@ -151,7 +162,6 @@ def plan_room(self, args): .replace("ROOM_SIZE", room_size_str) .replace("REQUIREMENTS", additional_requirements) ) - # print(f"\nUser: {prompt_1}\n") output_1 = self.llm(prompt_1).lower() plan_1 = self.extract_json(output_1) @@ -174,13 +184,17 @@ def plan_room(self, args): room2vertices[room_type], ) - if floor_capacity[1] / floor_capacity[0] >= 0.8: + required_floor_capacity_percentage = 0.8 + if floor_capacity[1] / floor_capacity[0] >= required_floor_capacity_percentage: result["floor"] = floor_objects result["wall"] = wall_objects result["plan"] = plan_1 else: print( - f"{Fore.RED}AI: The floor capacity of {room_type} is {floor_capacity[1]:.2g}m^2, which is less than 70% of the total floor capacity {floor_capacity[0]:.2g}m^2.{Fore.RESET}" + f"{Fore.RED}AI: The floor capacity of {room_type} is {floor_capacity[1]:.2g}m^2," + f" which is less than {100*required_floor_capacity_percentage:.0f}% of the total floor capacity" + f" {floor_capacity[0]:.2g}m^2." + f"{Fore.RESET}" ) prompt_2 = self.object_selection_template_2.format( object_selection_prompt_new_1=prompt_1, @@ -190,6 +204,12 @@ def plan_room(self, args): output_2 = self.llm(prompt_2).lower() plan_2 = self.extract_json(output_2) + if plan_2 is None: + print( + f"{Fore.RED}AI: Replanning failed, will use original plan.{Fore.RESET}" + ) + plan_2 = plan_1 + new_plan = copy.deepcopy(plan_1) for object in plan_2: new_plan[object] = plan_2[object] @@ -202,40 +222,76 @@ def plan_room(self, args): room2wall_capacity[room_type], room2vertices[room_type], ) + result["floor"] = floor_objects result["wall"] = wall_objects result["plan"] = new_plan return room_type, result + def _recursively_normalize_attribute_keys(self, obj): + if isinstance(obj, Dict): + return { + key.strip() + .lower() + .replace(" ", "_"): self._recursively_normalize_attribute_keys(value) + for key, value in obj.items() + } + elif isinstance(obj, List): + return [self._recursively_normalize_attribute_keys(value) for value in obj] + elif isinstance(obj, (str, int, float, bool)): + return obj + else: + print( + f"Unexpected type {type(obj)} in {obj} while normalizing attribute keys." + f" Returning the object as is." + ) + return obj + def extract_json(self, input_string): # Using regex to identify the JSON structure in the string json_match = re.search(r"{.*}", input_string, re.DOTALL) if json_match: extracted_json = json_match.group(0) + + # Convert the extracted JSON string into a Python dictionary + json_dict = None try: - # Convert the extracted JSON string into a Python dictionary json_dict = json.loads(extracted_json) - json_dict = self.check_dict(json_dict) - return json_dict - except json.JSONDecodeError: - print(input_string) - print("Error while decoding the JSON.") + except: + try: + json_dict = ast.literal_eval(extracted_json) + except: + pass + + if json_dict is None: + print( + f"{Fore.RED}[ERROR] while parsing the JSON for:\n{input_string}{Fore.RESET}", + flush=True, + ) return None + + json_dict = self._recursively_normalize_attribute_keys(json_dict) + try: + json_dict = self.check_dict(json_dict) + except Exception as e: + print( + f"{Fore.RED}[ERROR] Dictionary check failed for:" + f"\n{json_dict}" + f"\nFailure reason:{traceback.format_exception_only(e)}" + f"{Fore.RESET}", + flush=True, + ) + + return json_dict + else: - print("No valid JSON found.") + print(f"No valid JSON found in:\n{input_string}", flush=True) return None def check_dict(self, dict): valid = True - attributes = [ - "description", - "location", - "size", - "quantity", - "variance_type", - "objects_on_top", - ] + for key, value in dict.items(): if not isinstance(key, str): valid = False @@ -245,7 +301,7 @@ def check_dict(self, dict): valid = False break - for attribute in attributes: + for attribute in EXPECTED_OBJECT_ATTRIBUTES: if attribute not in value: valid = False break @@ -254,7 +310,7 @@ def check_dict(self, dict): valid = False break - if value["location"] not in ["floor", "wall"]: + if value.get("location") not in ["floor", "wall"]: dict[key]["location"] = "floor" if ( @@ -267,12 +323,12 @@ def check_dict(self, dict): if not isinstance(value["quantity"], int): dict[key]["quantity"] = 1 - if not isinstance(value["variance_type"], str) or value[ + if not isinstance(value.get("variance_type"), str) or value[ "variance_type" ] not in ["same", "varied"]: dict[key]["variance_type"] = "same" - if not isinstance(value["objects_on_top"], list): + if not isinstance(value.get("objects_on_top"), list): dict[key]["objects_on_top"] = [] for i, child in enumerate(value["objects_on_top"]): @@ -280,7 +336,7 @@ def check_dict(self, dict): valid = False break - for attribute in ["object_name", "quantity", "variance_type"]: + for attribute in ["object_name", "quantity"]: if attribute not in child: valid = False break @@ -292,7 +348,7 @@ def check_dict(self, dict): if not isinstance(child["quantity"], int): dict[key]["objects_on_top"][i]["quantity"] = 1 - if not isinstance(child["variance_type"], str) or child[ + if not isinstance(child.get("variance_type"), str) or child[ "variance_type" ] not in ["same", "varied"]: dict[key]["objects_on_top"][i]["variance_type"] = "same" @@ -355,7 +411,12 @@ def get_floor_objects( object_description = floor_object["description"] object_size = floor_object["size"] quantity = min(floor_object["quantity"], 10) - variance_type = floor_object["variance_type"] + + if "variance_type" not in floor_object: + print( + f'[WARNING] variance_type not found in the the object:\n{floor_object}, will set this to be "same".' + ) + variance_type = floor_object.get("variance_type", "same") candidates = self.object_retriever.retrieve( [f"a 3D model of {object_type}, {object_description}"], diff --git a/holodeck/generation/prompts.py b/holodeck/generation/prompts.py index d5fd0b8..8db9aae 100644 --- a/holodeck/generation/prompts.py +++ b/holodeck/generation/prompts.py @@ -204,7 +204,7 @@ object_selection_prompt_new_1 = """You are an experienced room designer, please assist me in selecting large *floor*/*wall* objects and small objects on top of them to furnish the room. You need to select appropriate objects to satisfy the customer's requirements. -You must provide a description and desired size for each object since I will use it to retrieve object. If multiple identical items are to be placed in the room, please indicate the quantity and variance type (same or varied). +You must provide a description and desired size for each object since I will use it to retrieve object. If multiple items are to be placed in the room with the same description, please indicate the quantity and variance_type ("same" if they should be identical, otherwise "varied"). Present your recommendations in JSON format: { object_name:{ diff --git a/holodeck/generation/rooms.py b/holodeck/generation/rooms.py index 4558251..f961ff1 100644 --- a/holodeck/generation/rooms.py +++ b/holodeck/generation/rooms.py @@ -17,7 +17,7 @@ from tqdm import tqdm import holodeck.generation.prompts as prompts -from holodeck.constants import HOLODECK_BASE_DATA_DIR +from holodeck.constants import HOLODECK_BASE_DATA_DIR, DEBUGGING class FloorPlanGenerator: @@ -127,6 +127,19 @@ def parse_raw_plan(self, raw_plan): if not valid: print(f"{Fore.RED}AI: {msg}{Fore.RESET}") + + if DEBUGGING: + import matplotlib.pyplot as plt + import numpy as np + + colors = plt.cm.rainbow(np.linspace(0, 1, len(parsed_plan))) + for room in parsed_plan: + for i in range(len(room["vertices"])): + a = room["vertices"][i] + b = room["vertices"][(i + 1) % len(room["vertices"])] + plt.plot([a[0], b[0]], [a[1], b[1]], color=colors[i]) + plt.show() + raise ValueError(msg) else: print(f"{Fore.GREEN}AI: {msg}{Fore.RESET}") diff --git a/holodeck/main.py b/holodeck/main.py index a6c56ce..800137f 100644 --- a/holodeck/main.py +++ b/holodeck/main.py @@ -22,19 +22,35 @@ def str2bool(v: str): def generate_single_scene(args): folder_name = args.query.replace(" ", "_").replace("'", "") - try: - if args.original_scene is not None: + + scene = None + if args.original_scene is not None: + print(f"Loading original scene from {args.original_scene}.") + try: scene = compress_json.load(args.original_scene) - print(f"Loading exist scene from {args.original_scene}.") - else: - path = os.path.join( - HOLODECK_BASE_DATA_DIR, f"scenes/{folder_name}/{folder_name}.json" + except: + print( + f"[ERROR] Could not load original scene from given path {args.original_scene}." ) - print(f"Loading exist scene from {path}.") - scene = compress_json.load(path) - except: - scene = args.model.get_empty_scene() + raise + else: + path = os.path.join( + HOLODECK_BASE_DATA_DIR, f"scenes/{folder_name}/{folder_name}.json" + ) + if os.path.exists(path): + print(f"Loading existing scene from {path}.") + try: + scene = compress_json.load(path) + except: + print( + f"[ERROR] The path {path} exists but could not be loaded. Please delete" + f" this file and try again." + ) + raise + + if scene is None: print("Generating from an empty scene.") + scene = args.model.get_empty_scene() try: _, save_dir = args.model.generate_scene( diff --git a/requirements.txt b/requirements.txt index 6b748f1..8f81e6b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -23,3 +23,4 @@ Flask==2.0.1 compress-pickle compress-json black +objathor diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..dd4f138 --- /dev/null +++ b/setup.py @@ -0,0 +1,55 @@ +import os +from pathlib import Path + +from setuptools import setup, find_packages + +if __name__ == "__main__": + with Path(Path(__file__).parent, "README.md").open(encoding="utf-8") as file: + long_description = file.read() + + def _read_reqs(relpath): + fullpath = os.path.join(os.path.dirname(__file__), relpath) + with open(fullpath) as f: + return [ + s.strip() + for s in f.readlines() + if (s.strip() and not s.startswith("#")) + ] + + REQUIREMENTS = _read_reqs("requirements.txt") + + setup( + name="holodeck", + packages=find_packages(), + include_package_data=True, + version="0.0.2", + license="Apache 2.0", + description='Holodeck: a framework for "Language Guided Generation of 3D Embodied AI Environments".', + long_description=long_description, + long_description_content_type="text/markdown", + author="Allen Institute for AI", + author_email="lucaw@allenai.org", + url="https://github.com/allenai/Holodeck", + data_files=[(".", ["README.md"])], + keywords=[ + "procedural generation", + "home environments", + "unity", + "3D assets", + "annotation", + "3D", + "ai2thor", + ], + install_requires=REQUIREMENTS, + classifiers=[ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "Topic :: Scientific/Engineering :: Artificial Intelligence", + "License :: OSI Approved :: MIT License", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + ], + package_data={ + "objathor": ["generation/*/*.json", "generation/*.json"], + }, + ) From cd604863a79efaa8e533fda4c73d334e7929bc21 Mon Sep 17 00:00:00 2001 From: lucaw Date: Thu, 30 May 2024 16:57:13 -0700 Subject: [PATCH 4/6] Changing `holodeck` -> `ai2holodeck` due to existing pypi packages. --- {holodeck => ai2holodeck}/__init__.py | 0 {holodeck => ai2holodeck}/constants.py | 0 .../generation/__init__.py | 0 .../generation/ceiling_objects.py | 6 ++-- {holodeck => ai2holodeck}/generation/doors.py | 4 +-- .../generation/empty_house.json | 0 .../generation/floor_objects.py | 8 ++--- .../generation/holodeck.py | 30 +++++++++---------- .../generation/layers.py | 0 .../generation/lights.py | 0 .../generation/milp_utils.py | 0 .../generation/objaverse_retriever.py | 4 +-- .../generation/object_selector.py | 10 +++---- .../generation/prompts.py | 0 {holodeck => ai2holodeck}/generation/rooms.py | 4 +-- .../generation/skybox.py | 0 .../generation/small_objects.py | 6 ++-- {holodeck => ai2holodeck}/generation/utils.py | 2 +- .../generation/wall_objects.py | 6 ++-- {holodeck => ai2holodeck}/generation/walls.py | 2 +- .../generation/windows.py | 4 +-- {holodeck => ai2holodeck}/main.py | 4 +-- connect_to_unity.py | 2 +- setup.py | 2 +- 24 files changed, 47 insertions(+), 47 deletions(-) rename {holodeck => ai2holodeck}/__init__.py (100%) rename {holodeck => ai2holodeck}/constants.py (100%) rename {holodeck => ai2holodeck}/generation/__init__.py (100%) rename {holodeck => ai2holodeck}/generation/ceiling_objects.py (96%) rename {holodeck => ai2holodeck}/generation/doors.py (99%) rename {holodeck => ai2holodeck}/generation/empty_house.json (100%) rename {holodeck => ai2holodeck}/generation/floor_objects.py (99%) rename {holodeck => ai2holodeck}/generation/holodeck.py (94%) rename {holodeck => ai2holodeck}/generation/layers.py (100%) rename {holodeck => ai2holodeck}/generation/lights.py (100%) rename {holodeck => ai2holodeck}/generation/milp_utils.py (100%) rename {holodeck => ai2holodeck}/generation/objaverse_retriever.py (98%) rename {holodeck => ai2holodeck}/generation/object_selector.py (99%) rename {holodeck => ai2holodeck}/generation/prompts.py (100%) rename {holodeck => ai2holodeck}/generation/rooms.py (99%) rename {holodeck => ai2holodeck}/generation/skybox.py (100%) rename {holodeck => ai2holodeck}/generation/small_objects.py (99%) rename {holodeck => ai2holodeck}/generation/utils.py (99%) rename {holodeck => ai2holodeck}/generation/wall_objects.py (99%) rename {holodeck => ai2holodeck}/generation/walls.py (99%) rename {holodeck => ai2holodeck}/generation/windows.py (99%) rename {holodeck => ai2holodeck}/main.py (98%) diff --git a/holodeck/__init__.py b/ai2holodeck/__init__.py similarity index 100% rename from holodeck/__init__.py rename to ai2holodeck/__init__.py diff --git a/holodeck/constants.py b/ai2holodeck/constants.py similarity index 100% rename from holodeck/constants.py rename to ai2holodeck/constants.py diff --git a/holodeck/generation/__init__.py b/ai2holodeck/generation/__init__.py similarity index 100% rename from holodeck/generation/__init__.py rename to ai2holodeck/generation/__init__.py diff --git a/holodeck/generation/ceiling_objects.py b/ai2holodeck/generation/ceiling_objects.py similarity index 96% rename from holodeck/generation/ceiling_objects.py rename to ai2holodeck/generation/ceiling_objects.py index b2d0bfe..9337152 100644 --- a/holodeck/generation/ceiling_objects.py +++ b/ai2holodeck/generation/ceiling_objects.py @@ -7,9 +7,9 @@ from langchain import PromptTemplate, OpenAI from shapely.geometry import Polygon -import holodeck.generation.prompts as prompts -from holodeck.generation.objaverse_retriever import ObjathorRetriever -from holodeck.generation.utils import get_bbox_dims, get_annotations +import ai2holodeck.generation.prompts as prompts +from ai2holodeck.generation.objaverse_retriever import ObjathorRetriever +from ai2holodeck.generation.utils import get_bbox_dims, get_annotations class CeilingObjectGenerator: diff --git a/holodeck/generation/doors.py b/ai2holodeck/generation/doors.py similarity index 99% rename from holodeck/generation/doors.py rename to ai2holodeck/generation/doors.py index 8df3d68..0636a3e 100644 --- a/holodeck/generation/doors.py +++ b/ai2holodeck/generation/doors.py @@ -11,8 +11,8 @@ from langchain import PromptTemplate, OpenAI from tqdm import tqdm -import holodeck.generation.prompts as prompts -from holodeck.constants import HOLODECK_BASE_DATA_DIR +import ai2holodeck.generation.prompts as prompts +from ai2holodeck.constants import HOLODECK_BASE_DATA_DIR class DoorGenerator: diff --git a/holodeck/generation/empty_house.json b/ai2holodeck/generation/empty_house.json similarity index 100% rename from holodeck/generation/empty_house.json rename to ai2holodeck/generation/empty_house.json diff --git a/holodeck/generation/floor_objects.py b/ai2holodeck/generation/floor_objects.py similarity index 99% rename from holodeck/generation/floor_objects.py rename to ai2holodeck/generation/floor_objects.py index 1b01c76..4a2b0c2 100644 --- a/holodeck/generation/floor_objects.py +++ b/ai2holodeck/generation/floor_objects.py @@ -14,10 +14,10 @@ from scipy.interpolate import interp1d from shapely.geometry import Polygon, Point, box, LineString -import holodeck.generation.prompts as prompts -from holodeck.generation.milp_utils import * -from holodeck.generation.objaverse_retriever import ObjathorRetriever -from holodeck.generation.utils import get_bbox_dims +import ai2holodeck.generation.prompts as prompts +from ai2holodeck.generation.milp_utils import * +from ai2holodeck.generation.objaverse_retriever import ObjathorRetriever +from ai2holodeck.generation.utils import get_bbox_dims class FloorObjectGenerator: diff --git a/holodeck/generation/holodeck.py b/ai2holodeck/generation/holodeck.py similarity index 94% rename from holodeck/generation/holodeck.py rename to ai2holodeck/generation/holodeck.py index ae7ddec..0bc0a7d 100644 --- a/holodeck/generation/holodeck.py +++ b/ai2holodeck/generation/holodeck.py @@ -8,7 +8,7 @@ from sentence_transformers import SentenceTransformer from tqdm import tqdm -from holodeck.constants import ( +from ai2holodeck.constants import ( HOLODECK_BASE_DATA_DIR, OBJATHOR_VERSIONED_DIR, OBJATHOR_ASSETS_DIR, @@ -18,20 +18,20 @@ HOLODECK_THOR_ANNOTATIONS_PATH, LLM_MODEL_NAME, ) -from holodeck.generation.ceiling_objects import CeilingObjectGenerator -from holodeck.generation.doors import DoorGenerator -from holodeck.generation.floor_objects import FloorObjectGenerator -from holodeck.generation.layers import map_asset2layer -from holodeck.generation.lights import generate_lights -from holodeck.generation.objaverse_retriever import ObjathorRetriever -from holodeck.generation.object_selector import ObjectSelector -from holodeck.generation.rooms import FloorPlanGenerator -from holodeck.generation.skybox import getSkybox -from holodeck.generation.small_objects import SmallObjectGenerator -from holodeck.generation.utils import get_top_down_frame, room_video -from holodeck.generation.wall_objects import WallObjectGenerator -from holodeck.generation.walls import WallGenerator -from holodeck.generation.windows import WindowGenerator +from ai2holodeck.generation.ceiling_objects import CeilingObjectGenerator +from ai2holodeck.generation.doors import DoorGenerator +from ai2holodeck.generation.floor_objects import FloorObjectGenerator +from ai2holodeck.generation.layers import map_asset2layer +from ai2holodeck.generation.lights import generate_lights +from ai2holodeck.generation.objaverse_retriever import ObjathorRetriever +from ai2holodeck.generation.object_selector import ObjectSelector +from ai2holodeck.generation.rooms import FloorPlanGenerator +from ai2holodeck.generation.skybox import getSkybox +from ai2holodeck.generation.small_objects import SmallObjectGenerator +from ai2holodeck.generation.utils import get_top_down_frame, room_video +from ai2holodeck.generation.wall_objects import WallObjectGenerator +from ai2holodeck.generation.walls import WallGenerator +from ai2holodeck.generation.windows import WindowGenerator def confirm_paths_exist(): diff --git a/holodeck/generation/layers.py b/ai2holodeck/generation/layers.py similarity index 100% rename from holodeck/generation/layers.py rename to ai2holodeck/generation/layers.py diff --git a/holodeck/generation/lights.py b/ai2holodeck/generation/lights.py similarity index 100% rename from holodeck/generation/lights.py rename to ai2holodeck/generation/lights.py diff --git a/holodeck/generation/milp_utils.py b/ai2holodeck/generation/milp_utils.py similarity index 100% rename from holodeck/generation/milp_utils.py rename to ai2holodeck/generation/milp_utils.py diff --git a/holodeck/generation/objaverse_retriever.py b/ai2holodeck/generation/objaverse_retriever.py similarity index 98% rename from holodeck/generation/objaverse_retriever.py rename to ai2holodeck/generation/objaverse_retriever.py index 0b32490..3637afe 100644 --- a/holodeck/generation/objaverse_retriever.py +++ b/ai2holodeck/generation/objaverse_retriever.py @@ -6,13 +6,13 @@ import torch import torch.nn.functional as F -from holodeck.constants import ( +from ai2holodeck.constants import ( OBJATHOR_ANNOTATIONS_PATH, HOLODECK_THOR_ANNOTATIONS_PATH, OBJATHOR_FEATURES_DIR, HOLODECK_THOR_FEATURES_DIR, ) -from holodeck.generation.utils import get_bbox_dims +from ai2holodeck.generation.utils import get_bbox_dims class ObjathorRetriever: diff --git a/holodeck/generation/object_selector.py b/ai2holodeck/generation/object_selector.py similarity index 99% rename from holodeck/generation/object_selector.py rename to ai2holodeck/generation/object_selector.py index f38a0ea..0395a71 100644 --- a/holodeck/generation/object_selector.py +++ b/ai2holodeck/generation/object_selector.py @@ -13,11 +13,11 @@ from langchain import PromptTemplate, OpenAI from shapely import Polygon -import holodeck.generation.prompts as prompts -from holodeck.generation.floor_objects import DFS_Solver_Floor -from holodeck.generation.objaverse_retriever import ObjathorRetriever -from holodeck.generation.utils import get_bbox_dims, get_annotations -from holodeck.generation.wall_objects import DFS_Solver_Wall +import ai2holodeck.generation.prompts as prompts +from ai2holodeck.generation.floor_objects import DFS_Solver_Floor +from ai2holodeck.generation.objaverse_retriever import ObjathorRetriever +from ai2holodeck.generation.utils import get_bbox_dims, get_annotations +from ai2holodeck.generation.wall_objects import DFS_Solver_Wall EXPECTED_OBJECT_ATTRIBUTES = [ "description", diff --git a/holodeck/generation/prompts.py b/ai2holodeck/generation/prompts.py similarity index 100% rename from holodeck/generation/prompts.py rename to ai2holodeck/generation/prompts.py diff --git a/holodeck/generation/rooms.py b/ai2holodeck/generation/rooms.py similarity index 99% rename from holodeck/generation/rooms.py rename to ai2holodeck/generation/rooms.py index f961ff1..9520cba 100644 --- a/holodeck/generation/rooms.py +++ b/ai2holodeck/generation/rooms.py @@ -16,8 +16,8 @@ from shapely.geometry import LineString, Point, Polygon from tqdm import tqdm -import holodeck.generation.prompts as prompts -from holodeck.constants import HOLODECK_BASE_DATA_DIR, DEBUGGING +import ai2holodeck.generation.prompts as prompts +from ai2holodeck.constants import HOLODECK_BASE_DATA_DIR, DEBUGGING class FloorPlanGenerator: diff --git a/holodeck/generation/skybox.py b/ai2holodeck/generation/skybox.py similarity index 100% rename from holodeck/generation/skybox.py rename to ai2holodeck/generation/skybox.py diff --git a/holodeck/generation/small_objects.py b/ai2holodeck/generation/small_objects.py similarity index 99% rename from holodeck/generation/small_objects.py rename to ai2holodeck/generation/small_objects.py index 5da2058..c964562 100644 --- a/holodeck/generation/small_objects.py +++ b/ai2holodeck/generation/small_objects.py @@ -10,9 +10,9 @@ from procthor.constants import FLOOR_Y from procthor.utils.types import Vector3 -from holodeck.constants import THOR_COMMIT_ID -from holodeck.generation.objaverse_retriever import ObjathorRetriever -from holodeck.generation.utils import ( +from ai2holodeck.constants import THOR_COMMIT_ID +from ai2holodeck.generation.objaverse_retriever import ObjathorRetriever +from ai2holodeck.generation.utils import ( get_bbox_dims, get_annotations, get_secondary_properties, diff --git a/holodeck/generation/utils.py b/ai2holodeck/generation/utils.py similarity index 99% rename from holodeck/generation/utils.py rename to ai2holodeck/generation/utils.py index f87996c..de9029b 100644 --- a/holodeck/generation/utils.py +++ b/ai2holodeck/generation/utils.py @@ -16,7 +16,7 @@ ) from tqdm import tqdm -from holodeck.constants import HOLODECK_BASE_DATA_DIR, THOR_COMMIT_ID +from ai2holodeck.constants import HOLODECK_BASE_DATA_DIR, THOR_COMMIT_ID def all_edges_white(img): diff --git a/holodeck/generation/wall_objects.py b/ai2holodeck/generation/wall_objects.py similarity index 99% rename from holodeck/generation/wall_objects.py rename to ai2holodeck/generation/wall_objects.py index 4ed8d7b..a5ab129 100644 --- a/holodeck/generation/wall_objects.py +++ b/ai2holodeck/generation/wall_objects.py @@ -10,9 +10,9 @@ from shapely.geometry import Polygon, box, Point, LineString from shapely.ops import substring -import holodeck.generation.prompts as prompts -from holodeck.generation.objaverse_retriever import ObjathorRetriever -from holodeck.generation.utils import get_bbox_dims +import ai2holodeck.generation.prompts as prompts +from ai2holodeck.generation.objaverse_retriever import ObjathorRetriever +from ai2holodeck.generation.utils import get_bbox_dims class WallObjectGenerator: diff --git a/holodeck/generation/walls.py b/ai2holodeck/generation/walls.py similarity index 99% rename from holodeck/generation/walls.py rename to ai2holodeck/generation/walls.py index c1bac44..04e164d 100644 --- a/holodeck/generation/walls.py +++ b/ai2holodeck/generation/walls.py @@ -6,7 +6,7 @@ from langchain import PromptTemplate, OpenAI from shapely.geometry import LineString, Polygon, Point -import holodeck.generation.prompts as prompts +import ai2holodeck.generation.prompts as prompts class WallGenerator: diff --git a/holodeck/generation/windows.py b/ai2holodeck/generation/windows.py similarity index 99% rename from holodeck/generation/windows.py rename to ai2holodeck/generation/windows.py index 2b488dc..3ff385f 100644 --- a/holodeck/generation/windows.py +++ b/ai2holodeck/generation/windows.py @@ -9,8 +9,8 @@ from colorama import Fore from langchain import PromptTemplate, OpenAI -import holodeck.generation.prompts as prompts -from holodeck.constants import HOLODECK_BASE_DATA_DIR +import ai2holodeck.generation.prompts as prompts +from ai2holodeck.constants import HOLODECK_BASE_DATA_DIR class WindowGenerator: diff --git a/holodeck/main.py b/ai2holodeck/main.py similarity index 98% rename from holodeck/main.py rename to ai2holodeck/main.py index 800137f..1696073 100644 --- a/holodeck/main.py +++ b/ai2holodeck/main.py @@ -6,8 +6,8 @@ import compress_json from tqdm import tqdm -from holodeck.constants import HOLODECK_BASE_DATA_DIR, OBJATHOR_ASSETS_DIR -from holodeck.generation.holodeck import Holodeck +from ai2holodeck.constants import HOLODECK_BASE_DATA_DIR, OBJATHOR_ASSETS_DIR +from ai2holodeck.generation.holodeck import Holodeck def str2bool(v: str): diff --git a/connect_to_unity.py b/connect_to_unity.py index 8ad09bd..0512613 100644 --- a/connect_to_unity.py +++ b/connect_to_unity.py @@ -6,7 +6,7 @@ from ai2thor.controller import Controller from ai2thor.hooks.procedural_asset_hook import ProceduralAssetHookRunner -from holodeck.constants import ( +from ai2holodeck.constants import ( HOLODECK_BASE_DATA_DIR, THOR_COMMIT_ID, OBJATHOR_ASSETS_DIR, diff --git a/setup.py b/setup.py index dd4f138..49fc3a2 100644 --- a/setup.py +++ b/setup.py @@ -19,7 +19,7 @@ def _read_reqs(relpath): REQUIREMENTS = _read_reqs("requirements.txt") setup( - name="holodeck", + name="ai2holodeck", packages=find_packages(), include_package_data=True, version="0.0.2", From fe8845611a51541555da1d79eb2fd134bd081ff8 Mon Sep 17 00:00:00 2001 From: lucaw Date: Thu, 30 May 2024 17:12:17 -0700 Subject: [PATCH 5/6] Using gpt-4o-2024-05-13 for cost and speed. --- ai2holodeck/constants.py | 3 ++- ai2holodeck/generation/object_selector.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/ai2holodeck/constants.py b/ai2holodeck/constants.py index 38c1e7e..20c453f 100644 --- a/ai2holodeck/constants.py +++ b/ai2holodeck/constants.py @@ -29,6 +29,7 @@ else: THOR_COMMIT_ID = "3213d486cd09bcbafce33561997355983bdf8d1a" -LLM_MODEL_NAME = "gpt-4-1106-preview" +# LLM_MODEL_NAME = "gpt-4-1106-preview" +LLM_MODEL_NAME = "gpt-4o-2024-05-13" DEBUGGING = os.environ.get("DEBUGGING", "0").lower() in ["1", "true", "True", "t", "T"] diff --git a/ai2holodeck/generation/object_selector.py b/ai2holodeck/generation/object_selector.py index 0395a71..fd038ed 100644 --- a/ai2holodeck/generation/object_selector.py +++ b/ai2holodeck/generation/object_selector.py @@ -191,7 +191,7 @@ def plan_room(self, args): result["plan"] = plan_1 else: print( - f"{Fore.RED}AI: The floor capacity of {room_type} is {floor_capacity[1]:.2g}m^2," + f"{Fore.RED}AI: The used floor capacity of {room_type} is {floor_capacity[1]:.2g}m^2," f" which is less than {100*required_floor_capacity_percentage:.0f}% of the total floor capacity" f" {floor_capacity[0]:.2g}m^2." f"{Fore.RESET}" From ae184529acc58ae058dc6d6a3ea530f775d71029 Mon Sep 17 00:00:00 2001 From: lucaw Date: Thu, 30 May 2024 17:23:24 -0700 Subject: [PATCH 6/6] Automatically selecting a constraint based on edit distance. --- ai2holodeck/generation/floor_objects.py | 14 ++++++++++++-- requirements.txt | 1 + 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/ai2holodeck/generation/floor_objects.py b/ai2holodeck/generation/floor_objects.py index 4a2b0c2..0532cc7 100644 --- a/ai2holodeck/generation/floor_objects.py +++ b/ai2holodeck/generation/floor_objects.py @@ -7,6 +7,7 @@ import re import time +import editdistance import matplotlib.pyplot as plt import numpy as np from langchain import PromptTemplate, OpenAI @@ -349,8 +350,17 @@ def parse_constraints(self, constraint_text, object_names): try: constraint_type = constraint_name2type[constraint_name] except: - print(f"constraint type {constraint_name} not found") - continue + _, new_constraint_name = min( + [ + (editdistance.eval(cn, constraint_name), cn) + for cn in constraint_name2type + ] + ) + print( + f"constraint type {constraint_name} not found, using {new_constraint_name} instead." + ) + constraint_name = new_constraint_name + constraint_type = constraint_name2type[constraint_name] if constraint_type == "global": object2constraints[object_name].append( diff --git a/requirements.txt b/requirements.txt index 8f81e6b..40c8290 100644 --- a/requirements.txt +++ b/requirements.txt @@ -24,3 +24,4 @@ compress-pickle compress-json black objathor +editdistance