Skip to content

Commit

Permalink
[GraphBolt][CUDA] puregpu option for the multiGPU example. (#7089)
Browse files Browse the repository at this point in the history
  • Loading branch information
mfbalin authored Feb 6, 2024
1 parent 845864d commit 4391241
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 22 deletions.
37 changes: 19 additions & 18 deletions examples/multigpu/graphbolt/node_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,9 +151,7 @@ def evaluate(rank, model, dataloader, num_classes, device):
y = []
y_hats = []

for step, data in (
tqdm.tqdm(enumerate(dataloader)) if rank == 0 else enumerate(dataloader)
):
for data in tqdm.tqdm(dataloader) if rank == 0 else dataloader:
blocks = data.blocks
x = data.node_features["feat"]
y.append(data.labels)
Expand Down Expand Up @@ -271,22 +269,25 @@ def run(rank, world_size, args, devices, dataset):

# Pin the graph and features to enable GPU access.
if args.storage_device == "pinned":
dataset.graph.pin_memory_()
dataset.feature.pin_memory_()
graph = dataset.graph.pin_memory_()
feature = dataset.feature.pin_memory_()
else:
graph = dataset.graph.to(args.storage_device)
feature = dataset.feature.to(args.storage_device)

train_set = dataset.tasks[0].train_set
valid_set = dataset.tasks[0].validation_set
test_set = dataset.tasks[0].test_set
args.fanout = list(map(int, args.fanout.split(",")))
num_classes = dataset.tasks[0].metadata["num_classes"]

in_size = dataset.feature.size("node", None, "feat")[0]
in_size = feature.size("node", None, "feat")[0]
hidden_size = 256
out_size = num_classes

if args.gpu_cache_size > 0:
dataset.feature._features[("node", None, "feat")] = gb.GPUCachedFeature(
dataset.feature._features[("node", None, "feat")],
if args.gpu_cache_size > 0 and args.storage_device != "cuda":
feature._features[("node", None, "feat")] = gb.GPUCachedFeature(
feature._features[("node", None, "feat")],
args.gpu_cache_size,
)

Expand All @@ -297,24 +298,24 @@ def run(rank, world_size, args, devices, dataset):
# Create data loaders.
train_dataloader = create_dataloader(
args,
dataset.graph,
dataset.feature,
graph,
feature,
train_set,
device,
is_train=True,
)
valid_dataloader = create_dataloader(
args,
dataset.graph,
dataset.feature,
graph,
feature,
valid_set,
device,
is_train=False,
)
test_dataloader = create_dataloader(
args,
dataset.graph,
dataset.feature,
graph,
feature,
test_set,
device,
is_train=False,
Expand Down Expand Up @@ -396,9 +397,9 @@ def parse_args():
parser.add_argument(
"--mode",
default="pinned-cuda",
choices=["cpu-cuda", "pinned-cuda"],
help="Dataset storage placement and Train device: 'cpu' for CPU and RAM,"
" 'pinned' for pinned memory in RAM, 'cuda' for GPU and GPU memory.",
choices=["cpu-cuda", "pinned-cuda", "cuda-cuda"],
help="Dataset storage placement and Train device: 'cpu' for CPU and RAM"
", 'pinned' for pinned memory in RAM, 'cuda' for GPU and GPU memory.",
)
return parser.parse_args()

Expand Down
5 changes: 3 additions & 2 deletions python/dgl/graphbolt/impl/fused_csc_sampling_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -1092,7 +1092,8 @@ def _pin(x):
return self2._apply_to_members(_pin if device == "pinned" else _to)

def pin_memory_(self):
"""Copy `FusedCSCSamplingGraph` to the pinned memory in-place."""
"""Copy `FusedCSCSamplingGraph` to the pinned memory in-place. Returns
the same object modified in-place."""
# torch.Tensor.pin_memory() is not an inplace operation. To make it
# truly in-place, we need to use cudaHostRegister. Then, we need to use
# cudaHostUnregister to unpin the tensor in the destructor.
Expand Down Expand Up @@ -1123,7 +1124,7 @@ def _pin(x):

return x

self._apply_to_members(_pin)
return self._apply_to_members(_pin)


def fused_csc_sampling_graph(
Expand Down
10 changes: 8 additions & 2 deletions python/dgl/graphbolt/impl/torch_based_feature_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,8 @@ def metadata(self):
)

def pin_memory_(self):
"""In-place operation to copy the feature to pinned memory."""
"""In-place operation to copy the feature to pinned memory. Returns the
same object modified in-place."""
# torch.Tensor.pin_memory() is not an inplace operation. To make it
# truly in-place, we need to use cudaHostRegister. Then, we need to use
# cudaHostUnregister to unpin the tensor in the destructor.
Expand All @@ -194,6 +195,8 @@ def pin_memory_(self):

self._is_inplace_pinned.add(x)

return self

def is_pinned(self):
"""Returns True if the stored feature is pinned."""
return self._tensor.is_pinned()
Expand Down Expand Up @@ -289,10 +292,13 @@ def __init__(self, feat_data: List[OnDiskFeatureData]):
super().__init__(features)

def pin_memory_(self):
"""In-place operation to copy the feature store to pinned memory."""
"""In-place operation to copy the feature store to pinned memory.
Returns the same object modified in-place."""
for feature in self._features.values():
feature.pin_memory_()

return self

def is_pinned(self):
"""Returns True if all the stored features are pinned."""
return all(feature.is_pinned() for feature in self._features.values())
Expand Down

0 comments on commit 4391241

Please sign in to comment.