SquencePipe support long type of tensors

88de1d9c · Wenjie Huang · 32fec45c · 88de1d9c · 88de1d9c · 88de1d9c
Commit 88de1d9c authored Dec 21, 2023 by Wenjie Huang
Expand all Hide whitespace changes
Inline Side-by-side

Showing with 27 additions and 1 deletions

cora.py
+1 -1

starrygl/distributed/cclib.py
+6 -0

starrygl/parallel/sequence.py
+0 -0

starrygl/parallel/utils.py
+20 -0

No files found.
--- a/cora.py
+++ b/cora.py
@@ -4,7 +4,7 @@ from torch_geometric.utils import add_remaining_self_loops, to_undirected
 import os.path as osp
 import sys
-from starrygl.graph import GraphData
+from starrygl.data import GraphData
 import logging
 logging.getLogger().setLevel(logging.INFO)

--- a/starrygl/distributed/cclib.py
+++ b/starrygl/distributed/cclib.py
@@ -149,6 +149,9 @@ def batch_send(
    group: Any = None,
    async_op: bool = False,
 ):
+    if len(tensors) == 0:
+        return BatchWork(None, None)
    # tensors = tuple(t.data for t in tensors)
    backend = dist.get_backend(group)
@@ -171,6 +174,9 @@ def batch_recv(
    group: Any = None,
    async_op: bool = False,
 ):
+    if len(tensors) == 0:
+        return BatchWork(None, None)
    # tensors = tuple(t.data for t in tensors)
    backend = dist.get_backend(group)

--- a/starrygl/parallel/sequence.py
+++ b/starrygl/parallel/sequence.py
--- a/starrygl/parallel/utils.py
+++ b/starrygl/parallel/utils.py
+import torch
+import torch.nn as nn
+import torch.distributed as dist
+from torch import Tensor
+from typing import *
+__all__ = [
+    "all_reduce_gradients",
+    "all_reduce_buffers",
+]
+def all_reduce_gradients(net: nn.Module, op = dist.ReduceOp.SUM, group = None):
+    for p in net.parameters():
+        dist.all_reduce(p.grad, op=op, group=group)
+def all_reduce_buffers(net: nn.Module, op = dist.ReduceOp.AVG, group = None):
+    for b in net.buffers():
+        dist.all_reduce(b.data, op=op, group=group)