fix bugs. Route

9586742a · Wenjie Huang · 10c38111 · 9586742a · 9586742a · 9586742a
Commit 9586742a authored Dec 19, 2023 by Wenjie Huang
Showing with 33 additions and 86 deletions

cora.py
+5 -4

run_route.py
+25 -10

starrygl/distributed/cclib.py
+0 -2

starrygl/graph/__init__.py
+3 -24

starrygl/graph/data.py
+0 -0

starrygl/graph/route.py
+0 -0

starrygl/graph/utils.py
+0 -46

No files found.
--- a/cora.py
+++ b/cora.py
@@ -4,7 +4,7 @@ from torch_geometric.utils import add_remaining_self_loops, to_undirected
 import os.path as osp
 import sys
-from starrygl.utils.data import partition_pyg
+from starrygl.graph import GraphData
 import logging
 logging.getLogger().setLevel(logging.INFO)
@@ -18,7 +18,9 @@ if __name__ == "__main__":
    print(f"num_nodes: {data.num_nodes}")
    print(f"num_edges: {data.num_edges}")
    print(f"num_features: {data.num_features}")
+    data = GraphData.from_pyg_data(data)
    num_parts_list = [1, 2, 3, 5, 7, 9, 11]
    algos = ["metis", 'mt-metis', "random"]
@@ -27,4 +29,4 @@ if __name__ == "__main__":
    for num_parts in num_parts_list:
        for algo in algos:
            print(f"======== {num_parts} + {algo} ========")
-            partition_pyg(root, data, num_parts, algo)
+            data.save_partition(root, num_parts, algo)
\ No newline at end of file
--- a/run_route.py
+++ b/run_route.py
@@ -5,7 +5,7 @@ from torch import Tensor
 from typing import *
 from starrygl.distributed import DistributedContext
-from starrygl.graph import new_vc_route
+from starrygl.graph import *
 from torch_scatter import scatter_sum
@@ -28,32 +28,38 @@ all_eparts = [
    ],
 ]
+def get_data():
-def get_route(bipartite: bool = True):
    ctx = DistributedContext.get_default_context()
    assert ctx.world_size == 3
    dst_ids = torch.tensor(all_nparts[ctx.rank], dtype=torch.long, device=ctx.device)
    edge_index = torch.tensor(all_eparts[ctx.rank], dtype=torch.long, device=ctx.device).t()
-    return new_vc_route(dst_ids, edge_index, bipartite=bipartite)
+    src_ids, edge_index = init_vc_edge_index(dst_ids, edge_index)
+    return GraphData.from_bipartite(edge_index, raw_src_ids=src_ids, raw_dst_ids=dst_ids)
 if __name__ == "__main__":
    ctx = DistributedContext.init(backend="gloo", use_gpu=True)
-    src_ids, edge_index, dst_ids, route = get_route(False)
+    g = get_data()
-    src_size = route.src_len
+    route = g.to_route()
-    dst_size = route.dst_len
+    edge_index = g.edge_index()
+    # src_ids, edge_index, dst_ids, route = get_route(False)
+    # src_size = route.src_len
+    # dst_size = route.dst_len
    ctx.sync_print(route.src_len, route.dst_len)
+    ctx.sync_print(route._fw_ptr, route._fw_ind)
+    ctx.sync_print(route._bw_ptr, route._bw_ind)
    edge_ones = torch.ones(edge_index.size(1), device=ctx.device).requires_grad_()
    src_ones = scatter_sum(edge_ones, edge_index[0], dim=0, dim_size=route.src_len)
    dst_ones = scatter_sum(edge_ones, edge_index[1], dim=0, dim_size=route.dst_len)
-    # ctx.sync_print(route.fw_tensor(dst_ones))
+    ctx.sync_print(route.fw_tensor(dst_ones))
-    # ctx.sync_print(route.bw_tensor(src_ones))
+    ctx.sync_print(route.bw_tensor(src_ones))
-    out = route.reverse_route().apply(src_ones)
+    out = route.rev().apply(src_ones)
    ctx.sync_print(out)
    out.sum().backward()
@@ -61,4 +67,13 @@ if __name__ == "__main__":
    ctx.sync_print(route.get_src_part_ids())
+    dst_mask = torch.full((route.dst_len,), ctx.rank % 2, dtype=torch.bool, device=ctx.device)
+    ctx.main_print("="*64)
+    ctx.sync_print(dst_mask)
+    _, _, r2 = route.filter(dst_mask)
+    ctx.sync_print(r2.apply(dst_ones).detach())
+    ctx.sync_print(r2.rev().apply(src_ones).detach())
+    # dst_true = torch.ones(route.dst_len, dtype=torch.float, device=ctx.device)
+    # ctx.sync_print(route.fw_tensor(dst_true, "max"))
    ctx.shutdown()
--- a/starrygl/distributed/cclib.py
+++ b/starrygl/distributed/cclib.py
@@ -45,8 +45,6 @@ def all_to_all_v(
    assert len(output_tensor_list) == world_size
    assert len(input_tensor_list) == world_size
-    # if group is None:
-    #     group = dist.distributed_c10d._get_default_group()
    backend = dist.get_backend(group)
    if backend == "nccl":

--- a/starrygl/graph/__init__.py
+++ b/starrygl/graph/__init__.py
-from .route import Route
+from .data import *
-from .utils import init_vc_edge_index
+from .route import *
\ No newline at end of file
-from torch import Tensor
-from typing import Tuple
-__all__ = [
-    "Route",
-    "init_vc_edge_index",
-    "new_vc_route",
-]
-def new_vc_route(
-    dst_ids: Tensor,
-    edge_index: Tensor,
-    bipartite: bool = True
-) -> Tuple[Tensor, Tensor, Tensor, Route]:
-    src_ids, local_edge_index = init_vc_edge_index(
-        dst_ids, edge_index, bipartite=bipartite)
-    route = Route.from_raw_indices(
-        src_ids, dst_ids, bipartite=bipartite)
-    return src_ids, local_edge_index, dst_ids, route
--- a/starrygl/utils/data.py
+++ b/starrygl/utils/data.py
--- a/starrygl/graph/route.py
+++ b/starrygl/graph/route.py
--- a/starrygl/graph/utils.py
+++ b/starrygl/graph/utils.py
-import torch
-import torch.distributed as dist
-from torch import Tensor
-from typing import *
-def init_vc_edge_index(
-    dst_ids: Tensor,
-    edge_index: Tensor,
-    bipartite: bool = True,
-) -> Tuple[Tensor, Tensor]:
-    ikw = dict(dtype=torch.long, device=dst_ids.device)
-    local_num_nodes = torch.zeros(1, **ikw)
-    if dst_ids.numel() > 0:
-        local_num_nodes = dst_ids.max().max(local_num_nodes)
-    if edge_index.numel() > 0:
-        local_num_nodes = edge_index.max().max(local_num_nodes)
-    local_num_nodes = local_num_nodes.item() + 1
-    xmp: Tensor = torch.zeros(local_num_nodes, **ikw)
-    xmp[edge_index[1].unique()] += 0b01
-    xmp[dst_ids.unique()] += 0b10
-    if not (xmp != 0x01).all():
-        raise RuntimeError(f"must be vertex-cut partition graph")
-    if bipartite:
-        src_ids = edge_index[0].unique()
-    else:
-        xmp.fill_(0)
-        xmp[edge_index[0]] = 1
-        xmp[dst_ids] = 0
-        src_ids = torch.cat([dst_ids, torch.where(xmp > 0)[0]], dim=-1)
-    xmp.fill_((2**62-1)*2+1)
-    xmp[src_ids] = torch.arange(src_ids.size(0), **ikw)
-    src = xmp[edge_index[0]]
-    xmp.fill_((2**62-1)*2+1)
-    xmp[dst_ids] = torch.arange(dst_ids.size(0), **ikw)
-    dst = xmp[edge_index[1]]
-    local_edge_index = torch.vstack([src, dst])
-    return src_ids, local_edge_index