Commit 83c28e38 by zhlj

add cache,delete wait thread,add dist edge

parents 269cc25a f69647c0
......@@ -32,7 +32,6 @@ class BatchData:
real_x = graph1.select_attr(graph1.get_localId_by_partitionId(0,torch.tensor(id)))
else:
real_x = graph2.select_attr(graph2.get_localId_by_partitionId(1,torch.tensor(id)))
def __repr__(self):
return "BatchData(batch_size = {},roots = {} , \
nides = {} , edge_index = {} , x= {}, \
......@@ -51,4 +50,4 @@ class BatchData:
self.roots.to(device)
self.nids.to(device)
self.eids.to(device)
\ No newline at end of file
This diff is collapsed. Click to expand it.
<<<<<<< HEAD
import time
=======
>>>>>>> f69647c0a75a701fc9de194000ca1f2150a99d1a
import torch
# import sys
# from os.path import abspath, dirname
......@@ -12,10 +15,14 @@ edge_index = torch.tensor([[0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5], [1, 0, 2, 4,
num_nodes = 6
num_neighbors = 2
# Run the neighbor sampling
<<<<<<< HEAD
pre = time.time()
sampler=NeighborSampler(edge_index=edge_index, num_nodes=num_nodes, num_layers=2, workers=2, fanout=[2, 1])
end = time.time()
print("neighbor time:", end-pre)
=======
sampler=NeighborSampler(edge_index=edge_index, num_nodes=num_nodes, num_layers=2, workers=2, fanout=[2, 1])
>>>>>>> f69647c0a75a701fc9de194000ca1f2150a99d1a
# neighbor_nodes, sampled_edge_index = sampler._sample_one_layer_from_nodes(nodes=torch.tensor([1,3]), fanout=num_neighbors)
neighbor_nodes, sampled_edge_index = sampler.sample_from_nodes(torch.tensor([1,2,3]))
......@@ -35,11 +42,18 @@ print('neighbor_nodes_id: \n',neighbor_nodes, '\nedge_index: \n',sampled_edge_in
# print('neighbors: \n', neighbors[0]==1)
from base import NegativeSampling
<<<<<<< HEAD
edge_index = torch.tensor([[0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5],
[1, 0, 2, 4, 1, 3, 0, 2, 5, 3, 5, 0, 2]])
num_nodes = 6
# sampler
sampler=NeighborSampler(edge_index=edge_index.clone(), num_nodes=num_nodes, num_layers=2, workers=2, fanout=[2, 1])
=======
edge_index = torch.tensor([[0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5], [1, 0, 2, 4, 1, 3, 0, 2, 5, 3, 5, 0, 2]])
num_nodes = 6
# sampler
sampler=NeighborSampler(edge_index=edge_index, num_nodes=num_nodes, num_layers=2, workers=2, fanout=[2, 1])
>>>>>>> f69647c0a75a701fc9de194000ca1f2150a99d1a
# negative
weight = torch.tensor([0.3,0.1,0.1,0.1,0.3,0.1])
......
......@@ -2,7 +2,10 @@ import torch
from ogb.nodeproppred import PygNodePropPredDataset
from torch_geometric import datasets
import time
<<<<<<< HEAD
from Utils import GraphData
=======
>>>>>>> f69647c0a75a701fc9de194000ca1f2150a99d1a
def load_ogb_dataset(name, data_path):
dataset = PygNodePropPredDataset(name=name, root=data_path)
......@@ -18,6 +21,7 @@ def load_ogb_dataset(name, data_path):
node_data['test_mask'][split_idx["test"]] = True
return g, node_data
<<<<<<< HEAD
g, node_data = load_ogb_dataset('ogbn-products', "/home/hzq/code/gnn/my_sampler/NewSample/dataset")
print(g)
# for worker in [1,2,3,4,5,6,7,8,9,10,20,30]:
......@@ -31,26 +35,50 @@ row, col = g.edge_index
tnb = get_neighbors(row.contiguous(), col.contiguous(), g.num_nodes)
sampler = NeighborSampler(g.num_nodes, num_layers=2, fanout=[100,100], graph_data=g_data, workers=10, tnb=tnb)
# sampler = RandomWalkSampler(g.num_nodes, num_layers=2, graph_data=g_data, workers=10, tnb=tnb)
=======
g, node_data = load_ogb_dataset('ogbn-products', "/home/hzq/code/gnn/test/NewSample/dataset")
print(g)
from neighbor_sampler import NeighborSampler
pre = time.time()
from neighbor_sampler import get_neighbors
row, col = g.edge_index
tnb = get_neighbors(row.contiguous(), col.contiguous(), g.num_nodes)
sampler = NeighborSampler(g.num_nodes, num_layers=2, fanout=[100,100], workers=4, tnb=tnb)
>>>>>>> f69647c0a75a701fc9de194000ca1f2150a99d1a
end = time.time()
print("init time:", end-pre)
# from torch_geometric.sampler import NeighborSampler, NumNeighbors, NodeSamplerInput, SamplerOutput
# pre = time.time()
# num_nei = NumNeighbors([100, 100])
<<<<<<< HEAD
# node_idx = NodeSamplerInput(input_id=None, node=torch.tensor(range(g.num_nodes//3, g.num_nodes//3+800000)))# (input_id=None, node=torch.masked_select(torch.arange(g.num_nodes),node_data['train_mask']))
=======
# node_idx = NodeSamplerInput(input_id=None, node=torch.masked_select(torch.arange(g.num_nodes),node_data['train_mask']))
>>>>>>> f69647c0a75a701fc9de194000ca1f2150a99d1a
# sampler = NeighborSampler(g, num_nei)
# end = time.time()
# print("init time:", end-pre)
pre = time.time()
<<<<<<< HEAD
node, edge = sampler.sample_from_nodes(torch.tensor(range(g.num_nodes//3, g.num_nodes//3+800000)))# sampler.sample_from_nodes(torch.masked_select(torch.arange(g.num_nodes),node_data['train_mask']))
=======
node, edge = sampler.sample_from_nodes(torch.masked_select(torch.arange(g.num_nodes),node_data['train_mask']))
>>>>>>> f69647c0a75a701fc9de194000ca1f2150a99d1a
# out = sampler.sample_from_nodes(node_idx)
# node = out.node
# edge = [out.row, out.col]
end = time.time()
<<<<<<< HEAD
print('node:', node)
print('edge:', edge)
=======
print(node)
print(edge)
>>>>>>> f69647c0a75a701fc9de194000ca1f2150a99d1a
print("sample time", end-pre)
\ No newline at end of file
......@@ -2,17 +2,29 @@ import torch
import torch.multiprocessing as mp
from typing import Optional, Tuple
<<<<<<< HEAD
from base import BaseSampler, NegativeSampling
from neighbor_sampler import NeighborSampler
=======
from .base import BaseSampler, NegativeSampling
from .neighbor_sampler import NeighborSampler
>>>>>>> f69647c0a75a701fc9de194000ca1f2150a99d1a
class RandomWalkSampler(BaseSampler):
def __init__(
self,
num_nodes: int,
num_layers: int,
<<<<<<< HEAD
graph_data,
workers = 1,
tnb = None
=======
workers = 1,
edge_index : Optional[torch.Tensor] = None,
deg = None,
neighbors = None
>>>>>>> f69647c0a75a701fc9de194000ca1f2150a99d1a
) -> None:
r"""__init__
Args:
......@@ -20,6 +32,7 @@ class RandomWalkSampler(BaseSampler):
num_layers: the num of layers to be sampled
fanout: the list of max neighbors' number chosen for each layer
workers: the number of threads, default value is 1
<<<<<<< HEAD
graph_data: graph data in this partition
tnb: all nodes' neighbors info
"""
......@@ -38,6 +51,21 @@ class RandomWalkSampler(BaseSampler):
else:
self.sampler = NeighborSampler(num_nodes, num_layers, [1 for _ in range(num_layers)],
graph_data, workers)
=======
edge_index: all edges in the graph
neighbors: all nodes' neighbors
deg: the degree of all nodes
"""
super().__init__()
if(edge_index is not None):
self.sampler = NeighborSampler(num_nodes, num_layers, [1 for _ in range(num_layers)],
workers, edge_index)
elif(neighbors is not None and deg is not None):
self.sampler = NeighborSampler(num_nodes, num_layers, [1 for _ in range(num_layers)],
workers, neighbors, deg)
else:
raise Exception("Not enough parameters")
>>>>>>> f69647c0a75a701fc9de194000ca1f2150a99d1a
self.num_layers = num_layers
# 线程数不超过torch默认的omp线程数
......
Metadata-Version: 2.1
Name: sample-cores
Version: 0.0.0
sample_cores.cpp
setup.py
sample_cores.egg-info/PKG-INFO
sample_cores.egg-info/SOURCES.txt
sample_cores.egg-info/dependency_links.txt
sample_cores.egg-info/top_level.txt
\ No newline at end of file
File added
2023-05-11 02:24:19,565-torch.distributed.distributed_c10d-distributed_c10d.py-[line:228]-INFO: Added key: store_based_barrier_key:0 to store for rank: 0
2023-05-11 02:24:19,566-torch.distributed.distributed_c10d-distributed_c10d.py-[line:262]-INFO: Rank 0: Completed store-based barrier for key:store_based_barrier_key:0 with 1 nodes.
2023-05-11 02:24:20,504-torch.nn.parallel.distributed-distributed.py-[line:995]-INFO: Reducer buckets have been rebuilt in this iteration.
......@@ -24,10 +24,17 @@ class GraphData():
def select_attr(self,index):
return torch.index_select(self.data.x,0,index)
<<<<<<< HEAD
#返回全局的节点id 所对应的分区数量
def get_part_num(self):
return self.data.x.size()[0]
=======
#返回全局的节点id 所对应的分区
def get_part_num(self):
return self.data.x.size()[0]
>>>>>>> f69647c0a75a701fc9de194000ca1f2150a99d1a
def select_attr(self,index):
return torch.index_select(self.data.x,0,index)
def select_y(self,index):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment