Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
starrty_sampler
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
zhlj
starrty_sampler
Commits
c6330899
Commit
c6330899
authored
Feb 22, 2023
by
XXX
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
update Sample v5: add muti-layer sample
parent
aee067ba
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
210 additions
and
0 deletions
+210
-0
Sample/Sampler.py
+0
-0
Sample/Sampler_v4_c_single_node.py
+210
-0
part/__pycache__/Utils.cpython-37.pyc
+0
-0
No files found.
Sample/Sampler.py
View file @
c6330899
This diff is collapsed.
Click to expand it.
Sample/Sampler_v4_c_single_node.py
0 → 100644
View file @
c6330899
from
typing
import
Tuple
import
torch
import
torch_scatter
import
torch.multiprocessing
as
mp
from
abc
import
ABC
from
Sample.sample_cores
import
neighbor_sample_from_node
,
TemporalGraphBlock
class
BaseSampler
(
ABC
):
r"""An abstract base class that initializes a graph sampler and provides
:meth:`sample_from_nodes` and :meth:`sample_from_edges` routines.
"""
def
sample_from_node
(
self
,
node
:
int
,
edge_index
:
torch
.
Tensor
,
num_nodes
:
int
,
**
kwargs
)
->
Tuple
[
torch
.
tensor
,
torch
.
tensor
]:
r"""Performs sampling from the node specified in: node,
returning a sampled subgraph in the specified output format: Tuple[int, torch.tensor].
Args:
node: the seed node index
edge_index: edges in the graph
num_nodes: the num of all node in the graph
**kwargs: other kwargs
Returns:
samples_nodes: the node sampled
edge_index: the edge sampled
"""
raise
NotImplementedError
def
sample_from_nodes
(
self
,
nodes
:
torch
.
Tensor
,
edge_index
:
torch
.
Tensor
,
num_nodes
:
int
,
**
kwargs
)
->
Tuple
[
torch
.
Tensor
,
torch
.
tensor
]:
r"""Performs sampling from the nodes specified in: nodes,
returning a sampled subgraph in the specified output format: Tuple[int, torch.tensor].
Args:
nodes: the seed nodes index
edge_index: edges in the graph
num_nodes: the num of all node in the graph
**kwargs: other kwargs
Returns:
samples_nodes: the node sampled
edge_index: the edge sampled
"""
raise
NotImplementedError
def
sample_from_nodes_parallel
(
self
,
nodes
:
torch
.
Tensor
,
edge_index
:
torch
.
Tensor
,
num_nodes
:
int
,
workers
:
int
,
**
kwargs
)
->
Tuple
[
torch
.
Tensor
,
torch
.
tensor
]:
r"""Performs sampling paralleled from the nodes specified in: nodes,
returning a sampled subgraph in the specified output format: Tuple[int, torch.tensor].
Args:
node: the seed node index
edge_index: edges in the graph
num_nodes: the num of all node in the graph
workers: the number of threads
**kwargs: other kwargs
Returns:
nodes: the node sampled
edge_index: the edge sampled
"""
raise
NotImplementedError
class
NeighborSampler
(
BaseSampler
):
def
__init__
(
self
)
->
None
:
super
()
.
__init__
()
def
sample_from_node
(
self
,
node
:
int
,
edge_index
:
torch
.
Tensor
,
num_nodes
:
int
,
fanout
:
int
)
->
Tuple
[
torch
.
Tensor
,
torch
.
tensor
]:
r"""Performs sampling from the node specified in: node,
returning a sampled subgraph in the specified output format: Tuple[int, torch.tensor].
Args:
node: the seed node index
edge_index: edges in the graph
num_nodes: the num of all node in the graph
fanout: the number of max neighbor chosen
Returns:
samples_nodes: the node sampled
edge_index: the edge sampled
"""
row
,
col
=
edge_index
row
=
row
.
numpy
()
.
tolist
()
col
=
col
.
numpy
()
.
tolist
()
tgb
=
neighbor_sample_from_node
(
node
,
row
,
col
,
num_nodes
,
fanout
)
row
=
torch
.
IntTensor
(
tgb
.
row
())
col
=
torch
.
IntTensor
(
tgb
.
col
())
samples_nodes
=
torch
.
IntTensor
(
tgb
.
nodes
())
return
samples_nodes
,
torch
.
stack
([
row
,
col
],
dim
=
0
)
def
sample_from_nodes
(
self
,
nodes
:
torch
.
Tensor
,
edge_index
:
torch
.
Tensor
,
num_nodes
:
int
,
fanout
:
int
)
->
Tuple
[
torch
.
Tensor
,
torch
.
tensor
]:
r"""Performs sampling from the nodes specified in: nodes,
returning a sampled subgraph in the specified output format: Tuple[int, torch.tensor].
Args:
nodes: the seed nodes index
edge_index: edges in the graph
num_nodes: the num of all node in the graph
**kwargs: other kwargs
Returns:
samples_nodes: the node sampled
edge_index: the edge sampled
"""
if
len
(
nodes
)
==
1
:
return
self
.
sample_from_node
(
nodes
[
0
],
edge_index
,
num_nodes
,
fanout
)
samples_nodes
=
torch
.
IntTensor
([])
row
=
torch
.
IntTensor
([])
col
=
torch
.
IntTensor
([])
# 单线程循环法:
for
node
in
nodes
:
samples_nodes_i
,
edge_index_i
=
self
.
sample_from_node
(
node
,
edge_index
,
num_nodes
,
fanout
)
samples_nodes
=
torch
.
unique
(
torch
.
concat
([
samples_nodes
,
samples_nodes_i
]))
row
=
torch
.
concat
([
row
,
edge_index_i
[
0
]])
col
=
torch
.
concat
([
col
,
edge_index_i
[
1
]])
return
samples_nodes
,
torch
.
stack
([
row
,
col
],
dim
=
0
)
def
sample_from_nodes_parallel
(
self
,
nodes
:
torch
.
Tensor
,
edge_index
:
torch
.
Tensor
,
num_nodes
:
int
,
workers
:
int
,
fanout
:
int
)
->
Tuple
[
torch
.
Tensor
,
torch
.
tensor
]:
r"""Performs sampling from the nodes specified in: nodes,
returning a sampled subgraph in the specified output format: Tuple[int, torch.tensor].
Args:
node: the seed node index
edge_index: edges in the graph
num_nodes: the num of all node in the graph
workers: the number of threads
fanout: the number of max neighbor chosen
Returns:
nodes: the node sampled
edge_index: the edge sampled
"""
samples_nodes
=
torch
.
IntTensor
([])
row
=
torch
.
IntTensor
([])
col
=
torch
.
IntTensor
([])
with
mp
.
Pool
(
processes
=
torch
.
get_num_threads
())
as
p
:
n
=
len
(
nodes
)
if
(
workers
>=
n
):
results
=
[
p
.
apply_async
(
self
.
sample_from_node
,
(
node
,
edge_index
,
num_nodes
,
fanout
))
for
node
in
nodes
]
else
:
quotient
=
n
//
workers
remainder
=
n
%
workers
# 每个batch先分配quotient个nodes,然后将余数remainder平均分配给其中一些batch
nodes1
=
nodes
[
0
:(
quotient
+
1
)
*
(
remainder
)]
.
resize_
(
remainder
,
quotient
+
1
)
# 分配了余数的batch
nodes2
=
nodes
[(
quotient
+
1
)
*
(
remainder
):
n
]
.
resize_
(
workers
-
remainder
,
quotient
)
# 未分配余数的batch
results
=
[
p
.
apply_async
(
self
.
sample_from_nodes
,
(
nodes1
[
i
],
edge_index
,
num_nodes
,
fanout
))
for
i
in
range
(
0
,
remainder
)]
results
.
extend
([
p
.
apply_async
(
self
.
sample_from_nodes
,
(
nodes2
[
i
],
edge_index
,
num_nodes
,
fanout
))
for
i
in
range
(
0
,
workers
-
remainder
)])
for
result
in
results
:
samples_nodes_i
,
edge_index_i
=
result
.
get
()
samples_nodes
=
torch
.
unique
(
torch
.
cat
([
samples_nodes
,
samples_nodes_i
]))
row
=
torch
.
cat
([
row
,
edge_index_i
[
0
]])
col
=
torch
.
cat
([
col
,
edge_index_i
[
1
]])
return
samples_nodes
,
torch
.
stack
([
row
,
col
],
dim
=
0
)
# 不使用sample_from_node直接取所有点邻居方法:
# row, col = edge_index
# neighbors1=torch.concat([row[row==nodes[i]] for i in range(0, nodes.shape[0])])
# neighbors2=torch.concat([col[row==nodes[i]] for i in range(0, nodes.shape[0])])
# neighbors=torch.stack([neighbors1, neighbors2], dim=0)
# print('neighbors: \n', neighbors)
if
__name__
==
"__main__"
:
edge_index
=
torch
.
tensor
([[
0
,
1
,
1
,
1
,
2
,
2
,
2
,
3
,
3
,
4
,
4
,
4
,
5
],
[
1
,
0
,
2
,
4
,
1
,
3
,
0
,
2
,
5
,
3
,
5
,
0
,
2
]])
num_nodes
=
6
num_neighbors
=
2
# Run the neighbor sampling
sampler
=
NeighborSampler
()
# neighbor_nodes, edge_index = sampler.sample_from_node(2, edge_index, num_nodes, num_neighbors)
# neighbor_nodes, edge_index = sampler.sample_from_nodes(torch.tensor([1,2]), edge_index, num_nodes, num_neighbors)
# neighbor_nodes, edge_index = sampler.sample_from_nodes_parallel(torch.tensor([1,2,3]), edge_index, num_nodes, workers=3, fanout=num_neighbors)
neighbor_nodes
,
edge_index
=
sampler
.
sample_from_nodes_parallel
(
torch
.
tensor
([
1
,
2
,
3
,
4
,
5
]),
edge_index
,
num_nodes
,
workers
=
4
,
fanout
=
num_neighbors
)
# Print the result
print
(
'neighbor_nodes_id:
\n
'
,
neighbor_nodes
,
'
\n
edge_index:
\n
'
,
edge_index
)
part/__pycache__/Utils.cpython-37.pyc
View file @
c6330899
No preview for this file type
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment