Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
starrty_sampler
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
zhlj
starrty_sampler
Commits
d5b33231
Commit
d5b33231
authored
Feb 24, 2023
by
zljJoan
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'main' of github.com:zhljJoan/startGNN_sample into main
parents
e3346a38
b86ba1e3
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
304 additions
and
142 deletions
+304
-142
Sample/Sampler.py
+151
-135
Sample/__init__.py
+0
-0
Sample/demo.py
+10
-7
Sample/sample_cores.cpp
+143
-0
Sample/sample_cores.cpython-37m-x86_64-linux-gnu.so
+0
-0
No files found.
Sample/Sampler.py
View file @
d5b33231
...
@@ -4,193 +4,204 @@ import torch_scatter
...
@@ -4,193 +4,204 @@ import torch_scatter
import
torch.multiprocessing
as
mp
import
torch.multiprocessing
as
mp
from
abc
import
ABC
from
abc
import
ABC
from
Sample.sample_cores
import
get_neighbors
,
neighbor_sample_from_nodes
class
BaseSampler
(
ABC
):
class
BaseSampler
(
ABC
):
r"""An abstract base class that initializes a graph sampler and provides
r"""An abstract base class that initializes a graph sampler and provides
:meth:`sample_from_nodes` and :meth:`sample_from_edges` routines.
:meth:`_sample_one_layer_from_node`
:meth:`_sample_one_layer_from_nodes`
:meth:`_sample_one_layer_from_nodes_parallel`
:meth:`sample_from_nodes` routines.
"""
"""
def
sample_from_node
(
def
__init__
(
self
,
edge_index
:
torch
.
Tensor
,
num_nodes
:
int
,
num_layers
:
int
,
workers
=
1
,
**
kwargs
)
->
None
:
r"""__init__
Args:
edge_index: all edges in the graph
num_nodes: the num of all nodes in the graph
num_layers: the num of layers to be sampled
workers: the number of threads, default value is 1
**kwargs: other kwargs
"""
super
()
.
__init__
()
self
.
edge_index
=
edge_index
self
.
num_nodes
=
num_nodes
self
.
num_layers
=
num_layers
self
.
workers
=
workers
def
sample_from_nodes
(
self
,
self
,
node
:
int
,
nodes
:
torch
.
Tensor
,
edge_index
:
torch
.
Tensor
,
num_nodes
:
int
,
**
kwargs
**
kwargs
)
->
Tuple
[
torch
.
tensor
,
torch
.
tensor
]:
)
->
Tuple
[
torch
.
Tensor
,
list
]:
r"""Performs sampling from the node specified in: node,
r"""Performs mutilayer sampling from the nodes specified in: nodes
returning a sampled subgraph in the specified output format: Tuple[int, torch.tensor].
The specific number of layers is determined by parameter: num_layers
returning a sampled subgraph in the specified output format: Tuple[torch.Tensor, list].
Args:
Args:
node: the seed node index
nodes: the list of seed nodes index
edge_index: edges in the graph
num_nodes: the num of all node in the graph
**kwargs: other kwargs
**kwargs: other kwargs
Returns:
Returns:
sample
s_nodes: the node
sampled
sample
d_nodes: the nodes
sampled
edge_index: the edge
sampled
sampled_edge_index: the edges
sampled
"""
"""
raise
NotImplementedError
raise
NotImplementedError
def
sample
_from_nodes
(
def
_sample_one_layer
_from_nodes
(
self
,
self
,
nodes
:
torch
.
Tensor
,
nodes
:
torch
.
Tensor
,
edge_index
:
torch
.
Tensor
,
num_nodes
:
int
,
**
kwargs
**
kwargs
)
->
Tuple
[
torch
.
Tensor
,
torch
.
t
ensor
]:
)
->
Tuple
[
torch
.
Tensor
,
torch
.
T
ensor
]:
r"""Performs sampling from the nodes specified in: nodes,
r"""Performs sampling from the nodes specified in: nodes,
returning a sampled subgraph in the specified output format: Tuple[
int, torch.t
ensor].
returning a sampled subgraph in the specified output format: Tuple[
torch.Tensor, torch.T
ensor].
Args:
Args:
nodes: the seed nodes index
nodes: the list of seed nodes index
edge_index: edges in the graph
num_nodes: the num of all node in the graph
**kwargs: other kwargs
**kwargs: other kwargs
Returns:
Returns:
sample
s_nodes: the node
sampled
sample
d_nodes: the nodes
sampled
edge_index: the edge
sampled
sampled_edge_index: the edges
sampled
"""
"""
raise
NotImplementedError
raise
NotImplementedError
def
sample
_from_nodes_parallel
(
def
_sample_one_layer
_from_nodes_parallel
(
self
,
self
,
nodes
:
torch
.
Tensor
,
nodes
:
torch
.
Tensor
,
edge_index
:
torch
.
Tensor
,
**
kwargs
num_nodes
:
int
,
)
->
Tuple
[
torch
.
Tensor
,
torch
.
Tensor
]:
workers
:
int
,
fanout
:
int
)
->
Tuple
[
torch
.
Tensor
,
torch
.
tensor
]:
r"""Performs sampling paralleled from the nodes specified in: nodes,
r"""Performs sampling paralleled from the nodes specified in: nodes,
returning a sampled subgraph in the specified output format: Tuple[
int, torch.t
ensor].
returning a sampled subgraph in the specified output format: Tuple[
torch.Tensor, torch.T
ensor].
Args:
Args:
node: the seed node index
nodes: the list of seed nodes index
edge_index: edges in the graph
**kwargs: other kwargs
num_nodes: the num of all node in the graph
workers: the number of threads
fanout: the number of max neighbor chosen
Returns:
Returns:
nodes: the node
sampled
sampled_nodes: the nodes
sampled
edge_index: the edge
sampled
sampled_edge_index: the edges
sampled
"""
"""
raise
NotImplementedError
raise
NotImplementedError
class
NeighborSampler
(
BaseSampler
):
def
__init__
(
self
)
->
None
:
super
()
.
__init__
()
def
sample_from_node
(
class
NeighborSampler
(
BaseSampler
):
self
,
def
__init__
(
node
:
int
,
self
,
edge_index
:
torch
.
Tensor
,
edge_index
:
torch
.
Tensor
,
num_nodes
:
int
,
num_nodes
:
int
,
fanout
:
int
num_layers
:
int
,
)
->
Tuple
[
torch
.
Tensor
,
torch
.
tensor
]:
fanout
:
list
,
r"""Performs sampling from the node specified in: node,
workers
=
1
returning a sampled subgraph in the specified output format: Tuple[int, torch.tensor].
)
->
None
:
r"""__init__
Args:
edge_index: all edges in the graph
num_nodes: the num of all nodes in the graph
num_layers: the num of layers to be sampled
fanout: the list of max neighbors' number chosen for each layer
workers: the number of threads, default value is 1
"""
super
()
.
__init__
(
edge_index
,
num_nodes
,
num_layers
,
workers
)
self
.
fanout
=
fanout
row
,
col
=
edge_index
tnb
=
get_neighbors
(
row
.
tolist
(),
col
.
tolist
(),
num_nodes
)
self
.
neighbors
=
tnb
.
neighbors
self
.
deg
=
tnb
.
deg
def
sample_from_nodes
(
self
,
nodes
:
torch
.
Tensor
)
->
Tuple
[
torch
.
Tensor
,
list
]:
r"""Performs mutilayer sampling from the nodes specified in: nodes
The specific number of layers is determined by parameter: num_layers
returning a sampled subgraph in the specified output format: Tuple[torch.Tensor, list].
Args:
Args:
node: the seed node index
nodes: the list of seed nodes index
edge_index: edges in the graph
num_nodes: the num of all node in the graph
fanout: the number of max neighbor chosen
Returns:
Returns:
sample
s
_nodes: the node sampled
sample
d
_nodes: the node sampled
edge_index: the edge sampled
sampled_
edge_index: the edge sampled
"""
"""
row
,
col
=
edge_index
sampled_edge_index_list
=
[]
deg
=
torch_scatter
.
scatter_add
(
torch
.
ones_like
(
row
),
row
,
dim
=
0
,
dim_size
=
num_nodes
)
sampled_nodes
=
torch
.
IntTensor
([])
neighbors
=
torch
.
stack
([
row
[
row
==
node
],
col
[
row
==
node
]],
dim
=
0
)
assert
self
.
workers
>
0
,
'Workers should be positive integer!!!'
print
(
'neighbors:
\n
'
,
neighbors
)
for
i
in
range
(
0
,
self
.
num_layers
):
if
deg
[
node
]
<=
fanout
:
sampled_nodes_i
,
sampled_edge_index_i
=
self
.
_sample_one_layer_from_nodes_parallel
(
nodes
,
self
.
fanout
[
i
])
return
torch
.
unique
(
neighbors
[
1
],
dim
=
0
),
neighbors
nodes
=
torch
.
unique
(
sampled_edge_index_i
[
1
])
else
:
sampled_nodes
=
torch
.
unique
(
torch
.
cat
([
sampled_nodes
,
sampled_nodes_i
]))
random_index
=
torch
.
multinomial
(
torch
.
ones
(
deg
[
node
]),
fanout
,
replacement
=
False
)
# torch.randperm(neighbors.shape[1])[0:fanout]
sampled_edge_index_list
.
append
(
sampled_edge_index_i
)
print
(
"random_index:
\n
"
,
random_index
)
return
sampled_nodes
,
sampled_edge_index_list
edge_index
=
neighbors
.
index_select
(
dim
=
1
,
index
=
random_index
)
samples_nodes
=
torch
.
unique
(
edge_index
.
view
(
-
1
),
dim
=
0
)
return
samples_nodes
,
edge_index
def
sample
_from_nodes
(
def
_sample_one_layer
_from_nodes
(
self
,
self
,
nodes
:
torch
.
Tensor
,
nodes
:
torch
.
Tensor
,
edge_index
:
torch
.
Tensor
,
num_nodes
:
int
,
fanout
:
int
fanout
:
int
)
->
Tuple
[
torch
.
Tensor
,
torch
.
t
ensor
]:
)
->
Tuple
[
torch
.
Tensor
,
torch
.
T
ensor
]:
r"""Performs sampling from the nodes specified in: nodes,
r"""Performs sampling from the nodes specified in: nodes,
returning a sampled subgraph in the specified output format: Tuple[
int, torch.t
ensor].
returning a sampled subgraph in the specified output format: Tuple[
torch.Tensor, torch.T
ensor].
Args:
Args:
nodes: the seed nodes index
nodes: the list of seed nodes index
edge_index: edges in the graph
fanout: the number of max neighbors chosen
num_nodes: the num of all node in the graph
**kwargs: other kwargs
Returns:
Returns:
sample
s_nodes: the node
sampled
sample
d_nodes: the nodes
sampled
edge_index: the edge
sampled
sampled_edge_index: the edges
sampled
"""
"""
if
len
(
nodes
)
==
1
:
tgb
=
neighbor_sample_from_nodes
(
nodes
.
tolist
(),
self
.
neighbors
,
self
.
deg
,
fanout
)
return
self
.
sample_from_node
(
nodes
[
0
],
edge_index
,
num_nodes
,
fanout
)
row
=
torch
.
IntTensor
(
tgb
.
row
())
samples_nodes
=
torch
.
IntTensor
([])
col
=
torch
.
IntTensor
(
tgb
.
col
())
row
=
torch
.
IntTensor
([])
sampled_nodes
=
torch
.
IntTensor
(
tgb
.
nodes
())
col
=
torch
.
IntTensor
([])
return
sampled_nodes
,
torch
.
stack
([
row
,
col
],
dim
=
0
)
# 单线程循环法:
for
node
in
nodes
:
def
_sample_one_layer_from_nodes_parallel
(
samples_nodes_i
,
edge_index_i
=
self
.
sample_from_node
(
node
,
edge_index
,
num_nodes
,
fanout
)
samples_nodes
=
torch
.
unique
(
torch
.
concat
([
samples_nodes
,
samples_nodes_i
]))
row
=
torch
.
concat
([
row
,
edge_index_i
[
0
]])
col
=
torch
.
concat
([
col
,
edge_index_i
[
1
]])
return
samples_nodes
,
torch
.
stack
([
row
,
col
],
dim
=
0
)
def
sample_from_nodes_parallel
(
self
,
self
,
nodes
:
torch
.
Tensor
,
nodes
:
torch
.
Tensor
,
edge_index
:
torch
.
Tensor
,
num_nodes
:
int
,
workers
:
int
,
fanout
:
int
fanout
:
int
)
->
Tuple
[
torch
.
Tensor
,
torch
.
t
ensor
]:
)
->
Tuple
[
torch
.
Tensor
,
torch
.
T
ensor
]:
r"""Performs sampling from the nodes specified in: nodes,
r"""Performs sampling from the nodes specified in: nodes,
returning a sampled subgraph in the specified output format: Tuple[
int, torch.t
ensor].
returning a sampled subgraph in the specified output format: Tuple[
torch.Tensor, torch.T
ensor].
Args:
Args:
node: the seed node index
nodes: the list of seed nodes index
edge_index: edges in the graph
num_nodes: the num of all node in the graph
workers: the number of threads
fanout: the number of max neighbor chosen
fanout: the number of max neighbor chosen
Returns:
Returns:
nodes: the node sampled
sampled_
nodes: the node sampled
edge_index: the edge sampled
sampled_
edge_index: the edge sampled
"""
"""
sample
s
_nodes
=
torch
.
IntTensor
([])
sample
d
_nodes
=
torch
.
IntTensor
([])
row
=
torch
.
IntTensor
([])
row
=
torch
.
IntTensor
([])
col
=
torch
.
IntTensor
([])
col
=
torch
.
IntTensor
([])
assert
self
.
workers
>
0
,
'Workers should be positive integer!!!'
with
mp
.
Pool
(
processes
=
torch
.
get_num_threads
())
as
p
:
with
mp
.
Pool
(
processes
=
torch
.
get_num_threads
())
as
p
:
n
=
len
(
nodes
)
n
=
len
(
nodes
)
if
(
workers
>=
n
):
if
(
self
.
workers
>=
n
):
results
=
[
p
.
apply_async
(
self
.
sample_from_node
,
results
=
[
p
.
apply_async
(
self
.
_sample_one_layer_from_nodes
,
(
node
,
edge_index
,
num_nodes
,
fanout
))
(
torch
.
tensor
([
node
.
item
()]),
fanout
))
for
node
in
nodes
]
for
node
in
nodes
]
else
:
else
:
quotient
=
n
//
workers
quotient
=
n
//
self
.
workers
remainder
=
n
%
workers
remainder
=
n
%
self
.
workers
# 每个batch先分配quotient个nodes,然后将余数remainder平均分配给其中一些batch
# 每个batch先分配quotient个nodes,然后将余数remainder平均分配给其中一些batch
nodes1
=
nodes
[
0
:(
quotient
+
1
)
*
(
remainder
)]
.
resize_
(
remainder
,
quotient
+
1
)
# 分配了余数的batch
nodes1
=
nodes
[
0
:(
quotient
+
1
)
*
(
remainder
)]
.
resize_
(
remainder
,
quotient
+
1
)
# 分配了余数的batch
nodes2
=
nodes
[(
quotient
+
1
)
*
(
remainder
):
n
]
.
resize_
(
workers
-
remainder
,
quotient
)
# 未分配余数的batch
nodes2
=
nodes
[(
quotient
+
1
)
*
(
remainder
):
n
]
.
resize_
(
self
.
workers
-
remainder
,
quotient
)
# 未分配余数的batch
results
=
[
p
.
apply_async
(
self
.
sample
_from_nodes
,
results
=
[
p
.
apply_async
(
self
.
_sample_one_layer
_from_nodes
,
(
nodes1
[
i
],
edge_index
,
num_nodes
,
fanout
))
(
nodes1
[
i
],
fanout
))
for
i
in
range
(
0
,
remainder
)]
for
i
in
range
(
0
,
remainder
)]
results
.
extend
([
p
.
apply_async
(
self
.
sample
_from_nodes
,
results
.
extend
([
p
.
apply_async
(
self
.
_sample_one_layer
_from_nodes
,
(
nodes2
[
i
],
edge_index
,
num_nodes
,
fanout
))
(
nodes2
[
i
],
fanout
))
for
i
in
range
(
0
,
workers
-
remainder
)])
for
i
in
range
(
0
,
self
.
workers
-
remainder
)])
for
result
in
results
:
for
result
in
results
:
sample
s_nodes_i
,
edge_index_i
=
result
.
get
()
sample
d_nodes_i
,
sampled_
edge_index_i
=
result
.
get
()
sample
s_nodes
=
torch
.
unique
(
torch
.
cat
([
samples_nodes
,
samples
_nodes_i
]))
sample
d_nodes
=
torch
.
unique
(
torch
.
cat
([
sampled_nodes
,
sampled
_nodes_i
]))
row
=
torch
.
cat
([
row
,
edge_index_i
[
0
]])
row
=
torch
.
cat
([
row
,
sampled_
edge_index_i
[
0
]])
col
=
torch
.
cat
([
col
,
edge_index_i
[
1
]])
col
=
torch
.
cat
([
col
,
sampled_
edge_index_i
[
1
]])
return
sample
s
_nodes
,
torch
.
stack
([
row
,
col
],
dim
=
0
)
return
sample
d
_nodes
,
torch
.
stack
([
row
,
col
],
dim
=
0
)
# 不使用
sample
_from_node直接取所有点邻居方法:
# 不使用
_sample_one_layer
_from_node直接取所有点邻居方法:
# row, col = edge_index
# row, col = edge_index
# neighbors1=torch.concat([row[row==nodes[i]] for i in range(0, nodes.shape[0])])
# neighbors1=torch.concat([row[row==nodes[i]] for i in range(0, nodes.shape[0])])
# neighbors2=torch.concat([col[row==nodes[i]] for i in range(0, nodes.shape[0])])
# neighbors2=torch.concat([col[row==nodes[i]] for i in range(0, nodes.shape[0])])
...
@@ -198,15 +209,20 @@ class NeighborSampler(BaseSampler):
...
@@ -198,15 +209,20 @@ class NeighborSampler(BaseSampler):
# print('neighbors: \n', neighbors)
# print('neighbors: \n', neighbors)
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
edge_index
=
torch
.
tensor
([[
0
,
1
,
1
,
1
,
2
,
2
,
2
,
3
,
3
,
4
,
4
,
4
,
5
],
[
1
,
0
,
2
,
4
,
1
,
3
,
0
,
2
,
5
,
3
,
5
,
0
,
2
]])
edge_index1
=
torch
.
tensor
([[
0
,
1
,
1
,
1
,
2
,
2
,
2
,
4
,
4
,
4
,
5
],
# , 3, 3
num_nodes
=
6
[
1
,
0
,
2
,
4
,
1
,
3
,
0
,
3
,
5
,
0
,
2
]])
# , 2, 5
num_nodes1
=
6
num_neighbors
=
2
num_neighbors
=
2
# Run the neighbor sampling
# Run the neighbor sampling
sampler
=
NeighborSampler
()
sampler
=
NeighborSampler
(
edge_index
=
edge_index1
,
num_nodes
=
num_nodes1
,
num_layers
=
2
,
workers
=
2
,
fanout
=
[
2
,
1
])
# neighbor_nodes, edge_index = sampler.sample_from_node(2, edge_index, num_nodes, num_neighbors)
# neighbor_nodes, edge_index = sampler.sample_from_nodes(torch.tensor([1,2]), edge_index, num_nodes, num_neighbors)
# neighbor_nodes, edge_index = sampler.sample_from_nodes_parallel(torch.tensor([1,2,3]), edge_index, num_nodes, workers=3, fanout=num_neighbors)
neighbor_nodes
,
edge_index
=
sampler
.
sample_from_nodes_parallel
(
torch
.
tensor
([
1
,
2
,
3
,
4
,
5
]),
edge_index
,
num_nodes
,
workers
=
4
,
fanout
=
num_neighbors
)
# neighbor_nodes, sampled_edge_index = sampler._sample_one_layer_from_node(node=2, fanout=2)
# neighbor_nodes, sampled_edge_index = sampler._sample_one_layer_from_nodes(nodes=torch.tensor([1,3]), fanout=num_neighbors)
# sampler.workers=3
# neighbor_nodes, sampled_edge_index = sampler._sample_one_layer_from_nodes_parallel(nodes=torch.tensor([1,2,3]), fanout=num_neighbors)
# sampler.workers=4
# neighbor_nodes, sampled_edge_index = sampler._sample_one_layer_from_nodes_parallel(nodes=torch.tensor([1,2,3,4,5]), fanout=num_neighbors)
neighbor_nodes
,
sampled_edge_index
=
sampler
.
sample_from_nodes
(
torch
.
tensor
([
1
,
2
,
3
]))
# Print the result
# Print the result
print
(
'neighbor_nodes_id:
\n
'
,
neighbor_nodes
,
'
\n
edge_index:
\n
'
,
edge_index
)
print
(
'neighbor_nodes_id:
\n
'
,
neighbor_nodes
,
'
\n
edge_index:
\n
'
,
sampled_
edge_index
)
Sample/__init__.py
0 → 100644
View file @
d5b33231
Sample/
main
.py
→
Sample/
demo
.py
View file @
d5b33231
...
@@ -7,15 +7,18 @@ edge_index = torch.tensor([[0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5], [1, 0, 2, 4,
...
@@ -7,15 +7,18 @@ edge_index = torch.tensor([[0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5], [1, 0, 2, 4,
num_nodes
=
6
num_nodes
=
6
num_neighbors
=
2
num_neighbors
=
2
# Run the neighbor sampling
# Run the neighbor sampling
sampler
=
NeighborSampler
()
sampler
=
NeighborSampler
(
edge_index
=
edge_index
,
num_nodes
=
num_nodes
,
num_layers
=
2
,
workers
=
2
,
fanout
=
[
2
,
1
])
# neighbor_nodes, edge_index = sampler.sample_from_node(2, edge_index, num_nodes, num_neighbors)
# neighbor_nodes, edge_index = sampler.sample_from_nodes(torch.tensor([1,2]), edge_index, num_nodes, num_neighbors)
# neighbor_nodes, edge_index = sampler.sample_from_nodes_parallel(torch.tensor([1,2]), edge_index, num_nodes, workers=1, fanout=num_neighbors)
neighbor_nodes
,
edge_index
=
sampler
.
sample_from_nodes_parallel
(
torch
.
tensor
([
1
,
2
,
3
,
4
,
5
]),
edge_index
,
num_nodes
,
workers
=
4
,
fanout
=
num_neighbors
)
# neighbor_nodes, sampled_edge_index = sampler._sample_one_layer_from_node(node=2, fanout=2)
# neighbor_nodes, sampled_edge_index = sampler._sample_one_layer_from_nodes(nodes=torch.tensor([1,3]), fanout=num_neighbors)
# sampler.workers=3
# neighbor_nodes, sampled_edge_index = sampler._sample_one_layer_from_nodes_parallel(nodes=torch.tensor([1,2,3]), fanout=num_neighbors)
# sampler.workers=4
# neighbor_nodes, sampled_edge_index = sampler._sample_one_layer_from_nodes_parallel(nodes=torch.tensor([1,2,3,4,5]), fanout=num_neighbors)
neighbor_nodes
,
sampled_edge_index
=
sampler
.
sample_from_nodes
(
torch
.
tensor
([
1
,
2
,
3
]))
# Print the result
# Print the result
print
(
'neighbor_nodes_id:
\n
'
,
neighbor_nodes
,
'
\n
edge_index:
\n
'
,
edge_index
)
print
(
'neighbor_nodes_id:
\n
'
,
neighbor_nodes
,
'
\n
edge_index:
\n
'
,
sampled_edge_index
)
# import torch_scatter
# import torch_scatter
# nodes=torch.Tensor([1,2])
# nodes=torch.Tensor([1,2])
...
...
Sample/sample_cores.cpp
0 → 100644
View file @
d5b33231
#include <iostream>
#include<set>
#include<pybind11/pybind11.h>
#include<pybind11/numpy.h>
#include <pybind11/stl.h>
using
namespace
std
;
namespace
py
=
pybind11
;
typedef
int
NodeIDType
;
// typedef int EdgeIDType;
// typedef float TimeStampType;
template
<
typename
T
>
inline
py
::
array
vec2npy
(
const
std
::
vector
<
T
>
&
vec
)
{
// need to let python garbage collector handle C++ vector memory
// see https://github.com/pybind/pybind11/issues/1042
// non-copy value transfer
auto
v
=
new
std
::
vector
<
T
>
(
vec
);
auto
capsule
=
py
::
capsule
(
v
,
[](
void
*
v
)
{
delete
reinterpret_cast
<
std
::
vector
<
T
>
*>
(
v
);
});
return
py
::
array
(
v
->
size
(),
v
->
data
(),
capsule
);
// return py::array(vec.size(), vec.data());
}
/*
* NeighborSampler Utils
*/
class
TemporalNeighborBlock
{
public
:
std
::
vector
<
vector
<
NodeIDType
>*>
neighbors
;
std
::
vector
<
int
>
deg
;
TemporalNeighborBlock
(){}
TemporalNeighborBlock
(
std
::
vector
<
vector
<
NodeIDType
>*>&
neighbors
,
std
::
vector
<
int
>
&
deg
)
:
neighbors
(
neighbors
),
deg
(
deg
){}
py
::
array
get_node_neighbor
(
int
node_id
){
return
vec2npy
(
*
(
neighbors
[
node_id
]));
}
int
get_node_deg
(
int
node_id
){
return
deg
[
node_id
];
}
};
class
TemporalGraphBlock
{
public
:
std
::
vector
<
NodeIDType
>
row
;
std
::
vector
<
NodeIDType
>
col
;
std
::
vector
<
NodeIDType
>
nodes
;
TemporalGraphBlock
(){}
TemporalGraphBlock
(
std
::
vector
<
NodeIDType
>
&
_row
,
std
::
vector
<
NodeIDType
>
&
_col
,
std
::
vector
<
NodeIDType
>
&
_nodes
)
:
row
(
_row
),
col
(
_col
),
nodes
(
_nodes
){}
};
TemporalNeighborBlock
get_neighbors
(
vector
<
NodeIDType
>&
row
,
vector
<
NodeIDType
>&
col
,
int
num_nodes
){
int
edge_num
=
row
.
size
();
TemporalNeighborBlock
tnb
=
TemporalNeighborBlock
();
tnb
.
deg
.
resize
(
num_nodes
,
0
);
for
(
int
i
=
0
;
i
<
num_nodes
;
i
++
)
tnb
.
neighbors
.
push_back
(
new
vector
<
NodeIDType
>
());
for
(
int
i
=
0
;
i
<
edge_num
;
i
++
){
//计算节点邻居
tnb
.
neighbors
[
row
[
i
]]
->
push_back
(
col
[
i
]);
//计算节点度
tnb
.
deg
[
row
[
i
]]
++
;
}
return
tnb
;
}
TemporalGraphBlock
neighbor_sample_from_node
(
NodeIDType
node
,
vector
<
NodeIDType
>&
neighbors
,
int
deg
,
int
fanout
){
TemporalGraphBlock
tgb
=
TemporalGraphBlock
();
tgb
.
col
=
neighbors
;
srand
((
int
)
time
(
0
));
if
(
deg
>
fanout
){
//度大于扇出的话需要随机删除一些邻居
for
(
int
i
=
0
;
i
<
deg
-
fanout
;
i
++
){
//循环删除deg-fanout个邻居
auto
erase_iter
=
tgb
.
col
.
begin
()
+
rand
()
%
(
deg
-
i
);
tgb
.
col
.
erase
(
erase_iter
);
}
}
tgb
.
row
.
resize
(
tgb
.
col
.
size
(),
node
);
//sampled nodes 去重
unordered_set
<
int
>
s
;
for
(
int
i
:
tgb
.
col
)
s
.
insert
(
i
);
s
.
insert
(
node
);
tgb
.
nodes
.
assign
(
s
.
begin
(),
s
.
end
());
return
tgb
;
}
TemporalGraphBlock
neighbor_sample_from_nodes
(
vector
<
NodeIDType
>&
nodes
,
vector
<
vector
<
NodeIDType
>>&
neighbors
,
vector
<
NodeIDType
>&
deg
,
int
fanout
){
TemporalGraphBlock
tgb
=
TemporalGraphBlock
();
for
(
int
i
=
0
;
i
<
nodes
.
size
();
i
++
){
NodeIDType
node
=
nodes
[
i
];
TemporalGraphBlock
tgb_i
=
neighbor_sample_from_node
(
node
,
neighbors
[
node
],
deg
[
node
],
fanout
);
tgb
.
row
.
insert
(
tgb
.
row
.
end
(),
tgb_i
.
row
.
begin
(),
tgb_i
.
row
.
end
());
tgb
.
col
.
insert
(
tgb
.
col
.
end
(),
tgb_i
.
col
.
begin
(),
tgb_i
.
col
.
end
());
tgb
.
nodes
.
insert
(
tgb
.
nodes
.
end
(),
tgb_i
.
nodes
.
begin
(),
tgb_i
.
nodes
.
end
());
}
//sampled nodes 去重
unordered_set
<
int
>
s
;
for
(
int
i
:
tgb
.
col
)
s
.
insert
(
i
);
tgb
.
nodes
.
assign
(
s
.
begin
(),
s
.
end
());
return
tgb
;
}
PYBIND11_MODULE
(
sample_cores
,
m
)
{
m
.
def
(
"neighbor_sample_from_nodes"
,
&
neighbor_sample_from_nodes
)
.
def
(
"get_neighbors"
,
&
get_neighbors
);
py
::
class_
<
TemporalGraphBlock
>
(
m
,
"TemporalGraphBlock"
)
.
def
(
py
::
init
<
std
::
vector
<
NodeIDType
>
&
,
std
::
vector
<
NodeIDType
>
&
,
std
::
vector
<
NodeIDType
>
&>
())
.
def
(
"row"
,
[](
const
TemporalGraphBlock
&
tgb
)
{
return
vec2npy
(
tgb
.
row
);
})
.
def
(
"col"
,
[](
const
TemporalGraphBlock
&
tgb
)
{
return
vec2npy
(
tgb
.
col
);
})
.
def
(
"nodes"
,
[](
const
TemporalGraphBlock
&
tgb
)
{
return
vec2npy
(
tgb
.
nodes
);
});
py
::
class_
<
TemporalNeighborBlock
>
(
m
,
"TemporalNeighborBlock"
)
.
def
(
py
::
init
<
std
::
vector
<
vector
<
NodeIDType
>*>&
,
std
::
vector
<
int
>
&>
())
.
def_readonly
(
"neighbors"
,
&
TemporalNeighborBlock
::
neighbors
)
.
def_readonly
(
"deg"
,
&
TemporalNeighborBlock
::
deg
);
}
Sample/sample_cores.cpython-37m-x86_64-linux-gnu.so
0 → 100755
View file @
d5b33231
File added
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment