Commit 7f481360 by xxx

Merge branch 'master' into hzq

parents cd3f3cd2 6acf7ed1
install.sh merge=ours
\ No newline at end of file
*.tgz
*.my
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
......@@ -169,8 +171,12 @@ cython_debug/
/third_party
/.vscode
/.history
/.cache
/run_route.py
/dataset
/test_*
/*.ipynb
saved_models/
saved_checkpoints/
\ No newline at end of file
sampling:
- layer: 1
neighbor:
- 10
strategy: 'recent'
prop_time: False
history: 1
duration: 0
num_thread: 32
memory:
- type: 'node'
dim_time: 100
deliver_to: 'self'
mail_combine: 'last'
memory_update: 'gru'
mailbox_size: 1
combine_node_feature: True
dim_out: 100
gnn:
- arch: 'transformer_attention'
use_src_emb: True
use_dst_emb: True
layer: 1
att_head: 2
dim_time: 100
dim_out: 100
train:
- epoch: 50
batch_size: 100
# reorder: 16
lr: 0.0001
dropout: 0.1
att_dropout: 0.2
all_on_gpu: True
\ No newline at end of file
sampling:
- no_sample: True
history: 1
memory:
- type: 'node'
dim_time: 100
deliver_to: 'self'
mail_combine: 'last'
memory_update: 'rnn'
mailbox_size: 1
combine_node_feature: True
dim_out: 100
gnn:
- arch: 'identity'
use_src_emb: False
use_dst_emb: False
time_transform: 'JODIE'
train:
- epoch: 20
batch_size: 200
lr: 0.0001
dropout: 0.1
all_on_gpu: True
\ No newline at end of file
sampling:
- layer: 2
neighbor:
- 10
- 10
strategy: 'uniform'
prop_time: False
history: 1
duration: 0
num_thread: 32
memory:
- type: 'none'
dim_out: 0
gnn:
- arch: 'transformer_attention'
layer: 2
att_head: 2
dim_time: 100
dim_out: 100
train:
- epoch: 100
batch_size: 600
lr: 0.0001
dropout: 0.1
att_dropout: 0.1
all_on_gpu: True
\ No newline at end of file
......@@ -18,13 +18,15 @@ memory:
dim_out: 100
gnn:
- arch: 'transformer_attention'
use_src_emb: False
use_dst_emb: False
layer: 1
att_head: 2
dim_time: 100
dim_out: 100
train:
- epoch: 5
#batch_size: 100
- epoch: 20
batch_size: 200
# reorder: 16
lr: 0.0001
dropout: 0.2
......
sampling:
- layer: 1
neighbor:
- 10
strategy: 'recent'
prop_time: False
history: 1
duration: 0
num_thread: 32
memory:
- type: 'node'
dim_time: 100
deliver_to: 'self'
mail_combine: 'last'
memory_update: 'gru'
mailbox_size: 1
combine_node_feature: True
dim_out: 100
gnn:
- arch: 'transformer_attention'
use_src_emb: True
use_dst_emb: True
layer: 1
att_head: 2
dim_time: 100
dim_out: 100
train:
- epoch: 20
batch_size: 200
# reorder: 16
lr: 0.0001
dropout: 0.2
att_dropout: 0.2
all_on_gpu: True
\ No newline at end of file
#include<head.h>
#include <sampler.h>
#include <tppr.h>
#include <output.h>
#include <neighbors.h>
#include <temporal_utils.h>
......@@ -88,4 +89,22 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m)
.def("reset", &ParallelSampler::reset)
.def("get_ret", [](const ParallelSampler &ps) { return ps.ret; });
py::class_<ParallelTppRComputer>(m, "ParallelTppRComputer")
.def(py::init<TemporalNeighborBlock &, NodeIDType, EdgeIDType, int,
int, int, int, vector<float>&, vector<float>& >())
.def_readonly("ret", &ParallelTppRComputer::ret, py::return_value_policy::reference)
.def("reset_ret", &ParallelTppRComputer::reset_ret)
.def("reset_tppr", &ParallelTppRComputer::reset_tppr)
.def("reset_val_tppr", &ParallelTppRComputer::reset_val_tppr)
.def("backup_tppr", &ParallelTppRComputer::backup_tppr)
.def("restore_tppr", &ParallelTppRComputer::restore_tppr)
.def("restore_val_tppr", &ParallelTppRComputer::restore_val_tppr)
.def("get_pruned_topk", &ParallelTppRComputer::get_pruned_topk)
.def("extract_streaming_tppr", &ParallelTppRComputer::extract_streaming_tppr)
.def("streaming_topk", &ParallelTppRComputer::streaming_topk)
.def("single_streaming_topk", &ParallelTppRComputer::single_streaming_topk)
.def("streaming_topk_no_fake", &ParallelTppRComputer::streaming_topk_no_fake)
.def("compute_val_tppr", &ParallelTppRComputer::compute_val_tppr)
.def("get_ret", [](const ParallelTppRComputer &ps) { return ps.ret; });
}
\ No newline at end of file
#pragma once
#include <iostream>
#include <algorithm>
#include <torch/extension.h>
#include <omp.h>
#include <time.h>
......@@ -17,6 +18,12 @@ typedef int64_t NodeIDType;
typedef int64_t EdgeIDType;
typedef float WeightType;
typedef float TimeStampType;
typedef tuple<NodeIDType, EdgeIDType, TimeStampType> PPRKeyType;
typedef double PPRValueType;
typedef phmap::parallel_flat_hash_map<PPRKeyType, PPRValueType> PPRDictType;
typedef vector<PPRDictType> PPRListDictType;
typedef vector<vector<PPRDictType>> PPRListListDictType;
typedef vector<vector<double>> NormListType;
class TemporalNeighborBlock;
class TemporalGraphBlock;
......@@ -28,6 +35,7 @@ int nodeIdToInOut(NodeIDType nid, int pid, const vector<NodeIDType>& part_ptr);
int nodeIdToPartId(NodeIDType nid, const vector<NodeIDType>& part_ptr);
vector<th::Tensor> divide_nodes_to_part(th::Tensor nodes, const vector<NodeIDType>& part_ptr, int threads);
NodeIDType sample_multinomial(const vector<WeightType>& weights, default_random_engine& e);
vector<int64_t> sample_max(const vector<WeightType>& weights, int k);
......@@ -173,3 +181,17 @@ NodeIDType sample_multinomial(const vector<WeightType>& weights, default_random_
sample_indice = distance(cumulative_weights.begin(), it);
return sample_indice;
}
vector<int64_t> sample_max(const vector<WeightType>& weights, int k) {
vector<int64_t> indices(weights.size());
for (int i = 0; i < weights.size(); ++i) {
indices[i] = i;
}
// 使用部分排序算法(选择算法)找到前k个最大值的索引
partial_sort(indices.begin(), indices.begin() + k, indices.end(),
[&weights](int64_t a, int64_t b) { return weights[a] > weights[b]; });
// 返回前k个最大值的索引
return vector<int64_t>(indices.begin(), indices.begin() + k);
}
\ No newline at end of file
......@@ -287,10 +287,15 @@ void TemporalNeighborBlock::update_edge_weight(
for(int64_t i=0; i<edge_num; i++){
//修改节点与邻居边的权重
AT_ASSERTM(this->inverted_index[dst[i]].count(src[i])==1, "Unexist Edge Index: "+to_string(src[i])+", "+to_string(dst[i]));
int index;
if(this->with_eid) index = this->inverted_index[dst[i]][eid_ptr[i]];
else index = this->inverted_index[dst[i]][src[i]];
if(this->with_eid){
AT_ASSERTM(this->inverted_index[dst[i]].count(eid_ptr[i])==1, "Unexist Eid --> Col: "+to_string(eid_ptr[i])+"-->"+to_string(dst[i]));
index = this->inverted_index[dst[i]][eid_ptr[i]];
}
else{
AT_ASSERTM(this->inverted_index[dst[i]].count(src[i])==1, "Unexist Edge Index: "+to_string(src[i])+", "+to_string(dst[i]));
index = this->inverted_index[dst[i]][src[i]];
}
this->edge_weight[dst[i]][index] = ew[i];
}
}
......
......@@ -11,6 +11,7 @@ class TemporalGraphBlock
vector<int64_t> src_index;
vector<NodeIDType> sample_nodes;
vector<TimeStampType> sample_nodes_ts;
vector<WeightType> e_weights;
double sample_time = 0;
double tot_time = 0;
int64_t sample_edge_num = 0;
......
......@@ -105,13 +105,13 @@ void ParallelSampler :: neighbor_sample_from_nodes_static_layer(th::Tensor nodes
// uniform_int_distribution<> u(0, tnb.deg[node]-1);
// while(temp_s.size()!=fanout && temp_s.size()<tnb.neighbors_set[node].size()){
for(int i=0;i<fanout;i++){
//ѭ��ѡ��fanout���ھ�
//循环选择fanout个邻居
NodeIDType indice;
if(policy == "weighted"){//���DZ�Ȩ����Ϣ
if(policy == "weighted"){//考虑边权重信
const vector<WeightType>& ew = tnb.edge_weight[node];
indice = sample_multinomial(ew, e);
}
else if(policy == "uniform"){//���Ȳ���
else if(policy == "uniform"){//均匀采样
// indice = u(e);
indice = rand_r(&loc_seed) % (nei.size());
}
......@@ -119,7 +119,7 @@ void ParallelSampler :: neighbor_sample_from_nodes_static_layer(th::Tensor nodes
auto chosen_e_iter = edge.begin() + indice;
if(part_unique){
auto rst = temp_s.insert(*chosen_n_iter);
if(rst.second){ //���ظ�
if(rst.second){ //不重复
eid_threads[tid].emplace_back(*chosen_e_iter);
node_s_threads[tid].insert(*chosen_n_iter);
if(!tnb.neighbors_set.empty() && temp_s.size()<fanout && temp_s.size()<tnb.neighbors_set[node].size()) fanout++;
......@@ -229,7 +229,7 @@ void ParallelSampler :: neighbor_sample_from_nodes_with_before_layer(
}
}
else{
//��ѡ�ھӱߴ����ȳ��Ļ���Ҫ���ѡ��fanout���ھ�
//可选邻居边大于扇出的话需要随机选择fanout个邻居
tgb_i[tid].src_index.insert(tgb_i[tid].src_index.end(), fanout, i);
uniform_int_distribution<> u(0, end_index-1);
//cout<<end_index<<endl;
......
......@@ -114,11 +114,15 @@ edge_weight_dict = {}
edge_weight_dict['edata'] = 2*neg_nums
edge_weight_dict['sample_data'] = 1*neg_nums
edge_weight_dict['neg_data'] = 1
partition_save('./dataset/here/'+data_name, data, 1, 'metis_for_tgnn',
edge_weight_dict=edge_weight_dict)
partition_save('./dataset/here/'+data_name, data, 2, 'metis_for_tgnn',
edge_weight_dict=edge_weight_dict)
partition_save('./dataset/here/'+data_name, data, 4, 'metis_for_tgnn',
#partition_save('./dataset/here/'+data_name, data, 1, 'metis_for_tgnn',
# edge_weight_dict=edge_weight_dict)
#partition_save('./dataset/here/'+data_name, data, 2, 'metis_for_tgnn',
# edge_weight_dict=edge_weight_dict)
#partition_save('./dataset/here/'+data_name, data, 4, 'metis_for_tgnn',
# edge_weight_dict=edge_weight_dict)
#partition_save('./dataset/here/'+data_name, data, 8, 'metis_for_tgnn',
# edge_weight_dict=edge_weight_dict)
partition_save('./dataset/here/'+data_name, data, 16, 'metis_for_tgnn',
edge_weight_dict=edge_weight_dict)
#
# partition_save('./dataset/here/'+data_name, data, 4, 'metis_for_tgnn',
......
This diff is collapsed. Click to expand it.
Advanced Data Preprocessing
===========================
.. note::
详细介绍一下StarryGL几种数据管理类,例如GraphData,的使用细节,内部索引结构的设计和底层操作。
\ No newline at end of file
......@@ -4,4 +4,4 @@ Advanced Concepts
.. toctree::
sampling_parallel/index
partition_parallel/index
timeline_parallel/index
\ No newline at end of file
timeline_parallel/index
Distributed Partition Parallel
==============================
.. note::
分布式分区并行训练部分
\ No newline at end of file
Distributed Timeline Parallel
=============================
.. note::
分布式时序并行
\ No newline at end of file
Distributed Temporal Sampling
=============================
.. note::
基于分布式时序图采样的训练模式
\ No newline at end of file
starrygl.sample.cache.fetch_cache
=================================
.. note::
The cache used in feature fetching
.. currentmodule:: starrygl.sample.cache.fetch_cache
.. autoclass::
FetchFeatureCache
:members:
starrygl.sample.graph_core
==========================
.. note::
Distributed Data Structure used in sampling training
.. currentmodule:: starrygl.sample.graph_core
.. autoclass::
DistributedGraphStore
:members:
.. autoclass::
DataSet
.. autoclass::
TemporalNeighborSampleGraph
\ No newline at end of file
......@@ -5,4 +5,6 @@ Package References
distributed
neighbor_sampler
memory
data_loader
\ No newline at end of file
data_loader
graph_core
cache
Preparing the Temporal Graph Dataset
====================================
.. note::
包含从原始数据开始的数据清洗和预处理步骤,最终形成可以被StarryGL使用的数据文件
\ No newline at end of file
In this tutorial, we will show the preparation process of the temporal graph datase that can be used by StarryGL.
Read Raw Data
-------------
Take Wikipedia dataset as an example, the raw data files are as follows:
- `edges.csv`: the temporal edges of the graph
- `node_features.pt`: the node features of the graph
- `edge_features.pt`: the edge features of the graph
Here is an example to read the raw data files:
.. code-block:: python
data_name = args.data_name
df = pd.read_csv('raw_data/'+data_name+'/edges.csv')
if os.path.exists('raw_data/'+data_name+'/node_features.pt'):
n_feat = torch.load('raw_data/'+data_name+'/node_features.pt')
else:
n_feat = None
if os.path.exists('raw_data/'+data_name+'/edge_features.pt'):
e_feat = torch.load('raw_data/'+data_name+'/edge_features.pt')
else:
e_feat = None
src = torch.from_numpy(np.array(df.src.values)).long()
dst = torch.from_numpy(np.array(df.dst.values)).long()
ts = torch.from_numpy(np.array(df.time.values)).long()
neg_nums = args.num_neg_sample
edge_index = torch.cat((src[np.newaxis, :], dst[np.newaxis, :]), 0)
num_nodes = edge_index.view(-1).max().item()+1
num_edges = edge_index.shape[1]
print('the number of nodes in graph is {}, \
the number of edges in graph is {}'.format(num_nodes, num_edges))
Preprocess Data
---------------
After reading the raw data, we need to preprocess the data to get the data format that can be used by StarryGL. The following code shows the preprocessing process:
.. code-block:: python
sample_graph = {}
sample_src = torch.cat([src.view(-1, 1), dst.view(-1, 1)], dim=1)\
.reshape(1, -1)
sample_dst = torch.cat([dst.view(-1, 1), src.view(-1, 1)], dim=1)\
.reshape(1, -1)
sample_ts = torch.cat([ts.view(-1, 1), ts.view(-1, 1)], dim=1).reshape(-1)
sample_eid = torch.arange(num_edges).view(-1, 1).repeat(1, 2).reshape(-1)
sample_graph['edge_index'] = torch.cat([sample_src, sample_dst], dim=0)
sample_graph['ts'] = sample_ts
sample_graph['eids'] = sample_eid
neg_sampler = NegativeSampling('triplet')
neg_src = neg_sampler.sample(edge_index.shape[1]*neg_nums, num_nodes)
neg_sample = neg_src.reshape(-1, neg_nums)
edge_ts = torch.torch.from_numpy(np.array(ts)).float()
data = Data() #torch_geometric.data.Data()
data.num_nodes = num_nodes
data.num_edges = num_edges
data.edge_index = edge_index
data.edge_ts = edge_ts
data.neg_sample = neg_sample
if n_feat is not None:
data.x = n_feat
if e_feat is not None:
data.edge_attr = e_feat
data.train_mask = (torch.from_numpy(np.array(df.ext_roll.values)) == 0)
data.val_mask = (torch.from_numpy(np.array(df.ext_roll.values)) == 1)
data.test_mask = (torch.from_numpy(np.array(df.ext_roll.values)) == 2)
sample_graph['train_mask'] = data.train_mask[sample_eid]
sample_graph['test_mask'] = data.test_mask[sample_eid]
sample_graph['val_mask'] = data.val_mask[sample_eid]
data.sample_graph = sample_graph
data.y = torch.zeros(edge_index.shape[1])
edge_index_dict = {}
edge_index_dict['edata'] = data.edge_index
edge_index_dict['sample_data'] = data.sample_graph['edge_index']
edge_index_dict['neg_data'] = torch.cat([neg_src.view(1, -1),
dst.view(-1, 1).repeat(1, neg_nums).
reshape(1, -1)], dim=0)
data.edge_index_dict = edge_index_dict
edge_weight_dict = {}
edge_weight_dict['edata'] = 2*neg_nums
edge_weight_dict['sample_data'] = 1*neg_nums
edge_weight_dict['neg_data'] = 1
We construct a torch_geometric.data.Data object to store the data. The data object contains the following attributes:
- `num_nodes`: the number of nodes in the graph
- `num_edges`: the number of edges in the graph
- `edge_index`: the edge index of the graph
- `edge_ts`: the timestamp of the edges
- `neg_sample`: the negative samples of the edges
- `x`: the node features of the graph
- `edge_attr`: the edge features of the graph
- `train_mask`: the train mask of the edges
- `val_mask`: the validation mask of the edges
- `test_mask`: the test mask of the edges
- `sample_graph`: the sampled graph
- `edge_index_dict`: the edge index of the sampled graph
Finally, we can partition the graph and save the data:
.. code-block:: python
partition_save('./dataset/here/'+data_name, data, 16, 'metis_for_tgnn',
edge_weight_dict=edge_weight_dict)
......@@ -5,5 +5,4 @@ Tutorials
intro
module
dataset
application
distributed
\ No newline at end of file
Introduction to Temporal GNN
==============================================
.. note::
简单介绍一下时序GNN,应用场景,需要解决的问题等,相当于一个总体的介绍
There are so many real-word systems that can be formulated as temporal interaction graphs, such as social network and citation network. In these systems, the nodes represent the entities and the edges represent the interactions between entities. The interactions are usually time-stamped, which means the edges are associated with time. Temporal interaction graphs are dynamic, which means the graph structure changes over time. For example, in a social network, the friendship between two people may be established or broken at different time. In a citation network, a paper may cite another paper at different time.
To encapsulate the temporal information present in these graphs and learn dynamic representations, researchers have introduced temporal graph neural networks (GNNs). These networks are capable of modeling both structural and temporal dependencies within the graph. Numerous innovative frameworks have been proposed to date, achieving outstanding performance in specific tasks such as link prediction. Based on two different methods to represent temporal graphs, we can divide temporal GNNs into two categories:
1. continuous-time temporal GNNs, which model the temporal graph as a sequence of interactions
2. discrete-time temporal GNNs, which model the temporal graph as a sequence of snapshots
However, as the temporal graph expands—potentially encompassing millions of nodes and billions of edges—it becomes increasingly challenging to scale temporal GNN training to accommodate these larger graphs. The reasons are twofold: first, sampling neighbors from a larger graph demands more time; second, chronological training also incurs a higher time cost. To address these challenges, we introduce StarryGL in this tutorial. StarryGL is a distributed temporal GNN framework designed to efficiently navigate the complexities of training larger temporal graphs.
\ No newline at end of file
Creating Temporal GNN Models
============================
.. note::
介绍如何创建GNN模型,找最经典最简洁的两个例子即可。包括 **离散时间动态图模型** 模型构建和 **连续时间动态图模型**。
\ No newline at end of file
Continuous-time Temporal GNN Models
-----------------------------------
To create a continuous-time temporal GNN model, we first need to define a configuration file with the suffix yml to specify the model structures and parameters. Here we use the configuration file :code:`TGN.yml` for TGN model as an example:
.. code-block:: yaml
sampling:
- layer: 1
neighbor:
- 10
strategy: 'recent'
prop_time: False
history: 1
duration: 0
num_thread: 32
memory:
- type: 'node'
dim_time: 100
deliver_to: 'self'
mail_combine: 'last'
memory_update: 'gru'
mailbox_size: 1
combine_node_feature: True
dim_out: 100
gnn:
- arch: 'transformer_attention'
use_src_emb: False
use_dst_emb: False
layer: 1
att_head: 2
dim_time: 100
dim_out: 100
train:
- epoch: 20
batch_size: 200
# reorder: 16
lr: 0.0001
dropout: 0.2
att_dropout: 0.2
all_on_gpu: True
The configuration file is composed of four parts: :code:`sampling`, :code:`memory`, :code:`gnn` and :code:`train`. Here are their meanings:
- :code:`sampling`: This part specifies the sampling strategy for the temporal graph. :code:`layer` field specifies the number of layers in the sampling strategy. The :code:`neighbor` field specifies the number of neighbors to sample for each layer. The :code:`strategy` field specifies the sampling strategy(recent or uniform). The :code:`prop_time` field specifies whether to propagate the time information. The :code:`history` field specifies the number of historical timestamps to use. The :code:`duration` field specifies the duration of the time window. The :code:`num_thread` field specifies the number of threads to use for sampling.
- :code:`memory`: This part specifies the memory module. :code:`type` field specifies the type of memory module(node or none). :code:`dim_time` field specifies the dimension of the time embedding. :code:`deliver_to` field specifies the destination of the message. :code:`mail_combine` field specifies the way to combine the messages. :code:`memory_update` field specifies the way to update the memory. :code:`mailbox_size` field specifies the size of the mailbox. :code:`combine_node_feature` field specifies whether to combine the node features. :code:`dim_out` field specifies the dimension of the output.
- :code:`gnn`: This part specifies the GNN module. :code:`arch` field specifies the architecture of the GNN module. :code:`use_src_emb` field specifies whether to use the source embedding. :code:`use_dst_emb` field specifies whether to use the destination embedding. :code:`layer` field specifies the number of layers in the GNN module. :code:`att_head` field specifies the number of attention heads. :code:`dim_time` field specifies the dimension of the time embedding. :code:`dim_out` field specifies the dimension of the output.
- :code:`train`: This part specifies the training parameters. :code:`epoch` field specifies the number of epochs. :code:`batch_size` field specifies the batch size. :code:`lr` field specifies the learning rate. :code:`dropout` field specifies the dropout rate. :code:`att_dropout` field specifies the attention dropout rate. :code:`all_on_gpu` field specifies whether to put all the data on GPU.
After defining the configuration file, we can firstly read the parameters from the configuration file and create the model by constructing a :code:`General Model` object:
.. code-block:: python
def parse_config(f):
conf = yaml.safe_load(open(f, 'r'))
sample_param = conf['sampling'][0]
memory_param = conf['memory'][0]
gnn_param = conf['gnn'][0]
train_param = conf['train'][0]
return sample_param, memory_param, gnn_param, train_param
sample_param, memory_param, gnn_param, train_param = parse_config('./config/{}.yml'.format(args.model))
model = GeneralModel(gnn_dim_node, gnn_dim_edge, sample_param, memory_param, gnn_param, train_param).cuda()
model = DDP(model)
Then a :code:`GeneralModel` object is created. If needed, we can adjust the model's parameters by modifying the contents of the configuration file. Here we provide 5 models for continuous-time temporal GNNs:
- :code:`TGN`: The TGN model proposed in `Temporal Graph Networks for Deep Learning on Dynamic Graphs <https://arxiv.org/abs/2006.10637>`__.
- :code:`DyRep`: The DyRep model proposed in `Representation Learning and Reasoning on Temporal Knowledge Graphs <https://arxiv.org/abs/1803.04051>`__.
- :code:`TIGER`: The TIGER model proposed in `TIGER: A Transformer-Based Framework for Temporal Knowledge Graph Completion <https://arxiv.org/abs/2302.06057>`__.
- :code:`Jodie`: The Jodie model proposed in `JODIE: Joint Optimization of Dynamics and Importance for Online Embedding <https://arxiv.org/abs/1908.01207>`__.
- :code:`TGAT`: The TGAT model proposed in `Temporal Graph Attention for Deep Temporal Modeling <https://arxiv.org/abs/2002.07962>`__.
\ No newline at end of file
......@@ -3,11 +3,17 @@
mkdir -p build && cd build
cmake .. \
-DCMAKE_EXPORT_COMPILE_COMMANDS=ON \
-DCMAKE_PREFIX_PATH="/home/hwj/.miniconda3/envs/sgl/lib/python3.10/site-packages" \
-DPython3_ROOT_DIR="/home/hwj/.miniconda3/envs/sgl" \
-DCUDA_TOOLKIT_ROOT_DIR="/home/hwj/.local/cuda-11.8" \
<<<<<<< HEAD
-DCMAKE_PREFIX_PATH="/home/zlj/.miniconda3/envs/dgnn/lib/python3.10/site-packages" \
-DPython3_ROOT_DIR="/home/zlj/.miniconda3/envs/dgnn" \
-DCUDA_TOOLKIT_ROOT_DIR="/home/zlj/local/cuda-12.2" \
=======
-DCMAKE_PREFIX_PATH=$(python -c "from distutils.sysconfig import get_python_lib; print(get_python_lib())") \
-DPython3_ROOT_DIR=$(python -c "import sys; print(sys.prefix)") \
-DCUDA_TOOLKIT_ROOT_DIR=${CUDA_HOME:-"$(realpath $(dirname $(which nvcc))/../)"} \
>>>>>>> 98ad16d40e5e3e0a7dcdbc4f21dc9e164abc625f
&& make -j32 \
&& rm -rf ../starrygl/lib \
&& mkdir ../starrygl/lib \
&& cp lib*.so ../starrygl/lib/ \
&& patchelf --set-rpath '$ORIGIN:$ORIGIN/lib' --force-rpath ../starrygl/lib/*.so
&& patchelf --set-rpath '$ORIGIN:$ORIGIN/lib' --force-rpath ../starrygl/lib/*.so
\ No newline at end of file
......@@ -3,7 +3,7 @@ torch==2.1.1+cu118
torchvision==0.16.1+cu118
torchaudio==2.1.1+cu118
--extra-index-url https://data.pyg.org/whl/torch-2.1.0+cu118.html
--find-links https://data.pyg.org/whl/torch-2.1.0+cu118.html
torch_geometric==2.4.0
pyg_lib==0.3.1+pt21cu118
torch_scatter==2.1.2+pt21cu118
......@@ -11,6 +11,12 @@ torch_sparse==0.6.18+pt21cu118
torch_cluster==1.6.3+pt21cu118
torch_spline_conv==1.2.2+pt21cu118
--find-links https://data.dgl.ai/wheels/cu118/repo.html
dgl==1.1.3+cu118
--find-links https://data.dgl.ai/wheels-test/repo.html
dglgo==0.0.2
ogb
tqdm
networkx
\ No newline at end of file
networkx
......@@ -294,7 +294,7 @@ class DistributedTensor:
index = dist_index.loc
futs: List[torch.futures.Future] = []
for i in range(self.num_parts()):
for i in range(self.num_parts):
mask = part_idx == i
f = self.accessor.async_index_copy_(0, index[mask], source[mask], self.rrefs[i])
futs.append(f)
......@@ -308,7 +308,7 @@ class DistributedTensor:
index = dist_index.loc
futs: List[torch.futures.Future] = []
for i in range(self.num_parts()):
for i in range(self.num_parts):
mask = part_idx == i
f = self.accessor.async_index_add_(0, index[mask], source[mask], self.rrefs[i])
futs.append(f)
......
import torch
import dgl
from os.path import abspath, join, dirname
import sys
sys.path.insert(0, join(abspath(dirname(__file__))))
......@@ -47,7 +46,7 @@ class GeneralModel(torch.nn.Module):
self.edge_predictor = EdgePredictor(gnn_param['dim_out'])
if 'combine' in gnn_param and gnn_param['combine'] == 'rnn':
self.combiner = torch.nn.RNN(gnn_param['dim_out'], gnn_param['dim_out'])
def forward(self, mfgs, metadata = None,neg_samples=1):
if self.memory_param['type'] == 'node':
......@@ -68,8 +67,14 @@ class GeneralModel(torch.nn.Module):
out = torch.stack(out, dim=0)
out = self.combiner(out)[0][-1, :, :]
#metadata需要在前面去重的时候记一下id
if self.gnn_param['use_src_emb'] or self.gnn_param['use_dst_emb']:
self.embedding = out.detach().clone()
else:
self.embedding = None
if metadata is not None:
#out = torch.cat((out[metadata['dst_pos_pos']],out[metadata['src_id_pos']],out[metadata['dst_neg_pos']]),0)
if self.gnn_param['dyrep']:
out = self.memory_updater.last_updated_memory
out = torch.cat((out[metadata['src_pos_index']],out[metadata['dst_pos_index']],out[metadata['src_neg_index']]),0)
return self.edge_predictor(out, neg_samples=neg_samples)
......
import yaml
import numpy as np
def parse_config(f):
conf = yaml.safe_load(open(f, 'r'))
......@@ -7,4 +7,32 @@ def parse_config(f):
memory_param = conf['memory'][0]
gnn_param = conf['gnn'][0]
train_param = conf['train'][0]
return sample_param, memory_param, gnn_param, train_param
\ No newline at end of file
return sample_param, memory_param, gnn_param, train_param
class EarlyStopMonitor(object):
def __init__(self, max_round=3, higher_better=True, tolerance=1e-10):
self.max_round = max_round
self.num_round = 0
self.epoch_count = 0
self.best_epoch = 0
self.last_best = None
self.higher_better = higher_better
self.tolerance = tolerance
def early_stop_check(self, curr_val):
if not self.higher_better:
curr_val *= -1
if self.last_best is None:
self.last_best = curr_val
elif (curr_val - self.last_best) / np.abs(self.last_best) > self.tolerance:
self.last_best = curr_val
self.num_round = 0
self.best_epoch = self.epoch_count
else:
self.num_round += 1
self.epoch_count += 1
return self.num_round >= self.max_round
\ No newline at end of file
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.autograd as autograd
from torch import Tensor
from typing import *
from torch_scatter import segment_csr, gather_csr
from torch_sparse import SparseTensor
__all__ = [
"EmmaAttention",
"EmmaSum",
]
class EmmaAttention(nn.Module):
def __init__(self) -> None:
super().__init__()
self.register_buffer(
"his_x",
torch.empty(0),
persistent=False,
)
self.register_buffer(
"his_m",
torch.empty(0),
persistent=False,
)
self.register_buffer(
"inv_w",
torch.empty(0),
persistent=False,
)
self.reset_parameters()
def reset_parameters(self):
self.get_buffer("his_x").zero_()
self.get_buffer("his_m").fill_(-torch.inf)
self.get_buffer("inv_w").zero_()
def forward(self, x: Tensor, max_a: Tensor, agg_n: Tensor):
if self.training:
his_x = self.get_buffer("his_x")
his_m = self.get_buffer("his_m")
inv_w = self.get_buffer("inv_w")
x = EmmaAttentionFunction.apply(
x, max_a, his_x, his_m, agg_n, inv_w)
else:
inv_w = 1.0 / agg_n.data
inv_w[agg_n == 0] = 0.0
self._copy_or_clone("his_x", x)
self._copy_or_clone("his_m", max_a)
self._copy_or_clone("inv_w", inv_w)
return x
def _copy_or_clone(self, name: str, x: Tensor):
_x = self.get_buffer(name)
if _x.size() != x.size():
self.register_buffer(
name, x.data.clone(), persistent=False)
else:
_x.copy_(x.data)
@staticmethod
def softmax_gat(
src_a: Tensor,
dst_a: Tensor,
adj_t: SparseTensor,
negative_slope: float = 0.01,
) -> Tuple[SparseTensor, Tensor]:
assert src_a.dim() in {1, 2}
assert src_a.dim() == dst_a.dim()
ptr, ind, val = adj_t.csr()
a = src_a[ind] + gather_csr(dst_a, ptr)
a = F.leaky_relu(a, negative_slope=negative_slope)
with torch.no_grad():
max_a = torch.full_like(dst_a, -torch.inf)
max_a = segment_csr(a, ptr, reduce="max", out=max_a)
exp_a = torch.exp(a - gather_csr(max_a, ptr))
if val is not None:
assert val.dim() == 1
if exp_a.dim() == 1:
exp_a = exp_a * val
else:
exp_a = exp_a * val.unsqueeze(-1)
sum_exp_a = segment_csr(exp_a, ptr, reduce="sum")
exp_a = exp_a / gather_csr(sum_exp_a, ptr)
with torch.no_grad():
max_a.add_(sum_exp_a.log())
adj_t = SparseTensor(rowptr=ptr, col=ind, value=exp_a)
return adj_t, max_a
@staticmethod
def apply_gat(
x: Tensor,
src_a: Tensor,
dst_a: Tensor,
adj_t: SparseTensor,
negative_slope: float = 0.01,
) -> Tuple[Tensor, Tensor]:
adj_t, max_a = EmmaAttention.softmax_gat(
src_a=src_a, dst_a=dst_a,
adj_t=adj_t, negative_slope=negative_slope,
)
ptr, ind, val = adj_t.csr()
if val.dim() == 1:
assert x.dim() == 2
x = adj_t @ x
elif val.dim() == 2:
assert x.dim() == 3
assert x.size(1) == val.size(1)
xs = []
for i in range(x.size(1)):
xs.append(
SparseTensor(
rowptr=ptr, col=ind, value=val[:,i],
) @ x[:,i,:]
)
x = torch.cat(xs, dim=1).view(-1, *x.shape[1:])
return x, max_a
class EmmaAttentionFunction(autograd.Function):
@staticmethod
def forward(
ctx: autograd.function.FunctionCtx,
x: Tensor,
max_a: Tensor,
his_x: Tensor,
his_m: Tensor,
agg_n: Tensor,
inv_w: Tensor,
):
assert x.dim() in {2, 3}
assert x.dim() == his_x.dim()
assert max_a.dim() == his_m.dim()
beta = (1.0 - inv_w * agg_n).clamp_(0.0, 1.0)
if x.dim() == 2:
assert max_a.dim() == 1
elif x.dim() == 3:
assert max_a.dim() == 2
beta = beta.unsqueeze_(-1)
max_m = torch.max(max_a, his_m)
p = (his_m - max_m).nan_to_num_(0.0).exp_().mul_(beta)
q = (max_a - max_m).nan_to_num_(0.0).exp_()
t = p + q
p.div_(t).unsqueeze_(-1)
q.div_(t).unsqueeze_(-1)
his_x.mul_(p).add_(x * q)
his_m.copy_(max_m).add_(t.log_())
ctx.save_for_backward(q)
return his_x
@staticmethod
def backward(
ctx: autograd.function.FunctionCtx,
grad: Tensor,
):
q, = ctx.saved_tensors
return grad * q, None, None, None, None, None
class EmmaSum(nn.Module):
def __init__(self) -> None:
super().__init__()
self.register_buffer(
"his_x",
torch.empty(0),
persistent=False,
)
self.register_buffer(
"inv_w",
torch.empty(0),
persistent=False,
)
self.reset_parameters()
def reset_parameters(self):
self.get_buffer("his_x").zero_()
self.get_buffer("inv_w").zero_()
def forward(self, x: Tensor, agg_n: Tensor, aggr: str = "sum"):
assert aggr in {"sum", "mean"}
if self.training:
his_x = self.get_buffer("his_x")
inv_w = self.get_buffer("inv_w")
x = EmmaSumFunction.apply(x, his_x, agg_n, inv_w)
else:
inv_w = 1.0 / agg_n.data
inv_w[agg_n == 0] = 0.0
self._copy_or_clone("his_x", x)
self._copy_or_clone("inv_w", inv_w)
if aggr == "mean":
x = x * inv_w[:,None]
return x
def _copy_or_clone(self, name: str, x: Tensor):
_x = self.get_buffer(name)
if _x.size() != x.size():
self.register_buffer(
name, x.data.clone(), persistent=False)
else:
_x.copy_(x.data)
class EmmaSumFunction(autograd.Function):
@staticmethod
def forward(
ctx: autograd.function.FunctionCtx,
x: Tensor,
his_x: Tensor,
agg_n: Tensor,
inv_w: Tensor,
):
assert x.dim() == 2
assert his_x.dim() == x.dim()
beta = (1.0 - inv_w * agg_n) \
.clamp_(0.0, 1.0).unsqueeze_(-1)
his_x.mul_(beta).add_(x)
# ctx.save_for_backward(inv_w)
return his_x
@staticmethod
def backward(
ctx: autograd.function.FunctionCtx,
grad: Tensor,
):
# inv_w, = ctx.saved_tensors
# return grad * inv_w[:,None], None, None, None
return grad, None, None, None
\ No newline at end of file
......@@ -75,6 +75,12 @@ class LayerPipe(ABC):
models.append((key, val))
return tuple(models)
def parameters(self):
params: List[nn.Parameter] = []
for name, m in self.get_model():
params.extend(m.parameters())
return params
def register_route(self, *xs: Tensor):
for t in xs:
t.requires_route = True
......
......@@ -55,6 +55,12 @@ class SequencePipe(ABC):
models.append((key, val))
return tuple(models)
def parameters(self):
params: List[nn.Parameter] = []
for name, m in self.get_model():
params.extend(m.parameters())
return params
def to(self, device: Any):
for _, net in self.get_model():
net.to(device)
......
......@@ -17,12 +17,31 @@ class FetchFeatureCache:
graph: DistributedGraphStore,
mailbox:SharedMailBox = None,
policy = 'lru'):
"""
method to create a fetch cache instance.
Args:
num_nodes: Total number of nodes in the graph.
num_edges: Total number of edges in the graph.
edge_cache_ratio: The hit rate of cache edges.
node_cache_ratio: The hit rate of cache nodes.
graph: Distributed graph store.
mailbox: used for storing information.
policy: Caching policy, either 'lru' or 'static'.
"""
global _FetchCache
_FetchCache = FetchFeatureCache(num_nodes, num_edges,
edge_cache_ratio, node_cache_ratio,
graph,mailbox,policy)
@staticmethod
def getFetchCache():
"""
method to get the existing fetch cache instance.
Returns:
FetchFeatureCache: The existing fetch cache instance.
"""
global _FetchCache
return _FetchCache
def __init__(self, num_nodes: int, num_edges: int,
......@@ -31,6 +50,19 @@ class FetchFeatureCache:
mailbox:SharedMailBox = None,
policy = 'lru'
):
"""
Initializes the FetchFeatureCache instance.
Args:
num_nodes: Total number of nodes in the graph.
num_edges: Total number of edges in the graph.
edge_cache_ratio: The hit rate of cache edges.
node_cache_ratio: The hit rate of cache nodes.
graph: Distributed graph store.
mailbox: used for storing information.
policy: Caching policy, either 'lru' or 'static'.
"""
if policy == 'lru':
init_fn = LRU_cache.LRUCache
elif policy == 'static':
......@@ -62,7 +94,17 @@ class FetchFeatureCache:
def fetch_feature(self, nid: Optional[torch.Tensor] = None, dist_nid = None,
eid: Optional[torch.Tensor] = None, dist_eid = None
):
):
"""
Fetches node and edge features along with mailbox memory.
Args:
nid: Node indices to fetch features for.
dist_nid: The remote communication corresponding to nid.
eid: Edge indices to fetch features for.
dist_eid: The remote communication corresponding to eid.
"""
nfeat = None
mem = None
efeat = None
......@@ -147,6 +189,14 @@ class FetchFeatureCache:
return nfeat,efeat,mem
def init_cache_with_presample(self,dataloader, num_epoch:int = 10):
"""
Initializes the cache with pre-sampled data from the provided dataloader.
Args:
dataloader: The data loader we implement, containing the graph data.
num_epoch: Number of epochs to pre-sample the data.
"""
node_size = self.node_cache.capacity if self.node_cache is not None else 0
edge_size = self.edge_cache.capacity if self.edge_cache is not None else 0
node_counts,edge_counts = pre_sample(dataloader=dataloader,
......
......@@ -21,10 +21,54 @@ import math
class DistributedDataLoader:
'''
Args:
data_path: the path of loaded graph ,each part 0 of graph is saved on $path$/rank_0
num_replicas: the num of worker
We will perform feature fetch in the data loader.
you can simply define a data loader for use, while starrygl assisting in fetching node or edge features:
Args:
graph: distributed graph store
data: the graph data
sampler: a parallel sampler like `NeighborSampler` above
sampler_fn: sample type
neg_sampler: negative sampler
batch_size: batch size
mailbox: APAN's mailbox and TGN's memory implemented by starrygl
Examples:
.. code-block:: python
import torch
from starrygl.sample.data_loader import DistributedDataLoader
from starrygl.sample.part_utils.partition_tgnn import partition_load
from starrygl.sample.graph_core import DataSet, DistributedGraphStore, TemporalNeighborSampleGraph
from starrygl.sample.memory.shared_mailbox import SharedMailBox
from starrygl.sample.sample_core.neighbor_sampler import NeighborSampler
from starrygl.sample.sample_core.base import NegativeSampling
from starrygl.sample.batch_data import SAMPLE_TYPE
pdata = partition_load("PATH/{}".format(dataname), algo="metis_for_tgnn")
graph = DistributedGraphStore(pdata = pdata, uvm_edge = False, uvm_node = False)
sample_graph = TemporalNeighborSampleGraph(sample_graph = pdata.sample_graph,mode = 'full')
mailbox = SharedMailBox(pdata.ids.shape[0], memory_param, dim_edge_feat=pdata.edge_attr.shape[1] if pdata. edge_attr is not None else 0)
sampler = NeighborSampler(num_nodes=graph.num_nodes, num_layers=1, fanout=[10], graph_data=sample_graph, workers=15,policy = 'recent',graph_name = "wiki_train")
neg_sampler = NegativeSampling('triplet')
train_data = torch.masked_select(graph.edge_index, pdata.train_mask.to(graph.edge_index.device)).reshape (2, -1)
trainloader = DistributedDataLoader(graph, train_data, sampler=sampler, sampler_fn=SAMPLE_TYPE. SAMPLE_FROM_TEMPORAL_EDGES,neg_sampler=neg_sampler, batch_size=1000, shuffle=False, drop_last=True, chunk_size = None,train=True, mailbox=mailbox )
In the data loader, we will call the `graph_sample`, sourced from `starrygl.sample.batch_data`.
And the `to_block` function in the `graph_sample` will implement feature fetching.
If cache is not used, we will directly fetch node or edge features from the graph data,
otherwise we will call `fetch_data` for feature fetching.
'''
def __init__(
......@@ -111,10 +155,10 @@ class DistributedDataLoader:
self.expected_idx = data_size // self.batch_size if self.drop_last is True else int(math.ceil(data_size/self.batch_size))
if dist.get_world_size() > 1:
num_epochs = torch.tensor([self.expected_idx],dtype = torch.long,device=self.device)
print(num_epochs)
dist.all_reduce(num_epochs, op=op)
self.expected_idx = int(num_epochs.item())
num_batchs = torch.tensor([self.expected_idx],dtype = torch.long,device=self.device)
print("num_batchs:", num_batchs)
dist.all_reduce(num_batchs, op=op)
self.expected_idx = int(num_batchs.item())
def _next_data(self):
if self.current_pos >= self.dataset.len:
......@@ -148,6 +192,7 @@ class DistributedDataLoader:
self.device)
self.recv_idxs += 1
assert batch_data is not None
torch.cuda.synchronize()
return batch_data
else :
raise StopIteration
......
import starrygl
from starrygl.distributed.context import DistributedContext
from starrygl.distributed.utils import DistIndex, DistributedTensor
from starrygl.sample.graph_core.utils import build_mapper
......@@ -6,8 +7,22 @@ import torch
import torch.distributed as dist
from torch_geometric.data import Data
from starrygl.utils.uvm import cudaMemoryAdvise, uvm_advise, uvm_empty, uvm_prefetch, uvm_share
class DistributedGraphStore:
'''
Initializes the DistributedGraphStore with distributed graph data.
Args:
pdata: Graph data object containing ids, eids, edge_index, edge_ts, sample_graph, x, and edge_attr.
device: Device to which tensors are moved (default is 'cuda').
uvm_node: If True, enables Unified Virtual Memory (UVM) for node data.
uvm_edge: If True, enables Unified Virtual Memory (UVM) for edge data.
'''
def __init__(self, pdata, device = torch.device('cuda'),
uvm_node = False,
uvm_edge = False):
......@@ -36,12 +51,12 @@ class DistributedGraphStore:
x = pdata.x.to(self.device)
else:
if self.device.type == 'cuda':
x = uvm_empty(*pdata.x.size(),
x = starrygl.utils.uvm.uvm_empty(*pdata.x.size(),
dtype=pdata.x.dtype,
device=ctx.device)
uvm_share(x,device = ctx.device)
uvm_advise(x,cudaMemoryAdvise.cudaMemAdviseSetAccessedBy)
uvm_prefetch(x)
starrygl.utils.uvm.uvm_share(x,device = ctx.device)
starrygl.utils.uvm.uvm_advise(x,starrygl.utils.uvm.cudaMemoryAdvise.cudaMemAdviseSetAccessedBy)
starrygl.utils.uvm.uvm_prefetch(x)
if world_size > 1:
self.x = DistributedTensor(pdata.x.to(self.device).to(torch.float))
else:
......@@ -56,12 +71,12 @@ class DistributedGraphStore:
edge_attr = pdata.edge_attr.to(self.device)
else:
if self.device.type == 'cuda':
edge_attr = uvm_empty(*pdata.edge_attr.size(),
edge_attr = starrygl.utils.uvm.uvm_empty(*pdata.edge_attr.size(),
dtype=pdata.edge_attr.dtype,
device=ctx.device)
uvm_share(edge_attr,device = ctx.device)
uvm_advise(edge_attr,cudaMemoryAdvise.cudaMemAdviseSetAccessedBy)
uvm_prefetch(edge_attr)
starrygl.utils.uvm.uvm_share(edge_attr,device = ctx.device)
starrygl.utils.uvm.uvm_advise(edge_attr,starrygl.utils.uvm.cudaMemoryAdvise.cudaMemAdviseSetAccessedBy)
starrygl.utils.uvm.uvm_prefetch(edge_attr)
if world_size > 1:
self.edge_attr = DistributedTensor(edge_attr)
else:
......@@ -70,6 +85,15 @@ class DistributedGraphStore:
self.edge_attr = None
def _get_node_attr(self,ids,asyncOp = False):
'''
Retrieves node attributes for the specified node IDs.
Args:
ids: Node IDs for which to retrieve attributes.
asyncOp: If True, performs asynchronous operation for distributed data.
'''
if self.x is None:
return None
elif dist.get_world_size() == 1:
......@@ -81,6 +105,15 @@ class DistributedGraphStore:
return self.x.index_select(ids)
def _get_edge_attr(self,ids,asyncOp = False):
'''
Retrieves edge attributes for the specified edge IDs.
Args:
ids: Edge IDs for which to retrieve attributes.
asyncOp: If True, performs asynchronous operation for distributed data.
'''
if self.edge_attr is None:
return None
elif dist.get_world_size() == 1:
......@@ -93,9 +126,32 @@ class DistributedGraphStore:
return self.edge_attr.index_select(ids)
def _get_dist_index(self,ind,mapper):
'''
Retrieves the distributed index for the specified local index using the provided mapper.
Args:
ind: Local index for which to retrieve the distributed index.
mapper: Mapper providing the distributed index.
'''
return mapper[ind.to(mapper.device)]
class DataSet:
'''
Args:
nodes: Tensor representing nodes. If not None, it is moved to the specified device.
edges: Tensor representing edges. If not None, it is moved to the specified device.
labels: Optional parameter for labels.
ts: Tensor representing timestamps. If not None, it is moved to the specified device.
device: Device to which tensors are moved (default is 'cuda').
'''
def __init__(self,nodes = None,
edges = None,
labels = None,
......@@ -110,10 +166,15 @@ class DataSet:
if labels is not None:
self.labels = labels
self.len = self.nodes.shape[0] if nodes is not None else self.edges.shape[1]
for k, v in kwargs.items():
assert isinstance(v,torch.Tensor) and v.shape[0]==self.len
setattr(self, k, v.to(device))
def _get_empty(self):
'''
Creates an empty dataset with the same device and data types as the current instance.
'''
nodes = torch.empty([],dtype = self.nodes.dtype,device= self.nodes.device)if hasattr(self,'nodes') else None
edges = torch.empty([[],[]],dtype = self.edges.dtype,device= self.edge.device)if hasattr(self,'edges') else None
d = DataSet(nodes,edges)
......@@ -126,6 +187,13 @@ class DataSet:
#@staticmethod
def get_next(self,indx):
'''
Retrieves the next dataset based on the provided index.
Args:
indx: Index specifying the dataset to retrieve.
'''
nodes = self.nodes[indx] if hasattr(self,'nodes') else None
edges = self.edges[:,indx] if hasattr(self,'edges') else None
d = DataSet(nodes,edges)
......@@ -138,6 +206,10 @@ class DataSet:
#@staticmethod
def shuffle(self):
'''
Shuffles the dataset and returns a new dataset with the same attributes.
'''
indx = torch.randperm(self.len)
nodes = self.nodes[indx] if hasattr(self,'nodes') else None
edges = self.edges[:,indx] if hasattr(self,'edges') else None
......@@ -151,7 +223,7 @@ class DataSet:
class TemporalGraphData(DistributedGraphStore):
def __init__(self,pdata,device):
super(TemporalGraphData,self).__init__(pdata,device)
super(DistributedGraphStore,self).__init__(pdata,device)
def _set_temporal_batch_cache(self,size,pin_size):
pass
def _load_feature_to_cuda(self,ids):
......@@ -161,6 +233,17 @@ class TemporalGraphData(DistributedGraphStore):
class TemporalNeighborSampleGraph(DistributedGraphStore):
'''
Args:
sample_graph: A dictionary containing graph structure information, including 'edge_index', 'ts' (edge timestamp), and 'eids' (edge identifiers).
mode: Specifies the dataset mode ('train', 'val', 'test', or 'full').
eids_mapper: Optional parameter for edge identifiers mapping.
'''
def __init__(self, sample_graph=None, mode='full', eids_mapper=None):
self.edge_index = sample_graph['edge_index']
self.num_edges = self.edge_index.shape[1]
......
import starrygl
from typing import Union
from typing import List
from typing import Optional
......@@ -8,9 +9,41 @@ from starrygl.distributed.context import DistributedContext
from starrygl.distributed.utils import DistIndex, DistributedTensor
import torch.distributed as dist
from starrygl.utils.uvm import cudaMemoryAdvise, uvm_advise, uvm_empty, uvm_prefetch, uvm_share
#from starrygl.utils.uvm import cudaMemoryAdvise
class SharedMailBox():
'''
We will first define our mailbox, including our definitions of mialbox and memory:
.. code-block:: python
from starrygl.sample.memory.shared_mailbox import SharedMailBox
mailbox = SharedMailBox(num_nodes=num_nodes, memory_param=memory_param, dim_edge_feat=dim_edge_feat)
Args:
num_nodes (int): number of nodes
memory_param (dict): the memory parameters in the yaml file,refer to TGL
dim_edge_feat (int): the dim of edge feature
device (torch.device): the device used to store MailBox
uvm (bool): 1-use uvm, 0-don't use uvm
Examples:
.. code-block:: python
from starrygl.sample.part_utils.partition_tgnn import partition_load
from starrygl.sample.memory.shared_mailbox import SharedMailBox
pdata = partition_load("PATH/{}".format(dataname), algo="metis_for_tgnn")
mailbox = SharedMailBox(pdata.ids.shape[0], memory_param, dim_edge_feat=pdata.edge_attr.shape[1] if pdata.edge_attr is not None else 0)
We then need to hand over the mailbox to the data loader as in the above example, so that the relevant memory/mailbox can be directly loaded during training.
During the training, we will call `get_update_memory`/`get_update_mail` function constantly updates
the relevant storage,which is the idea related to TGN.
'''
def __init__(self,
num_nodes,
memory_param,
......@@ -47,18 +80,18 @@ class SharedMailBox():
if uvm is True:
ctx = DistributedContext.get_default_context()
node_memory = uvm_empty(*node_memory.shape,
node_memory = starrygl.utils.uvm.uvm_empty(*node_memory.shape,
dtype=node_memory.dtype,
device=ctx.device)
uvm_share(node_memory,device = ctx.device)
uvm_advise(node_memory,cudaMemoryAdvise.cudaMemAdviseSetAccessedBy)
uvm_prefetch(node_memory)
mailbox = uvm_empty(*mailbox.shape,
starrygl.utils.uvm.uvm_share(node_memory,device = ctx.device)
starrygl.utils.uvm.uvm_advise(node_memory,starrygl.utils.uvm.cudaMemoryAdvise.cudaMemAdviseSetAccessedBy)
starrygl.utils.uvm.uvm_prefetch(node_memory)
mailbox = starrygl.utils.uvm.uvm_empty(*mailbox.shape,
dtype=mailbox.dtype,
device=ctx.device)
uvm_share(mailbox,device = ctx.device)
uvm_advise(mailbox,cudaMemoryAdvise.cudaMemAdviseSetAccessedBy)
uvm_prefetch(mailbox)
starrygl.utils.uvm.uvm_share(mailbox,device = ctx.device)
starrygl.utils.uvm.vm_advise(mailbox,starrygl.utils.uvm.cudaMemoryAdvise.cudaMemAdviseSetAccessedBy)
starrygl.utils.uvm.uvm_prefetch(mailbox)
self.node_memory = DistributedTensor(node_memory)
self.node_memory_ts = DistributedTensor(node_memory_ts)
self.mailbox = DistributedTensor(mailbox)
......@@ -266,7 +299,7 @@ class SharedMailBox():
def get_update_mail(self,dist_indx_mapper,
src,dst,ts,edge_feats,
memory):
memory,embedding=None,use_src_emb=False,use_dst_emb=False):
if edge_feats is not None:
edge_feats = edge_feats.to(self.device).to(self.mailbox.dtype)
src = src.to(self.device)
......@@ -276,12 +309,14 @@ class SharedMailBox():
mem_src = memory[src]
mem_dst = memory[dst]
if embedding is not None:
emb_src = embedding[src]
emb_dst = embedding[dst]
src_mail = torch.cat([emb_src if use_src_emb else mem_src, emb_dst if use_dst_emb else mem_dst], dim=1)
dst_mail = torch.cat([emb_dst if use_src_emb else mem_dst, emb_src if use_dst_emb else mem_src], dim=1)
if edge_feats is not None:
src_mail = torch.cat([mem_src, mem_dst, edge_feats], dim=1)
dst_mail = torch.cat([mem_dst, mem_src, edge_feats], dim=1)
else:
src_mail = torch.cat([mem_src, mem_dst], dim=1)
dst_mail = torch.cat([mem_dst, mem_src], dim=1)
src_mail = torch.cat([src_mail, edge_feats], dim=1)
dst_mail = torch.cat([dst_mail, edge_feats], dim=1)
mail = torch.cat([src_mail, dst_mail], dim=1).reshape(-1, src_mail.shape[1])
mail_ts = torch.cat((ts,ts),-1).to(self.device).to(self.mailbox_ts.dtype)
unq_index,inv = torch.unique(index,return_inverse = True)
......@@ -291,7 +326,6 @@ class SharedMailBox():
index = unq_index
return index,mail,mail_ts
def get_update_memory(self,index,memory,memory_ts):
unq_index,inv = torch.unique(index,return_inverse = True)
max_ts,idx = torch_scatter.scatter_max(memory_ts,inv,0)
......
from torch_sparse import SparseTensor
from torch_geometric.data import Data
from torch_geometric.utils import degree
import os.path as osp
import os
import shutil
......
import os.path as osp
import torch
class GraphData():
def __init__(self, path):
assert path is not None and osp.exists(path),'path 不存在'
id,edge_index,data,partptr =torch.load(path)
# 当前分区序号
self.partition_id = id
# 总分区数
self.partitions = partptr.numel() - 1
# 全图结构数据
self.num_nodes = partptr[self.partitions]
self.num_edges = edge_index[0].numel()
self.edge_index = edge_index
# 该分区下的数据(包含特征向量和子图结构)pyg Data数据结构
self.data = data
# 分区映射关系
self.partptr = partptr
self.eid = [i for i in range(self.num_edges)]
def __init__(self, id, edge_index, data, partptr, timestamp=None):
# 当前分区序号
self.partition_id = id
# 总分区数
self.partitions = partptr.numel() - 1
# 全图结构数据
self.num_nodes = partptr[self.partitions]
if edge_index is not None:
self.num_edges = edge_index[0].numel()
self.edge_index = edge_index
self.edge_ts = timestamp
# 该分区下的数据(包含特征向量和子图结构)pyg Data数据结构
self.data = data
# 分区映射关系
self.partptr = partptr
# edge id
self.eid = torch.tensor([i for i in range(0, self.num_edges)])
def select_attr(self,index):
return torch.index_select(self.data.x,0,index)
#返回全局的节点id 所对应的分区
def get_part_num(self):
return self.data.x.size()[0]
def select_attr(self,index):
return torch.index_select(self.data.x,0,index)
def select_y(self,index):
return torch.index_select(self.data.y,0,index)
#返回全局的节点id 所对应的分区
def get_localId_by_partitionId(self,id,index):
#print(index)
if(id == -1 or id == 0):
return index
else:
return torch.add(index,-self.partptr[id])
def get_globalId_by_partitionId(self,id,index):
if(id == -1 or id == 0):
return index
else:
return torch.add(index,self.partptr[id])
def get_node_num(self):
return self.num_nodes
def localId_to_globalId(self,id,partitionId:int = -1):
'''
将分区partitionId内的点id映射为全局的id
'''
if partitionId == -1:
partitionId = self.partition_id
assert id >=self.partptr[self.partition_id] and id < self.partptr[self.partition_id+1]
ids_before = 0
if self.partition_id>0:
ids_before = self.partptr[self.partition_id-1]
return id+ids_before
def get_partitionId_by_globalId(self,id):
'''
通过全局id得到对应的分区序号
'''
partitionId = -1
assert id>=0 and id<self.num_nodes,'id 超过范围'
for i in range(self.partitions):
if id>=self.partptr[i] and id<self.partptr[i+1]:
partitionId = i
break
assert partitionId>=0, 'id 不存在对应的分区'
return partitionId
def get_nodes_by_partitionId(self,id):
'''
根据partitioId 返回该分区的节点数量
'''
assert id>=0 and id<self.partitions,'partitionId 非法'
return (int)(self.partptr[id+1]-self.partptr[id])
def __repr__(self):
return (f'{self.__class__.__name__}(\n'
f' partition_id={self.partition_id}\n'
f' data={self.data},\n'
f' global_info('
f'num_nodes={self.num_nodes},'
f' num_edges={self.num_edges},'
f' num_parts={self.partitions},'
f' edge_index=[2,{self.edge_index[0].numel()}])\n'
f')')
import starrygl
import sys
from os.path import abspath, join, dirname
sys.path.insert(0, join(abspath(dirname(__file__))))
import math
import torch
......@@ -9,16 +9,72 @@ from typing import Optional, Tuple
from .base import BaseSampler, NegativeSampling, SampleOutput, SampleType
# from sample_cores import ParallelSampler, get_neighbors, heads_unique
from starrygl.lib.libstarrygl_sampler import ParallelSampler, get_neighbors
from torch.distributed.rpc import rpc_async
# def outer_sample(graph_name, nodes, ts, fanout_index, with_outer_sample = SampleType.Outer):# 默认此时继续向外采样
# local_sampler = get_local_sampler(graph_name)
# assert local_sampler is not None, 'Local_sampler is None!!!'
# out = local_sampler.sample_from_nodes(nodes, with_outer_sample, ts, fanout_index)
# return out
from torch.distributed.rpc import rpc_async
class NeighborSampler(BaseSampler):
r'''
Parallel sampling is crucial for expanding model training to a large amount of data.Due to the large scale and complexity of graph data, traditional serial sampling may lead to significant waste of computing and storage resources. The significance of parallel sampling lies in improving the efficiency and overall computational speed of sampling by simultaneously sampling from multiple nodes or neighbors.
This helps to accelerate the training and inference process of the model, making it more scalable and practical when dealing with large-scale graph data.
Our parallel sampling adopts a hybrid approach of CPU and GPU, where the entire graph structure is stored on the CPU and then uploaded to the GPU after sampling the graph structure on the CPU. Each trainer has a separate sampler for parallel training.
We have encapsulated the functions for parallel sampling, and you can easily use them in the following ways:
.. code-block:: python
# First,you need to import Python packages
from starrygl.sample.sample_core.neighbor_sampler import NeighborSampler
# Then,you can use ours parallel sampler
sampler = NeighborSampler(num_nodes=num_nodes, num_layers=num_layers, fanout=fanout, graph_data=graph_data,
workers=workers, is_distinct = is_distinct, policy = policy, edge_weight= edge_weight, graph_name = graph_name)
Args:
num_nodes (int): the num of all nodes in the graph
num_layers (int): the num of layers to be sampled
fanout (list): the list of max neighbors' number chosen for each layer
graph_data (:class: starrygl.sample.sample_core.neighbor_sampler): the graph data you want to sample
workers (int): the number of threads, default value is 1
is_distinct (bool): 1-need distinct muti-edge, 0-don't need distinct muti-edge
policy (str): "uniform" or "recent" or "weighted"
edge_weight (torch.Tensor,Optional): the initial weights of edges
graph_name (str): the name of graph should provide edge_index or (neighbors, deg)
Examples:
.. code-block:: python
from starrygl.sample.part_utils.partition_tgnn import partition_load
from starrygl.sample.graph_core import DataSet, DistributedGraphStore, TemporalNeighborSampleGraph
from starrygl.sample.sample_core.neighbor_sampler import NeighborSampler
pdata = partition_load("PATH/{}".format(dataname), algo="metis_for_tgnn")
graph = DistributedGraphStore(pdata = pdata,uvm_edge = False,uvm_node = False)
sample_graph = TemporalNeighborSampleGraph(sample_graph = pdata.sample_graph,mode = 'full')
sampler = NeighborSampler(num_nodes=graph.num_nodes, num_layers=1, fanout=[10],
graph_data=sample_graph, workers=15, policy = 'recent', graph_name = "wiki_train")
If you want to directly call parallel sampling functions, use the following methods:
.. code-block:: python
# the parameter meaning is the same as the `Args` above
from starrygl.lib.libstarrygl_sampler import ParallelSampler, get_neighbors
# get neighbor infomation table,row and col come from graph_data.edge_index=(row, col)
tnb = get_neighbors(graph_name, row.contiguous(), col.contiguous(), num_nodes, is_distinct, graph_data. eid, edge_weight, timestamp)
# call parallel sampler
p_sampler = ParallelSampler(self.tnb, num_nodes, graph_data.num_edges, workers, fanout, num_layers, policy)
For complete usage and more details, please refer to `~starrygl.sample.sample_core.neighbor_sampler`
'''
def __init__(
self,
num_nodes: int,
......@@ -68,11 +124,11 @@ class NeighborSampler(BaseSampler):
row, col = graph_data.edge_index
if(edge_weight is not None):
edge_weight = edge_weight.float().contiguous()
self.tnb = get_neighbors(graph_name, row.contiguous(), col.contiguous(), num_nodes, is_distinct, eid, edge_weight, timestamp)
self.tnb = starrygl.sampler_ops.get_neighbors(graph_name, row.contiguous(), col.contiguous(), num_nodes, is_distinct, eid, edge_weight, timestamp)
else:
assert tnb is not None
self.tnb = tnb
self.p_sampler = ParallelSampler(self.tnb, num_nodes, graph_data.num_edges, workers,
self.p_sampler = starrygl.sampler_ops.ParallelSampler(self.tnb, num_nodes, graph_data.num_edges, workers,
fanout, num_layers, policy)
def _get_sample_info(self):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment