reorganize c++ sampler

c71ae1f5 · xxx · 1fa9b9fa · c71ae1f5 · c71ae1f5 · c71ae1f5
Commit c71ae1f5 authored Dec 20, 2023 by xxx
7 changed files
--- a/csrc/sampler/export.cpp
+++ b/csrc/sampler/export.cpp
--- a/csrc/sampler/include/head.h
+++ b/csrc/sampler/include/head.h
+#pragma once
+#include <iostream>
+#include <torch/extension.h>
+#include <omp.h>
+#include <time.h>
+#include <random>
+#include <parallel_hashmap/phmap.h>
+#include <pybind11/pybind11.h>
+#include <pybind11/numpy.h>
+#include <pybind11/stl.h>
+
+using namespace std;
+namespace py = pybind11;
+namespace th = torch;
+
+typedef int64_t NodeIDType;
+typedef int64_t EdgeIDType;
+typedef float WeightType;
+typedef float TimeStampType;
+
+class TemporalNeighborBlock;
+class TemporalGraphBlock;
+class ParallelSampler;
+
+TemporalNeighborBlock& get_neighbors(string graph_name, th::Tensor row, th::Tensor col, int64_t num_nodes, int is_distinct, optional<th::Tensor> eid, optional<th::Tensor> edge_weight, optional<th::Tensor> time);
+th::Tensor heads_unique(th::Tensor array, th::Tensor heads, int threads);
+int nodeIdToInOut(NodeIDType nid, int pid, const vector<NodeIDType>& part_ptr);
+int nodeIdToPartId(NodeIDType nid, const vector<NodeIDType>& part_ptr);
+vector<th::Tensor> divide_nodes_to_part(th::Tensor nodes, const vector<NodeIDType>& part_ptr, int threads);
+NodeIDType sample_multinomial(const vector<WeightType>& weights, default_random_engine& e);
+
+
+
+// 辅助函数
+template<typename T>
+inline py::array vec2npy(const std::vector<T> &vec)
+{
+    // need to let python garbage collector handle C++ vector memory 
+    // see https://github.com/pybind/pybind11/issues/1042
+    // non-copy value transfer
+    auto v = new std::vector<T>(vec);
+    auto capsule = py::capsule(v, [](void *v)
+                               { delete reinterpret_cast<std::vector<T> *>(v); });
+    return py::array(v->size(), v->data(), capsule);
+    // return py::array(vec.size(), vec.data());
+}
+
+template <typename T>
+T* get_data_ptr(const th::Tensor& tensor) {
+    AT_ASSERTM(tensor.is_contiguous(), "Offset tensor must be contiguous");
+    AT_ASSERTM(tensor.dim() == 1, "Offset tensor must be one-dimensional");
+    return tensor.data_ptr<T>();
+}
+
+template <typename T>
+torch::Tensor vecToTensor(const std::vector<T>& vec) {
+    // 确定数据类型
+    torch::ScalarType dtype;
+    if (std::is_same<T, int64_t>::value) {
+        dtype = torch::kInt64;
+    } else if (std::is_same<T, float>::value) {
+        dtype = torch::kFloat32;
+    } else {
+        throw std::runtime_error("Unsupported data type");
+    }
+
+    // 创建Tensor
+    torch::Tensor tensor = torch::from_blob(
+        const_cast<T*>(vec.data()), /* 数据指针 */
+        {static_cast<long>(vec.size())}, /* 尺寸 */
+        dtype /* 数据类型 */
+    );
+
+    return tensor;//.clone(); // 克隆Tensor以拷贝数据
+}
+
+/*-------------------------------------------------------------------------------------**
+**------------Utils--------------------------------------------------------------------**
+**-------------------------------------------------------------------------------------*/
+
+th::Tensor heads_unique(th::Tensor array, th::Tensor heads, int threads){
+    auto array_ptr = array.data_ptr<NodeIDType>();
+    phmap::parallel_flat_hash_set<NodeIDType> s(array_ptr, array_ptr+array.numel());
+    if(heads.numel()==0) return th::tensor(vector<NodeIDType>(s.begin(), s.end()));
+    AT_ASSERTM(heads.is_contiguous(), "Offset tensor must be contiguous");
+    AT_ASSERTM(heads.dim() == 1, "0ffset tensor must be one-dimensional");
+    auto heads_ptr = heads.data_ptr<NodeIDType>();
+#pragma omp parallel for num_threads(threads)
+    for(int64_t i=0; i<heads.size(0); i++){
+        if(s.count(heads_ptr[i])==1){
+        #pragma omp critical(erase)
+            s.erase(heads_ptr[i]);
+        }
+    }
+    vector<NodeIDType> ret;
+    ret.reserve(s.size()+heads.numel());
+    ret.assign(heads_ptr, heads_ptr+heads.numel());
+    ret.insert(ret.end(), s.begin(), s.end());
+    // cout<<"s: "<<s.size()<<" array: "<<array.size()<<endl;
+    return th::tensor(ret);
+}
+
+int nodeIdToPartId(NodeIDType nid, const vector<NodeIDType>& part_ptr){
+    int partitionId = -1;
+    for(int i=0;i<part_ptr.size()-1;i++){
+            if(nid>=part_ptr[i]&&nid<part_ptr[i+1]){
+                partitionId = i;
+                break;
+            }
+    }
+    if(partitionId<0) throw "nid 不存在对应的分区";
+    return partitionId;
+}
+//0:inner; 1:outer
+int nodeIdToInOut(NodeIDType nid, int pid, const vector<NodeIDType>& part_ptr){
+    if(nid>=part_ptr[pid]&&nid<part_ptr[pid+1]){
+        return 0;
+    }
+    return 1;
+}
+
+vector<th::Tensor> divide_nodes_to_part(
+        th::Tensor nodes, const vector<NodeIDType>& part_ptr, int threads){
+    double start_time = omp_get_wtime();
+    AT_ASSERTM(nodes.is_contiguous(), "Offset tensor must be contiguous");
+    AT_ASSERTM(nodes.dim() == 1, "0ffset tensor must be one-dimensional");
+    auto nodes_id = nodes.data_ptr<NodeIDType>();
+    vector<vector<vector<NodeIDType>>> node_part_threads;
+    vector<th::Tensor> result(part_ptr.size()-1);
+    //初始化点的分区，每个分区按线程划分避免冲突
+    for(int i = 0; i<threads; i++){
+        vector<vector<NodeIDType>> node_parts;
+        for(int j=0;j<part_ptr.size()-1;j++){
+            node_parts.push_back(vector<NodeIDType>());
+        }
+        node_part_threads.push_back(node_parts);
+    }
+#pragma omp parallel for num_threads(threads) default(shared)
+    for(int64_t i=0; i<nodes.size(0); i++){
+        int tid = omp_get_thread_num();
+        int pid = nodeIdToPartId(nodes_id[i], part_ptr);
+        node_part_threads[tid][pid].emplace_back(nodes_id[i]);
+    }
+#pragma omp parallel for num_threads(part_ptr.size()-1) default(shared)
+    for(int i = 0; i<part_ptr.size()-1; i++){
+        vector<NodeIDType> temp;
+        for(int j=0;j<threads;j++){
+            temp.insert(temp.end(), node_part_threads[j][i].begin(), node_part_threads[j][i].end());
+        }
+        result[i]=th::tensor(temp);
+    }
+    double end_time = omp_get_wtime();
+    // cout<<"end divide consume: "<<end_time-start_time<<"s"<<endl;
+    return result;
+}
+
+float getRandomFloat(unsigned int *seed, float min, float max) {
+    float scale = rand_r(seed) / (float) RAND_MAX; // 转换为0到1之间的浮点数
+    return min + scale * (max - min); // 调整到min到max之间
+}
+
+NodeIDType sample_multinomial(const vector<WeightType>& weights, default_random_engine& e){
+    NodeIDType sample_indice;
+    vector<WeightType> cumulative_weights;
+    partial_sum(weights.begin(), weights.end(), back_inserter(cumulative_weights));
+    AT_ASSERTM(cumulative_weights.back() > 0, "Edge weight sum should be greater than 0.");
+    
+    // uniform_real_distribution<WeightType> distribution(0.0, cumulative_weights.back());
+    // WeightType random_value = distribution(e);
+    unsigned int loc_seed = omp_get_thread_num();
+    WeightType random_value = getRandomFloat(&loc_seed, 0.0, cumulative_weights.back());
+    auto it = lower_bound(cumulative_weights.begin(), cumulative_weights.end(), random_value);
+    sample_indice = distance(cumulative_weights.begin(), it);
+    return sample_indice;
+}
--- a/csrc/sampler/include/neighbors.h
+++ b/csrc/sampler/include/neighbors.h
--- a/csrc/sampler/include/output.h
+++ b/csrc/sampler/include/output.h
+#pragma once
+#include <head.h>
+
+class TemporalGraphBlock
+{
+    public:
+        vector<NodeIDType> row;
+        vector<NodeIDType> col;
+        vector<EdgeIDType> eid;
+        vector<TimeStampType> delta_ts;
+        vector<int64_t> src_index;
+        vector<NodeIDType> sample_nodes;
+        vector<TimeStampType> sample_nodes_ts;
+        double sample_time = 0;
+        double tot_time = 0;
+        int64_t sample_edge_num = 0;
+
+        TemporalGraphBlock(){}
+        // TemporalGraphBlock(const TemporalGraphBlock &tgb);
+        TemporalGraphBlock(vector<NodeIDType> &_row, vector<NodeIDType> &_col,
+                           vector<NodeIDType> &_sample_nodes):
+                           row(_row), col(_col), sample_nodes(_sample_nodes){}
+        TemporalGraphBlock(vector<NodeIDType> &_row, vector<NodeIDType> &_col,
+                           vector<NodeIDType> &_sample_nodes,
+                           vector<TimeStampType> &_sample_nodes_ts):
+                           row(_row), col(_col), sample_nodes(_sample_nodes),
+                           sample_nodes_ts(_sample_nodes_ts){}
+};
+
+class T_TemporalGraphBlock
+{
+    public:
+        th::Tensor row;
+        th::Tensor col;
+        th::Tensor eid;
+        th::Tensor delta_ts;
+        th::Tensor src_index;
+        th::Tensor sample_nodes;
+        th::Tensor sample_nodes_ts;
+        double sample_time = 0;
+        double tot_time = 0;
+        int64_t sample_edge_num = 0;
+
+        T_TemporalGraphBlock(){}
+        T_TemporalGraphBlock(th::Tensor &_row, th::Tensor &_col,
+                           th::Tensor &_sample_nodes):
+                           row(_row), col(_col), sample_nodes(_sample_nodes){}
+};
\ No newline at end of file
--- a/csrc/sampler/include/sampler.h
+++ b/csrc/sampler/include/sampler.h
--- a/starrygl/sample/sample_core/neighbor_sampler.py
+++ b/starrygl/sample/sample_core/neighbor_sampler.py
@@ -10,7 +10,8 @@ from typing import Optional, Tuple
 import graph_store
 from distparser import SampleType, NUM_SAMPLER
 from base import BaseSampler, NegativeSampling, SampleOutput
-from sample_cores import ParallelSampler, get_neighbors, heads_unique
+# from sample_cores import ParallelSampler, get_neighbors, heads_unique
+from starrygl.lib.libstarrygl_ops_sampler import ParallelSampler, get_neighbors
 from torch.distributed.rpc import rpc_async

 def outer_sample(graph_name, nodes, ts, fanout_index, with_outer_sample = SampleType.Outer):# 默认此时继续向外采样

--- a/starrygl/sample/sample_core/reddit_demo_before.py
+++ b/starrygl/sample/sample_core/reddit_demo_before.py
@@ -8,7 +8,7 @@ from torch_geometric.datasets import Reddit
 import time

 from tqdm import tqdm
-from Utils import GraphData
+from .Utils import GraphData

 class NegLinkSampler:

@@ -26,7 +26,7 @@ class NegLinkInductiveSampler:
        return np.random.choice(self.nodes, size=n)

 def load_reddit_dataset(data_path):
-    df = pd.read_csv('/home/zlj/hzq/project/code/TGL/DATA/{}/edges.csv'.format("REDDIT"))
+    df = pd.read_csv('/mnt/data/hzq/DATA/{}/edges.csv'.format("REDDIT"))
    num_nodes = max(int(df['src'].max()), int(df['dst'].max())) + 1
    src = torch.tensor(df['src'].to_numpy(dtype=int))
    dst = torch.tensor(df['dst'].to_numpy(dtype=int))
@@ -36,94 +36,94 @@ def load_reddit_dataset(data_path):
    return g, df


-
-parser=argparse.ArgumentParser()
-parser.add_argument('--data', type=str, help='dataset name',default="REDDIT")
-parser.add_argument('--config', type=str, help='path to config file',default="/home/zlj/hzq/project/code/TGL/config/TGN.yml")
-parser.add_argument('--batch_size', type=int, default=600, help='path to config file')
-parser.add_argument('--num_thread', type=int, default=64, help='number of thread')
-args=parser.parse_args()
-
-seed=10
-torch.manual_seed(seed) # 为CPU设置随机种子
-torch.cuda.manual_seed(seed) # 为当前GPU设置随机种子
-torch.cuda.manual_seed_all(seed)  # if you are using multi-GPU，为所有GPU设置随机种子
-np.random.seed(seed)  # Numpy module.
-random.seed(seed)  # Python random module.	
-torch.backends.cudnn.benchmark = False
-torch.backends.cudnn.deterministic = True
-
-g_data, df = load_reddit_dataset("/home/zlj/hzq/code/dataset/reddit")
-print(g_data)
-# for worker in [1,2,3,4,5,6,7,8,9,10,20,30]:
-# import random
-# timestamp = [random.randint(1, 5) for i in range(0, g.num_edges)]
-# timestamp = torch.FloatTensor(timestamp)
-
-# print('begin load')
-# pre = time.time()
-# # timestamp = torch.load('/home/zlj/hzq/code/gnn/my_sampler/TemporalSample/timestamp.my')
-# tnb = torch.load("tnb_reddit_before.my")
-# end = time.time()
-# print("load time:", end-pre)
-# row, col = g.edge_index
-edge_weight=None
-# g_data = GraphData(id=1, edge_index=g.edge_index, timestamp=timestamp, data=g, partptr=torch.tensor([0, g.num_nodes//4, g.num_nodes//4*2, g.num_nodes//4*3, g.num_nodes]))
-
-from neighbor_sampler import NeighborSampler, SampleType, get_neighbors
-
-print('begin tnb')
-row, col = g_data.edge_index
-row = torch.cat([row, col])
-col = torch.cat([col, row])
-eid = torch.cat([g_data.eid, g_data.eid])
-timestamp = torch.cat([g_data.edge_ts, g_data.edge_ts])
-timestamp,ind = timestamp.sort()
-timestamp = timestamp.float().contiguous()
-eid  = eid[ind].contiguous()
-row = row[ind]
-col = col[ind]
-print(row, col)
-pre = time.time()
-tnb = get_neighbors("reddit", row.contiguous(), col.contiguous(), g_data.num_nodes, 0, eid, edge_weight, timestamp)
-end = time.time()
-print("init tnb time:", end-pre)
-torch.save(tnb, "tnb_reddit_before.my")
-
-
-pre = time.time()
-sampler = NeighborSampler(g_data.num_nodes, 
+def test():
+    parser=argparse.ArgumentParser()
+    parser.add_argument('--data', type=str, help='dataset name',default="REDDIT")
+    parser.add_argument('--config', type=str, help='path to config file',default="/home/zlj/hzq/project/code/TGL/config/TGN.yml")
+    parser.add_argument('--batch_size', type=int, default=600, help='path to config file')
+    parser.add_argument('--num_thread', type=int, default=64, help='number of thread')
+    args=parser.parse_args()
+
+    seed=10
+    torch.manual_seed(seed) # 为CPU设置随机种子
+    torch.cuda.manual_seed(seed) # 为当前GPU设置随机种子
+    torch.cuda.manual_seed_all(seed)  # if you are using multi-GPU，为所有GPU设置随机种子
+    np.random.seed(seed)  # Numpy module.
+    random.seed(seed)  # Python random module.	
+    torch.backends.cudnn.benchmark = False
+    torch.backends.cudnn.deterministic = True
+
+    g_data, df = load_reddit_dataset("/mnt/data/hzq/DATA/REDDIT")
+    print(g_data)
+    # for worker in [1,2,3,4,5,6,7,8,9,10,20,30]:
+    # import random
+    # timestamp = [random.randint(1, 5) for i in range(0, g.num_edges)]
+    # timestamp = torch.FloatTensor(timestamp)
+
+    # print('begin load')
+    # pre = time.time()
+    # # timestamp = torch.load('/home/zlj/hzq/code/gnn/my_sampler/TemporalSample/timestamp.my')
+    # tnb = torch.load("tnb_reddit_before.my")
+    # end = time.time()
+    # print("load time:", end-pre)
+    # row, col = g.edge_index
+    edge_weight=None
+    # g_data = GraphData(id=1, edge_index=g.edge_index, timestamp=timestamp, data=g, partptr=torch.tensor([0, g.num_nodes//4, g.num_nodes//4*2, g.num_nodes//4*3, g.num_nodes]))
+
+    from .neighbor_sampler import NeighborSampler, SampleType, get_neighbors
+
+    print('begin tnb')
+    row, col = g_data.edge_index
+    row = torch.cat([row, col])
+    col = torch.cat([col, row])
+    eid = torch.cat([g_data.eid, g_data.eid])
+    timestamp = torch.cat([g_data.edge_ts, g_data.edge_ts])
+    timestamp,ind = timestamp.sort()
+    timestamp = timestamp.float().contiguous()
+    eid  = eid[ind].contiguous()
+    row = row[ind]
+    col = col[ind]
+    print(row, col)
+    pre = time.time()
+    tnb = get_neighbors("reddit", row.contiguous(), col.contiguous(), g_data.num_nodes, 0, eid, edge_weight, timestamp)
+    end = time.time()
+    print("init tnb time:", end-pre)
+    torch.save(tnb, "tnb_reddit_before.my")
+
+
+    pre = time.time()
+    sampler = NeighborSampler(g_data.num_nodes, 
                            tnb=tnb,
                            num_layers=1, 
                            fanout=[10], 
                            graph_data=g_data, 
-                          workers=10, 
+                            workers=32, 
                            policy="recent", 
                            graph_name='a')
-end = time.time()
-print("init time:", end-pre)
+    end = time.time()
+    print("init time:", end-pre)

-# neg_link_sampler = NegLinkSampler(g_data.num_nodes)
-from base import NegativeSampling, NegativeSamplingMode
-neg_link_sampler = NegativeSampling(NegativeSamplingMode.triplet)
+    # neg_link_sampler = NegLinkSampler(g_data.num_nodes)
+    from .base import NegativeSampling, NegativeSamplingMode
+    neg_link_sampler = NegativeSampling(NegativeSamplingMode.triplet)

-# from torch_geometric.sampler import NeighborSampler, NumNeighbors, NodeSamplerInput, SamplerOutput
-# pre = time.time()
-# num_nei = NumNeighbors([100, 100])
-# node_idx = NodeSamplerInput(input_id=None, node=torch.tensor(range(g.num_nodes//4, g.num_nodes//4+600000)))# (input_id=None, node=torch.masked_select(torch.arange(g.num_nodes),node_data['train_mask']))
-# sampler = NeighborSampler(g, num_nei)
-# end = time.time()
-# print("init time:", end-pre)
+    # from torch_geometric.sampler import NeighborSampler, NumNeighbors, NodeSamplerInput, SamplerOutput
+    # pre = time.time()
+    # num_nei = NumNeighbors([100, 100])
+    # node_idx = NodeSamplerInput(input_id=None, node=torch.tensor(range(g.num_nodes//4, g.num_nodes//4+600000)))# (input_id=None, node=torch.masked_select(torch.arange(g.num_nodes),node_data['train_mask']))
+    # sampler = NeighborSampler(g, num_nei)
+    # end = time.time()
+    # print("init time:", end-pre)


-out = []
-tot_time = 0
-sam_time = 0
-sam_edge = 0
-pre = time.time()
+    out = []
+    tot_time = 0
+    sam_time = 0
+    sam_edge = 0
+    pre = time.time()


-for _, rows in tqdm(df.groupby(df.index // args.batch_size), total=len(df) // args.batch_size):
+    for _, rows in tqdm(df.groupby(df.index // args.batch_size), total=len(df) // args.batch_size):
            # root_nodes = torch.tensor(np.concatenate([rows.src.values, rows.dst.values, neg_link_sampler.sample(len(rows))])).long()
            # ts = torch.tensor(np.concatenate([rows.time.values, rows.time.values, rows.time.values]).astype(np.float32))
            # outi = sampler.sample_from_nodes(root_nodes, ts=ts)
@@ -135,16 +135,20 @@ for _, rows in tqdm(df.groupby(df.index // args.batch_size), total=len(df) // ar
            sam_edge += outi[0].sample_edge_num
            out.append(outi)

-end = time.time()
-print("sample time", end-pre)
-print("tot_time", tot_time)
-print("sam_time", sam_time)
-print("sam_edge", sam_edge)
-print('eid_list:', out[23][0].eid())
-# print('delta_ts_list:', out[10][0].delta_ts)
-print('node:', out[23][0].sample_nodes())
-# print('node_ts:', out[23][0].sample_nodes_ts)
-# print('eid_list:', out[23][1].eid)
-# print('node:', out[23][1].sample_nodes)
-# print('node_ts:', out[23][1].sample_nodes_ts)
-# print('edge_index_list:', out[0][0].edge_index)
\ No newline at end of file
+    end = time.time()
+    print("row", out[23][0].row())
+    print("sample time", end-pre)
+    print("tot_time", tot_time)
+    print("sam_time", sam_time)
+    print("sam_edge", sam_edge)
+    print('eid_list:', out[23][0].eid())
+    # print('delta_ts_list:', out[10][0].delta_ts)
+    print('node:', out[23][0].sample_nodes())
+    # print('node_ts:', out[23][0].sample_nodes_ts)
+    # print('eid_list:', out[23][1].eid)
+    # print('node:', out[23][1].sample_nodes)
+    # print('node_ts:', out[23][1].sample_nodes_ts)
+    # print('edge_index_list:', out[0][0].edge_index)
+    
+if __name__ == "__main__":
+    test()
\ No newline at end of file