Commit 71cb0206 by zlj

add examples

parent 16cff1d2
LOCAL RANK 0, RANK0
use cuda on 0
9228
get_neighbors consume: 0.0103759s
Epoch 0:
train loss:377.5712 train ap:0.903848 val ap:0.886584 val auc:0.904656
total time:11.40s prep time:9.88s
fetch time:0.00s write back time:0.00s
Epoch 1:
train loss:329.1190 train ap:0.920000 val ap:0.885216 val auc:0.904735
total time:11.32s prep time:9.79s
fetch time:0.00s write back time:0.00s
Epoch 2:
train loss:316.1359 train ap:0.924376 val ap:0.895123 val auc:0.912622
total time:11.49s prep time:9.95s
fetch time:0.00s write back time:0.00s
Epoch 3:
train loss:311.4889 train ap:0.926138 val ap:0.893922 val auc:0.912589
total time:11.50s prep time:9.97s
fetch time:0.00s write back time:0.00s
Epoch 4:
train loss:302.2057 train ap:0.929684 val ap:0.889695 val auc:0.909766
total time:11.48s prep time:9.95s
fetch time:0.00s write back time:0.00s
Epoch 5:
train loss:300.2464 train ap:0.931034 val ap:0.897774 val auc:0.916421
total time:11.48s prep time:9.95s
fetch time:0.00s write back time:0.00s
Epoch 6:
train loss:293.5465 train ap:0.934657 val ap:0.896159 val auc:0.914983
total time:11.55s prep time:10.02s
fetch time:0.00s write back time:0.00s
Epoch 7:
train loss:285.9396 train ap:0.937834 val ap:0.905351 val auc:0.922268
total time:11.52s prep time:9.99s
fetch time:0.00s write back time:0.00s
Epoch 8:
train loss:281.7048 train ap:0.941035 val ap:0.909690 val auc:0.924262
total time:11.51s prep time:9.98s
fetch time:0.00s write back time:0.00s
Epoch 9:
train loss:273.8330 train ap:0.945250 val ap:0.913860 val auc:0.928068
total time:11.56s prep time:10.00s
fetch time:0.00s write back time:0.00s
Epoch 10:
train loss:268.6164 train ap:0.947141 val ap:0.917379 val auc:0.930309
total time:11.77s prep time:10.19s
fetch time:0.00s write back time:0.00s
Epoch 11:
train loss:265.0121 train ap:0.949457 val ap:0.918648 val auc:0.931452
total time:11.62s prep time:10.08s
fetch time:0.00s write back time:0.00s
Epoch 12:
train loss:255.6320 train ap:0.953506 val ap:0.919272 val auc:0.932783
total time:11.50s prep time:9.98s
fetch time:0.00s write back time:0.00s
Epoch 13:
train loss:252.6296 train ap:0.954798 val ap:0.924649 val auc:0.936515
total time:11.50s prep time:9.96s
fetch time:0.00s write back time:0.00s
Epoch 14:
train loss:248.4476 train ap:0.956243 val ap:0.925952 val auc:0.938199
total time:11.53s prep time:10.00s
fetch time:0.00s write back time:0.00s
Epoch 15:
train loss:243.4459 train ap:0.958749 val ap:0.929440 val auc:0.940865
total time:11.54s prep time:10.01s
fetch time:0.00s write back time:0.00s
Epoch 16:
train loss:238.6286 train ap:0.960667 val ap:0.936339 val auc:0.946161
total time:17.48s prep time:15.12s
fetch time:0.00s write back time:0.00s
Epoch 17:
train loss:234.5283 train ap:0.961787 val ap:0.933828 val auc:0.944680
total time:18.09s prep time:15.69s
fetch time:0.00s write back time:0.00s
Epoch 18:
train loss:227.3527 train ap:0.964591 val ap:0.932110 val auc:0.943765
total time:18.17s prep time:15.46s
fetch time:0.00s write back time:0.00s
Epoch 19:
train loss:223.7772 train ap:0.965486 val ap:0.937780 val auc:0.947312
total time:17.80s prep time:15.43s
fetch time:0.00s write back time:0.00s
Epoch 20:
train loss:221.9428 train ap:0.966139 val ap:0.938104 val auc:0.948022
total time:18.31s prep time:15.82s
fetch time:0.00s write back time:0.00s
Epoch 21:
train loss:216.8870 train ap:0.968285 val ap:0.942088 val auc:0.950660
total time:18.14s prep time:15.48s
fetch time:0.00s write back time:0.00s
Epoch 22:
train loss:213.5077 train ap:0.968911 val ap:0.944023 val auc:0.951869
total time:18.09s prep time:15.56s
fetch time:0.00s write back time:0.00s
Epoch 23:
train loss:210.1412 train ap:0.970743 val ap:0.944840 val auc:0.952554
total time:17.74s prep time:15.47s
fetch time:0.00s write back time:0.00s
Epoch 24:
train loss:208.9109 train ap:0.971101 val ap:0.944029 val auc:0.952720
total time:18.47s prep time:15.73s
fetch time:0.00s write back time:0.00s
Epoch 25:
train loss:207.5198 train ap:0.970606 val ap:0.944518 val auc:0.952912
total time:17.97s prep time:15.66s
fetch time:0.00s write back time:0.00s
Epoch 26:
train loss:203.6585 train ap:0.971611 val ap:0.940218 val auc:0.949371
total time:17.70s prep time:15.42s
fetch time:0.00s write back time:0.00s
Epoch 27:
train loss:203.3531 train ap:0.972317 val ap:0.949000 val auc:0.956595
total time:18.01s prep time:15.33s
fetch time:0.00s write back time:0.00s
Epoch 28:
train loss:198.1525 train ap:0.973525 val ap:0.948420 val auc:0.955604
total time:17.78s prep time:15.31s
fetch time:0.00s write back time:0.00s
Epoch 29:
train loss:197.6365 train ap:0.973818 val ap:0.944911 val auc:0.953313
total time:17.74s prep time:15.49s
fetch time:0.00s write back time:0.00s
Epoch 30:
train loss:197.7800 train ap:0.973573 val ap:0.950356 val auc:0.958595
total time:18.24s prep time:15.60s
fetch time:0.00s write back time:0.00s
Epoch 31:
train loss:194.4391 train ap:0.974730 val ap:0.952775 val auc:0.959729
total time:17.84s prep time:15.23s
fetch time:0.00s write back time:0.00s
Epoch 32:
train loss:190.1150 train ap:0.976038 val ap:0.953111 val auc:0.959360
total time:17.72s prep time:15.46s
fetch time:0.00s write back time:0.00s
Epoch 33:
train loss:185.7417 train ap:0.976925 val ap:0.954769 val auc:0.961057
total time:18.04s prep time:15.56s
fetch time:0.00s write back time:0.00s
Epoch 34:
train loss:189.0004 train ap:0.976267 val ap:0.954641 val auc:0.961198
total time:17.89s prep time:15.12s
fetch time:0.00s write back time:0.00s
Epoch 35:
train loss:185.4487 train ap:0.977420 val ap:0.954675 val auc:0.960969
total time:17.65s prep time:15.13s
fetch time:0.00s write back time:0.00s
Epoch 36:
train loss:185.9187 train ap:0.977260 val ap:0.955284 val auc:0.961039
total time:17.67s prep time:15.36s
fetch time:0.00s write back time:0.00s
Epoch 37:
train loss:184.6686 train ap:0.977626 val ap:0.955124 val auc:0.961923
total time:17.90s prep time:15.42s
fetch time:0.00s write back time:0.00s
Epoch 38:
train loss:183.1190 train ap:0.977930 val ap:0.956069 val auc:0.962114
total time:18.10s prep time:15.26s
fetch time:0.00s write back time:0.00s
Epoch 39:
train loss:179.3445 train ap:0.978350 val ap:0.958382 val auc:0.963833
total time:18.05s prep time:15.60s
fetch time:0.00s write back time:0.00s
Epoch 40:
train loss:174.6380 train ap:0.980014 val ap:0.956793 val auc:0.963013
total time:18.28s prep time:15.77s
fetch time:0.00s write back time:0.00s
Epoch 41:
train loss:178.2737 train ap:0.979067 val ap:0.958580 val auc:0.964227
total time:18.24s prep time:15.51s
fetch time:0.00s write back time:0.00s
Epoch 42:
train loss:175.7294 train ap:0.979611 val ap:0.960288 val auc:0.965754
total time:17.98s prep time:15.62s
fetch time:0.00s write back time:0.00s
Epoch 43:
train loss:173.2326 train ap:0.980324 val ap:0.960428 val auc:0.965867
total time:18.21s prep time:15.60s
fetch time:0.00s write back time:0.00s
Epoch 44:
train loss:172.3492 train ap:0.980196 val ap:0.962143 val auc:0.966774
total time:18.35s prep time:15.80s
fetch time:0.00s write back time:0.00s
Epoch 45:
train loss:168.8601 train ap:0.981180 val ap:0.963014 val auc:0.968132
total time:17.73s prep time:15.50s
fetch time:0.00s write back time:0.00s
Epoch 46:
train loss:169.5997 train ap:0.981473 val ap:0.961124 val auc:0.966405
total time:13.20s prep time:11.67s
fetch time:0.00s write back time:0.00s
Epoch 47:
train loss:167.5232 train ap:0.981394 val ap:0.961333 val auc:0.966534
total time:11.49s prep time:9.96s
fetch time:0.00s write back time:0.00s
Epoch 48:
train loss:165.6863 train ap:0.981684 val ap:0.960024 val auc:0.965201
total time:11.50s prep time:9.97s
fetch time:0.00s write back time:0.00s
Epoch 49:
train loss:165.3790 train ap:0.981795 val ap:0.962299 val auc:0.967019
total time:11.54s prep time:9.98s
fetch time:0.00s write back time:0.00s
Loading the best model at epoch 45
test AP:0.946485 test AUC:0.954197
test_dataset 23621 avg_time 13.31522078514099
import argparse
import os
import sys
from os.path import abspath, join, dirname
from starrygl.distributed.context import DistributedContext
from starrygl.distributed.utils import DistIndex
from starrygl.evaluation.get_evalute_data import get_link_prediction_data
from starrygl.module.modules import GeneralModel
from pathlib import Path
from pathlib import Path
from starrygl.module.utils import parse_config
from starrygl.sample.cache.fetch_cache import FetchFeatureCache
from starrygl.sample.graph_core import DataSet, DistributedGraphStore, TemporalNeighborSampleGraph
from starrygl.module.utils import parse_config, EarlyStopMonitor
from starrygl.sample.graph_core import DataSet, DistributedGraphStore, TemporalNeighborSampleGraph
from starrygl.sample.memory.shared_mailbox import SharedMailBox
from starrygl.sample.sample_core.EvaluateNegativeSampling import EvaluateNegativeSampling
from starrygl.sample.sample_core.base import NegativeSampling
from starrygl.sample.sample_core.neighbor_sampler import NeighborSampler
from starrygl.sample.part_utils.partition_tgnn import partition_load
import torch
import time
import torch
import torch.nn.functional as F
import torch.distributed as dist
import torch.multiprocessing as mp
from torch.nn.parallel import DistributedDataParallel as DDP
from torch.distributed import init_process_group, destroy_process_group
import os
from starrygl.sample.data_loader import DistributedDataLoader
from starrygl.sample.batch_data import SAMPLE_TYPE
from starrygl.sample.stream_manager import getPipelineManger
parser = argparse.ArgumentParser(
description="RPC Reinforcement Learning Example",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
parser.add_argument('--rank', default=0, type=int, metavar='W',
help='name of dataset')
parser.add_argument('--patience', type=int, default=5, help='Patience for early stopping')
parser.add_argument('--world_size', default=1, type=int, metavar='W',
help='number of negative samples')
parser.add_argument('--dataname', default=1, type=str, metavar='W',
help='name of dataset')
parser.add_argument('--model', default='TGN', type=str, metavar='W',
help='name of model')
parser.add_argument('--negative_sample_strategy', default='random', type=str, metavar='W',
help='name of negative sample strategy')
parser.add_argument('--negative_sample_strategy', default='random', type=str, metavar='W',
help='name of negative sample strategy')
args = parser.parse_args()
from sklearn.metrics import average_precision_score, roc_auc_score
import torch
import time
import random
import dgl
import numpy as np
from sklearn.metrics import average_precision_score, roc_auc_score
from torch.nn.parallel import DistributedDataParallel as DDP
#os.environ['CUDA_VISIBLE_DEVICES'] = str(args.rank)
#os.environ["RANK"] = str(args.rank)
#os.environ["WORLD_SIZE"] = str(args.world_size)
#os.environ["LOCAL_RANK"] = str(0)
torch.cuda.set_device(int(os.environ["LOCAL_RANK"]))
os.environ["MASTER_ADDR"] = '10.214.211.187'
os.environ["MASTER_PORT"] = '9337'
def seed_everything(seed=42):
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
seed_everything(1234)
def main():
print('main')
use_cuda = True
sample_param, memory_param, gnn_param, train_param = parse_config('./config/{}.yml'.format(args.model))
torch.set_num_threads(12)
ctx = DistributedContext.init(backend="nccl", use_gpu=True)
device_id = torch.cuda.current_device()
pdata = partition_load("/mnt/data/part_data/evaluate/{}".format(args.dataname), algo="metis_for_tgnn")
graph = DistributedGraphStore(pdata = pdata,uvm_edge = False)
gnn_param['dyrep'] = True if args.model == 'DyRep' else False
use_src_emb = gnn_param['use_src_emb'] if 'use_src_emb' in gnn_param else False
use_dst_emb = gnn_param['use_dst_emb'] if 'use_dst_emb' in gnn_param else False
gnn_dim_node = 0 if graph.x is None else pdata.x.shape[1]
gnn_dim_edge = 0 if graph.edge_attr is None else pdata.edge_attr.shape[1]
print(gnn_dim_node,gnn_dim_edge)
avg_time = 0
MODEL_SAVE_PATH = f'./saved_models/{args.model}-{args.dataname}.pth'
if use_cuda:
model = GeneralModel(gnn_dim_node, gnn_dim_edge, sample_param, memory_param, gnn_param, train_param).cuda()
device = torch.device('cuda')
else:
model = GeneralModel(gnn_dim_node, gnn_dim_edge, sample_param, memory_param, gnn_param, train_param)
device = torch.device('cpu')
model.load_state_dict(torch.load(MODEL_SAVE_PATH))
sample_graph = TemporalNeighborSampleGraph(sample_graph = pdata.sample_graph,mode = 'full')
if memory_param['type'] != 'none':
mailbox = SharedMailBox(pdata.ids.shape[0], memory_param, dim_edge_feat = pdata.edge_attr.shape[1] if pdata.edge_attr is not None else 0)
else:
mailbox = None
fanout = []
num_layers = sample_param['layer'] if 'layer' in sample_param else 1
fanout = sample_param['neighbor'] if 'neighbor' in sample_param else [10]
policy = sample_param['strategy'] if 'strategy' in sample_param else 'recent'
sampler = NeighborSampler(num_nodes=graph.num_nodes, num_layers=num_layers, fanout=fanout,graph_data=sample_graph, workers=10,policy = policy, graph_name = "wiki_train")
train_data = torch.masked_select(graph.edge_index,pdata.train_mask.to(device)).reshape(2,-1)
train_ts = torch.masked_select(graph.edge_ts,pdata.train_mask.to(device))
val_data = torch.masked_select(graph.edge_index,pdata.val_mask.to(device)).reshape(2,-1)
val_ts = torch.masked_select(graph.edge_ts,pdata.val_mask.to(device))
test_data = torch.masked_select(graph.edge_index,pdata.test_mask.to(device)).reshape(2,-1)
test_ts = torch.masked_select(graph.edge_ts,pdata.test_mask.to(device))
#print(train_data.shape[1],val_data.shape[1],test_data.shape[1])
train_data = DataSet(edges = train_data,ts =train_ts,eids = torch.nonzero(pdata.train_mask).view(-1))
test_data = DataSet(edges = test_data,ts =test_ts,eids = torch.nonzero(pdata.test_mask).view(-1))
val_data = DataSet(edges = val_data,ts = val_ts,eids = torch.nonzero(pdata.val_mask).view(-1))
new_node_val_data = torch.masked_select(graph.edge_index,pdata.new_node_val_mask.to(device)).reshape(2,-1)
new_node_val_ts = torch.masked_select(graph.edge_ts,pdata.new_node_val_mask.to(device))
new_node_test_data = torch.masked_select(graph.edge_index,pdata.new_node_test_mask.to(device)).reshape(2,-1)
new_node_test_ts = torch.masked_select(graph.edge_ts,pdata.new_node_test_mask.to(device))
new_node_val_data = DataSet(edges = new_node_val_data, ts = new_node_val_ts, edis = torch.nonzero(pdata.new_node_val_mask).view(-1))
new_node_test_data = DataSet(edges = new_node_test_data, ts = new_node_test_ts, edis = torch.nonzero(pdata.new_node_test_mask).view(-1))
if args.negative_sample_strategy != 'random':
val_neg_edge_sampler = EvaluateNegativeSampling(src_node_ids=graph.edge_index[0,:], dst_node_ids=graph.edge_index[1,:],
interact_times=graph.edge_ts, last_observed_time=train_data.ts[-1],
negative_sample_strategy=args.negative_sample_strategy, seed=0)
new_node_val_neg_edge_sampler = EvaluateNegativeSampling(src_node_ids=new_node_val_data.edges[0,:], dst_node_ids=new_node_val_data.edges[1,:],
interact_times=new_node_val_data.ts, last_observed_time=train_data.ts[-1],
negative_sample_strategy=args.negative_sample_strategy, seed=1)
test_neg_edge_sampler = EvaluateNegativeSampling(src_node_ids=graph.edge_index[0,:], dst_node_ids=graph.edge_index[1,:],
interact_times=graph.edge_ts, last_observed_time=val_data.ts[-1],
negative_sample_strategy=args.negative_sample_strategy, seed=2)
new_node_test_neg_edge_sampler = EvaluateNegativeSampling(src_node_ids=new_node_test_data.edges[0,:], dst_node_ids=new_node_test_data.edges[1,:],
interact_times=new_node_test_data.ts, last_observed_time=val_data.ts[-1],
negative_sample_strategy=args.negative_sample_strategy, seed=3)
else:
val_neg_edge_sampler = EvaluateNegativeSampling(src_node_ids=graph.edge_index[0,:], dst_node_ids=graph.edge_index[1,:], seed=0)
new_node_val_neg_edge_sampler = EvaluateNegativeSampling(src_node_ids=new_node_val_data.edges[0,:], dst_node_ids=new_node_val_data.edges[1,:], seed=1)
test_neg_edge_sampler =EvaluateNegativeSampling(src_node_ids=graph.edge_index[0,:], dst_node_ids=graph.edge_index[1,:], seed=2)
new_node_test_neg_edge_sampler = EvaluateNegativeSampling(src_node_ids=new_node_test_data.edges[0,:], dst_node_ids=new_node_test_data.edges[1,:], seed=3)
import argparse
import os
import sys
current_path = os.path.dirname(os.path.abspath(__file__))
parent_path = os.path.abspath(os.path.join(current_path, os.pardir))
sys.path.append(parent_path)
from os.path import abspath, join, dirname
from starrygl.evaluation.evaluator import Evaluator
from starrygl.sample.sample_core.EvaluateNegativeSampling import TgbNegativeSampling
from starrygl.distributed.context import DistributedContext
from starrygl.distributed.utils import DistIndex
from starrygl.module.modules import GeneralModel
from pathlib import Path
from pathlib import Path
from starrygl.module.utils import parse_config
from starrygl.sample.cache.fetch_cache import FetchFeatureCache
from starrygl.sample.graph_core import DataSet, DistributedGraphStore, TemporalNeighborSampleGraph
from starrygl.module.utils import parse_config, EarlyStopMonitor
from starrygl.sample.graph_core import DataSet, DistributedGraphStore, TemporalNeighborSampleGraph
from starrygl.sample.memory.shared_mailbox import SharedMailBox
from starrygl.sample.sample_core.base import NegativeSampling
from starrygl.sample.sample_core.neighbor_sampler import NeighborSampler
from starrygl.sample.part_utils.partition_tgnn import partition_load
import torch
import time
import torch
import torch.nn.functional as F
import torch.distributed as dist
import torch.multiprocessing as mp
from torch.nn.parallel import DistributedDataParallel as DDP
from torch.distributed import init_process_group, destroy_process_group
import os
from starrygl.sample.data_loader import DistributedDataLoader
from starrygl.sample.batch_data import SAMPLE_TYPE
from starrygl.sample.stream_manager import getPipelineManger
from starrygl.sample.sample_core.LocalNegSampling import LocalNegativeSampling
parser = argparse.ArgumentParser(
description="RPC Reinforcement Learning Example",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
parser.add_argument('--rank', default=0, type=int, metavar='W',
help='name of dataset')
parser.add_argument('--local_rank', default=0, type=int, metavar='W',
help='name of dataset')
parser.add_argument('--patience', type=int, default=5, help='Patience for early stopping')
parser.add_argument('--world_size', default=1, type=int, metavar='W',
help='number of negative samples')
parser.add_argument('--dataname', default="WIKI", type=str, metavar='W',
help='name of dataset')
parser.add_argument('--model', default='TGN', type=str, metavar='W',
help='name of model')
parser.add_argument('--num_neg_sampler', default=999, type=int, metavar='W',
help='name of negative sample strategy')
parser.add_argument('--inductive', default=True, type=bool, metavar='W',
help='name of negative sample strategy')
parser.add_argument('--train_world_size', default=1, type=int, metavar='W',
help='name of negative sample strategy')
args = parser.parse_args()
from sklearn.metrics import average_precision_score, roc_auc_score
import torch
import time
import random
import numpy as np
from sklearn.metrics import average_precision_score, roc_auc_score
from torch.nn.parallel import DistributedDataParallel as DDP
from tgb.linkproppred.dataset_pyg import PyGLinkPropPredDataset
#os.environ['CUDA_VISIBLE_DEVICES'] = str(args.rank)
if not 'WORLD_SIZE' in os.environ:
os.environ["RANK"] = str(0)#str(args.rank)
os.environ["WORLD_SIZE"] = str(1)#str(args.world_size)
os.environ["LOCAL_RANK"] = str(0)#str(args.local_rank)
if not 'MASTER_ADDR' in os.environ:
os.environ["MASTER_ADDR"] = '192.168.2.107'
if not 'MASTER_PORT' in os.environ:
os.environ["MASTER_PORT"] = '9337'
torch.cuda.set_device(int(os.environ["LOCAL_RANK"]))
def seed_everything(seed=42):
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
seed_everything(34)
def main():
print('LOCAL RANK {}, RANK{}'.format(os.environ["LOCAL_RANK"],os.environ["RANK"]))
use_cuda = True
sample_param, memory_param, gnn_param, train_param = parse_config('../config/{}.yml'.format(args.model))
ctx = DistributedContext.init(backend="nccl", use_gpu=True)
print('initlize distributed')
torch.set_num_threads(10)#int(/torch.distributed.get_world_size()))
device_id = torch.cuda.current_device()
print('use cuda on',device_id)
pdata = partition_load('/mnt/data/part_data/evaluate/tgbl/{}'.format(args.dataname), algo="metis_for_tgnn")
graph = DistributedGraphStore(pdata = pdata)
print(graph.num_nodes)
Path("../saved_models/").mkdir(parents=True, exist_ok=True)
Path("../saved_checkpoints/").mkdir(parents=True, exist_ok=True)
get_checkpoint_path = lambda \
epoch: f'../saved_checkpoints/{args.model}-{args.dataname}-{epoch}.pth'
MODEL_SAVE_PATH = f'../saved_models/{args.model}-{args.dataname}-{args.train_world_size}.pth'
gnn_param['dyrep'] = True if args.model == 'DyRep' else False
use_src_emb = gnn_param['use_src_emb'] if 'use_src_emb' in gnn_param else False
use_dst_emb = gnn_param['use_dst_emb'] if 'use_dst_emb' in gnn_param else False
sample_graph = TemporalNeighborSampleGraph(sample_graph = pdata.sample_graph,mode = 'full')
if memory_param['type'] != 'none':
mailbox = SharedMailBox(pdata.ids.shape[0], memory_param, dim_edge_feat = pdata.edge_attr.shape[1] if pdata.edge_attr is not None else 0,ts_dtye = graph.edge_ts.dtype)
else:
mailbox = None
fanout = []
num_layers = sample_param['layer'] if 'layer' in sample_param else 1
fanout = sample_param['neighbor'] if 'neighbor' in sample_param else [10]
policy = sample_param['strategy'] if 'strategy' in sample_param else 'recent'
sampler = NeighborSampler(num_nodes=graph.num_nodes, num_layers=num_layers, fanout=fanout,graph_data=sample_graph, workers=10,policy = policy, graph_name = "wiki_train")
train_data = torch.masked_select(graph.edge_index,pdata.train_mask.to(graph.edge_index.device)).reshape(2,-1)
train_ts = torch.masked_select(graph.edge_ts,pdata.train_mask.to(graph.edge_index.device))
test_data = torch.masked_select(graph.edge_index,pdata.test_mask.to(graph.edge_index.device)).reshape(2,-1)
test_ts = torch.masked_select(graph.edge_ts,pdata.test_mask.to(graph.edge_index.device))
val_data = torch.masked_select(graph.edge_index,pdata.val_mask.to(graph.edge_index.device)).reshape(2,-1)
val_ts = torch.masked_select(graph.edge_ts,pdata.val_mask.to(graph.edge_index.device))
train_data = DataSet(edges = train_data,ts =train_ts,eids = torch.nonzero(pdata.train_mask).view(-1))
test_data = DataSet(edges = test_data,ts =test_ts,eids = torch.nonzero(pdata.test_mask).view(-1))
val_data = DataSet(edges = val_data,ts = val_ts,eids = torch.nonzero(pdata.val_mask).view(-1))
dataset = PyGLinkPropPredDataset(name=args.dataname, root="datasets")
print(graph.edge_ts.to(torch.long),dataset.get_TemporalData().t)
#neg_sampler = LocalNegativeSampling('triplet',args.num_neg_sampler,dst_node_list = graph.edge_index[1,:])
neg_sampler = TgbNegativeSampling('tgbtriplet',args.num_neg_sampler,dataset.negative_sampler,dataset.dataset.meta_dict)
train_sampler = NegativeSampling('triplet')
trainloader = DistributedDataLoader(graph,train_data,sampler = sampler,
sampler_fn = SAMPLE_TYPE.SAMPLE_FROM_TEMPORAL_EDGES,
neg_sampler=train_sampler,
batch_size = train_param['batch_size'],
shuffle=False,
drop_last=True,
chunk_size = None,
train=True,
queue_size = 200,
mailbox = mailbox,
)
testloader = DistributedDataLoader(graph,test_data,sampler = sampler,
sampler_fn = SAMPLE_TYPE.SAMPLE_FROM_TEMPORAL_EDGES,
neg_sampler=neg_sampler,
batch_size = train_param['batch_size'],
shuffle=False,
drop_last=False,
chunk_size = None,
train=False,
queue_size = 100,
mailbox = mailbox)
valloader = DistributedDataLoader(graph,val_data,sampler = sampler,
sampler_fn = SAMPLE_TYPE.SAMPLE_FROM_TEMPORAL_EDGES,
neg_sampler=neg_sampler,
batch_size = train_param['batch_size'],
shuffle=False,
drop_last=False,
chunk_size = None,
train=False,
queue_size = 100,
mailbox = mailbox)
print(val_data.ts.to(torch.long)[:100],test_data.ts,train_data.ts)
gnn_dim_node = 0 if graph.x is None else pdata.x.shape[1]
gnn_dim_edge = 0 if graph.edge_attr is None else pdata.edge_attr.shape[1]
avg_time = 0
if use_cuda:
model = GeneralModel(gnn_dim_node, gnn_dim_edge, sample_param, memory_param, gnn_param, train_param).cuda()
device = torch.device('cuda')
else:
model = GeneralModel(gnn_dim_node, gnn_dim_edge, sample_param, memory_param, gnn_param, train_param)
device = torch.device('cpu')
model = DDP(model,find_unused_parameters=True)
model.load_state_dict(torch.load(MODEL_SAVE_PATH))
train_stream = torch.cuda.Stream()
evaluator = Evaluator()
def eval(mode='val'):
model.eval()
metric = list()
if mode == 'val':
loader = valloader
if isinstance(neg_sampler,TgbNegativeSampling):
neg_sampler.load_ns(mode)
elif mode == 'train':
loader = trainloader
neg_num = 1
elif mode == 'test':
loader = testloader
if isinstance(neg_sampler,TgbNegativeSampling):
neg_sampler.load_ns(mode)
with torch.no_grad():
total_loss = 0
signal = torch.tensor([0],dtype = int,device = device)
with torch.cuda.stream(train_stream):
for roots,mfgs,metadata in loader:
if mode == 'train':
neg_num = 1
else:
neg_num = neg_sampler.amount
pred_pos, pred_neg = model(mfgs,metadata,neg_num,'triplet')
if mode != 'train':
#print(pred_pos,pred_neg)
eval_metric = evaluator.eval(pred_pos,pred_neg)
metric.append(list(eval_metric.values()))
#print(eval_metric)
if mailbox is not None:
src = metadata['src_pos_index']
dst = metadata['dst_pos_index']
ts = roots.ts
if graph.edge_attr is None:
edge_feats = None
elif(graph.edge_attr.device == torch.device('cpu')):
edge_feats = graph.edge_attr[roots.eids.to('cpu')].to('cuda')
else:
edge_feats = graph.edge_attr[roots.eids]
dist_index_mapper = mfgs[0][0].srcdata['ID']
root_index = torch.cat((src,dst))
last_updated_nid = model.module.memory_updater.last_updated_nid[root_index]
last_updated_memory = model.module.memory_updater.last_updated_memory[root_index]
last_updated_ts=model.module.memory_updater.last_updated_ts[root_index]
index, memory, memory_ts = mailbox.get_update_memory(last_updated_nid,
last_updated_memory,
last_updated_ts)
index, mail, mail_ts = mailbox.get_update_mail(dist_index_mapper,
src,dst,ts,edge_feats,
model.module.memory_updater.last_updated_memory,
model.module.embedding,use_src_emb,
use_dst_emb,
)
mailbox.set_mailbox_all_to_all(index,memory,memory_ts,mail,mail_ts,reduce_Op = 'max')
if mode != 'train':
print('{} metric\n'.format(mode))
metric = torch.tensor(metric)
metric_val = metric.mean(dim = 0)
for i,key in enumerate(evaluator.valid_metric_list):
print('metric {} is {}\n'.format(key,metric_val[i]))
print('\n')
model.eval()
if mailbox is not None and args.inductive is True:
mailbox.reset()
model.module.memory_updater.last_updated_nid = None
eval('train')
eval('val')
eval('test')
print('test_dataset {} avg_time {} \n'.format(test_data.edges.shape[1],avg_time/train_param['epoch']))
torch.save(model.state_dict(), MODEL_SAVE_PATH)
ctx.shutdown()
if __name__ == "__main__":
main()
import itertools
import torch
import torch.nn as nn
import torch.nn.functional as F
import dgl.function as fn
from dgl.nn import SAGEConv
import numpy as np
class TimeEncode(torch.nn.Module):
def __init__(self, dim):
super(TimeEncode, self).__init__()
self.dim = dim
self.w = torch.nn.Linear(1, dim)
self.w.weight = torch.nn.Parameter((torch.from_numpy(1 / 10 ** np.linspace(0, 9, dim, dtype=np.float32))).reshape(dim, -1))
self.w.bias = torch.nn.Parameter(torch.zeros(dim))
def forward(self, t):
output = torch.cos(self.w(t.float().reshape((-1, 1))))
return output
class GraphSAGE(nn.Module):
def __init__(self, in_feats, h_feats):
super(GraphSAGE, self).__init__()
self.conv1 = SAGEConv(in_feats, h_feats, "mean")
self.conv2 = SAGEConv(h_feats, h_feats, "mean")
def forward(self, g, in_feat):
h = self.conv1(g, in_feat)
h = F.relu(h)
h = self.conv2(g, h)
return h
class TSAGELayer(nn.Module):
def __init__(self, in_dim = 0, edge_dim = 0, time_dim = 0, h_feats = 0):
super(TSAGELayer, self).__init__()
assert in_dim + time_dim != 0 and h_feats != 0
self.time_dim = time_dim
self.time_enc = TimeEncode(time_dim)
self.sage = SAGEConv(in_dim,h_feats,"mean"),
def forward(self,b):
time_f = self.time_enc(b.edata['dt'])
time_f = torch.cat((torch.zeros(b.num_dst_nodes(),self.time_dim,dtype = time_f.dtype,
device = time_f.device),time_f),dim = 1)
if 'f' in b.edata:
edge_f = torch.cat((torch.zeros(b.num_dst_nodes(),b.edata['f'].shape[1],
dtype = b.edata['f'].dtype,device = b.edata['f'].device),
b.edata['f']),dim = 1)
if 'h' in b.srcdata:
b.srcdata['h'] = torch.cat((b.srcnode['h'],edge_f,time_f),dim = 1)
else:
b.srcdata['h'] = torch.cat((edge_f,time_f),dim = 1)
else:
if 'h' in b.srcdata:
b.srcdata['h'] = torch.cat((b.srcnode['h'],time_f),dim = 1)
else:
b.srcdata['h'] = time_f
return F.relu(self.sage(b,b.src_data['h']))
class TSAGEModel(nn.Module):
def __init__(self, num_layer, node_dim, edge_dim, time_dim, h_dim):
super(TSAGEModel, self).__init__()
self.num_layer = num_layer
layer = []
for i in range(num_layer):
if i != 0:
layer.append(TSAGELayer(h_dim,edge_dim,time_dim,h_dim))
else:
layer.append(TSAGELayer(node_dim,edge_dim,time_dim,h_dim))
self.layers = layer
def forward(self,mfgs):
for l in range(len(mfgs)):
for h in range(len(mfgs[l])):
if l < self.num_layer - 1:
mfgs[l+1][h].srcdata['h'] = self.layers[l](self,mfgs[l][h])
else:
return self.layers[l](self,mfgs[l][h])
class DotPredictor(nn.Module):
def forward(self, g, h):
with g.local_scope():
g.ndata["h"] = h
# Compute a new edge feature named 'score' by a dot-product between the
# source node feature 'h' and destination node feature 'h'.
g.apply_edges(fn.u_dot_v("h", "h", "score"))
# u_dot_v returns a 1-element vector for each edge so you need to squeeze it.
return g.edata["score"][:, 0]
# Thumbnail credits: Link Prediction with Neo4j, Mark Needham
# sphinx_gallery_thumbnail_path = '_static/blitz_4_link_predict.png'
\ No newline at end of file
import argparse
import os
import sys
from os.path import abspath, join, dirname
current_path = os.path.dirname(os.path.abspath(__file__))
parent_path = os.path.abspath(os.path.join(current_path, os.pardir))
parent_path = os.path.abspath(os.path.join(parent_path, os.pardir))
sys.path.append(parent_path)
from pre_train import DotPredictor, TSAGEModel
from starrygl.module.layers import EdgePredictor
from starrygl.sample.sample_core.LocalNegSampling import LocalNegativeSampling
from starrygl.distributed.context import DistributedContext
from starrygl.distributed.utils import DistIndex
from starrygl.module.modules import GeneralModel
from pathlib import Path
from pathlib import Path
from starrygl.module.utils import parse_config
from starrygl.sample.cache.fetch_cache import FetchFeatureCache
from starrygl.sample.graph_core import DataSet, DistributedGraphStore, TemporalNeighborSampleGraph
from starrygl.module.utils import parse_config, EarlyStopMonitor
from starrygl.sample.graph_core import DataSet, DistributedGraphStore, TemporalNeighborSampleGraph
from starrygl.sample.memory.shared_mailbox import SharedMailBox
from starrygl.sample.sample_core.base import NegativeSampling
from starrygl.sample.sample_core.neighbor_sampler import NeighborSampler
from starrygl.sample.part_utils.partition_tgnn import partition_load
import torch
import time
import torch
import torch.nn.functional as F
import torch.distributed as dist
import torch.multiprocessing as mp
from torch.nn.parallel import DistributedDataParallel as DDP
from torch.distributed import init_process_group, destroy_process_group
import os
from starrygl.sample.data_loader import DistributedDataLoader
from starrygl.sample.batch_data import SAMPLE_TYPE
from starrygl.sample.stream_manager import getPipelineManger
parser = argparse.ArgumentParser(
description="RPC Reinforcement Learning Example",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
parser.add_argument('--rank', default=0, type=int, metavar='W',
help='name of dataset')
parser.add_argument('--local_rank', default=0, type=int, metavar='W',
help='name of dataset')
parser.add_argument('--patience', type=int, default=5, help='Patience for early stopping')
parser.add_argument('--world_size', default=1, type=int, metavar='W',
help='number of negative samples')
parser.add_argument('--dataname', default="WIKI", type=str, metavar='W',
help='name of dataset')
parser.add_argument('--model', default='pre', type=str, metavar='W',
help='name of model')
args = parser.parse_args()
from sklearn.metrics import average_precision_score, roc_auc_score
import torch
import time
import random
import numpy as np
from sklearn.metrics import average_precision_score, roc_auc_score
from torch.nn.parallel import DistributedDataParallel as DDP
#os.environ['CUDA_VISIBLE_DEVICES'] = str(args.rank)
if not 'WORLD_SIZE' in os.environ:
os.environ["RANK"] = str(args.rank)
os.environ["WORLD_SIZE"] = str(args.world_size)
os.environ["LOCAL_RANK"] = str(args.local_rank)
if not 'MASTER_ADDR' in os.environ:
os.environ["MASTER_ADDR"] = '192.168.2.107'
if not 'MASTER_PORT' in os.environ:
os.environ["MASTER_PORT"] = '9337'
torch.cuda.set_device(int(os.environ["LOCAL_RANK"]))
local_rank = os.environ["LOCAL_RANK"]
def seed_everything(seed=42):
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
seed_everything(34)
def main():
print('LOCAL RANK {}, RANK{}'.format(os.environ["LOCAL_RANK"],os.environ["RANK"]))
use_cuda = True
sample_param, memory_param, gnn_param, train_param = parse_config('../../config/{}.yml'.format(args.model))
ctx = DistributedContext.init(backend="nccl", use_gpu=True)
torch.set_num_threads(int(40/torch.distributed.get_world_size()))
device_id = torch.cuda.current_device()
print('use cuda on',device_id)
pdata = partition_load('/mnt/data/part_data/evaluate/tgbl/{}'.format(args.dataname), algo="metis_for_tgnn")
#pdata = partition_load("/mnt/data/part_data/v2/here/{}".format(args.dataname), algo="metis_for_tgnn")
graph = DistributedGraphStore(pdata = pdata)
print(graph.num_nodes)
Path("../../saved_models/").mkdir(parents=True, exist_ok=True)
Path("../../saved_checkpoints/").mkdir(parents=True, exist_ok=True)
get_checkpoint_path = lambda \
epoch: f'../../saved_checkpoints/{args.model}-{args.dataname}-{epoch}.pth'
gnn_param['dyrep'] = True if args.model == 'DyRep' else False
use_src_emb = gnn_param['use_src_emb'] if 'use_src_emb' in gnn_param else False
use_dst_emb = gnn_param['use_dst_emb'] if 'use_dst_emb' in gnn_param else False
sample_graph = TemporalNeighborSampleGraph(sample_graph = pdata.sample_graph,mode = 'full')
if memory_param['type'] != 'none':
mailbox = SharedMailBox(pdata.ids.shape[0], memory_param, dim_edge_feat = pdata.edge_attr.shape[1] if pdata.edge_attr is not None else 0)
else:
mailbox = None
fanout = []
num_layers = sample_param['layer'] if 'layer' in sample_param else 1
fanout = sample_param['neighbor'] if 'neighbor' in sample_param else [10]
policy = sample_param['strategy'] if 'strategy' in sample_param else 'recent'
sampler = NeighborSampler(num_nodes=graph.num_nodes, num_layers=num_layers, fanout=fanout,graph_data=sample_graph, workers=int(40/torch.distributed.get_world_size()),policy = policy, graph_name = "wiki_train")
train_data = torch.masked_select(graph.edge_index,pdata.train_mask.to(graph.edge_index.device)).reshape(2,-1)
train_ts = torch.masked_select(graph.edge_ts,pdata.train_mask.to(graph.edge_index.device))
test_data = torch.masked_select(graph.edge_index,pdata.test_mask.to(graph.edge_index.device)).reshape(2,-1)
test_ts = torch.masked_select(graph.edge_ts,pdata.test_mask.to(graph.edge_index.device))
val_data = torch.masked_select(graph.edge_index,pdata.val_mask.to(graph.edge_index.device)).reshape(2,-1)
val_ts = torch.masked_select(graph.edge_ts,pdata.val_mask.to(graph.edge_index.device))
train_data = DataSet(edges = train_data,ts =train_ts,eids = torch.nonzero(pdata.train_mask).view(-1))
test_data = DataSet(edges = test_data,ts =test_ts,eids = torch.nonzero(pdata.test_mask).view(-1))
val_data = DataSet(edges = val_data,ts = val_ts,eids = torch.nonzero(pdata.val_mask).view(-1))
neg_sampler = LocalNegativeSampling('triplet',dst_node_list = graph.edge_index[1,:].unique())
trainloader = DistributedDataLoader(graph,train_data,sampler = sampler,
sampler_fn = SAMPLE_TYPE.SAMPLE_FROM_TEMPORAL_EDGES,
neg_sampler=neg_sampler,
batch_size = train_param['batch_size'],
shuffle=False,
drop_last=True,
chunk_size = None,
train=True,
queue_size = 200,
mailbox = mailbox,
)
testloader = DistributedDataLoader(graph,test_data,sampler = sampler,
sampler_fn = SAMPLE_TYPE.SAMPLE_FROM_TEMPORAL_EDGES,
neg_sampler=neg_sampler,
batch_size = train_param['batch_size'],
shuffle=False,
drop_last=False,
chunk_size = None,
train=False,
queue_size = 100,
mailbox = mailbox)
valloader = DistributedDataLoader(graph,val_data,sampler = sampler,
sampler_fn = SAMPLE_TYPE.SAMPLE_FROM_TEMPORAL_EDGES,
neg_sampler=neg_sampler,
batch_size = train_param['batch_size'],
shuffle=False,
drop_last=False,
chunk_size = None,
train=False,
queue_size = 100,
mailbox = mailbox)
gnn_dim_node = 0 if graph.x is None else pdata.x.shape[1]
gnn_dim_edge = 0 if graph.edge_attr is None else pdata.edge_attr.shape[1]
avg_time = 0
model = TSAGEModel(2,gnn_dim_node,gnn_dim_edge,gnn_param['dim_time'],gnn_param['dim_out'])
edge_model = EdgePredictor(gnn_param['dim_out'])#DotPredictor()
device = torch.device('cuda')
model = DDP(model,find_unused_parameters=True)
train_stream = torch.cuda.Stream()
val_losses = list()
def eval(mode='val'):
neg_samples = 1
model.eval()
aps = list()
aucs_mrrs = list()
if mode == 'val':
loader = valloader
elif mode == 'test':
loader = testloader
elif mode == 'train':
loader = trainloader
with torch.no_grad():
total_loss = 0
with torch.cuda.stream(train_stream):
for roots,mfgs,metadata in loader:
embedding = model(mfgs)
src_embedding = embedding[metadata['src_pos_index']]
dst_embedding = embedding[metadata['dst_pos_index']]
dst_neg_embedding = embedding[metadata['dst_neg_index']]
pred_pos,pred_neg = edge_model(src_embedding,dst_embedding,
h_neg_dst = dst_neg_embedding,
)
total_loss += creterion(pred_pos, torch.ones_like(pred_pos))
total_loss += creterion(pred_neg, torch.zeros_like(pred_neg))
y_pred = torch.cat([pred_pos, pred_neg], dim=0).sigmoid().cpu()
y_true = torch.cat([torch.ones(pred_pos.size(0)), torch.zeros(pred_neg.size(0))], dim=0)
aps.append(average_precision_score(y_true, y_pred.detach().numpy()))
aucs_mrrs.append(roc_auc_score(y_true, y_pred))
world_size = dist.get_world_size()
apc = torch.empty([loader.expected_idx*world_size],dtype = torch.float,device='cuda')
auc_mrr = torch.empty([loader.expected_idx*world_size],dtype = torch.float,device = 'cuda')
dist.all_gather_into_tensor(apc,torch.tensor(aps,device ='cuda',dtype=torch.float))
dist.all_gather_into_tensor(auc_mrr,torch.tensor(aucs_mrrs,device ='cuda',dtype=torch.float))
ap = float(apc.clone().mean())
auc_mrr = float(auc_mrr.clone().mean())
return ap, auc_mrr
creterion = torch.nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=train_param['lr'])
early_stopper = EarlyStopMonitor(max_round=args.patience)
MODEL_SAVE_PATH = f'../../saved_models/{args.model}-{args.dataname}-{dist.get_world_size()}.pth'
for e in range(train_param['epoch']):
torch.cuda.synchronize()
write_back_time = 0
fetch_time = 0
epoch_start_time = time.time()
train_aps = list()
print('Epoch {:d}:'.format(e))
time_prep = 0
total_loss = 0
model.train()
for roots,mfgs,metadata in trainloader:
#print(e,mfgs)
#fetch_time +=sample_time/1000
t_prep_s = time.time()
with torch.cuda.stream(train_stream):
optimizer.zero_grad()
embedding = model(mfgs)
src_embedding = embedding[metadata['src_pos_index']]
dst_embedding = embedding[metadata['dst_pos_index']]
dst_neg_embedding = embedding[metadata['dst_neg_index']]
pred_pos,pred_neg = edge_model(src_embedding,dst_embedding,
h_neg_dst = dst_neg_embedding,
)
loss = creterion(pred_pos, torch.ones_like(pred_pos))
loss += creterion(pred_neg, torch.zeros_like(pred_neg))
total_loss += float(loss)
loss.backward()
optimizer.step()
t_prep_s = time.time()
y_pred = torch.cat([pred_pos, pred_neg], dim=0).sigmoid().cpu()
y_true = torch.cat([torch.ones(pred_pos.size(0)), torch.zeros(pred_neg.size(0))], dim=0)
train_aps.append(average_precision_score(y_true, y_pred.detach().numpy()))
print('val')
time_prep = time.time() - epoch_start_time
avg_time += time.time() - epoch_start_time
train_ap = float(torch.tensor(train_aps).mean())
ap = 0
auc = 0
ap, auc = eval('val')
early_stop = True#early_stopper.early_stop_check(ap)
if early_stop:
print("Early stopping at epoch {:d}\n".format(e))
print(f"Loading the best model at epoch {early_stopper.best_epoch}\n")
best_model_path = get_checkpoint_path(early_stopper.best_epoch)
model.load_state_dict(torch.load(best_model_path))
break
else:
print('\ttrain loss:{:.4f} train ap:{:4f} val ap:{:4f} val auc:{:4f}\n'.format(total_loss,train_ap, ap, auc))
print('\ttotal time:{:.2f}s prep time:{:.2f}s\n'.format(time.time()-epoch_start_time, time_prep))
print('\t fetch time:{:.2f}s write back time:{:.2f}s\n'.format(fetch_time,write_back_time))
if local_rank == 0:
torch.save(model.state_dict(), get_checkpoint_path(e))
if not early_stop:
print(f"Loading the best model at epoch {early_stopper.best_epoch}")
best_model_path = get_checkpoint_path(early_stopper.best_epoch)
model.load_state_dict(torch.load(best_model_path))
model.eval()
ap, auc = eval('test')
eval_neg_samples = 1
if eval_neg_samples > 1:
print('\ttest AP:{:4f} test MRR:{:4f}\n'.format(ap, auc))
else:
print('\ttest AP:{:4f} test AUC:{:4f}\n'.format(ap, auc))
print('test_dataset {} avg_time {} \n'.format(test_data.edges.shape[1],avg_time/train_param['epoch']))
torch.save(model.state_dict(), MODEL_SAVE_PATH)
ctx.shutdown()
if __name__ == "__main__":
main()
#torchrun --standalone --nproc-per-node 1 train_tgnn.py --dataname tgbl-wiki --model TGN > tgbl_wiki_train.out &
#wait
#torchrun --standalone --nproc-per-node 4 train_tgnn.py --dataname tgbl-wiki --model TGN > tgbl_wiki_train_4.out &
#wait
#torchrun --standalone --nproc-per-node 1 train_tgnn.py --dataname tgbl-review --model TGN > tgbl_review_train.out &
#wait
#torchrun --standalone --nproc-per-node 4 train_tgnn.py --dataname tgbl-review --model TGN > tgbl_review_train_4.out &
#wait
torchrun --standalone --nproc-per-node 1 train_tgnn.py --dataname tgbl-coin --model TGN_600 > tgbl_coin_train.out &
wait
torchrun --standalone --nproc-per-node 4 train_tgnn.py --dataname tgbl-coin --model TGN_600 > tgbl_coin_train_4.out &
wait
torchrun --standalone --nproc-per-node 1 train_tgnn.py --dataname tgbl-comment --model TGN_600 > tgbl_comment_train.out &
wait
torchrun --standalone --nproc-per-node 4 train_tgnn.py --dataname tgbl-comment --model TGN_600 > tgbl_comment_train_4.out &
wait
torchrun --standalone --nproc-per-node 1 train_tgnn.py --dataname tgbl-flight --model TGN_600 > tgbl_flight_train.out &
wait
torchrun --standalone --nproc-per-node 4 train_tgnn.py --dataname tgbl-flight --model TGN_600 > tgbl_flight_train_4.out &
wait
#torchrun --standalone --nproc-per-node 1 evaluate_tgbl_predict.py --dataname tgbl-wiki --model TGN --train_world_size 1 > tgbl_wiki_4.out &
#wait
#torchrun --standalone --nproc-per-node 1 evaluate_tgbl_predict.py --dataname tgbl-wiki --model TGN --train_world_size 4 > tgbl_wiki.out &
#wait
torchrun --standalone --nproc-per-node 1 evaluate_tgbl_predict.py --dataname tgbl-review --model TGN --train_world_size 1 > tgbl_review.out &
wait
torchrun --standalone --nproc-per-node 1 evaluate_tgbl_predict.py --dataname tgbl-review --model TGN --train_world_size 4 > tgbl_review_4.out &
wait
torchrun --standalone --nproc-per-node 1 evaluate_tgbl_predict.py --dataname tgbl-coin --model TGN_600 --train_world_size 1 > tgbl_coin.out &
wait
torchrun --standalone --nproc-per-node 1 evaluate_tgbl_predict.py --dataname tgbl-coin --model TGN_600 --train_world_size 4 > tgbl_coin_4.out &
wait
torchrun --standalone --nproc-per-node 1 evaluate_tgbl_predict.py --dataname tgbl-comment --model TGN_600 --train_world_size 1 > tgbl_comment.out &
wait
torchrun --standalone --nproc-per-node 1 evaluate_tgbl_predict.py --dataname tgbl-comment --model TGN_600 --train_world_size 4 > tgbl_comment_4.out &
wait
torchrun --standalone --nproc-per-node 1 evaluate_tgbl_predict.py --dataname tgbl-flight --model TGN_600 --train_world_size 1 > tgbl_flight.out &
wait
torchrun --standalone --nproc-per-node 1 evaluate_tgbl_predict.py --dataname tgbl-flight --model TGN_600 --train_world_size 4 > tgbl_flight_4.out &
wait
\ No newline at end of file
LOCAL RANK 0, RANK0
initlize distributed
use cuda on 0
638486
get_neighbors consume: 4.52747s
raw file found, skipping download
Dataset directory is /home/zlj/.miniconda3/envs/dgnn-3.10/lib/python3.10/site-packages/tgb/datasets/tgbl_coin
loading processed file
tensor([1648811421, 1648811421, 1648811424, ..., 1667278439, 1667278439,
1667278439], device='cuda:0') tensor([1648811421, 1648811421, 1648811424, ..., 1667278439, 1667278439,
1667278439])
tensor([1662096249, 1662096249, 1662096249, 1662096249, 1662096249, 1662096249,
1662096249, 1662096249, 1662096249, 1662096249, 1662096254, 1662096254,
1662096254, 1662096254, 1662096254, 1662096254, 1662096254, 1662096254,
1662096254, 1662096254, 1662096254, 1662096254, 1662096254, 1662096254,
1662096254, 1662096254, 1662096254, 1662096254, 1662096254, 1662096254,
1662096254, 1662096254, 1662096254, 1662096254, 1662096254, 1662096254,
1662096276, 1662096276, 1662096276, 1662096276, 1662096276, 1662096286,
1662096290, 1662096290, 1662096290, 1662096290, 1662096290, 1662096290,
1662096290, 1662096290, 1662096293, 1662096293, 1662096293, 1662096293,
1662096293, 1662096293, 1662096297, 1662096297, 1662096297, 1662096297,
1662096297, 1662096297, 1662096297, 1662096297, 1662096297, 1662096297,
1662096297, 1662096297, 1662096297, 1662096297, 1662096297, 1662096297,
1662096297, 1662096297, 1662096297, 1662096297, 1662096297, 1662096297,
1662096297, 1662096297, 1662096297, 1662096297, 1662096297, 1662096297,
1662096297, 1662096297, 1662096325, 1662096325, 1662096325, 1662096325,
1662096325, 1662096325, 1662096325, 1662096325, 1662096325, 1662096325,
1662096325, 1662096325, 1662096325, 1662096325], device='cuda:0') tensor([1664482319, 1664482319, 1664482319, ..., 1667278439, 1667278439,
1667278439], device='cuda:0') tensor([1648811421, 1648811421, 1648811424, ..., 1662096217, 1662096217,
1662096217], device='cuda:0')
This source diff could not be displayed because it is too large. You can view the blob instead.
LOCAL RANK 0, RANK0
use cuda on 0
638486
get_neighbors consume: 4.12395s
Epoch 0:
LOCAL RANK 0, RANK0
LOCAL RANK 2, RANK2
LOCAL RANK 1, RANK1
LOCAL RANK 3, RANK3
use cuda on 3
use cuda on 0
use cuda on 2
use cuda on 1
638486
638486
638486
638486
get_neighbors consume: 3.42567s
get_neighbors consume: 3.42812s
num_batchs: tensor([7069], device='cuda:2')
num_batchs: tensor([6015], device='cuda:0')
get_neighbors consume: 3.68743s
num_batchs: tensor([6948], device='cuda:1')
get_neighbors consume: 4.58464s
num_batchs: tensor([6576], device='cuda:3')
num_batchs: num_batchs: tensor([1254], device='cuda:0')
tensor([1642], device='cuda:3')
num_batchs: num_batchs: tensor([1331], device='cuda:2')
tensor([1478], device='cuda:1')
num_batchs:num_batchs:num_batchs: num_batchs: tensor([1227], device='cuda:0')
tensor([1625], device='cuda:3')tensor([1412], device='cuda:1')tensor([1440], device='cuda:2')
Epoch 0:
Epoch 0:
Epoch 0:
Epoch 0:
train loss:3025.4560 train ap:0.964935 val ap:0.973583 val auc:0.969748
train loss:2842.4385 train ap:0.968786 val ap:0.973583 val auc:0.969748
train loss:3149.4863 train ap:0.960053 val ap:0.973583 val auc:0.969748
train loss:2905.2378 train ap:0.966912 val ap:0.973583 val auc:0.969748
total time:109.11s prep time:90.95s
total time:109.11s prep time:90.95s
total time:109.11s prep time:90.95s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
total time:109.10s prep time:90.95s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 1:
Epoch 1:
Epoch 1:
Epoch 1:
train loss:2756.1783 train ap:0.969414 val ap:0.976812 val auc:0.973716
train loss:2786.7234 train ap:0.970014 val ap:0.976812 val auc:0.973716
train loss:2600.9249 train ap:0.973055 val ap:0.976812 val auc:0.973716
train loss:2561.2065 train ap:0.974347 val ap:0.976812 val auc:0.973716
total time:107.65s prep time:89.41s
total time:107.65s prep time:89.41s
total time:107.65s prep time:89.41s
total time:107.65s prep time:89.41s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 2:
Epoch 2:
Epoch 2:
Epoch 2:
train loss:2430.7610 train ap:0.976709 val ap:0.979544 val auc:0.976893
train loss:2616.8964 train ap:0.972457 val ap:0.979544 val auc:0.976893
train loss:2666.5888 train ap:0.972383 val ap:0.979544 val auc:0.976893
train loss:2477.5472 train ap:0.975493 val ap:0.979544 val auc:0.976893
total time:107.73s prep time:89.82s
total time:107.73s prep time:89.82s
total time:107.73s prep time:89.82s
total time:107.73s prep time:89.82s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 3:Epoch 3:Epoch 3:
Epoch 3:
train loss:2404.6129 train ap:0.977177 val ap:0.979526 val auc:0.976748
train loss:2652.1562 train ap:0.972664 val ap:0.979526 val auc:0.976748
train loss:2561.0276 train ap:0.973517 val ap:0.979526 val auc:0.976748
train loss:2431.4974 train ap:0.976369 val ap:0.979526 val auc:0.976748
total time:107.16s prep time:89.15s
total time:107.16s prep time:89.15s
total time:107.16s prep time:89.15s
total time:107.16s prep time:89.15s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 4:
Epoch 4:Epoch 4:Epoch 4:
train loss:2599.0614 train ap:0.973655 val ap:0.980024 val auc:0.977120
train loss:2343.8141 train ap:0.978188 val ap:0.980024 val auc:0.977120
train loss:2382.7643 train ap:0.977246 val ap:0.980024 val auc:0.977120
total time:107.70s prep time:89.60s
total time:107.70s prep time:89.60s
fetch time:0.00s write back time:0.00s
total time:107.70s prep time:89.60s
train loss:2503.4472 train ap:0.974597 val ap:0.980024 val auc:0.977120
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
total time:107.70s prep time:89.60s
fetch time:0.00s write back time:0.00s
Epoch 5:Epoch 5:
Epoch 5:Epoch 5:
train loss:2377.6717 train ap:0.977300 val ap:0.981272 val auc:0.978760
total time:108.36s prep time:89.99s
train loss:2586.4221 train ap:0.973873 val ap:0.981272 val auc:0.978760
train loss:2510.5564 train ap:0.974502 val ap:0.981272 val auc:0.978760
train loss:2345.5698 train ap:0.978154 val ap:0.981272 val auc:0.978760
fetch time:0.00s write back time:0.00s
total time:108.36s prep time:89.99s
total time:108.36s prep time:89.99s
total time:108.36s prep time:89.99s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 6:
Epoch 6:Epoch 6:
Epoch 6:
train loss:2287.1365 train ap:0.979113 val ap:0.981768 val auc:0.979250
train loss:2541.0882 train ap:0.974732 val ap:0.981768 val auc:0.979250
train loss:2441.7481 train ap:0.975795 val ap:0.981768 val auc:0.979250
total time:108.29s prep time:90.22s
train loss:2313.8948 train ap:0.978471 val ap:0.981768 val auc:0.979250
total time:108.29s prep time:90.22s
total time:108.29s prep time:90.22s
fetch time:0.00s write back time:0.00s
total time:108.29s prep time:90.22s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 7:
Epoch 7:
Epoch 7:
Epoch 7:
train loss:2321.0527 train ap:0.978335 val ap:0.980500 val auc:0.978016
train loss:2558.9959 train ap:0.974414 val ap:0.980500 val auc:0.978016
train loss:2289.0225 train ap:0.979144 val ap:0.980500 val auc:0.978016
train loss:2436.1819 train ap:0.975923 val ap:0.980500 val auc:0.978016
total time:107.98s prep time:90.08s
total time:107.98s prep time:90.08s
total time:107.98s prep time:90.08s
total time:107.98s prep time:90.08s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 8:
Epoch 8:Epoch 8:
Epoch 8:
train loss:2422.3653 train ap:0.976156 val ap:0.982765 val auc:0.980566
train loss:2250.0465 train ap:0.979720 val ap:0.982765 val auc:0.980566
total time:107.98s prep time:89.73s
train loss:2517.5717 train ap:0.975174 val ap:0.982765 val auc:0.980566
total time:107.98s prep time:89.73s
fetch time:0.00s write back time:0.00s
train loss:2284.2223 train ap:0.978957 val ap:0.982765 val auc:0.980566
fetch time:0.00s write back time:0.00s
total time:107.98s prep time:89.73s
total time:107.98s prep time:89.73s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 9:
Epoch 9:Epoch 9:
Epoch 9:
train loss:2495.3455 train ap:0.975555 val ap:0.980162 val auc:0.977624
train loss:2268.7504 train ap:0.979202 val ap:0.980162 val auc:0.977624
train loss:2243.5499 train ap:0.979831 val ap:0.980162 val auc:0.977624
train loss:2392.5389 train ap:0.976669 val ap:0.980162 val auc:0.977624
total time:108.06s prep time:89.87s
total time:108.06s prep time:89.87s
total time:108.06s prep time:89.87s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
total time:108.06s prep time:89.87s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 10:
Epoch 10:
Epoch 10:
Epoch 10:
train loss:2356.5620 train ap:0.977277 val ap:0.983905 val auc:0.981905
train loss:2475.2578 train ap:0.975923 val ap:0.983905 val auc:0.981905
total time:108.50s prep time:90.58s
train loss:2218.5262 train ap:0.980230 val ap:0.983905 val auc:0.981905
train loss:2249.7741 train ap:0.979533 val ap:0.983905 val auc:0.981905
fetch time:0.00s write back time:0.00s
total time:108.50s prep time:90.58s
total time:108.50s prep time:90.58s
total time:108.50s prep time:90.58s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 11:
Epoch 11:Epoch 11:
Epoch 11:
train loss:2371.3061 train ap:0.977063 val ap:0.981130 val auc:0.978457
train loss:2215.4943 train ap:0.980281 val ap:0.981130 val auc:0.978457
train loss:2469.7190 train ap:0.975983 val ap:0.981130 val auc:0.978457
train loss:2243.8975 train ap:0.979617 val ap:0.981130 val auc:0.978457
total time:107.79s prep time:89.81s
total time:107.79s prep time:89.81s
total time:107.79s prep time:89.81s
total time:107.79s prep time:89.81s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 12:
Epoch 12:
Epoch 12:
Epoch 12:
train loss:2454.1705 train ap:0.976276 val ap:0.983270 val auc:0.981183
train loss:2225.2349 train ap:0.979939 val ap:0.983270 val auc:0.981183
train loss:2337.4529 train ap:0.977606 val ap:0.983270 val auc:0.981183
train loss:2189.4448 train ap:0.980683 val ap:0.983270 val auc:0.981183
total time:108.64s prep time:90.63s
total time:108.64s prep time:90.63s
total time:108.64s prep time:90.63s
total time:108.64s prep time:90.63s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 13:
Epoch 13:
Epoch 13:
Epoch 13:
train loss:2374.7256 train ap:0.977024 val ap:0.981550 val auc:0.979260
train loss:2221.0432 train ap:0.980189 val ap:0.981550 val auc:0.979260
train loss:2471.9543 train ap:0.975953 val ap:0.981550 val auc:0.979260
train loss:2241.1903 train ap:0.979649 val ap:0.981550 val auc:0.979260
total time:108.69s prep time:90.62s
total time:108.69s prep time:90.62s
total time:108.69s prep time:90.62s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
total time:108.69s prep time:90.62s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 14:
Epoch 14:
Epoch 14:
Epoch 14:
train loss:2358.8334 train ap:0.977242 val ap:0.981721 val auc:0.979185
train loss:2208.2876 train ap:0.980383 val ap:0.981721 val auc:0.979185
train loss:2227.2542 train ap:0.979885 val ap:0.981721 val auc:0.979185
train loss:2460.0171 train ap:0.976178 val ap:0.981721 val auc:0.979185
total time:107.77s prep time:89.81s
total time:107.77s prep time:89.81s
total time:107.77s prep time:89.81s
total time:107.77s prep time:89.81s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 15:
Epoch 15:Epoch 15:
Epoch 15:
Early stopping at epoch 15
Early stopping at epoch 15
Early stopping at epoch 15
Early stopping at epoch 15
Loading the best model at epoch 10
Loading the best model at epoch 10
Loading the best model at epoch 10
Loading the best model at epoch 10
0.9546157717704773 0.9452952742576599
0.9546157717704773 0.9452952742576599
0.9546157717704773 0.9452952742576599
0.9546157717704773 0.9452952742576599
0.9489824175834656 0.9380446672439575
0.9489824175834656 0.9380446672439575
0.9489824175834656 0.9380446672439575
0.9489824175834656 0.9380446672439575
test AP:0.944646 test AUC:0.934612
test AP:0.944646 test AUC:0.934612
test AP:0.944646 test AUC:0.934612
test AP:0.944646 test AUC:0.934612
test_dataset 798529 avg_time 28.8176681804657
test_dataset 752056 avg_time 28.81766140937805
test_dataset 984603 avg_time 28.817663559913637
test_dataset 886223 avg_time 28.817657227516175
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
LOCAL RANK 0, RANK0
use cuda on 0
994790
get_neighbors consume: 6.18508s
Epoch 0:
train loss:12236.7578 train ap:0.986976 val ap:0.934674 val auc:0.946284
total time:630.37s prep time:545.79s
fetch time:0.00s write back time:0.00s
Epoch 1:
train loss:11833.1818 train ap:0.987815 val ap:0.960581 val auc:0.965728
total time:628.44s prep time:542.56s
fetch time:0.00s write back time:0.00s
Epoch 2:
train loss:11622.9559 train ap:0.988244 val ap:0.956752 val auc:0.963083
total time:622.89s prep time:538.77s
fetch time:0.00s write back time:0.00s
Epoch 3:
train loss:11679.1400 train ap:0.988072 val ap:0.929351 val auc:0.943797
total time:681.88s prep time:569.50s
fetch time:0.00s write back time:0.00s
Epoch 4:
train loss:11676.1710 train ap:0.988098 val ap:0.936353 val auc:0.948531
total time:849.98s prep time:741.47s
fetch time:0.00s write back time:0.00s
Epoch 5:
train loss:11745.6001 train ap:0.987897 val ap:0.950828 val auc:0.958958
total time:862.77s prep time:750.90s
fetch time:0.00s write back time:0.00s
Epoch 6:
Early stopping at epoch 6
Loading the best model at epoch 1
0.9248434901237488 0.929413378238678
0.8653780221939087 0.861071765422821
test AP:0.847958 test AUC:0.837159
test_dataset 6647176 avg_time 87.00003329753876
LOCAL RANK 0, RANK0
LOCAL RANK 2, RANK2LOCAL RANK 1, RANK1
LOCAL RANK 3, RANK3
use cuda on 0
use cuda on 1
use cuda on 2
use cuda on 3
994790
994790
994790
994790
get_neighbors consume: 6.11692s
get_neighbors consume: 6.12671s
get_neighbors consume: 6.03983s
get_neighbors consume: 6.05302s
num_batchs: tensor([17384], device='cuda:0')
num_batchs: tensor([3931], device='cuda:2')
num_batchs: tensor([16139], device='cuda:1')
num_batchs: tensor([14244], device='cuda:3')
num_batchs: num_batchs: tensor([1915], device='cuda:3')
num_batchs: tensor([1395], device='cuda:1')
num_batchs: tensor([6920], device='cuda:2')
tensor([850], device='cuda:0')
num_batchs:num_batchs: num_batchs: num_batchs: tensor([5545], device='cuda:2')
tensor([1015], device='cuda:0')tensor([2785], device='cuda:3')
tensor([1736], device='cuda:1')
Epoch 0:
Epoch 0:
Epoch 0:
Epoch 0:
train loss:1331.9399 train ap:0.977517 val ap:0.961959 val auc:0.965566
train loss:1162.0666 train ap:0.981900 val ap:0.961959 val auc:0.965566
train loss:1244.0312 train ap:0.978548 val ap:0.961959 val auc:0.965566
train loss:1308.8701 train ap:0.979221 val ap:0.961959 val auc:0.965566
total time:125.18s prep time:60.37s
total time:125.18s prep time:60.37s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
total time:125.18s prep time:60.37s
total time:125.18s prep time:60.37s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 1:
Epoch 1:Epoch 1:
Epoch 1:
train loss:1169.3326 train ap:0.981283 val ap:0.965914 val auc:0.968475
train loss:1227.6728 train ap:0.981686 val ap:0.965914 val auc:0.968475
train loss:1226.7282 train ap:0.980509 val ap:0.965914 val auc:0.968475
train loss:1078.3342 train ap:0.984551 val ap:0.965914 val auc:0.968475
total time:125.97s prep time:61.45s
total time:125.97s prep time:61.45s
total time:125.97s prep time:61.45s
total time:125.97s prep time:61.45s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 2:
Epoch 2:
Epoch 2:Epoch 2:
train loss:1157.2600 train ap:0.981707 val ap:0.967135 val auc:0.969484
train loss:1227.8567 train ap:0.981577 val ap:0.967135 val auc:0.969484
train loss:1224.2131 train ap:0.980388 val ap:0.967135 val auc:0.969484
total time:125.33s prep time:60.54s
total time:125.33s prep time:60.54s
total time:125.33s prep time:60.54s
fetch time:0.00s write back time:0.00s
train loss:1071.5106 train ap:0.984690 val ap:0.967135 val auc:0.969484
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
total time:125.33s prep time:60.54s
fetch time:0.00s write back time:0.00s
Epoch 3:Epoch 3:
Epoch 3:
Epoch 3:
train loss:1154.4245 train ap:0.981666 val ap:0.939874 val auc:0.947249
train loss:1221.6654 train ap:0.981759 val ap:0.939874 val auc:0.947249
train loss:1217.4941 train ap:0.980394 val ap:0.939874 val auc:0.947249
train loss:1064.5069 train ap:0.984769 val ap:0.939874 val auc:0.947249
total time:124.82s prep time:60.42s
total time:124.82s prep time:60.42s
total time:124.82s prep time:60.42s
fetch time:0.00s write back time:0.00s
total time:124.82s prep time:60.42s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 4:
Epoch 4:
Epoch 4:
Epoch 4:
train loss:1206.6170 train ap:0.980821 val ap:0.958336 val auc:0.962538
train loss:1058.5493 train ap:0.984994 val ap:0.958336 val auc:0.962538
train loss:1153.8455 train ap:0.981657 val ap:0.958336 val auc:0.962538
train loss:1214.2795 train ap:0.981938 val ap:0.958336 val auc:0.962538
total time:124.91s prep time:60.22s
total time:124.91s prep time:60.22s
total time:124.91s prep time:60.22s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
total time:124.91s prep time:60.22s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 5:
Epoch 5:
Epoch 5:
Epoch 5:
train loss:1211.5216 train ap:0.982009 val ap:0.949435 val auc:0.953953
train loss:1140.1948 train ap:0.982149 val ap:0.949435 val auc:0.953953
train loss:1050.7336 train ap:0.985195 val ap:0.949435 val auc:0.953953
total time:124.92s prep time:60.40s
train loss:1205.8990 train ap:0.980842 val ap:0.949435 val auc:0.953953
total time:124.92s prep time:60.40s
fetch time:0.00s write back time:0.00s
total time:124.92s prep time:60.40s
total time:124.92s prep time:60.40s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 6:Epoch 6:
Epoch 6:
Epoch 6:
train loss:1180.1657 train ap:0.981860 val ap:0.960236 val auc:0.963957
train loss:1120.2221 train ap:0.982891 val ap:0.960236 val auc:0.963957
train loss:1180.3385 train ap:0.983051 val ap:0.960236 val auc:0.963957
total time:124.69s prep time:60.01s
total time:124.69s prep time:60.01s
fetch time:0.00s write back time:0.00s
total time:124.69s prep time:60.01s
fetch time:0.00s write back time:0.00s
train loss:1026.1377 train ap:0.985808 val ap:0.960236 val auc:0.963957
fetch time:0.00s write back time:0.00s
total time:124.69s prep time:60.01s
fetch time:0.00s write back time:0.00s
Epoch 7:
Epoch 7:
Epoch 7:
Epoch 7:
Early stopping at epoch 7
Early stopping at epoch 7
Early stopping at epoch 7
Early stopping at epoch 7
Loading the best model at epoch 2
Loading the best model at epoch 2
Loading the best model at epoch 2
Loading the best model at epoch 2
0.9759191870689392 0.977138340473175
0.9759191870689392 0.977138340473175
0.9759191870689392 0.977138340473175
0.9759191870689392 0.977138340473175
0.9553558826446533 0.9581618309020996
0.9553558826446533 0.9581618309020996
0.9553558826446533 0.9581618309020996
0.9553558826446533 0.9581618309020996
test AP:0.940169 test AUC:0.942460
test AP:0.940169 test AUC:0.942460
test AP:0.940169 test AUC:0.942460
test AP:0.940169 test AUC:0.942460
test_dataset 836763 avg_time 9.689588661193847
test_dataset 509738 avg_time 9.689606781005859
test_dataset 1148929 avg_time 9.689572229385377
test_dataset 4151746 avg_time 9.689626097679138
This source diff could not be displayed because it is too large. You can view the blob instead.
LOCAL RANK 0, RANK0
initlize distributed
use cuda on 0
18143
get_neighbors consume: 5.92616s
raw file found, skipping download
Dataset directory is /home/zlj/.miniconda3/envs/dgnn-3.10/lib/python3.10/site-packages/tgb/datasets/tgbl_flight
loading processed file
tensor([1546318800, 1546318800, 1546318800, ..., 1667188800, 1667188800,
1667188800], device='cuda:0') tensor([1546318800, 1546318800, 1546318800, ..., 1667188800, 1667188800,
1667188800])
tensor([1638162000, 1638162000, 1638162000, 1638162000, 1638162000, 1638162000,
1638162000, 1638162000, 1638162000, 1638162000, 1638162000, 1638162000,
1638162000, 1638162000, 1638162000, 1638162000, 1638162000, 1638162000,
1638162000, 1638162000, 1638162000, 1638162000, 1638162000, 1638162000,
1638162000, 1638162000, 1638162000, 1638162000, 1638162000, 1638162000,
1638162000, 1638162000, 1638162000, 1638162000, 1638162000, 1638162000,
1638162000, 1638162000, 1638162000, 1638162000, 1638162000, 1638162000,
1638162000, 1638162000, 1638162000, 1638162000, 1638162000, 1638162000,
1638162000, 1638162000, 1638162000, 1638162000, 1638162000, 1638162000,
1638162000, 1638162000, 1638162000, 1638162000, 1638162000, 1638162000,
1638162000, 1638162000, 1638162000, 1638162000, 1638162000, 1638162000,
1638162000, 1638162000, 1638162000, 1638162000, 1638162000, 1638162000,
1638162000, 1638162000, 1638162000, 1638162000, 1638162000, 1638162000,
1638162000, 1638162000, 1638162000, 1638162000, 1638162000, 1638162000,
1638162000, 1638162000, 1638162000, 1638162000, 1638162000, 1638162000,
1638162000, 1638162000, 1638162000, 1638162000, 1638162000, 1638162000,
1638162000, 1638162000, 1638162000, 1638162000], device='cuda:0') tensor([1653796800, 1653796800, 1653796800, ..., 1667188800, 1667188800,
1667188800], device='cuda:0') tensor([1546318800, 1546318800, 1546318800, ..., 1638075600, 1638075600,
1638075600], device='cuda:0')
LOCAL RANK 0, RANK0
use cuda on 0
18143
get_neighbors consume: 5.32513s
Epoch 0:
train loss:17328.4542 train ap:0.989251 val ap:0.991792 val auc:0.992826
total time:985.43s prep time:855.49s
fetch time:0.00s write back time:0.00s
Epoch 1:
train loss:14294.4074 train ap:0.992597 val ap:0.987955 val auc:0.988882
total time:981.59s prep time:851.99s
fetch time:0.00s write back time:0.00s
Epoch 2:
train loss:14297.0658 train ap:0.992655 val ap:0.991572 val auc:0.992562
total time:980.14s prep time:851.62s
fetch time:0.00s write back time:0.00s
Epoch 3:
train loss:14622.0160 train ap:0.992315 val ap:0.989207 val auc:0.990117
total time:1115.17s prep time:951.63s
fetch time:0.00s write back time:0.00s
Epoch 4:
train loss:14551.7185 train ap:0.992456 val ap:0.986963 val auc:0.988173
total time:1225.09s prep time:1063.97s
fetch time:0.00s write back time:0.00s
Epoch 5:
Early stopping at epoch 5
Loading the best model at epoch 0
0.975067675113678 0.9767603874206543
0.9743184447288513 0.9764328598976135
test AP:0.970799 test AUC:0.973111
test_dataset 10026943 avg_time 112.53466749191284
LOCAL RANK 0, RANK0
LOCAL RANK 2, RANK2
LOCAL RANK 1, RANK1
LOCAL RANK 3, RANK3
use cuda on 1
use cuda on 0
use cuda on 3
use cuda on 2
1814318143
18143
18143
get_neighbors consume: 5.95983s
get_neighbors consume: 6.05613s
get_neighbors consume: 6.14142s
num_batchs: tensor([19750], device='cuda:3')
num_batchs: tensor([17645], device='cuda:2')
num_batchs: tensor([20015], device='cuda:1')
get_neighbors consume: 6.29594s
num_batchs: tensor([20994], device='cuda:0')
num_batchs: num_batchs:num_batchs: num_batchs: tensor([3877], device='cuda:0')tensor([3605], device='cuda:2')
tensor([4524], device='cuda:3')
tensor([4708], device='cuda:1')
num_batchs:num_batchs:num_batchs: num_batchs: tensor([4149], device='cuda:0')tensor([4748], device='cuda:3')tensor([4298], device='cuda:1')tensor([3638], device='cuda:2')
Epoch 0:
Epoch 0:
Epoch 0:
Epoch 0:
train loss:8041.7524 train ap:0.963479 val ap:0.976991 val auc:0.979452
train loss:5974.9788 train ap:0.976456 val ap:0.976991 val auc:0.979452
train loss:6391.0674 train ap:0.969010 val ap:0.976991 val auc:0.979452
total time:369.28s prep time:310.18s
total time:369.28s prep time:310.18s
fetch time:0.00s write back time:0.00s
total time:369.28s prep time:310.18s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
train loss:6098.5427 train ap:0.979030 val ap:0.976991 val auc:0.979452
total time:369.28s prep time:310.18s
fetch time:0.00s write back time:0.00s
Epoch 1:
Epoch 1:
Epoch 1:
Epoch 1:
train loss:6785.4083 train ap:0.974335 val ap:0.981245 val auc:0.983090
train loss:5226.3762 train ap:0.979351 val ap:0.981245 val auc:0.983090
train loss:4392.9569 train ap:0.986932 val ap:0.981245 val auc:0.983090
total time:369.13s prep time:309.22s
total time:369.13s prep time:309.22s
total time:369.13s prep time:309.22s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
train loss:4860.7541 train ap:0.986432 val ap:0.981245 val auc:0.983090
total time:369.13s prep time:309.22s
fetch time:0.00s write back time:0.00s
Epoch 2:
Epoch 2:
Epoch 2:
Epoch 2:
train loss:4129.9429 train ap:0.988314 val ap:0.980251 val auc:0.982577
train loss:6498.5192 train ap:0.976384 val ap:0.980251 val auc:0.982577
train loss:4974.4880 train ap:0.981104 val ap:0.980251 val auc:0.982577
total time:365.46s prep time:306.60s
total time:365.46s prep time:306.60s
total time:365.46s prep time:306.60s
train loss:4656.6260 train ap:0.987497 val ap:0.980251 val auc:0.982577
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
total time:365.46s prep time:306.60s
fetch time:0.00s write back time:0.00s
Epoch 3:Epoch 3:
Epoch 3:
Epoch 3:
train loss:6346.4929 train ap:0.977303 val ap:0.978576 val auc:0.980634
train loss:4068.8824 train ap:0.988540 val ap:0.978576 val auc:0.980634
train loss:4918.4225 train ap:0.981662 val ap:0.978576 val auc:0.980634
total time:362.13s prep time:303.87s
train loss:4560.1019 train ap:0.987865 val ap:0.978576 val auc:0.980634
total time:362.13s prep time:303.87s
total time:362.13s prep time:303.87s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
total time:362.13s prep time:303.87s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 4:Epoch 4:Epoch 4:
Epoch 4:
train loss:6310.2800 train ap:0.977605 val ap:0.974950 val auc:0.978057
train loss:3919.8300 train ap:0.989225 val ap:0.974950 val auc:0.978057
train loss:4854.9052 train ap:0.981859 val ap:0.974950 val auc:0.978057
total time:363.65s prep time:304.71s
total time:363.65s prep time:304.71s
fetch time:0.00s write back time:0.00s
total time:363.65s prep time:304.71s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
train loss:4467.4543 train ap:0.988407 val ap:0.974950 val auc:0.978057
total time:363.65s prep time:304.71s
fetch time:0.00s write back time:0.00s
Epoch 5:
Epoch 5:
Epoch 5:
Epoch 5:
train loss:6215.2998 train ap:0.978244 val ap:0.970081 val auc:0.973433
train loss:3909.4435 train ap:0.989068 val ap:0.970081 val auc:0.973433
train loss:4805.6621 train ap:0.982218 val ap:0.970081 val auc:0.973433
total time:367.40s prep time:307.71s
total time:367.40s prep time:307.71s
total time:367.40s prep time:307.71s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
train loss:4363.8407 train ap:0.988786 val ap:0.970081 val auc:0.973433
total time:367.40s prep time:307.71s
fetch time:0.00s write back time:0.00s
Epoch 6:
Epoch 6:
Epoch 6:
Epoch 6:
LOCAL RANK 0, RANK0
initlize distributed
use cuda on 0
352637
get_neighbors consume: 1.38667s
raw file found, skipping download
Dataset directory is /home/zlj/.miniconda3/envs/dgnn-3.10/lib/python3.10/site-packages/tgb/datasets/tgbl_review
loading processed file
tensor([ 929232000, 930787200, 931824000, ..., 1538524800, 1538611200,
1538611200], device='cuda:0') tensor([ 929232000, 930787200, 931824000, ..., 1538524800, 1538611200,
1538611200])
tensor([1464912000, 1464912000, 1464912000, 1464912000, 1464912000, 1464912000,
1464912000, 1464912000, 1464912000, 1464912000, 1464912000, 1464912000,
1464912000, 1464912000, 1464912000, 1464912000, 1464912000, 1464912000,
1464912000, 1464912000, 1464912000, 1464912000, 1464912000, 1464912000,
1464912000, 1464912000, 1464912000, 1464912000, 1464912000, 1464912000,
1464912000, 1464912000, 1464912000, 1464912000, 1464912000, 1464912000,
1464912000, 1464912000, 1464912000, 1464912000, 1464912000, 1464912000,
1464912000, 1464912000, 1464912000, 1464912000, 1464912000, 1464912000,
1464912000, 1464912000, 1464912000, 1464912000, 1464912000, 1464912000,
1464912000, 1464912000, 1464912000, 1464912000, 1464912000, 1464912000,
1464912000, 1464912000, 1464912000, 1464912000, 1464912000, 1464912000,
1464912000, 1464912000, 1464912000, 1464912000, 1464912000, 1464912000,
1464912000, 1464912000, 1464912000, 1464912000, 1464912000, 1464912000,
1464912000, 1464912000, 1464912000, 1464912000, 1464912000, 1464912000,
1464912000, 1464912000, 1464912000, 1464912000, 1464912000, 1464912000,
1464912000, 1464912000, 1464912000, 1464912000, 1464912000, 1464912000,
1464912000, 1464912000, 1464912000, 1464912000], device='cuda:0') tensor([1488844800, 1488844800, 1488844800, ..., 1538524800, 1538611200,
1538611200], device='cuda:0') tensor([ 929232000, 930787200, 931824000, ..., 1464825600, 1464825600,
1464825600], device='cuda:0')
val metric
metric hits@ is 0.545051806698618
metric mrr is 0.24419529163892895
metric ap is 0.05492426247254426
metric auc is 0.8336828640087952
test metric
metric hits@ is 0.50441967217847
metric mrr is 0.20610860927523572
metric ap is 0.03974634949228331
metric auc is 0.8243427933946369
test_dataset 728919 avg_time 0.0
This source diff could not be displayed because it is too large. You can view the blob instead.
LOCAL RANK 0, RANK0
use cuda on 0
352637
get_neighbors consume: 1.137s
Epoch 0:
train loss:13642.4319 train ap:0.904599 val ap:0.877952 val auc:0.875205
total time:237.42s prep time:206.95s
fetch time:0.00s write back time:0.00s
Epoch 1:
train loss:13355.5800 train ap:0.909874 val ap:0.893142 val auc:0.885588
total time:254.75s prep time:224.74s
fetch time:0.00s write back time:0.00s
Epoch 2:
train loss:13198.5205 train ap:0.912843 val ap:0.880398 val auc:0.881145
total time:254.27s prep time:223.80s
fetch time:0.00s write back time:0.00s
Epoch 3:
train loss:13217.6608 train ap:0.912531 val ap:0.900547 val auc:0.891135
total time:246.83s prep time:217.90s
fetch time:0.00s write back time:0.00s
Epoch 4:
train loss:13175.3821 train ap:0.913456 val ap:0.896569 val auc:0.888247
total time:254.71s prep time:222.90s
fetch time:0.00s write back time:0.00s
Epoch 5:
train loss:13129.5668 train ap:0.914303 val ap:0.901932 val auc:0.891772
total time:256.61s prep time:225.98s
fetch time:0.00s write back time:0.00s
Epoch 6:
train loss:13110.9403 train ap:0.914760 val ap:0.899841 val auc:0.890011
total time:258.74s prep time:228.35s
fetch time:0.00s write back time:0.00s
Epoch 7:
train loss:13174.3921 train ap:0.913513 val ap:0.899906 val auc:0.890777
total time:246.85s prep time:217.08s
fetch time:0.00s write back time:0.00s
Epoch 8:
train loss:13082.4202 train ap:0.915001 val ap:0.896229 val auc:0.888280
total time:248.71s prep time:218.14s
fetch time:0.00s write back time:0.00s
Epoch 9:
train loss:13044.7357 train ap:0.915478 val ap:0.900687 val auc:0.891358
total time:251.59s prep time:220.47s
fetch time:0.00s write back time:0.00s
Epoch 10:
Early stopping at epoch 10
Loading the best model at epoch 5
0.7501348853111267 0.711144745349884
0.8450031280517578 0.8455020189285278
test AP:0.803709 test AUC:0.829060
test_dataset 728919 avg_time 48.60917709827423
LOCAL RANK 0, RANK0
LOCAL RANK 2, RANK2
LOCAL RANK 1, RANK1
LOCAL RANK 3, RANK3
use cuda on 1
use cuda on 3
use cuda on 2
use cuda on 0
352637
352637352637
352637
get_neighbors consume: 1.3085s
get_neighbors consume: 1.3155s
num_batchs: tensor([3922], device='cuda:2')
get_neighbors consume: 1.34791s
num_batchs: tensor([4117], device='cuda:0')
num_batchs: tensor([4586], device='cuda:1')
get_neighbors consume: 1.59381s
num_batchs: tensor([4442], device='cuda:3')
num_batchs: num_batchs: num_batchs: num_batchs: tensor([1101], device='cuda:2')
tensor([961], device='cuda:3')
tensor([725], device='cuda:1')
tensor([860], device='cuda:0')
num_batchs:num_batchs:num_batchs: num_batchs: tensor([954], device='cuda:3')tensor([804], device='cuda:1')tensor([845], device='cuda:0')
tensor([1054], device='cuda:2')
Epoch 0:
Epoch 0:Epoch 0:
Epoch 0:
train loss:3455.0875 train ap:0.881182 val ap:0.881479 val auc:0.873071
train loss:3437.9604 train ap:0.881580 val ap:0.881479 val auc:0.873071
train loss:3072.9170 train ap:0.905032 val ap:0.881479 val auc:0.873071
train loss:3234.8054 train ap:0.896622 val ap:0.881479 val auc:0.873071
total time:82.22s prep time:69.20s
total time:82.22s prep time:69.20s
total time:82.22s prep time:69.20s
fetch time:0.00s write back time:0.00s
total time:82.22s prep time:69.20s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 1:Epoch 1:Epoch 1:
Epoch 1:
train loss:3354.9675 train ap:0.889861 val ap:0.851767 val auc:0.862433
train loss:3336.9128 train ap:0.890040 val ap:0.851767 val auc:0.862433
total time:82.42s prep time:69.02s
total time:82.42s prep time:69.02s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
train loss:2970.5795 train ap:0.912628 val ap:0.851767 val auc:0.862433
train loss:3119.9811 train ap:0.904879 val ap:0.851767 val auc:0.862433
total time:82.42s prep time:69.02s
total time:82.42s prep time:69.02s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 2:
Epoch 2:
Epoch 2:
Epoch 2:
train loss:3304.0523 train ap:0.895163 val ap:0.850313 val auc:0.854590
train loss:3290.1262 train ap:0.895067 val ap:0.850313 val auc:0.854590
total time:82.76s prep time:69.37s
total time:82.76s prep time:69.37s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
train loss:3074.9660 train ap:0.909920 val ap:0.850313 val auc:0.854590
train loss:2921.6779 train ap:0.917627 val ap:0.850313 val auc:0.854590
total time:82.76s prep time:69.37s
total time:82.76s prep time:69.37s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 3:Epoch 3:Epoch 3:Epoch 3:
train loss:3313.2843 train ap:0.894512 val ap:0.881532 val auc:0.874223
train loss:3299.2848 train ap:0.894433 val ap:0.881532 val auc:0.874223
train loss:2926.6350 train ap:0.917671 val ap:0.881532 val auc:0.874223
train loss:3072.5431 train ap:0.909920 val ap:0.881532 val auc:0.874223
total time:81.20s prep time:68.09s
total time:81.20s prep time:68.09s
total time:81.20s prep time:68.09s
total time:81.20s prep time:68.09s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 4:Epoch 4:Epoch 4:
Epoch 4:
train loss:3291.2189 train ap:0.896117 val ap:0.884235 val auc:0.877555
train loss:3047.8370 train ap:0.911690 val ap:0.884235 val auc:0.877555
train loss:3280.4566 train ap:0.895788 val ap:0.884235 val auc:0.877555
total time:75.97s prep time:63.78s
train loss:2908.7063 train ap:0.918882 val ap:0.884235 val auc:0.877555
total time:75.97s prep time:63.78s
total time:75.97s prep time:63.78s
fetch time:0.00s write back time:0.00s
total time:75.97s prep time:63.78s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 5:Epoch 5:Epoch 5:
Epoch 5:
train loss:3306.4561 train ap:0.895716 val ap:0.890279 val auc:0.881023
train loss:3292.3966 train ap:0.895672 val ap:0.890279 val auc:0.881023
train loss:2916.5336 train ap:0.919027 val ap:0.890279 val auc:0.881023
train loss:3060.5498 train ap:0.911726 val ap:0.890279 val auc:0.881023
total time:93.89s prep time:80.63s
total time:93.89s prep time:80.63s
total time:93.89s prep time:80.63s
fetch time:0.00s write back time:0.00s
total time:93.89s prep time:80.63s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 6:Epoch 6:Epoch 6:
Epoch 6:
train loss:3275.2013 train ap:0.897392 val ap:0.895190 val auc:0.884035
total time:80.06s prep time:66.91s
train loss:3259.0587 train ap:0.897736 val ap:0.895190 val auc:0.884035
fetch time:0.00s write back time:0.00s
train loss:2892.8312 train ap:0.920219 val ap:0.895190 val auc:0.884035
train loss:3026.5021 train ap:0.913531 val ap:0.895190 val auc:0.884035
total time:80.06s prep time:66.91s
total time:80.06s prep time:66.91s
total time:80.06s prep time:66.91s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 7:Epoch 7:Epoch 7:Epoch 7:
train loss:3250.9541 train ap:0.898891 val ap:0.891135 val auc:0.882604
train loss:3258.2501 train ap:0.899116 val ap:0.891135 val auc:0.882604
train loss:3009.0201 train ap:0.914832 val ap:0.891135 val auc:0.882604
train loss:2879.3229 train ap:0.921614 val ap:0.891135 val auc:0.882604
total time:79.55s prep time:66.41s
total time:79.55s prep time:66.41s
total time:79.55s prep time:66.41s
total time:79.55s prep time:66.41s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 8:
Epoch 8:Epoch 8:
Epoch 8:
train loss:3257.2647 train ap:0.899283 val ap:0.895562 val auc:0.885143
total time:79.58s prep time:66.55s
fetch time:0.00s write back time:0.00s
train loss:3250.4541 train ap:0.899202 val ap:0.895562 val auc:0.885143
train loss:2875.2902 train ap:0.921863 val ap:0.895562 val auc:0.885143
train loss:3006.9676 train ap:0.915343 val ap:0.895562 val auc:0.885143
total time:79.58s prep time:66.55s
total time:79.58s prep time:66.55s
fetch time:0.00s write back time:0.00s
total time:79.58s prep time:66.55s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 9:
Epoch 9:Epoch 9:
Epoch 9:
train loss:3262.0807 train ap:0.899378 val ap:0.895996 val auc:0.885572
train loss:3254.8120 train ap:0.898920 val ap:0.895996 val auc:0.885572
total time:79.91s prep time:66.75s
total time:79.91s prep time:66.75s
fetch time:0.00s write back time:0.00s
train loss:2876.3143 train ap:0.922068 val ap:0.895996 val auc:0.885572
fetch time:0.00s write back time:0.00s
train loss:3010.4703 train ap:0.915422 val ap:0.895996 val auc:0.885572
total time:79.91s prep time:66.75s
fetch time:0.00s write back time:0.00s
total time:79.91s prep time:66.75s
fetch time:0.00s write back time:0.00s
Epoch 10:
Epoch 10:Epoch 10:
Epoch 10:
train loss:3239.6802 train ap:0.900019 val ap:0.896796 val auc:0.885694
train loss:3253.2570 train ap:0.899919 val ap:0.896796 val auc:0.885694
train loss:2875.6315 train ap:0.922227 val ap:0.896796 val auc:0.885694
train loss:2999.5772 train ap:0.915974 val ap:0.896796 val auc:0.885694
total time:80.80s prep time:67.69s
total time:80.80s prep time:67.69s
total time:80.80s prep time:67.69s
total time:80.80s prep time:67.69s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 11:
Epoch 11:Epoch 11:
Epoch 11:
train loss:2856.0798 train ap:0.923709 val ap:0.895587 val auc:0.885286
train loss:3242.5730 train ap:0.900728 val ap:0.895587 val auc:0.885286
train loss:3228.4647 train ap:0.901085 val ap:0.895587 val auc:0.885286
train loss:2983.3531 train ap:0.917122 val ap:0.895587 val auc:0.885286
total time:80.81s prep time:67.57s
total time:80.81s prep time:67.57s
total time:80.81s prep time:67.57s
total time:80.81s prep time:67.57s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 12:Epoch 12:
Epoch 12:Epoch 12:
train loss:3232.7865 train ap:0.900783 val ap:0.896265 val auc:0.885029
train loss:3242.4190 train ap:0.900813 val ap:0.896265 val auc:0.885029
total time:80.35s prep time:67.14s
train loss:2867.1582 train ap:0.922902 val ap:0.896265 val auc:0.885029
train loss:2991.7870 train ap:0.916520 val ap:0.896265 val auc:0.885029
total time:80.35s prep time:67.14s
fetch time:0.00s write back time:0.00s
total time:80.35s prep time:67.14s
fetch time:0.00s write back time:0.00s
total time:80.35s prep time:67.14s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 13:
Epoch 13:
Epoch 13:
Epoch 13:
train loss:3210.9926 train ap:0.903183 val ap:0.897099 val auc:0.886113
train loss:3204.9153 train ap:0.902872 val ap:0.897099 val auc:0.886113
train loss:2951.0695 train ap:0.919192 val ap:0.897099 val auc:0.886113
total time:79.96s prep time:66.83s
total time:79.96s prep time:66.83s
train loss:2830.1740 train ap:0.925447 val ap:0.897099 val auc:0.886113
total time:79.96s prep time:66.83s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
total time:79.96s prep time:66.83s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 14:
Epoch 14:Epoch 14:
Epoch 14:
train loss:3214.7974 train ap:0.902729 val ap:0.893782 val auc:0.886151
train loss:2958.8985 train ap:0.918556 val ap:0.893782 val auc:0.886151
train loss:3210.1361 train ap:0.902159 val ap:0.893782 val auc:0.886151
train loss:2834.9212 train ap:0.924887 val ap:0.893782 val auc:0.886151
total time:79.83s prep time:66.74s
total time:79.83s prep time:66.74s
total time:79.83s prep time:66.74s
total time:79.83s prep time:66.74s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 15:
Epoch 15:Epoch 15:
Epoch 15:
train loss:3197.8651 train ap:0.903439 val ap:0.895787 val auc:0.885230
train loss:3205.1650 train ap:0.903768 val ap:0.895787 val auc:0.885230
train loss:2953.2088 train ap:0.918978 val ap:0.895787 val auc:0.885230
total time:79.47s prep time:66.43s
train loss:2824.2634 train ap:0.925647 val ap:0.895787 val auc:0.885230
total time:79.47s prep time:66.43s
total time:79.47s prep time:66.43s
fetch time:0.00s write back time:0.00s
total time:79.47s prep time:66.43s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 16:
Epoch 16:
Epoch 16:Epoch 16:
train loss:3199.0289 train ap:0.903872 val ap:0.899565 val auc:0.889018
train loss:2935.8385 train ap:0.920066 val ap:0.899565 val auc:0.889018
train loss:3196.4945 train ap:0.903233 val ap:0.899565 val auc:0.889018
train loss:2820.3423 train ap:0.925820 val ap:0.899565 val auc:0.889018
total time:80.10s prep time:67.15s
total time:80.10s prep time:67.15s
total time:80.10s prep time:67.15s
total time:80.10s prep time:67.15s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 17:
Epoch 17:
Epoch 17:Epoch 17:
train loss:2945.7744 train ap:0.919293 val ap:0.897282 val auc:0.887294
train loss:2829.3440 train ap:0.924931 val ap:0.897282 val auc:0.887294
train loss:3211.4821 train ap:0.902933 val ap:0.897282 val auc:0.887294
train loss:3199.5261 train ap:0.902971 val ap:0.897282 val auc:0.887294
total time:80.03s prep time:67.12s
total time:80.03s prep time:67.12s
total time:80.03s prep time:67.12s
total time:80.03s prep time:67.12s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 18:
Epoch 18:
Epoch 18:
Epoch 18:
train loss:2944.9082 train ap:0.919570 val ap:0.898754 val auc:0.887811
train loss:3204.4953 train ap:0.903740 val ap:0.898754 val auc:0.887811
train loss:3202.6150 train ap:0.902970 val ap:0.898754 val auc:0.887811
train loss:2829.4401 train ap:0.925407 val ap:0.898754 val auc:0.887811
total time:80.25s prep time:67.17s
total time:80.25s prep time:67.17s
total time:80.25s prep time:67.17s
total time:80.25s prep time:67.17s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 19:Epoch 19:Epoch 19:Epoch 19:
train loss:2832.3596 train ap:0.924859 val ap:0.894558 val auc:0.885486
train loss:3212.2773 train ap:0.902659 val ap:0.894558 val auc:0.885486
train loss:3200.5754 train ap:0.902784 val ap:0.894558 val auc:0.885486
train loss:2947.7225 train ap:0.919080 val ap:0.894558 val auc:0.885486
total time:78.81s prep time:66.04s
total time:78.81s prep time:66.04s
total time:78.81s prep time:66.04s
total time:78.81s prep time:66.04s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 20:Epoch 20:
Epoch 20:Epoch 20:
train loss:2947.5665 train ap:0.919538 val ap:0.897194 val auc:0.886452
train loss:3212.2243 train ap:0.902927 val ap:0.897194 val auc:0.886452
train loss:2827.8985 train ap:0.925361 val ap:0.897194 val auc:0.886452
train loss:3201.8063 train ap:0.902923 val ap:0.897194 val auc:0.886452
total time:79.69s prep time:66.20s
total time:79.69s prep time:66.20s
total time:79.69s prep time:66.20s
total time:79.69s prep time:66.20s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 21:
Epoch 21:Epoch 21:Epoch 21:
Early stopping at epoch 21
Early stopping at epoch 21
Early stopping at epoch 21
Early stopping at epoch 21
Loading the best model at epoch 16
Loading the best model at epoch 16
Loading the best model at epoch 16
Loading the best model at epoch 16
0.8480101227760315 0.8153982162475586
0.8480101227760315 0.8153982162475586
0.8480101227760315 0.8153982162475586
0.8480101227760315 0.8153982162475586
0.8272088170051575 0.8251734375953674
0.8272088170051575 0.8251734375953674
0.8272088170051575 0.8251734375953674
0.8272088170051575 0.8251734375953674
test AP:0.835010 test AUC:0.848419
test AP:0.835010 test AUC:0.848419
test AP:0.835010 test AUC:0.848419
test AP:0.835010 test AUC:0.848419
test_dataset 171839 avg_time 29.825049695968627
test_dataset 144922 avg_time 29.825043683052062
test_dataset 192122 avg_time 29.825049686431885
test_dataset 220036 avg_time 29.825050010681153
LOCAL RANK 0, RANK0
initlize distributed
use cuda on 0
9227
get_neighbors consume: 0.0126036s
raw file found, skipping download
Dataset directory is /home/zlj/.miniconda3/envs/dgnn-3.10/lib/python3.10/site-packages/tgb/datasets/tgbl_wiki
loading processed file
998
998
val metric
metric hits@ is 0.6441952827610548
metric mrr is 0.4429445469178228
metric ap is 0.13983213193432092
metric auc is 0.9535034098327113
998
998
test metric
metric hits@ is 0.5930238971918947
metric mrr is 0.4000823857097686
metric ap is 0.11770197066761524
metric auc is 0.9466253653660359
test_dataset 23621 avg_time 0.0
LOCAL RANK 0, RANK0
initlize distributed
use cuda on 0
9227
get_neighbors consume: 0.0121001s
raw file found, skipping download
Dataset directory is /home/zlj/.miniconda3/envs/dgnn-3.10/lib/python3.10/site-packages/tgb/datasets/tgbl_wiki
loading processed file
998
998
val metric
metric hits@ is 0.5399417177408556
metric mrr is 0.2723149118104061
metric ap is 0.05431445631127528
metric auc is 0.934435814512715
998
998
test metric
metric hits@ is 0.4802390880222562
metric mrr is 0.23669700951704495
metric ap is 0.040643292031104734
metric auc is 0.923300786458167
test_dataset 23621 avg_time 0.0
LOCAL RANK 0, RANK0
use cuda on 0
9227
get_neighbors consume: 0.0119422s
Epoch 0:
train loss:438.1820 train ap:0.905229 val ap:0.929162 val auc:0.928815
total time:10.91s prep time:9.85s
fetch time:0.00s write back time:0.00s
Epoch 1:
train loss:372.5819 train ap:0.926295 val ap:0.929234 val auc:0.926704
total time:8.88s prep time:7.75s
fetch time:0.00s write back time:0.00s
Epoch 2:
train loss:348.3494 train ap:0.939406 val ap:0.951674 val auc:0.948898
total time:9.72s prep time:8.57s
fetch time:0.00s write back time:0.00s
Epoch 3:
train loss:311.1370 train ap:0.952314 val ap:0.959255 val auc:0.956934
total time:9.88s prep time:8.77s
fetch time:0.00s write back time:0.00s
Epoch 4:
train loss:288.5227 train ap:0.959499 val ap:0.967144 val auc:0.964830
total time:9.83s prep time:8.71s
fetch time:0.00s write back time:0.00s
Epoch 5:
train loss:270.2249 train ap:0.964637 val ap:0.971685 val auc:0.969840
total time:9.53s prep time:8.41s
fetch time:0.00s write back time:0.00s
Epoch 6:
train loss:262.1692 train ap:0.966888 val ap:0.972277 val auc:0.970054
total time:9.58s prep time:8.50s
fetch time:0.00s write back time:0.00s
Epoch 7:
train loss:255.6310 train ap:0.968568 val ap:0.972964 val auc:0.971109
total time:9.37s prep time:8.28s
fetch time:0.00s write back time:0.00s
Epoch 8:
train loss:248.8265 train ap:0.970266 val ap:0.975745 val auc:0.974002
total time:9.68s prep time:8.53s
fetch time:0.00s write back time:0.00s
Epoch 9:
train loss:243.9315 train ap:0.971279 val ap:0.976228 val auc:0.974509
total time:9.86s prep time:8.69s
fetch time:0.00s write back time:0.00s
Epoch 10:
train loss:239.1431 train ap:0.972743 val ap:0.977532 val auc:0.975514
total time:9.80s prep time:8.62s
fetch time:0.00s write back time:0.00s
Epoch 11:
train loss:237.8836 train ap:0.972920 val ap:0.977701 val auc:0.976333
total time:9.93s prep time:8.76s
fetch time:0.00s write back time:0.00s
Epoch 12:
train loss:231.7141 train ap:0.973696 val ap:0.978296 val auc:0.976799
total time:9.90s prep time:8.73s
fetch time:0.00s write back time:0.00s
Epoch 13:
train loss:230.5749 train ap:0.974243 val ap:0.978770 val auc:0.977488
total time:9.77s prep time:8.62s
fetch time:0.00s write back time:0.00s
Epoch 14:
train loss:227.9846 train ap:0.974771 val ap:0.978397 val auc:0.977168
total time:9.80s prep time:8.63s
fetch time:0.00s write back time:0.00s
Epoch 15:
train loss:224.3624 train ap:0.975223 val ap:0.980206 val auc:0.979011
total time:9.78s prep time:8.68s
fetch time:0.00s write back time:0.00s
Epoch 16:
train loss:223.1655 train ap:0.975816 val ap:0.981120 val auc:0.979900
total time:9.62s prep time:8.51s
fetch time:0.00s write back time:0.00s
Epoch 17:
train loss:219.1989 train ap:0.976670 val ap:0.981726 val auc:0.980641
total time:9.79s prep time:8.69s
fetch time:0.00s write back time:0.00s
Epoch 18:
train loss:215.7983 train ap:0.977476 val ap:0.981537 val auc:0.980316
total time:10.03s prep time:8.84s
fetch time:0.00s write back time:0.00s
Epoch 19:
train loss:217.2757 train ap:0.976921 val ap:0.981455 val auc:0.980277
total time:9.82s prep time:8.66s
fetch time:0.00s write back time:0.00s
Epoch 20:
train loss:219.2030 train ap:0.976782 val ap:0.981089 val auc:0.980011
total time:10.38s prep time:9.28s
fetch time:0.00s write back time:0.00s
Epoch 21:
train loss:219.9309 train ap:0.976416 val ap:0.981670 val auc:0.980690
total time:9.43s prep time:8.32s
fetch time:0.00s write back time:0.00s
Epoch 22:
train loss:214.2197 train ap:0.977587 val ap:0.982226 val auc:0.981129
total time:9.75s prep time:8.53s
fetch time:0.00s write back time:0.00s
Epoch 23:
train loss:208.9837 train ap:0.978911 val ap:0.982907 val auc:0.981704
total time:11.65s prep time:10.40s
fetch time:0.00s write back time:0.00s
Epoch 24:
train loss:210.4146 train ap:0.978243 val ap:0.982097 val auc:0.980691
total time:10.86s prep time:9.73s
fetch time:0.00s write back time:0.00s
Epoch 25:
train loss:210.4207 train ap:0.978632 val ap:0.982267 val auc:0.981537
total time:9.95s prep time:8.77s
fetch time:0.00s write back time:0.00s
Epoch 26:
train loss:205.5232 train ap:0.979174 val ap:0.983918 val auc:0.982727
total time:9.51s prep time:8.42s
fetch time:0.00s write back time:0.00s
Epoch 27:
train loss:204.8931 train ap:0.979227 val ap:0.983066 val auc:0.982013
total time:9.13s prep time:8.08s
fetch time:0.00s write back time:0.00s
Epoch 28:
train loss:199.6552 train ap:0.980440 val ap:0.982168 val auc:0.981335
total time:9.11s prep time:8.04s
fetch time:0.00s write back time:0.00s
Epoch 29:
train loss:202.9698 train ap:0.979732 val ap:0.981972 val auc:0.981716
total time:9.21s prep time:8.11s
fetch time:0.00s write back time:0.00s
Epoch 30:
train loss:201.1851 train ap:0.980338 val ap:0.983631 val auc:0.982533
total time:9.40s prep time:8.30s
fetch time:0.00s write back time:0.00s
Epoch 31:
train loss:202.0885 train ap:0.979852 val ap:0.984241 val auc:0.983195
total time:9.11s prep time:7.96s
fetch time:0.00s write back time:0.00s
Epoch 32:
train loss:195.8186 train ap:0.981171 val ap:0.985042 val auc:0.984066
total time:9.39s prep time:8.17s
fetch time:0.00s write back time:0.00s
Epoch 33:
train loss:195.5999 train ap:0.980943 val ap:0.984088 val auc:0.983118
total time:9.81s prep time:8.62s
fetch time:0.00s write back time:0.00s
Epoch 34:
train loss:195.3828 train ap:0.981070 val ap:0.984907 val auc:0.983951
total time:10.13s prep time:8.90s
fetch time:0.00s write back time:0.00s
Epoch 35:
train loss:194.4766 train ap:0.981191 val ap:0.985196 val auc:0.984022
total time:10.03s prep time:8.93s
fetch time:0.00s write back time:0.00s
Epoch 36:
train loss:194.5252 train ap:0.981201 val ap:0.984551 val auc:0.983756
total time:9.26s prep time:8.19s
fetch time:0.00s write back time:0.00s
Epoch 37:
train loss:193.6458 train ap:0.981244 val ap:0.985164 val auc:0.984187
total time:9.13s prep time:8.07s
fetch time:0.00s write back time:0.00s
Epoch 38:
train loss:195.7096 train ap:0.981002 val ap:0.983946 val auc:0.982756
total time:9.05s prep time:7.99s
fetch time:0.00s write back time:0.00s
Epoch 39:
train loss:195.2296 train ap:0.981059 val ap:0.985010 val auc:0.983845
total time:9.11s prep time:8.04s
fetch time:0.00s write back time:0.00s
Epoch 40:
train loss:191.4868 train ap:0.981932 val ap:0.985390 val auc:0.984618
total time:9.09s prep time:8.03s
fetch time:0.00s write back time:0.00s
Epoch 41:
train loss:189.6005 train ap:0.981906 val ap:0.984920 val auc:0.984112
total time:9.05s prep time:7.98s
fetch time:0.00s write back time:0.00s
Epoch 42:
train loss:191.7600 train ap:0.981801 val ap:0.984937 val auc:0.984145
total time:9.18s prep time:8.12s
fetch time:0.00s write back time:0.00s
Epoch 43:
train loss:193.1520 train ap:0.981423 val ap:0.984112 val auc:0.983052
total time:9.07s prep time:7.98s
fetch time:0.00s write back time:0.00s
Epoch 44:
train loss:193.6521 train ap:0.981466 val ap:0.984804 val auc:0.983935
total time:9.35s prep time:8.24s
fetch time:0.00s write back time:0.00s
Epoch 45:
Early stopping at epoch 45
Loading the best model at epoch 40
0.9833124876022339 0.9827097654342651
0.9832088351249695 0.982053816318512
test AP:0.978572 test AUC:0.977290
test_dataset 23621 avg_time 7.839827566146851
LOCAL RANK 0, RANK0
LOCAL RANK 2, RANK2
LOCAL RANK 1, RANK1LOCAL RANK 3, RANK3
use cuda on 2
use cuda on 3
use cuda on 1
use cuda on 0
92279227
9227
9227
get_neighbors consume: 0.0114267s
get_neighbors consume: 0.0112016s
get_neighbors consume: 0.0112517s
get_neighbors consume: 0.0118708s
num_batchs: tensor([136], device='cuda:0')
num_batchs: tensor([145], device='cuda:1')
num_batchs: tensor([127], device='cuda:3')
num_batchs: tensor([141], device='cuda:2')
num_batchs: num_batchs: tensor([37], device='cuda:0')
num_batchs: tensor([27], device='cuda:3')num_batchs:
tensor([30], device='cuda:2')
tensor([27], device='cuda:1')
num_batchs:num_batchs: num_batchs:num_batchs: tensor([32], device='cuda:2')
tensor([29], device='cuda:0')
tensor([28], device='cuda:3')tensor([31], device='cuda:1')
Epoch 0:
Epoch 0:
Epoch 0:
Epoch 0:
train loss:123.3378 train ap:0.853755 val ap:0.914218 val auc:0.912577
train loss:119.7958 train ap:0.864334 val ap:0.914218 val auc:0.912577
train loss:128.1577 train ap:0.833298 val ap:0.914218 val auc:0.912577
train loss:121.0308 train ap:0.855477 val ap:0.914218 val auc:0.912577
total time:4.32s prep time:4.01s
total time:4.32s prep time:4.01s
total time:4.32s prep time:4.01s
fetch time:0.00s write back time:0.00s
total time:4.32s prep time:4.01s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 1:
Epoch 1:Epoch 1:
Epoch 1:
train loss:102.7750 train ap:0.896443 val ap:0.917845 val auc:0.917570
train loss:86.6295 train ap:0.923560 val ap:0.917845 val auc:0.917570
train loss:92.8748 train ap:0.912647 val ap:0.917845 val auc:0.917570
train loss:90.5898 train ap:0.914378 val ap:0.917845 val auc:0.917570
total time:2.07s prep time:1.76s
total time:2.07s prep time:1.76s
total time:2.07s prep time:1.76s
total time:2.07s prep time:1.76s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 2:
Epoch 2:Epoch 2:
Epoch 2:
train loss:81.4270 train ap:0.933189 val ap:0.917884 val auc:0.919979
train loss:98.9949 train ap:0.902533 val ap:0.917884 val auc:0.919979
train loss:88.5987 train ap:0.918559 val ap:0.917884 val auc:0.919979
train loss:85.2710 train ap:0.923680 val ap:0.917884 val auc:0.919979
total time:2.07s prep time:1.76s
total time:2.07s prep time:1.76s
total time:2.07s prep time:1.76s
total time:2.07s prep time:1.76s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 3:Epoch 3:
Epoch 3:
Epoch 3:
train loss:85.9204 train ap:0.923351 val ap:0.919950 val auc:0.919488
train loss:83.3718 train ap:0.928029 val ap:0.919950 val auc:0.919488
train loss:80.2313 train ap:0.931704 val ap:0.919950 val auc:0.919488
train loss:96.2580 train ap:0.909615 val ap:0.919950 val auc:0.919488
total time:2.07s prep time:1.76s
total time:2.07s prep time:1.76s
total time:2.07s prep time:1.76s
total time:2.07s prep time:1.76s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 4:Epoch 4:Epoch 4:
Epoch 4:
train loss:85.0409 train ap:0.926282 val ap:0.919517 val auc:0.919116
train loss:77.9838 train ap:0.935790 val ap:0.919517 val auc:0.919116
train loss:94.9697 train ap:0.913220 val ap:0.919517 val auc:0.919116
train loss:81.9256 train ap:0.929987 val ap:0.919517 val auc:0.919116
total time:2.06s prep time:1.76s
total time:2.06s prep time:1.76s
total time:2.06s prep time:1.76s
total time:2.06s prep time:1.76s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 5:
Epoch 5:Epoch 5:
Epoch 5:
train loss:75.5574 train ap:0.938574 val ap:0.923025 val auc:0.923346
train loss:92.2706 train ap:0.916673 val ap:0.923025 val auc:0.923346
train loss:83.1462 train ap:0.928510 val ap:0.923025 val auc:0.923346
train loss:80.5073 train ap:0.932611 val ap:0.923025 val auc:0.923346
total time:2.06s prep time:1.75s
total time:2.06s prep time:1.75s
total time:2.06s prep time:1.75s
total time:2.06s prep time:1.75s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 6:Epoch 6:
Epoch 6:
Epoch 6:
train loss:81.8060 train ap:0.930951 val ap:0.930009 val auc:0.931360
train loss:90.8795 train ap:0.919864 val ap:0.930009 val auc:0.931360
train loss:79.1179 train ap:0.936566 val ap:0.930009 val auc:0.931360
train loss:74.1278 train ap:0.943166 val ap:0.930009 val auc:0.931360
total time:2.19s prep time:1.85s
total time:2.19s prep time:1.85s
total time:2.19s prep time:1.85s
fetch time:0.00s write back time:0.00s
total time:2.19s prep time:1.85s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 7:Epoch 7:
Epoch 7:
Epoch 7:
train loss:74.2026 train ap:0.943481 val ap:0.934922 val auc:0.933923
train loss:89.4164 train ap:0.923606 val ap:0.934922 val auc:0.933923
train loss:79.4655 train ap:0.936280 val ap:0.934922 val auc:0.933923
train loss:77.4014 train ap:0.940044 val ap:0.934922 val auc:0.933923
total time:2.43s prep time:2.07s
total time:2.43s prep time:2.07s
total time:2.43s prep time:2.07s
total time:2.43s prep time:2.07s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 8:Epoch 8:
Epoch 8:
Epoch 8:
train loss:78.0228 train ap:0.938883 val ap:0.938825 val auc:0.938385
train loss:71.4538 train ap:0.947977 val ap:0.938825 val auc:0.938385
train loss:86.4893 train ap:0.928232 val ap:0.938825 val auc:0.938385
train loss:75.1229 train ap:0.943555 val ap:0.938825 val auc:0.938385
total time:2.35s prep time:2.00s
total time:2.35s prep time:2.00s
total time:2.35s prep time:2.00s
total time:2.35s prep time:2.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 9:
Epoch 9:Epoch 9:
Epoch 9:
train loss:69.4881 train ap:0.952006 val ap:0.947348 val auc:0.946217
train loss:75.6891 train ap:0.942633 val ap:0.947348 val auc:0.946217
train loss:84.4704 train ap:0.931431 val ap:0.947348 val auc:0.946217
train loss:72.4057 train ap:0.948850 val ap:0.947348 val auc:0.946217
total time:2.36s prep time:2.01s
total time:2.36s prep time:2.01s
total time:2.36s prep time:2.01s
total time:2.36s prep time:2.01s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 10:
Epoch 10:Epoch 10:Epoch 10:
train loss:73.9361 train ap:0.945705 val ap:0.950347 val auc:0.950115
train loss:66.5602 train ap:0.956275 val ap:0.950347 val auc:0.950115
train loss:82.0624 train ap:0.936810 val ap:0.950347 val auc:0.950115
train loss:70.8339 train ap:0.950265 val ap:0.950347 val auc:0.950115
total time:2.36s prep time:2.00s
total time:2.36s prep time:2.00s
total time:2.36s prep time:2.00s
total time:2.36s prep time:2.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 11:
Epoch 11:Epoch 11:
Epoch 11:
train loss:80.2513 train ap:0.939381 val ap:0.951866 val auc:0.951528
train loss:67.3593 train ap:0.953846 val ap:0.951866 val auc:0.951528
train loss:72.1514 train ap:0.949227 val ap:0.951866 val auc:0.951528
train loss:68.3804 train ap:0.954180 val ap:0.951866 val auc:0.951528
total time:2.36s prep time:2.00s
total time:2.36s prep time:2.00s
total time:2.36s prep time:2.00s
total time:2.36s prep time:2.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 12:
Epoch 12:Epoch 12:
Epoch 12:
train loss:77.6837 train ap:0.943483 val ap:0.955151 val auc:0.954360
train loss:64.1101 train ap:0.959728 val ap:0.955151 val auc:0.954360
train loss:71.5212 train ap:0.949585 val ap:0.955151 val auc:0.954360
train loss:68.1205 train ap:0.954258 val ap:0.955151 val auc:0.954360
total time:2.37s prep time:2.02s
total time:2.37s prep time:2.02s
total time:2.37s prep time:2.02s
total time:2.37s prep time:2.02s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 13:Epoch 13:Epoch 13:
Epoch 13:
train loss:76.7123 train ap:0.944382 val ap:0.958182 val auc:0.957536
train loss:69.6559 train ap:0.952299 val ap:0.958182 val auc:0.957536
train loss:62.7070 train ap:0.960355 val ap:0.958182 val auc:0.957536
total time:2.36s prep time:2.00s
total time:2.36s prep time:2.00s
total time:2.36s prep time:2.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
train loss:65.7178 train ap:0.958041 val ap:0.958182 val auc:0.957536
total time:2.36s prep time:2.00s
fetch time:0.00s write back time:0.00s
Epoch 14:
Epoch 14:Epoch 14:
Epoch 14:
train loss:61.2000 train ap:0.963588 val ap:0.956409 val auc:0.955584
train loss:76.0099 train ap:0.944896 val ap:0.956409 val auc:0.955584
train loss:68.6356 train ap:0.953758 val ap:0.956409 val auc:0.955584
train loss:63.9058 train ap:0.961098 val ap:0.956409 val auc:0.955584
total time:2.47s prep time:2.07s
total time:2.47s prep time:2.07s
total time:2.47s prep time:2.07s
total time:2.47s prep time:2.07s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 15:
Epoch 15:Epoch 15:
Epoch 15:
train loss:76.1507 train ap:0.944705 val ap:0.960281 val auc:0.959706
train loss:63.2278 train ap:0.959601 val ap:0.960281 val auc:0.959706
train loss:69.3052 train ap:0.953616 val ap:0.960281 val auc:0.959706
train loss:65.8970 train ap:0.957695 val ap:0.960281 val auc:0.959706
total time:2.51s prep time:2.13s
total time:2.51s prep time:2.13s
total time:2.51s prep time:2.13s
total time:2.51s prep time:2.13s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 16:Epoch 16:
Epoch 16:
Epoch 16:
train loss:61.6107 train ap:0.963324 val ap:0.961179 val auc:0.960351
train loss:69.1189 train ap:0.953501 val ap:0.961179 val auc:0.960351
train loss:74.8315 train ap:0.947684 val ap:0.961179 val auc:0.960351
train loss:64.2459 train ap:0.960182 val ap:0.961179 val auc:0.960351
total time:2.42s prep time:2.07s
total time:2.42s prep time:2.07s
total time:2.42s prep time:2.07s
total time:2.42s prep time:2.07s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 17:
Epoch 17:
Epoch 17:Epoch 17:
train loss:66.9914 train ap:0.957178 val ap:0.963475 val auc:0.962502
train loss:74.3185 train ap:0.947961 val ap:0.963475 val auc:0.962502
train loss:60.8575 train ap:0.962965 val ap:0.963475 val auc:0.962502
train loss:62.1433 train ap:0.962527 val ap:0.963475 val auc:0.962502
total time:2.40s prep time:2.04s
total time:2.40s prep time:2.04s
total time:2.40s prep time:2.04s
total time:2.40s prep time:2.04s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 18:Epoch 18:
Epoch 18:
Epoch 18:
train loss:66.4025 train ap:0.956987 val ap:0.964559 val auc:0.963546
train loss:60.3496 train ap:0.963946 val ap:0.964559 val auc:0.963546
train loss:72.3370 train ap:0.951568 val ap:0.964559 val auc:0.963546
train loss:61.7511 train ap:0.963632 val ap:0.964559 val auc:0.963546
total time:2.41s prep time:2.05s
total time:2.41s prep time:2.05s
total time:2.41s prep time:2.05s
fetch time:0.00s write back time:0.00s
total time:2.41s prep time:2.05s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 19:Epoch 19:Epoch 19:
Epoch 19:
train loss:66.6593 train ap:0.957518 val ap:0.963903 val auc:0.963591
train loss:59.7753 train ap:0.964455 val ap:0.963903 val auc:0.963591
train loss:71.7726 train ap:0.952638 val ap:0.963903 val auc:0.963591
train loss:60.8455 train ap:0.963829 val ap:0.963903 val auc:0.963591
total time:2.43s prep time:2.07s
total time:2.43s prep time:2.07s
total time:2.43s prep time:2.07s
total time:2.43s prep time:2.07s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 20:Epoch 20:
Epoch 20:
Epoch 20:
train loss:57.4701 train ap:0.968381 val ap:0.966703 val auc:0.965856
train loss:65.9609 train ap:0.958111 val ap:0.966703 val auc:0.965856
train loss:71.3897 train ap:0.953114 val ap:0.966703 val auc:0.965856
train loss:59.8048 train ap:0.964998 val ap:0.966703 val auc:0.965856
total time:2.40s prep time:2.03s
total time:2.40s prep time:2.03s
total time:2.40s prep time:2.03s
total time:2.40s prep time:2.03s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 21:Epoch 21:
Epoch 21:
Epoch 21:
train loss:71.6177 train ap:0.952593 val ap:0.966044 val auc:0.965142
train loss:57.6982 train ap:0.968289 val ap:0.966044 val auc:0.965142
train loss:59.9029 train ap:0.965802 val ap:0.966044 val auc:0.965142
train loss:63.2272 train ap:0.962170 val ap:0.966044 val auc:0.965142
total time:2.36s prep time:2.01s
total time:2.36s prep time:2.01s
total time:2.36s prep time:2.01s
total time:2.36s prep time:2.01s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 22:Epoch 22:
Epoch 22:
Epoch 22:
train loss:63.2778 train ap:0.961871 val ap:0.968463 val auc:0.967461
train loss:69.6851 train ap:0.954952 val ap:0.968463 val auc:0.967461
train loss:57.2220 train ap:0.968283 val ap:0.968463 val auc:0.967461
train loss:58.6709 train ap:0.967088 val ap:0.968463 val auc:0.967461
total time:2.37s prep time:2.02s
total time:2.37s prep time:2.02s
total time:2.37s prep time:2.02s
total time:2.37s prep time:2.02s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 23:
Epoch 23:
Epoch 23:
Epoch 23:
train loss:62.9887 train ap:0.962018 val ap:0.968122 val auc:0.967319
train loss:69.3614 train ap:0.955205 val ap:0.968122 val auc:0.967319
train loss:56.8950 train ap:0.968472 val ap:0.968122 val auc:0.967319
train loss:57.1121 train ap:0.969189 val ap:0.968122 val auc:0.967319
total time:2.38s prep time:2.03s
total time:2.38s prep time:2.03s
total time:2.38s prep time:2.03s
total time:2.38s prep time:2.03s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 24:
Epoch 24:
Epoch 24:
Epoch 24:
train loss:56.3057 train ap:0.970087 val ap:0.967538 val auc:0.966955
train loss:62.6017 train ap:0.962912 val ap:0.967538 val auc:0.966955
train loss:68.6662 train ap:0.956110 val ap:0.967538 val auc:0.966955
total time:2.36s prep time:2.02s
total time:2.36s prep time:2.02s
total time:2.36s prep time:2.02s
fetch time:0.00s write back time:0.00s
train loss:57.1639 train ap:0.968935 val ap:0.967538 val auc:0.966955
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
total time:2.36s prep time:2.02s
fetch time:0.00s write back time:0.00s
Epoch 25:
Epoch 25:
Epoch 25:
Epoch 25:
train loss:62.5181 train ap:0.962567 val ap:0.966588 val auc:0.966440
train loss:69.3504 train ap:0.956049 val ap:0.966588 val auc:0.966440
train loss:58.6922 train ap:0.966765 val ap:0.966588 val auc:0.966440
train loss:57.1053 train ap:0.968492 val ap:0.966588 val auc:0.966440
total time:2.38s prep time:2.01s
total time:2.38s prep time:2.01s
total time:2.38s prep time:2.01s
total time:2.38s prep time:2.01s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 26:
Epoch 26:
Epoch 26:
Epoch 26:
train loss:61.4962 train ap:0.963934 val ap:0.969727 val auc:0.969460
train loss:55.8220 train ap:0.969446 val ap:0.969727 val auc:0.969460
train loss:56.4361 train ap:0.969552 val ap:0.969727 val auc:0.969460
total time:2.34s prep time:1.99s
train loss:67.9671 train ap:0.957416 val ap:0.969727 val auc:0.969460
total time:2.34s prep time:1.99s
total time:2.34s prep time:1.99s
fetch time:0.00s write back time:0.00s
total time:2.34s prep time:1.99s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 27:
Epoch 27:Epoch 27:
Epoch 27:
train loss:60.9567 train ap:0.964485 val ap:0.969411 val auc:0.969159
train loss:54.8527 train ap:0.970446 val ap:0.969411 val auc:0.969159
train loss:66.7655 train ap:0.958428 val ap:0.969411 val auc:0.969159
train loss:55.8786 train ap:0.968926 val ap:0.969411 val auc:0.969159
total time:2.34s prep time:2.00s
total time:2.34s prep time:2.00s
total time:2.34s prep time:2.00s
total time:2.34s prep time:2.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 28:
Epoch 28:
Epoch 28:
Epoch 28:
train loss:66.5786 train ap:0.959190 val ap:0.970688 val auc:0.970374
train loss:54.0897 train ap:0.972020 val ap:0.970688 val auc:0.970374
train loss:60.7561 train ap:0.964240 val ap:0.970688 val auc:0.970374
train loss:55.3817 train ap:0.970250 val ap:0.970688 val auc:0.970374
total time:2.35s prep time:2.00s
total time:2.35s prep time:2.00s
total time:2.35s prep time:2.00s
total time:2.35s prep time:2.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 29:
Epoch 29:
Epoch 29:
Epoch 29:
train loss:59.1135 train ap:0.966342 val ap:0.971187 val auc:0.970594
train loss:54.0108 train ap:0.972241 val ap:0.971187 val auc:0.970594
train loss:55.3218 train ap:0.969804 val ap:0.971187 val auc:0.970594
train loss:65.7909 train ap:0.960222 val ap:0.971187 val auc:0.970594
total time:2.34s prep time:1.99s
total time:2.34s prep time:1.99s
total time:2.34s prep time:1.99s
total time:2.34s prep time:1.99s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 30:
Epoch 30:
Epoch 30:
Epoch 30:
train loss:53.6740 train ap:0.971874 val ap:0.971368 val auc:0.970488
train loss:59.0456 train ap:0.966357 val ap:0.971368 val auc:0.970488
train loss:54.3334 train ap:0.970578 val ap:0.971368 val auc:0.970488
train loss:65.3920 train ap:0.960561 val ap:0.971368 val auc:0.970488
total time:2.35s prep time:2.00s
total time:2.35s prep time:2.00s
total time:2.35s prep time:2.00s
total time:2.35s prep time:2.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 31:Epoch 31:
Epoch 31:
Epoch 31:
train loss:58.4527 train ap:0.967171 val ap:0.970350 val auc:0.970659
train loss:64.1537 train ap:0.961999 val ap:0.970350 val auc:0.970659
train loss:54.5837 train ap:0.970700 val ap:0.970350 val auc:0.970659
train loss:53.4245 train ap:0.972263 val ap:0.970350 val auc:0.970659
total time:2.37s prep time:2.00s
total time:2.38s prep time:2.00s
total time:2.38s prep time:2.00s
total time:2.38s prep time:2.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 32:
Epoch 32:
Epoch 32:
Epoch 32:
train loss:59.5737 train ap:0.965499 val ap:0.971045 val auc:0.971084
train loss:52.6024 train ap:0.973221 val ap:0.971045 val auc:0.971084
train loss:64.2868 train ap:0.961182 val ap:0.971045 val auc:0.971084
train loss:53.9027 train ap:0.971177 val ap:0.971045 val auc:0.971084
total time:2.34s prep time:2.00s
total time:2.34s prep time:2.00s
total time:2.34s prep time:2.00s
total time:2.34s prep time:2.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 33:
Epoch 33:
Epoch 33:
Epoch 33:
train loss:64.4721 train ap:0.961330 val ap:0.971705 val auc:0.971615
train loss:58.2022 train ap:0.966919 val ap:0.971705 val auc:0.971615
train loss:53.6572 train ap:0.972123 val ap:0.971705 val auc:0.971615
train loss:52.8161 train ap:0.972717 val ap:0.971705 val auc:0.971615
total time:2.34s prep time:2.00s
total time:2.34s prep time:2.00s
total time:2.34s prep time:2.00s
total time:2.34s prep time:2.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 34:
Epoch 34:
Epoch 34:
Epoch 34:
train loss:63.2622 train ap:0.963819 val ap:0.972779 val auc:0.972498
train loss:53.0382 train ap:0.972783 val ap:0.972779 val auc:0.972498
train loss:57.8172 train ap:0.966480 val ap:0.972779 val auc:0.972498
train loss:53.5220 train ap:0.971246 val ap:0.972779 val auc:0.972498
total time:2.38s prep time:2.02s
total time:2.38s prep time:2.02s
total time:2.38s prep time:2.02s
total time:2.38s prep time:2.02s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 35:
Epoch 35:
Epoch 35:
Epoch 35:
train loss:56.2001 train ap:0.969756 val ap:0.973664 val auc:0.973200
train loss:62.8994 train ap:0.963332 val ap:0.973664 val auc:0.973200
train loss:51.7398 train ap:0.973320 val ap:0.973664 val auc:0.973200
train loss:53.2094 train ap:0.972655 val ap:0.973664 val auc:0.973200
total time:2.35s prep time:2.00s
total time:2.35s prep time:2.00s
total time:2.35s prep time:2.00s
total time:2.35s prep time:2.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 36:Epoch 36:
Epoch 36:
Epoch 36:
train loss:52.4631 train ap:0.972994 val ap:0.972105 val auc:0.972144
train loss:62.6992 train ap:0.963157 val ap:0.972105 val auc:0.972144
train loss:58.7143 train ap:0.966666 val ap:0.972105 val auc:0.972144
train loss:52.6953 train ap:0.972413 val ap:0.972105 val auc:0.972144
total time:2.35s prep time:2.00s
total time:2.35s prep time:2.00s
total time:2.35s prep time:2.00s
total time:2.35s prep time:2.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 37:Epoch 37:
Epoch 37:
Epoch 37:
train loss:51.6406 train ap:0.973430 val ap:0.974538 val auc:0.974165
train loss:56.9392 train ap:0.968403 val ap:0.974538 val auc:0.974165
train loss:63.4455 train ap:0.962197 val ap:0.974538 val auc:0.974165
train loss:52.7715 train ap:0.971992 val ap:0.974538 val auc:0.974165
total time:2.36s prep time:2.00s
total time:2.36s prep time:2.00s
total time:2.36s prep time:2.00s
fetch time:0.00s write back time:0.00s
total time:2.36s prep time:2.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 38:
Epoch 38:
Epoch 38:
Epoch 38:
train loss:50.0356 train ap:0.975320 val ap:0.973131 val auc:0.972900
train loss:56.2640 train ap:0.969440 val ap:0.973131 val auc:0.972900
train loss:61.9657 train ap:0.964427 val ap:0.973131 val auc:0.972900
train loss:53.3090 train ap:0.971174 val ap:0.973131 val auc:0.972900
total time:2.34s prep time:2.00s
total time:2.34s prep time:2.00s
total time:2.34s prep time:2.00s
total time:2.34s prep time:2.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 39:
Epoch 39:
Epoch 39:
Epoch 39:
train loss:51.7537 train ap:0.972938 val ap:0.974730 val auc:0.974496
train loss:62.0245 train ap:0.963711 val ap:0.974730 val auc:0.974496
train loss:57.4291 train ap:0.968386 val ap:0.974730 val auc:0.974496
train loss:53.3602 train ap:0.971910 val ap:0.974730 val auc:0.974496
total time:2.37s prep time:2.02s
total time:2.37s prep time:2.02s
total time:2.37s prep time:2.02s
total time:2.37s prep time:2.02s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 40:
Epoch 40:
Epoch 40:
Epoch 40:
train loss:56.9792 train ap:0.968267 val ap:0.974983 val auc:0.974454
train loss:50.4307 train ap:0.975214 val ap:0.974983 val auc:0.974454
train loss:62.0409 train ap:0.963881 val ap:0.974983 val auc:0.974454
train loss:52.8885 train ap:0.971870 val ap:0.974983 val auc:0.974454
total time:2.33s prep time:1.98s
total time:2.33s prep time:1.98s
total time:2.33s prep time:1.98s
total time:2.33s prep time:1.98s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 41:Epoch 41:
Epoch 41:
Epoch 41:
train loss:54.5205 train ap:0.971014 val ap:0.975489 val auc:0.974803
train loss:51.5671 train ap:0.973705 val ap:0.975489 val auc:0.974803
train loss:49.1804 train ap:0.975497 val ap:0.975489 val auc:0.974803
train loss:61.3572 train ap:0.964603 val ap:0.975489 val auc:0.974803
total time:2.33s prep time:1.98s
total time:2.33s prep time:1.98s
total time:2.33s prep time:1.98s
total time:2.33s prep time:1.98s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 42:
Epoch 42:
Epoch 42:
Epoch 42:
train loss:55.4220 train ap:0.970446 val ap:0.973951 val auc:0.973813
train loss:62.3554 train ap:0.963702 val ap:0.973951 val auc:0.973813
train loss:52.0832 train ap:0.972311 val ap:0.973951 val auc:0.973813
train loss:49.2889 train ap:0.976735 val ap:0.973951 val auc:0.973813
total time:2.33s prep time:1.97s
total time:2.33s prep time:1.97s
total time:2.33s prep time:1.97s
total time:2.33s prep time:1.97s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 43:
Epoch 43:
Epoch 43:
Epoch 43:
train loss:48.9256 train ap:0.976293 val ap:0.974996 val auc:0.974853
train loss:60.1842 train ap:0.966549 val ap:0.974996 val auc:0.974853
train loss:53.9976 train ap:0.971387 val ap:0.974996 val auc:0.974853
train loss:50.6925 train ap:0.974185 val ap:0.974996 val auc:0.974853
total time:2.33s prep time:1.98s
total time:2.33s prep time:1.98s
total time:2.33s prep time:1.98s
total time:2.33s prep time:1.98s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 44:Epoch 44:
Epoch 44:
Epoch 44:
train loss:55.5751 train ap:0.969093 val ap:0.974097 val auc:0.974263
train loss:51.0575 train ap:0.973668 val ap:0.974097 val auc:0.974263
train loss:60.7870 train ap:0.965938 val ap:0.974097 val auc:0.974263
train loss:48.7321 train ap:0.976971 val ap:0.974097 val auc:0.974263
total time:2.35s prep time:2.00s
total time:2.35s prep time:2.00s
total time:2.35s prep time:2.00s
total time:2.35s prep time:2.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
Epoch 45:Epoch 45:
Epoch 45:
Epoch 45:
train loss:54.1609 train ap:0.970995 val ap:0.973614 val auc:0.973329
train loss:60.3872 train ap:0.965370 val ap:0.973614 val auc:0.973329
train loss:48.1635 train ap:0.977290 val ap:0.973614 val auc:0.973329
total time:2.36s prep time:2.01s
total time:2.36s prep time:2.01s
total time:2.36s prep time:2.01s
train loss:50.3630 train ap:0.973891 val ap:0.973614 val auc:0.973329
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
fetch time:0.00s write back time:0.00s
total time:2.36s prep time:2.01s
fetch time:0.00s write back time:0.00s
Epoch 46:Epoch 46:
Epoch 46:
Epoch 46:
Early stopping at epoch 46
Early stopping at epoch 46
Early stopping at epoch 46
Early stopping at epoch 46
Loading the best model at epoch 41
Loading the best model at epoch 41
Loading the best model at epoch 41
Loading the best model at epoch 41
0.9695311188697815 0.9686346650123596
0.9695311188697815 0.9686346650123596
0.9695311188697815 0.9686346650123596
0.9695311188697815 0.9686346650123596
0.9625623822212219 0.9607158899307251
0.9625623822212219 0.9607158899307251
0.9625623822212219 0.9607158899307251
0.9625623822212219 0.9607158899307251
test AP:0.959871 test AUC:0.957466
test AP:0.959871 test AUC:0.957466
test AP:0.959871 test AUC:0.957466
test AP:0.959871 test AUC:0.957466
test_dataset 7215 avg_time 1.9041776847839356
test_dataset 5214 avg_time 1.904272723197937
test_dataset 5980 avg_time 1.904249210357666
test_dataset 5212 avg_time 1.9042695140838624
import argparse
import os
import sys
from os.path import abspath, join, dirname
current_path = os.path.dirname(os.path.abspath(__file__))
parent_path = os.path.abspath(os.path.join(current_path, os.pardir))
sys.path.append(parent_path)
from starrygl.sample.sample_core.LocalNegSampling import LocalNegativeSampling
from starrygl.distributed.context import DistributedContext
from starrygl.distributed.utils import DistIndex
from starrygl.module.modules import GeneralModel
from pathlib import Path
from pathlib import Path
from starrygl.module.utils import parse_config
from starrygl.sample.cache.fetch_cache import FetchFeatureCache
from starrygl.sample.graph_core import DataSet, DistributedGraphStore, TemporalNeighborSampleGraph
from starrygl.module.utils import parse_config, EarlyStopMonitor
from starrygl.sample.graph_core import DataSet, DistributedGraphStore, TemporalNeighborSampleGraph
from starrygl.sample.memory.shared_mailbox import SharedMailBox
from starrygl.sample.sample_core.base import NegativeSampling
from starrygl.sample.sample_core.neighbor_sampler import NeighborSampler
from starrygl.sample.part_utils.partition_tgnn import partition_load
import torch
import time
import torch
import torch.nn.functional as F
import torch.distributed as dist
import torch.multiprocessing as mp
from torch.nn.parallel import DistributedDataParallel as DDP
from torch.distributed import init_process_group, destroy_process_group
import os
from starrygl.sample.data_loader import DistributedDataLoader
from starrygl.sample.batch_data import SAMPLE_TYPE
from starrygl.sample.stream_manager import getPipelineManger
parser = argparse.ArgumentParser(
description="RPC Reinforcement Learning Example",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
parser.add_argument('--rank', default=0, type=int, metavar='W',
help='name of dataset')
parser.add_argument('--local_rank', default=0, type=int, metavar='W',
help='name of dataset')
parser.add_argument('--patience', type=int, default=5, help='Patience for early stopping')
parser.add_argument('--world_size', default=1, type=int, metavar='W',
help='number of negative samples')
parser.add_argument('--dataname', default="WIKI", type=str, metavar='W',
help='name of dataset')
parser.add_argument('--model', default='TGN', type=str, metavar='W',
help='name of model')
args = parser.parse_args()
from sklearn.metrics import average_precision_score, roc_auc_score
import torch
import time
import random
import numpy as np
from sklearn.metrics import average_precision_score, roc_auc_score
from torch.nn.parallel import DistributedDataParallel as DDP
#os.environ['CUDA_VISIBLE_DEVICES'] = str(args.rank)
if not 'WORLD_SIZE' in os.environ:
os.environ["RANK"] = str(args.rank)
os.environ["WORLD_SIZE"] = str(args.world_size)
os.environ["LOCAL_RANK"] = str(args.local_rank)
if not 'MASTER_ADDR' in os.environ:
os.environ["MASTER_ADDR"] = '192.168.2.107'
if not 'MASTER_PORT' in os.environ:
os.environ["MASTER_PORT"] = '9337'
torch.cuda.set_device(int(os.environ["LOCAL_RANK"]))
local_rank = int(os.environ["LOCAL_RANK"])
def seed_everything(seed=42):
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
#seed_everything(34)
def main():
print('LOCAL RANK {}, RANK{}'.format(os.environ["LOCAL_RANK"],os.environ["RANK"]))
use_cuda = True
sample_param, memory_param, gnn_param, train_param = parse_config('../config/{}.yml'.format(args.model))
ctx = DistributedContext.init(backend="nccl", use_gpu=True)
torch.set_num_threads(10)
device_id = torch.cuda.current_device()
print('use cuda on',device_id)
#pdata = partition_load('/mnt/data/part_data/evaluate/tgbl/{}'.format(args.dataname), algo="metis_for_tgnn")
pdata = partition_load("/mnt/data/part_data/v2/here/{}".format(args.dataname), algo="metis_for_tgnn")
graph = DistributedGraphStore(pdata = pdata)
print(graph.num_nodes)
Path("../saved_models/").mkdir(parents=True, exist_ok=True)
Path("../saved_checkpoints/").mkdir(parents=True, exist_ok=True)
get_checkpoint_path = lambda \
epoch: f'../saved_checkpoints/{args.model}-{args.dataname}-{epoch}.pth'
gnn_param['dyrep'] = True if args.model == 'DyRep' else False
use_src_emb = gnn_param['use_src_emb'] if 'use_src_emb' in gnn_param else False
use_dst_emb = gnn_param['use_dst_emb'] if 'use_dst_emb' in gnn_param else False
sample_graph = TemporalNeighborSampleGraph(sample_graph = pdata.sample_graph,mode = 'full')
if memory_param['type'] != 'none':
mailbox = SharedMailBox(pdata.ids.shape[0], memory_param, dim_edge_feat = pdata.edge_attr.shape[1] if pdata.edge_attr is not None else 0,ts_dtye=graph.edge_ts.dtype)
else:
mailbox = None
fanout = []
num_layers = sample_param['layer'] if 'layer' in sample_param else 1
fanout = sample_param['neighbor'] if 'neighbor' in sample_param else [10]
policy = sample_param['strategy'] if 'strategy' in sample_param else 'recent'
sampler = NeighborSampler(num_nodes=graph.num_nodes, num_layers=num_layers, fanout=fanout,graph_data=sample_graph, workers=10,policy = policy, graph_name = "wiki_train")
train_data = torch.masked_select(graph.edge_index,pdata.train_mask.to(graph.edge_index.device)).reshape(2,-1)
train_ts = torch.masked_select(graph.edge_ts,pdata.train_mask.to(graph.edge_index.device))
test_data = torch.masked_select(graph.edge_index,pdata.test_mask.to(graph.edge_index.device)).reshape(2,-1)
test_ts = torch.masked_select(graph.edge_ts,pdata.test_mask.to(graph.edge_index.device))
val_data = torch.masked_select(graph.edge_index,pdata.val_mask.to(graph.edge_index.device)).reshape(2,-1)
val_ts = torch.masked_select(graph.edge_ts,pdata.val_mask.to(graph.edge_index.device))
train_data = DataSet(edges = train_data,ts =train_ts,eids = torch.nonzero(pdata.train_mask).view(-1))
test_data = DataSet(edges = test_data,ts =test_ts,eids = torch.nonzero(pdata.test_mask).view(-1))
val_data = DataSet(edges = val_data,ts = val_ts,eids = torch.nonzero(pdata.val_mask).view(-1))
#neg_sampler = LocalNegativeSampling('triplet',dst_node_list = graph.edge_index[1,:].unique())
neg_sampler = NegativeSampling('triplet')
trainloader = DistributedDataLoader(graph,train_data,sampler = sampler,
sampler_fn = SAMPLE_TYPE.SAMPLE_FROM_TEMPORAL_EDGES,
neg_sampler=neg_sampler,
batch_size = train_param['batch_size'],
shuffle=False,
drop_last=True,
chunk_size = None,
train=True,
queue_size = 200,
mailbox = mailbox,
)
testloader = DistributedDataLoader(graph,test_data,sampler = sampler,
sampler_fn = SAMPLE_TYPE.SAMPLE_FROM_TEMPORAL_EDGES,
neg_sampler=neg_sampler,
batch_size = train_param['batch_size'],
shuffle=False,
drop_last=False,
chunk_size = None,
train=False,
queue_size = 100,
mailbox = mailbox)
valloader = DistributedDataLoader(graph,val_data,sampler = sampler,
sampler_fn = SAMPLE_TYPE.SAMPLE_FROM_TEMPORAL_EDGES,
neg_sampler=neg_sampler,
batch_size = train_param['batch_size'],
shuffle=False,
drop_last=False,
chunk_size = None,
train=False,
queue_size = 100,
mailbox = mailbox)
gnn_dim_node = 0 if graph.x is None else pdata.x.shape[1]
gnn_dim_edge = 0 if graph.edge_attr is None else pdata.edge_attr.shape[1]
avg_time = 0
if use_cuda:
model = GeneralModel(gnn_dim_node, gnn_dim_edge, sample_param, memory_param, gnn_param, train_param).cuda()
device = torch.device('cuda')
else:
model = GeneralModel(gnn_dim_node, gnn_dim_edge, sample_param, memory_param, gnn_param, train_param)
device = torch.device('cpu')
model = DDP(model,find_unused_parameters=True)
train_stream = torch.cuda.Stream()
send_stream = torch.cuda.Stream()
scatter_stream = torch.cuda.Stream()
val_losses = list()
def eval(mode='val'):
neg_samples = 1
model.eval()
aps = list()
aucs_mrrs = list()
if mode == 'val':
loader = valloader
elif mode == 'test':
loader = testloader
elif mode == 'train':
loader = trainloader
with torch.no_grad():
total_loss = 0
signal = torch.tensor([0],dtype = int,device = device)
with torch.cuda.stream(train_stream):
for roots,mfgs,metadata in loader:
pred_pos, pred_neg = model(mfgs,metadata)
total_loss += creterion(pred_pos, torch.ones_like(pred_pos))
total_loss += creterion(pred_neg, torch.zeros_like(pred_neg))
y_pred = torch.cat([pred_pos, pred_neg], dim=0).sigmoid().cpu()
y_true = torch.cat([torch.ones(pred_pos.size(0)), torch.zeros(pred_neg.size(0))], dim=0)
aps.append(average_precision_score(y_true, y_pred.detach().numpy()))
aucs_mrrs.append(roc_auc_score(y_true, y_pred))
if mailbox is not None:
src = metadata['src_pos_index']
dst = metadata['dst_pos_index']
ts = roots.ts
if graph.edge_attr is None:
edge_feats = None
elif(graph.edge_attr.device == torch.device('cpu')):
edge_feats = graph.edge_attr[roots.eids.to('cpu')].to('cuda')
else:
edge_feats = graph.edge_attr[roots.eids]
dist_index_mapper = mfgs[0][0].srcdata['ID']
root_index = torch.cat((src,dst))
last_updated_nid = model.module.memory_updater.last_updated_nid[root_index]
last_updated_memory = model.module.memory_updater.last_updated_memory[root_index]
last_updated_ts=model.module.memory_updater.last_updated_ts[root_index]
index, memory, memory_ts = mailbox.get_update_memory(last_updated_nid,
last_updated_memory,
last_updated_ts)
#
index, mail, mail_ts = mailbox.get_update_mail(dist_index_mapper,
src,dst,ts,edge_feats,
model.module.memory_updater.last_updated_memory,
model.module.embedding,use_src_emb,
use_dst_emb,
)
mailbox.set_mailbox_all_to_all(index,memory,memory_ts,mail,mail_ts,reduce_Op = 'max')
#ap = float(torch.tensor(aps).mean())
#if neg_samples > 1:
# auc_mrr = float(torch.cat(aucs_mrrs).mean())
#else:
# auc_mrr = float(torch.tensor(aucs_mrrs).mean())
world_size = dist.get_world_size()
apc = torch.empty([loader.expected_idx*world_size],dtype = torch.float,device='cuda')
auc_mrr = torch.empty([loader.expected_idx*world_size],dtype = torch.float,device = 'cuda')
dist.all_gather_into_tensor(apc,torch.tensor(aps,device ='cuda',dtype=torch.float))
dist.all_gather_into_tensor(auc_mrr,torch.tensor(aucs_mrrs,device ='cuda',dtype=torch.float))
# ap = float(torch.tensor(apc).mean())
# auc_mrr = float(torch.tensor(auc_mrr).mean())
ap = float(apc.clone().mean())
auc_mrr = float(auc_mrr.clone().mean())
return ap, auc_mrr
#torch.autograd.set_detect_anomaly(True)
creterion = torch.nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=train_param['lr'])
early_stopper = EarlyStopMonitor(max_round=args.patience)
MODEL_SAVE_PATH = f'../saved_models/{args.model}-{args.dataname}-{dist.get_world_size()}.pth'
for e in range(train_param['epoch']):
torch.cuda.synchronize()
write_back_time = 0
fetch_time = 0
epoch_start_time = time.time()
train_aps = list()
print('Epoch {:d}:'.format(e))
time_prep = 0
total_loss = 0
model.train()
if mailbox is not None:
mailbox.reset()
model.module.memory_updater.last_updated_nid = None
model.module.memory_updater.last_updated_memory = None
model.module.memory_updater.last_updated_ts = None
for roots,mfgs,metadata in trainloader:
#print(e,mfgs)
#fetch_time +=sample_time/1000
t_prep_s = time.time()
with torch.cuda.stream(train_stream):
#print(mfgs[0][0].srcdata['ID'].shape[0])
model.train()
optimizer.zero_grad()
pred_pos, pred_neg = model(mfgs,metadata)
#print('pred_pos has NaN {}'.format(torch.isnan(pred_pos).any().item() == True))
#print('pred_neg has NaN {}'.format(torch.isnan(pred_neg).any().item() == True))
loss = creterion(pred_pos, torch.ones_like(pred_pos))
loss += creterion(pred_neg, torch.zeros_like(pred_neg))
#print(loss)
#print('loss has NaN {}'.format(torch.isnan(loss).any().item() == True))
#print(mfgs[0][0].edata['f'],mfgs[0][0].edata['dt'],mfgs[0][0].edata['f'])
total_loss += float(loss)
loss.backward()
#torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm)
#for name,param in model.named_parameters():
# print(name,'grad\n',param.grad,'value\n',param.data)
optimizer.step()
t_prep_s = time.time()
y_pred = torch.cat([pred_pos, pred_neg], dim=0).sigmoid().cpu()
y_true = torch.cat([torch.ones(pred_pos.size(0)), torch.zeros(pred_neg.size(0))], dim=0)
train_aps.append(average_precision_score(y_true, y_pred.detach().numpy()))
if mailbox is not None:
src = metadata['src_pos_index']
dst = metadata['dst_pos_index']
ts = roots.ts
if graph.edge_attr is None:
edge_feats = None
elif(graph.edge_attr.device == torch.device('cpu')):
edge_feats = graph.edge_attr[roots.eids.to('cpu')].to('cuda')
else:
edge_feats = graph.edge_attr[roots.eids]
dist_index_mapper = mfgs[0][0].srcdata['ID']
root_index = torch.cat((src,dst))
last_updated_nid = model.module.memory_updater.last_updated_nid[root_index]
last_updated_memory = model.module.memory_updater.last_updated_memory[root_index]
last_updated_ts=model.module.memory_updater.last_updated_ts[root_index]
index, memory, memory_ts = mailbox.get_update_memory(last_updated_nid,
last_updated_memory,
last_updated_ts)
index, mail, mail_ts = mailbox.get_update_mail(dist_index_mapper,
src,dst,ts,edge_feats,
model.module.memory_updater.last_updated_memory,
model.module.embedding,use_src_emb,use_dst_emb,
)
mailbox.set_mailbox_all_to_all(index,memory,memory_ts,mail,mail_ts,reduce_Op = 'max')
torch.cuda.synchronize()
time_prep = time.time() - epoch_start_time
avg_time += time.time() - epoch_start_time
train_ap = float(torch.tensor(train_aps).mean())
ap = 0
auc = 0
ap, auc = eval('val')
early_stop = early_stopper.early_stop_check(ap)
if early_stop:
dist.barrier()
print("Early stopping at epoch {:d}\n".format(e))
print(f"Loading the best model at epoch {early_stopper.best_epoch}\n")
best_model_path = get_checkpoint_path(early_stopper.best_epoch)
model.load_state_dict(torch.load(best_model_path))
break
else:
print('\ttrain loss:{:.4f} train ap:{:4f} val ap:{:4f} val auc:{:4f}\n'.format(total_loss,train_ap, ap, auc))
print('\ttotal time:{:.2f}s prep time:{:.2f}s\n'.format(time.time()-epoch_start_time, time_prep))
print('\t fetch time:{:.2f}s write back time:{:.2f}s\n'.format(fetch_time,write_back_time))
torch.save(model.state_dict(), get_checkpoint_path(e))
if not early_stop:
dist.barrier()
print(f"Loading the best model at epoch {early_stopper.best_epoch}")
best_model_path = get_checkpoint_path(early_stopper.best_epoch)
model.load_state_dict(torch.load(best_model_path))
model.eval()
if mailbox is not None:
mailbox.reset()
model.module.memory_updater.last_updated_nid = None
ap,auc = eval('train')
print('{} {} \n'.format(ap,auc))
ap,auc = eval('val')
print('{} {} \n'.format(ap,auc))
ap, auc = eval('test')
eval_neg_samples = 1
if eval_neg_samples > 1:
print('\ttest AP:{:4f} test MRR:{:4f}\n'.format(ap, auc))
else:
print('\ttest AP:{:4f} test AUC:{:4f}\n'.format(ap, auc))
print('test_dataset {} avg_time {} \n'.format(test_data.edges.shape[1],avg_time/train_param['epoch']))
torch.save(model.state_dict(), MODEL_SAVE_PATH)
ctx.shutdown()
if __name__ == "__main__":
main()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment