Commit b305c21a by zhlj

fix bugs and add APAN

parents 29970325 82337762
bound.png

16.4 KB

...@@ -37,6 +37,9 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) ...@@ -37,6 +37,9 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m)
.def("src_index", [](const TemporalGraphBlock &tgb) { return vecToTensor<EdgeIDType>(tgb.src_index); }) .def("src_index", [](const TemporalGraphBlock &tgb) { return vecToTensor<EdgeIDType>(tgb.src_index); })
.def("sample_nodes", [](const TemporalGraphBlock &tgb) { return vecToTensor<NodeIDType>(tgb.sample_nodes); }) .def("sample_nodes", [](const TemporalGraphBlock &tgb) { return vecToTensor<NodeIDType>(tgb.sample_nodes); })
.def("sample_nodes_ts", [](const TemporalGraphBlock &tgb) { return vecToTensor<TimeStampType>(tgb.sample_nodes_ts); }) .def("sample_nodes_ts", [](const TemporalGraphBlock &tgb) { return vecToTensor<TimeStampType>(tgb.sample_nodes_ts); })
.def("sample_weight",[](const TemporalGraphBlock &tgb){
return vecToTensor<float>(tgb.sample_weight);
})
.def_readonly("sample_time", &TemporalGraphBlock::sample_time, py::return_value_policy::reference) .def_readonly("sample_time", &TemporalGraphBlock::sample_time, py::return_value_policy::reference)
.def_readonly("tot_time", &TemporalGraphBlock::tot_time, py::return_value_policy::reference) .def_readonly("tot_time", &TemporalGraphBlock::tot_time, py::return_value_policy::reference)
.def_readonly("sample_edge_num", &TemporalGraphBlock::sample_edge_num, py::return_value_policy::reference); .def_readonly("sample_edge_num", &TemporalGraphBlock::sample_edge_num, py::return_value_policy::reference);
......
...@@ -11,6 +11,7 @@ class TemporalGraphBlock ...@@ -11,6 +11,7 @@ class TemporalGraphBlock
vector<int64_t> src_index; vector<int64_t> src_index;
vector<NodeIDType> sample_nodes; vector<NodeIDType> sample_nodes;
vector<TimeStampType> sample_nodes_ts; vector<TimeStampType> sample_nodes_ts;
vector<float> sample_weight;
vector<WeightType> e_weights; vector<WeightType> e_weights;
double sample_time = 0; double sample_time = 0;
double tot_time = 0; double tot_time = 0;
......
...@@ -308,9 +308,14 @@ void ParallelSampler :: neighbor_sample_from_nodes_with_before_layer( ...@@ -308,9 +308,14 @@ void ParallelSampler :: neighbor_sample_from_nodes_with_before_layer(
double p0 = (double)rand_r(&loc_seeds[tid]) / (RAND_MAX + 1.0); double p0 = (double)rand_r(&loc_seeds[tid]) / (RAND_MAX + 1.0);
double ep = boundery_probility*pr[cal_cnt-1]/sum_p*sum_1; double ep = boundery_probility*pr[cal_cnt-1]/sum_p*sum_1;
if(p0 > ep)continue; if(p0 > ep)continue;
tgb_i[tid].sample_weight.emplace_back((float)ep);
} }
else continue; else continue;
//cout<<"in"<<endl; //cout<<"in"<<endl;
}
else{
tgb_i[tid].sample_weight.emplace_back((float)1.0);
} }
tgb_i[tid].src_index.emplace_back(i); tgb_i[tid].src_index.emplace_back(i);
tgb_i[tid].sample_nodes.emplace_back(tnb.neighbors[node][cid]); tgb_i[tid].sample_nodes.emplace_back(tnb.neighbors[node][cid]);
...@@ -358,6 +363,8 @@ void ParallelSampler :: neighbor_sample_from_nodes_with_before_layer( ...@@ -358,6 +363,8 @@ void ParallelSampler :: neighbor_sample_from_nodes_with_before_layer(
each_begin[i]=size; each_begin[i]=size;
size += s; size += s;
} }
if(policy == "boundery_recent_decay")
ret[cur_layer].sample_weight.resize(size);
ret[cur_layer].eid.resize(size); ret[cur_layer].eid.resize(size);
ret[cur_layer].src_index.resize(size); ret[cur_layer].src_index.resize(size);
ret[cur_layer].delta_ts.resize(size); ret[cur_layer].delta_ts.resize(size);
...@@ -366,6 +373,8 @@ void ParallelSampler :: neighbor_sample_from_nodes_with_before_layer( ...@@ -366,6 +373,8 @@ void ParallelSampler :: neighbor_sample_from_nodes_with_before_layer(
#pragma omp parallel for schedule(static, 1) #pragma omp parallel for schedule(static, 1)
for(int i = 0; i<threads; i++){ for(int i = 0; i<threads; i++){
if(policy == "boundery_recent_decay")
copy(tgb_i[i].sample_weight.begin(), tgb_i[i].sample_weight.end(), ret[cur_layer].sample_weight.begin()+each_begin[i]);
copy(tgb_i[i].eid.begin(), tgb_i[i].eid.end(), ret[cur_layer].eid.begin()+each_begin[i]); copy(tgb_i[i].eid.begin(), tgb_i[i].eid.end(), ret[cur_layer].eid.begin()+each_begin[i]);
copy(tgb_i[i].src_index.begin(), tgb_i[i].src_index.end(), ret[cur_layer].src_index.begin()+each_begin[i]); copy(tgb_i[i].src_index.begin(), tgb_i[i].src_index.end(), ret[cur_layer].src_index.begin()+each_begin[i]);
copy(tgb_i[i].delta_ts.begin(), tgb_i[i].delta_ts.end(), ret[cur_layer].delta_ts.begin()+each_begin[i]); copy(tgb_i[i].delta_ts.begin(), tgb_i[i].delta_ts.end(), ret[cur_layer].delta_ts.begin()+each_begin[i]);
......
bash test_all.sh 13357 > 13357.out
wait
bash test_all.sh 12347 > 12347.out
wait
bash test_all.sh 63377 > 63377.out
wait
bash test_all.sh 53473 > 53473.out
wait
bash test_all.sh 54763 > 54763.out
wait
\ No newline at end of file
import matplotlib.pyplot as plt
import numpy as np
import torch
# 读取文件内容
ssim_values = [0, 0.1, 0.2, 0.3, 0.4, 2] # 假设这是你的 ssim 参数值
probability_values = [1,0.1,0.05,0.01,0]
data_values = ['WIKI','LASTFM','WikiTalk','DGraphFin'] # 存储从文件中读取的数据
seed = ['13357','12347','63377','53473',' 54763']
partition = 'ours_shared'
# 从文件中读取数据,假设数据存储在文件 data.txt 中
#all/"$data"/"$partitions"-ours_shared-0.01-"$mem"-"$ssim"-"$sample".out
partitions=4
topk=0.01
mem='all_update'#'historical'
model='TGN'
for sd in seed :
for data in data_values:
ap_list = []
comm_list = []
for p in probability_values:
if data == 'WIKI' or data =='LASTFM':
model = 'TGN'
else:
model = 'TGN_large'
if p == 1:
file = 'all_{}/{}/{}/{}-{}-{}-{}-recent.out'.format(sd,data,model,partitions,partition,topk,mem)
else:
file = 'all_{}/{}/{}/{}-{}-{}-{}-boundery_recent_decay-{}.out'.format(sd,data,model,partitions,partition,topk,mem,p)
prefix = "val ap:"
max_val_ap = 0
test_ap = 0
with open(file, 'r') as file:
for line in file:
if line.find(prefix)!=-1:
pos = line.find(prefix)+len(prefix)
posr = line.find(' ',pos)
#print(line[pos:posr])
val_ap = float(line[pos:posr])
pos = line.find("test ap ")+len("test ap ")
posr = line.find(' ',pos)
#print(line[pos:posr])
_test_ap = float(line[pos:posr])
if(val_ap>max_val_ap):
max_val_ap = val_ap
test_ap = _test_ap
ap_list.append(test_ap)
print('data {} seed {} ap: {}'.format(data,sd,ap_list))
# prefix = 'best test AP:'
# cnt = 0
# sum = 0
# with open(file, 'r') as file:
# for line in file:
# if line.startswith(prefix):
# ap = float(line.lstrip(prefix).split(' ')[0])
# pos = line.find('remote node number tensor')
# if(pos!=-1):
# posr = line.find(']',pos+2+len('remote node number tensor'),)
# #print(line,line[pos+2+len('remote node number tensor'):posr])
# comm = int(line[pos+2+len('remote node number tensor'):posr])
# #print()
# sum = sum+comm
# cnt = cnt+1
# #print(comm)
# ap_list.append(ap)
# comm_list.append(sum/cnt*4)
# # 绘制柱状图
# print('{} TestAP={}\n'.format(data,ap_list))
# bar_width = 0.4
# #shared comm tensor
# # 设置柱状图的位置
# bars = range(len(probability_values))
# # 绘制柱状图
# plt.bar([b for b in bars], ap_list, width=bar_width)
# # 绘制柱状图
# plt.ylim([0.9,1])
# plt.xticks([b for b in bars], probability_values)
# plt.xlabel('probability')
# plt.ylabel('Test AP')
# plt.title('{}({} partitions)'.format(data,partitions))
# plt.savefig('boundary_AP_{}_{}_{}.png'.format(data,partitions,model))
# plt.clf()
# print(comm_list)
# plt.bar([b for b in bars], comm_list, width=bar_width)
# # 绘制柱状图
# plt.xticks([b for b in bars], probability_values)
# plt.xlabel('probability')
# plt.ylabel('Communication volume')
# plt.title('{}({} partitions)'.format(data,partitions))
# plt.savefig('boundary_comm_{}_{}_{}.png'.format(data,partitions,model))
# plt.clf()
# if partition == 'ours_shared':
# partition0 = 'ours'
# else:
# partition0=partition
# for p in probability_values:
# file = '{}/{}/test_{}_{}_{}_0_boundery_recent_uniform_{}_all_update_2.pt'.format(data,model,partition0,topk,partitions,float(p))
# val_ap = torch.tensor(torch.load(file))[:,0]
# epoch = torch.arange(val_ap.shape[0])
# #绘制曲线图
# plt.plot(epoch,val_ap, label='probability={}'.format(p))
# plt.xlabel('Epoch')
# plt.ylabel('Val AP')
# plt.title('{}({} partitions)'.format(data,partitions))
# # plt.grid(True)
# plt.legend()
# plt.savefig('{}_{}_{}_boundary_Convergence_rate.png'.format(data,partitions,model))
# plt.clf()
import matplotlib.pyplot as plt
import numpy as np
import torch
# 读取文件内容
ssim_values = [0, 0.1, 0.2, 0.3, 0.4, 2] # 假设这是你的 ssim 参数值
probability_values = [1,0.5,0.1,0.05,0.01,0]
data_values = ['WIKI_3','LASTFM_3','WikiTalk','StackOverflow'] # 存储从文件中读取的数据
partition = 'ours'
# 从文件中读取数据,假设数据存储在文件 data.txt 中
#all/"$data"/"$partitions"-ours_shared-0.01-"$mem"-"$ssim"-"$sample".out
partitions=4
topk=0
mem='all_update'#'historical'
model='TGN'
for data in data_values:
ap_list = []
comm_list = []
for p in probability_values:
file = '{}/{}/{}-{}-{}-{}-boundery_recent_uniform-{}.out'.format(data,model,partitions,partition,topk,mem,p)
prefix = 'best test AP:'
cnt = 0
sum = 0
with open(file, 'r') as file:
for line in file:
if line.startswith(prefix):
ap = float(line.lstrip(prefix).split(' ')[0])
pos = line.find('remote node number tensor')
if(pos!=-1):
posr = line.find(']',pos+2+len('remote node number tensor'),)
#print(line,line[pos+2+len('remote node number tensor'):posr])
comm = int(line[pos+2+len('remote node number tensor'):posr])
#print()
sum = sum+comm
cnt = cnt+1
#print(comm)
ap_list.append(ap)
comm_list.append(sum/cnt*4)
# 绘制柱状图
print('{} TestAP={}\n'.format(data,ap_list))
bar_width = 0.4
#shared comm tensor
# 设置柱状图的位置
bars = range(len(probability_values))
# 绘制柱状图
plt.bar([b for b in bars], ap_list, width=bar_width)
# 绘制柱状图
plt.ylim([0.9,1])
plt.xticks([b for b in bars], probability_values)
plt.xlabel('probability')
plt.ylabel('Test AP')
plt.title('{}({} partitions)'.format(data,partitions))
plt.savefig('boundary_AP_{}_{}_{}.png'.format(data,partitions,model))
plt.clf()
print(comm_list)
plt.bar([b for b in bars], comm_list, width=bar_width)
# 绘制柱状图
plt.xticks([b for b in bars], probability_values)
plt.xlabel('probability')
plt.ylabel('Communication volume')
plt.title('{}({} partitions)'.format(data,partitions))
plt.savefig('boundary_comm_{}_{}_{}.png'.format(data,partitions,model))
plt.clf()
if partition == 'ours_shared':
partition0 = 'ours'
else:
partition0=partition
for p in probability_values:
file = '{}/{}/test_{}_{}_{}_0_boundery_recent_uniform_{}_all_update_2.pt'.format(data,model,partition0,topk,partitions,float(p))
val_ap = torch.tensor(torch.load(file))[:,0]
epoch = torch.arange(val_ap.shape[0])
#绘制曲线图
plt.plot(epoch,val_ap, label='probability={}'.format(p))
plt.xlabel('Epoch')
plt.ylabel('Val AP')
plt.title('{}({} partitions)'.format(data,partitions))
# plt.grid(True)
plt.legend()
plt.savefig('{}_{}_{}_boundary_Convergence_rate.png'.format(data,partitions,model))
plt.clf()
import matplotlib.pyplot as plt
import numpy as np
import torch
# 读取文件内容
ssim_values = [-1,0.3,0.5,0.7,2] # 假设这是你的 ssim 参数值
data_values = ['WIKI','LASTFM','WikiTalk','REDDIT','LASTFM','DGraphFin'] # 存储从文件中读取的数据
partition = 'ours_shared'
# 从文件中读取数据,假设数据存储在文件 data.txt 中
#all/"$data"/"$partitions"-ours_shared-0.01-"$mem"-"$ssim"-"$sample".out
partitions=4
model = 'TGN'
topk=0.01
mem='historical'
for data in data_values:
ap_list = []
comm_list = []
for ssim in ssim_values:
if ssim == 2:
file = '{}/{}/{}-{}-{}-local-recent.out'.format(data,model,partitions,partition,topk)
elif ssim == -1:
file = '{}/{}/{}-{}-{}-all_update-recent.out'.format(data,model,partitions,partition,topk)
else:
file = '{}/{}/{}-{}-{}-{}-{}-recent.out'.format(data,model,partitions,partition,topk,mem,ssim)
prefix = 'best test AP:'
with open(file, 'r') as file:
for line in file:
if line.startswith(prefix):
ap = float(line.lstrip(prefix).split(' ')[0])
pos = line.find('shared comm tensor')
if(pos!=-1):
comm = int(line[pos+2+len('shared comm tensor'):len(line)-3])
print(ap)
ap_list.append(ap)
comm_list.append(comm)
print('{} TestAP={}\n'.format(data,ap_list))
# 绘制柱状图
bar_width = 0.4
#shared comm tensor
print('{} TestAP={}\n'.format(data,ap_list))
# 设置柱状图的位置
bars = range(len(ssim_values))
# 绘制柱状图
plt.bar([b for b in bars], ap_list, width=bar_width)
# 绘制柱状图
plt.xticks([b for b in bars], ssim_values)
plt.xlabel('SSIM threshold Values')
plt.ylabel('Test AP')
#if(data=='WIKI'):
# plt.ylim([0.97,1])
plt.title('{}({} partitions)'.format(data,partitions))
plt.savefig('ssim_{}_{}_{}.png'.format(data,partitions,model))
plt.clf()
plt.bar([b for b in bars], comm_list, width=bar_width)
# 绘制柱状图
plt.xticks([b for b in bars], ssim_values)
plt.xlabel('SSIM threshold Values')
plt.ylabel('Communication volume')
plt.title('{}({} partitions)'.format(data,partitions))
plt.savefig('ssim_comm_{}_{}_{}.png'.format(data,partitions,model))
plt.clf()
if partition == 'ours_shared':
partition0 = 'ours'
else:
partition0=partition
for ssim in ssim_values:
if ssim == 2:
file = '{}/{}/test_{}_{}_{}_0_recent_0.1_local_2.pt'.format(data,model,partition0,topk,partitions,)
elif ssim == -1:
file = '{}/{}/test_{}_{}_{}_0_recent_0.1_all_update_2.pt'.format(data,model,partition0,topk,partitions,)
else:
file = '{}/{}/test_{}_{}_{}_0_recent_0.1_{}_{}.pt'.format(data,model,partition0,topk,partitions,mem,float(ssim))
val_ap = torch.tensor(torch.load(file))[:,0]
print(val_ap)
epoch = torch.arange(val_ap.shape[0])
#绘制曲线图
#print(val_ap)
if ssim == -1:
plt.plot(epoch,val_ap, label='all-update')
elif ssim == 2:
plt.plot(epoch,val_ap, label='local')
else:
plt.plot(epoch,val_ap, label='ssim = {}'.format(ssim))
if(data=='WIKI'):
plt.ylim([0.85,0.90])
plt.xlabel('Epoch')
plt.ylabel('Val AP')
plt.title('{}({} partitions)'.format(data,partitions))
# plt.grid(True)
plt.legend()
plt.savefig('{}_{}_{}_ssim_Convergence_rate.png'.format(data,partitions,model))
plt.clf()
...@@ -77,6 +77,8 @@ parser.add_argument('--eval_neg_samples', default=1, type=int, metavar='W', ...@@ -77,6 +77,8 @@ parser.add_argument('--eval_neg_samples', default=1, type=int, metavar='W',
help='name of model') help='name of model')
parser.add_argument('--memory_type', default='all_update', type=str, metavar='W', parser.add_argument('--memory_type', default='all_update', type=str, metavar='W',
help='name of model') help='name of model')
parser.add_argument('--seed', default=6773, type=int, metavar='W',
help='name of model')
#boundery_recent_uniform boundery_recent_decay #boundery_recent_uniform boundery_recent_decay
args = parser.parse_args() args = parser.parse_args()
if args.memory_type == 'all_local' or args.topk != '0': if args.memory_type == 'all_local' or args.topk != '0':
...@@ -124,7 +126,7 @@ def seed_everything(seed=42): ...@@ -124,7 +126,7 @@ def seed_everything(seed=42):
torch.cuda.manual_seed(seed) torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False torch.backends.cudnn.benchmark = False
seed_everything(args.seed)
total_next_batch = 0 total_next_batch = 0
total_forward = 0 total_forward = 0
total_count_score = 0 total_count_score = 0
...@@ -186,7 +188,6 @@ def query(): ...@@ -186,7 +188,6 @@ def query():
"total_update_mail" :total_update_mail , "total_update_mail" :total_update_mail ,
"total_update_memory":total_update_memory, "total_update_memory":total_update_memory,
"total_remote_update":total_remote_update,} "total_remote_update":total_remote_update,}
seed_everything(34)
def main(): def main():
#torch.autograd.set_detect_anomaly(True) #torch.autograd.set_detect_anomaly(True)
print('LOCAL RANK {}, RANK{}'.format(os.environ["LOCAL_RANK"],os.environ["RANK"])) print('LOCAL RANK {}, RANK{}'.format(os.environ["LOCAL_RANK"],os.environ["RANK"]))
...@@ -266,11 +267,15 @@ def main(): ...@@ -266,11 +267,15 @@ def main():
if args.local_neg_sample: if args.local_neg_sample:
print('dst len {} origin len {}'.format(graph.edge_index[1,mask].unique().shape[0],full_dst.unique().shape[0])) print('dst len {} origin len {}'.format(graph.edge_index[1,mask].unique().shape[0],full_dst.unique().shape[0]))
train_neg_sampler = LocalNegativeSampling('triplet',amount = args.neg_samples,dst_node_list = graph.edge_index[1,mask].unique()) train_neg_sampler = LocalNegativeSampling('triplet',amount = args.neg_samples,dst_node_list = graph.edge_index[1,mask].unique())
else: else:
#train_neg_sampler = LocalNegativeSampling('triplet',amount = args.neg_samples,dst_node_list = full_dst.unique()) #train_neg_sampler = LocalNegativeSampling('triplet',amount = args.neg_samples,dst_node_list = full_dst.unique())
train_neg_sampler = LocalNegativeSampling('triplet',amount = args.neg_samples,dst_node_list = full_dst.unique(),local_mask=(DistIndex(graph.nids_mapper[full_dst.unique()].to('cpu')).part == dist.get_rank()),prob=args.probability) train_neg_sampler = LocalNegativeSampling('triplet',amount = args.neg_samples,dst_node_list = full_dst.unique(),local_mask=(DistIndex(graph.nids_mapper[full_dst.unique()].to('cpu')).part == dist.get_rank()),prob=args.probability)
remote_ratio = train_neg_sampler.local_dst.shape[0] / train_neg_sampler.dst_node_list.shape[0]
train_ratio_pos = (1 - args.probability) + args.probability * remote_ratio
train_ratio_neg = args.probability * (1-remote_ratio)
print(train_neg_sampler.dst_node_list) print(train_neg_sampler.dst_node_list)
neg_sampler = LocalNegativeSampling('triplet',amount= neg_samples,dst_node_list = full_dst.unique(),seed=6773) neg_sampler = LocalNegativeSampling('triplet',amount= neg_samples,dst_node_list = full_dst.unique(),seed=args.seed)
trainloader = DistributedDataLoader(graph,eval_train_data,sampler = sampler, trainloader = DistributedDataLoader(graph,eval_train_data,sampler = sampler,
sampler_fn = SAMPLE_TYPE.SAMPLE_FROM_TEMPORAL_EDGES, sampler_fn = SAMPLE_TYPE.SAMPLE_FROM_TEMPORAL_EDGES,
...@@ -337,10 +342,10 @@ def main(): ...@@ -337,10 +342,10 @@ def main():
print('dim_node {} dim_edge {}\n'.format(gnn_dim_node,gnn_dim_edge)) print('dim_node {} dim_edge {}\n'.format(gnn_dim_node,gnn_dim_edge))
avg_time = 0 avg_time = 0
if use_cuda: if use_cuda:
model = GeneralModel(gnn_dim_node, gnn_dim_edge, sample_param, memory_param, gnn_param, train_param,graph.ids.shape[0],mailbox).cuda() model = GeneralModel(gnn_dim_node, gnn_dim_edge, sample_param, memory_param, gnn_param, train_param,graph.ids.shape[0],mailbox,train_ratio=(train_ratio_pos,train_ratio_neg)).cuda()
device = torch.device('cuda') device = torch.device('cuda')
else: else:
model = GeneralModel(gnn_dim_node, gnn_dim_edge, sample_param, memory_param, gnn_param, train_param,graph.ids.shape[0],mailbox) model = GeneralModel(gnn_dim_node, gnn_dim_edge, sample_param, memory_param, gnn_param, train_param,graph.ids.shape[0],mailbox,train_ratio=(train_ratio_pos,train_ratio_neg))
device = torch.device('cpu') device = torch.device('cpu')
model = DDP(model,find_unused_parameters=True) model = DDP(model,find_unused_parameters=True)
def count_parameters(model): def count_parameters(model):
...@@ -530,9 +535,12 @@ def main(): ...@@ -530,9 +535,12 @@ def main():
model.train() model.train()
optimizer.zero_grad() optimizer.zero_grad()
ones = torch.ones(metadata['dst_neg_index'].shape[0],device = model.device,dtype=torch.float)
weight = torch.where(DistIndex(mfgs[0][0].srcdata['ID'][metadata['dst_neg_index']]).part == torch.distributed.get_rank(),ones/train_ratio_pos,ones/train_ratio_neg).reshape(-1,1)
pred_pos, pred_neg = model(mfgs,metadata,neg_samples=args.neg_samples,async_param = param) pred_pos, pred_neg = model(mfgs,metadata,neg_samples=args.neg_samples,async_param = param)
loss = creterion(pred_pos, torch.ones_like(pred_pos)) loss = creterion(pred_pos, torch.ones_like(pred_pos))
loss += creterion(pred_neg, torch.zeros_like(pred_neg)) neg_creterion = torch.nn.BCEWithLogitsLoss(weight)
loss += neg_creterion(pred_neg, torch.zeros_like(pred_neg))
total_loss += float(loss.item()) total_loss += float(loss.item())
#mailbox.handle_last_async() #mailbox.handle_last_async()
#trainloader.async_feature() #trainloader.async_feature()
...@@ -610,7 +618,7 @@ def main(): ...@@ -610,7 +618,7 @@ def main():
print(' comm local node number {} remote node number {} local edge {} remote edge{}\n'.format(sum_local_comm,sum_remote_comm,sum_local_edge_comm,sum_remote_edge_comm)) print(' comm local node number {} remote node number {} local edge {} remote edge{}\n'.format(sum_local_comm,sum_remote_comm,sum_local_edge_comm,sum_remote_edge_comm))
print('memory comm {} shared comm {}\n'.format(tot_comm_count,tot_shared_count)) print('memory comm {} shared comm {}\n'.format(tot_comm_count,tot_shared_count))
#if(e==0): #if(e==0):
# torch.save((local_access,remote_access,local_edge_access,remote_edge_access,local_comm,remote_comm,local_edge_comm,remote_edge_comm),'all/{}/{}/comm/comm_{}_{}_{}_{}_{}_{}_{}_{}.pt'.format(args.dataname,args.model,args.partition,args.topk,dist.get_world_size(),dist.get_rank(),args.sample_type,args.probability,args.memory_type,args.shared_memory_ssim)) # torch.save((local_access,remote_access,local_edge_access,remote_edge_access,local_comm,remote_comm,local_edge_comm,remote_edge_comm),'all_args.seed/{}/{}/comm/comm_{}_{}_{}_{}_{}_{}_{}_{}.pt'.format(args.dataname,args.model,args.partition,args.topk,dist.get_world_size(),dist.get_rank(),args.sample_type,args.probability,args.memory_type,args.shared_memory_ssim))
ap = 0 ap = 0
auc = 0 auc = 0
tt.ssim_remote=0 tt.ssim_remote=0
...@@ -662,9 +670,9 @@ def main(): ...@@ -662,9 +670,9 @@ def main():
pass pass
# print('weight {} {}\n'.format(tt.weight_count_local,tt.weight_count_remote)) # print('weight {} {}\n'.format(tt.weight_count_local,tt.weight_count_remote))
# print('ssim {} {}\n'.format(tt.ssim_local/tt.ssim_cnt,tt.ssim_remote/tt.ssim_cnt)) # print('ssim {} {}\n'.format(tt.ssim_local/tt.ssim_cnt,tt.ssim_remote/tt.ssim_cnt))
torch.save(val_list,'all/{}/{}/val_{}_{}_{}_{}_{}_{}_{}_{}.pt'.format(args.dataname,args.model,args.partition,args.topk,dist.get_world_size(),dist.get_rank(),args.sample_type,args.probability,args.memory_type,args.shared_memory_ssim)) torch.save(val_list,'all_{}/{}/{}/val_{}_{}_{}_{}_{}_{}_{}_{}.pt'.format(args.seed,args.dataname,args.model,args.partition,args.topk,dist.get_world_size(),dist.get_rank(),args.sample_type,args.probability,args.memory_type,args.shared_memory_ssim))
torch.save(loss_list,'all/{}/{}/loss_{}_{}_{}_{}_{}_{}_{}_{}.pt'.format(args.dataname,args.model,args.partition,args.topk,dist.get_world_size(),dist.get_rank(),args.sample_type,args.probability,args.memory_type,args.shared_memory_ssim)) torch.save(loss_list,'all_{}/{}/{}/loss_{}_{}_{}_{}_{}_{}_{}_{}.pt'.format(args.seed,args.dataname,args.model,args.partition,args.topk,dist.get_world_size(),dist.get_rank(),args.sample_type,args.probability,args.memory_type,args.shared_memory_ssim))
torch.save(test_ap_list,'all/{}/{}/test_{}_{}_{}_{}_{}_{}_{}_{}.pt'.format(args.dataname,args.model,args.partition,args.topk,dist.get_world_size(),dist.get_rank(),args.sample_type,args.probability,args.memory_type,args.shared_memory_ssim)) torch.save(test_ap_list,'all_{}/{}/{}/test_{}_{}_{}_{}_{}_{}_{}_{}.pt'.format(args.seed,args.dataname,args.model,args.partition,args.topk,dist.get_world_size(),dist.get_rank(),args.sample_type,args.probability,args.memory_type,args.shared_memory_ssim))
print(avg_time) print(avg_time)
if not early_stop: if not early_stop:
......
import matplotlib.pyplot as plt
import numpy as np
# 数据
p_values = ['recent', 'p=0.1', 'p=0.05', 'p=0.01', 'p=0']
wiki_values = [0.979832, 0.980298, 0.975079, 0.97349, 0.96381]
lastfm_values = [0.820161, 0.852725, 0.848085, 0.817381, 0.796689]
wikitalk_values = [0.969647, 0.974473, 0.973996, 0.968961, 0.964867]
gdelt_values = [0.987338, 0.987454, 0.987038, 0.98812, 0.98726]
# 柱状图的宽度
barWidth = 0.15
# 柱状图的位置
r1 = np.arange(len(wiki_values))
r2 = [x + barWidth for x in r1]
r3 = [x + barWidth for x in r2]
r4 = [x + barWidth for x in r3]
# 创建图形
plt.figure(figsize=(12,8))
plt.bar(r1, wiki_values, color='b', width=barWidth, edgecolor='grey', label='WIKI')
plt.bar(r2, lastfm_values, color='r', width=barWidth, edgecolor='grey', label='LASTFM')
plt.bar(r3, wikitalk_values, color='g', width=barWidth, edgecolor='grey', label='WikiTalk')
plt.bar(r4, gdelt_values, color='y', width=barWidth, edgecolor='grey', label='GDELT')
# 添加标签
plt.xlabel('p values', fontweight='bold', fontsize=15)
plt.ylabel('SSIM', fontweight='bold', fontsize=15)
plt.xticks([r + barWidth for r in range(len(wiki_values))], p_values)
plt.savefig('bound.png')
plt.legend()
plt.show()
...@@ -295,11 +295,20 @@ class TransfomerAttentionLayer(torch.nn.Module): ...@@ -295,11 +295,20 @@ class TransfomerAttentionLayer(torch.nn.Module):
#V_remote = V.clone() #V_remote = V.clone()
#V_local[DistIndex(b.srcdata['ID']).part[b.edges()[0]]!=torch.distributed.get_rank()] = 0 #V_local[DistIndex(b.srcdata['ID']).part[b.edges()[0]]!=torch.distributed.get_rank()] = 0
#V_remote[DistIndex(b.srcdata['ID']).part[b.edges()[0]]==torch.distributed.get_rank()] = 0 #V_remote[DistIndex(b.srcdata['ID']).part[b.edges()[0]]==torch.distributed.get_rank()] = 0
b.edata['v'] = V
#b.edata['v0'] = V_local #b.edata['v0'] = V_local
#b.edata['v1'] = V_remote #b.edata['v1'] = V_remote
#b.update_all(dgl.function.copy_e('v0', 'm0'), dgl.function.sum('m0', 'h0')) #b.update_all(dgl.function.copy_e('v0', 'm0'), dgl.function.sum('m0', 'h0'))
#b.update_all(dgl.function.copy_e('v1', 'm1'), dgl.function.sum('m1', 'h1')) #b.update_all(dgl.function.copy_e('v1', 'm1'), dgl.function.sum('m1', 'h1'))
#if 'weight' in b.edata and self.training is True:
# with torch.no_grad():
# weight = b.edata['weight'].reshape(-1,1)#(b.edata['weight']/torch.sum(b.edata['weight']).item()).reshape(-1,1)
#weight =
#print(weight.max())
# b.edata['v'] = V*weight
#else:
# weight = b.edata['weight'].reshape(-1,1)
b.edata['v'] = V
#print(torch.sum(torch.sum(((V-V*weight)**2))))
b.update_all(dgl.function.copy_e('v', 'm'), dgl.function.sum('m', 'h')) b.update_all(dgl.function.copy_e('v', 'm'), dgl.function.sum('m', 'h'))
#tt.ssim_local+=torch.sum(torch.cosine_similarity(b.dstdata['h'],b.dstdata['h0'])) #tt.ssim_local+=torch.sum(torch.cosine_similarity(b.dstdata['h'],b.dstdata['h0']))
#tt.ssim_remote+=torch.sum(torch.cosine_similarity(b.dstdata['h'],b.dstdata['h1'])) #tt.ssim_remote+=torch.sum(torch.cosine_similarity(b.dstdata['h'],b.dstdata['h1']))
......
...@@ -52,20 +52,36 @@ class all_to_all_embedding(torch.autograd.Function): ...@@ -52,20 +52,36 @@ class all_to_all_embedding(torch.autograd.Function):
grad[dst_pos_index] = grad_pos_dst grad[dst_pos_index] = grad_pos_dst
grad[dst_neg_index] = grad_neg_dst grad[dst_neg_index] = grad_neg_dst
return grad,None,None return grad,None,None
class NegFixLayer(torch.autograd.Function):
def __init__(self):
super(NegFixLayer, self).__init__()
def forward(ctx, input, weight):
ctx.save_for_backward(weight)
return input
def backward(ctx, grad_output):
# Define your backward pass
# ...
weight, = ctx.saved_tensors
#print(weight)
return grad_output/weight,None
class GeneralModel(torch.nn.Module): class GeneralModel(torch.nn.Module):
def __init__(self, dim_node, dim_edge, sample_param, memory_param, gnn_param, train_param, num_nodes = None,mailbox = None,combined=False): def __init__(self, dim_node, dim_edge, sample_param, memory_param, gnn_param, train_param, num_nodes = None,mailbox = None,combined=False,train_ratio = None):
super(GeneralModel, self).__init__() super(GeneralModel, self).__init__()
self.dim_node = dim_node self.dim_node = dim_node
self.dim_node_input = dim_node self.dim_node_input = dim_node
self.dim_edge = dim_edge self.dim_edge = dim_edge
self.sample_param = sample_param self.sample_param = sample_param
self.memory_param = memory_param self.memory_param = memory_param
self.train_pos_ratio,self.train_neg_ratio = train_ratio
if not 'dim_out' in gnn_param: if not 'dim_out' in gnn_param:
gnn_param['dim_out'] = memory_param['dim_out'] gnn_param['dim_out'] = memory_param['dim_out']
self.gnn_param = gnn_param self.gnn_param = gnn_param
self.train_param = train_param self.train_param = train_param
self.neg_fix_layer = NegFixLayer()
if memory_param['type'] == 'node': if memory_param['type'] == 'node':
if memory_param['memory_update'] == 'gru': if memory_param['memory_update'] == 'gru':
#if memory_param['async'] == False: #if memory_param['async'] == False:
...@@ -138,12 +154,24 @@ class GeneralModel(torch.nn.Module): ...@@ -138,12 +154,24 @@ class GeneralModel(torch.nn.Module):
h_pos_src = out[metadata['src_pos_index']] h_pos_src = out[metadata['src_pos_index']]
h_pos_dst = out[metadata['dst_pos_index']] h_pos_dst = out[metadata['dst_pos_index']]
h_neg_dst = out[metadata['dst_neg_index']] h_neg_dst = out[metadata['dst_neg_index']]
#end.record() #end.record()
#end.synchronize() #end.synchronize()
#elapsed_time_ms = start.elapsed_time(end) #elapsed_time_ms = start.elapsed_time(end)
#print('time {}\n'.format(elapsed_time_ms)) #print('time {}\n'.format(elapsed_time_ms))
#print('pos src {} \n pos dst {} \n neg dst{} \n'.format(h_pos_src, h_pos_dst,h_neg_dst)) #print('pos src {} \n pos dst {} \n neg dst{} \n'.format(h_pos_src, h_pos_dst,h_neg_dst))
#print('pre predict {}'.format(mfgs[0][0].srcdata['ID'])) #print('pre predict {}'.format(mfgs[0][0].srcdata['ID']))
#if self.training is True:
# with torch.no_grad():
# ones = torch.ones(h_neg_dst.shape[0],device = h_neg_dst.device,dtype=torch.float)
# weight = torch.where(DistIndex(mfgs[0][0].srcdata['ID'][metadata['dst_neg_index']]).part == torch.distributed.get_rank(),ones/self.train_pos_ratio,ones/self.train_neg_ratio).reshape(-1,1)
#weight = torch.clip(weigh)
#weight = weight/weight.max().item()
#print(weight)
#weight =
#h_neg_dst*weight
# pred = self.edge_predictor(h_pos_src, h_pos_dst, None , self.neg_fix_layer.apply(h_neg_dst,weight), neg_samples=neg_samples, mode = mode)
#else:
pred = self.edge_predictor(h_pos_src, h_pos_dst, None , h_neg_dst, neg_samples=neg_samples, mode = mode) pred = self.edge_predictor(h_pos_src, h_pos_dst, None , h_neg_dst, neg_samples=neg_samples, mode = mode)
t_embedding = tt.elapsed_event(t1) t_embedding = tt.elapsed_event(t1)
tt.time_embedding+=t_embedding tt.time_embedding+=t_embedding
......
...@@ -290,7 +290,9 @@ def to_block(graph,data, sample_out,device = torch.device('cuda'),unique = True) ...@@ -290,7 +290,9 @@ def to_block(graph,data, sample_out,device = torch.device('cuda'),unique = True)
if sample_out[r].delta_ts().shape[0] > 0: if sample_out[r].delta_ts().shape[0] > 0:
b.edata['dt'] = sample_out[r].delta_ts().to(device) b.edata['dt'] = sample_out[r].delta_ts().to(device)
b.srcdata['ts'] = block_node_list[1,b.srcnodes()].to(torch.float) b.srcdata['ts'] = block_node_list[1,b.srcnodes()].to(torch.float)
weight = sample_out[r].sample_weight()
if(weight.shape[0] > 0):
b.edata['weight'] = 1/torch.clamp(sample_out[r].sample_weight(),0.0001).to(b.device)
b.edata['__ID'] = e_idx b.edata['__ID'] = e_idx
col = row col = row
col_len += eid_len[r] col_len += eid_len[r]
......
...@@ -18,9 +18,9 @@ class MemoryMoniter: ...@@ -18,9 +18,9 @@ class MemoryMoniter:
#self.memory_ssim.append(self.ssim(pre_memory,now_memory,method = 'cos')) #self.memory_ssim.append(self.ssim(pre_memory,now_memory,method = 'cos'))
#self.nid_list.append(nid) #self.nid_list.append(nid)
def draw(self,degree,data,model,e): def draw(self,degree,data,model,e):
torch.save(self.nid_list,'all/{}/{}/memorynid_{}.pt'.format(data,model,e)) torch.save(self.nid_list,'all_args.seed/{}/{}/memorynid_{}.pt'.format(data,model,e))
torch.save(self.memorychange,'all/{}/{}/memoryF_{}.pt'.format(data,model,e)) torch.save(self.memorychange,'all_args.seed/{}/{}/memoryF_{}.pt'.format(data,model,e))
torch.save(self.memory_ssim,'all/{}/{}/memcos_{}.pt'.format(data,model,e)) torch.save(self.memory_ssim,'all_args.seed/{}/{}/memcos_{}.pt'.format(data,model,e))
# path = './memory/{}/'.format(data) # path = './memory/{}/'.format(data)
# if not os.path.exists(path): # if not os.path.exists(path):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment