Commit e09d279e by zlj

find bugs in shared nodes index

parent e9e62b18

83.5 KB | W: | H:

63.4 KB | W: | H:

alpha.png
alpha.png
alpha.png
alpha.png
  • 2-up
  • Swipe
  • Onion skin
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
# 数据 # 数据
alpha_values = [0.1, 0.3, 0.5, 0.7, 0.9, 1.3, 1.5, 1.7, 2] alpha_values = [0,0.1, 0.3, 0.5, 0.7, 0.9, 1.3, 1.5, 1.7, 2]
lastfm = [0.933538, 0.931761, 0.932499, 0.933383, 0.931225, 0.929983, 0.933971, 0.928771, 0.932748] lastfm = [0.8712 ,0.933538, 0.931761, 0.932499, 0.933383, 0.931225, 0.929983, 0.933971, 0.928771, 0.8707 ]
wikitalk = [0.979627, 0.97997, 0.979484, 0.980269, 0.980758, 0.97979, 0.980233, 0.980004, 0.980353] wikitalk = [0.9710,0.979627, 0.97997, 0.979484, 0.980269, 0.980758, 0.97979, 0.980233, 0.980004, 0.9734 ]
stackoverflow = [0.979641, 0.979372, 0.97967, 0.978169, 0.979624, 0.978846, 0.978428, 0.978397, 0.978925] stackoverflow = [0.9630,0.979641, 0.979372, 0.97967, 0.978169, 0.979624, 0.978846, 0.978428, 0.978397, 0.9749 ]
# 创建新的图像和子图 # 创建新的图像和子图
fig, axs = plt.subplots(1,3, figsize=(15,4)) fig, axs = plt.subplots(1,3, figsize=(15,4))
......
...@@ -92,14 +92,19 @@ class HistoricalCache: ...@@ -92,14 +92,19 @@ class HistoricalCache:
if self.time_threshold is not None: if self.time_threshold is not None:
mask = (self.ssim(new_data,self.local_historical_data[index]) > self.threshold | (ts - self.local_ts[index] > self.time_threshold | self.loss_count[index] > self.times_threshold)) mask = (self.ssim(new_data,self.local_historical_data[index]) > self.threshold | (ts - self.local_ts[index] > self.time_threshold | self.loss_count[index] > self.times_threshold))
self.loss_count[index][~mask] += 1 self.loss_count[index][~mask] += 1
self.loss_count[index][mask] = 0 self.loss_count[index][mask] = 0
else: else:
#print('{} {} {} {} \n'.format(index,self.ssim(new_data,self.local_historical_data[index]),new_data,self.local_historical_data[index])) #print('{} {} {} {} \n'.format(index,self.ssim(new_data,self.local_historical_data[index]),new_data,self.local_historical_data[index]))
#print(new_data,self.local_historical_data[index]) #print(new_data,self.local_historical_data[index])
#print(self.ssim(new_data,self.local_historical_data[index]) < self.threshold, (self.loss_count[index] > self.times_threshold)) #print(self.ssim(new_data,self.local_historical_data[index]) < self.threshold, (self.loss_count[index] > self.times_threshold))
mask = (self.ssim(new_data,self.local_historical_data[index]) > self.threshold) | (self.loss_count[index] > self.times_threshold) mask = ((self.ssim(new_data,self.local_historical_data[index]) > self.threshold) | (self.loss_count[index] > self.times_threshold))
self.loss_count[index][~mask] += 1 (self.loss_count[index[~mask]]) += 1
self.loss_count[index][mask] = 0 #print(self.loss_count[index][~mask])
#if (~mask).sum() > 0:
# print('loss count {}'.format((self.loss_count[index][~mask]).max()))
self.loss_count[index[mask]] = 0
return mask return mask
def read_synchronize(self): def read_synchronize(self):
......
...@@ -159,16 +159,16 @@ class AdaParameter: ...@@ -159,16 +159,16 @@ class AdaParameter:
self.beta = self.beta * average_gnn_aggregate/average_fetch * (1 + self.wait_threshold) self.beta = self.beta * average_gnn_aggregate/average_fetch * (1 + self.wait_threshold)
average_memory_sync_time = self.average_memory_sync/self.count_memory_sync average_memory_sync_time = self.average_memory_sync/self.count_memory_sync
average_memory_update_time = self.average_memory_update/self.count_memory_update average_memory_update_time = self.average_memory_update/self.count_memory_update
self.alpha = self.alpha+math.log(average_memory_update_time/average_memory_sync_time * (1 + self.wait_threshold)) self.alpha = self.alpha-math.log(average_memory_update_time/average_memory_sync_time * (1 + self.wait_threshold))
self.beta = max(min(self.beta, self.max_beta),self.min_beta) self.beta = max(min(self.beta, self.max_beta),self.min_beta)
self.alpha = max(min(self.alpha, self.max_alpha),self.min_alpha) self.alpha = max(min(self.alpha, self.max_alpha),self.min_alpha)
ctx = DistributedContext.get_default_context() ctx = DistributedContext.get_default_context()
beta_comm=torch.tensor([self.beta]) beta_comm=torch.tensor([self.beta])
torch.distributed.all_reduce(beta_comm,group=ctx.gloo_group) torch.distributed.all_reduce(beta_comm,group=ctx.gloo_group)
self.beta = beta_comm[0].item() self.beta = beta_comm[0].item()/ctx.world_size
alpha_comm=torch.tensor([self.alpha]) alpha_comm=torch.tensor([self.alpha])
torch.distributed.all_reduce(alpha_comm,group=ctx.gloo_group) torch.distributed.all_reduce(alpha_comm,group=ctx.gloo_group)
self.alpha = alpha_comm[0].item() self.alpha = alpha_comm[0].item()/ctx.world_size
#print('gnn aggregate {} fetch {} memory sync {} memory update {}'.format(average_gnn_aggregate,average_fetch,average_memory_sync_time,average_memory_update_time)) #print('gnn aggregate {} fetch {} memory sync {} memory update {}'.format(average_gnn_aggregate,average_fetch,average_memory_sync_time,average_memory_update_time))
#print('beta is {} alpha is {}\n'.format(self.beta,self.alpha)) #print('beta is {} alpha is {}\n'.format(self.beta,self.alpha))
#self.reset_time() #self.reset_time()
......
...@@ -275,8 +275,8 @@ class SharedMailBox(): ...@@ -275,8 +275,8 @@ class SharedMailBox():
shared_index,shared_data,shared_ts = out shared_index,shared_data,shared_ts = out
index = self.shared_nodes_index[shared_index] index = self.shared_nodes_index[shared_index]
mask= (shared_ts > self.node_memory_ts.accessor.data[index]) mask= (shared_ts > self.node_memory_ts.accessor.data[index])
self.node_memory.accessor.data[index][mask] = shared_data[mask] self.node_memory.accessor.data[index[mask]] = shared_data[mask]
self.node_memory_ts.accessor.data[index][mask] = shared_ts[mask] self.node_memory_ts.accessor.data[index[mask]] = shared_ts[mask]
def update_shared(self): def update_shared(self):
ctx = DistributedContext.get_default_context() ctx = DistributedContext.get_default_context()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment