find bugs in shared nodes index

e09d279e · zlj · e9e62b18 · e09d279e · e09d279e · e09d279e
Commit e09d279e authored Feb 21, 2025 by zlj
Hide whitespace changes
Inline Side-by-side

Showing with 17 additions and 12 deletions

alpha.png
+0 -0

draw_alpha.py
+4 -4

starrygl/module/historical_cache.py
+8 -3

starrygl/module/utils.py
+3 -3

starrygl/sample/memory/_shared_mailbox.py
+2 -2

No files found.
--- a/alpha.png
+++ b/alpha.png
--- a/draw_alpha.py
+++ b/draw_alpha.py
 import matplotlib.pyplot as plt
 # 数据
-alpha_values = [0.1, 0.3, 0.5, 0.7, 0.9, 1.3, 1.5, 1.7, 2]
+alpha_values = [0,0.1, 0.3, 0.5, 0.7, 0.9, 1.3, 1.5, 1.7, 2]
-lastfm = [0.933538, 0.931761, 0.932499, 0.933383, 0.931225, 0.929983, 0.933971, 0.928771, 0.932748]
+lastfm = [0.8712 ,0.933538, 0.931761, 0.932499, 0.933383, 0.931225, 0.929983, 0.933971, 0.928771, 0.8707 ]
-wikitalk = [0.979627, 0.97997, 0.979484, 0.980269, 0.980758, 0.97979, 0.980233, 0.980004, 0.980353]
+wikitalk = [0.9710,0.979627, 0.97997, 0.979484, 0.980269, 0.980758, 0.97979, 0.980233, 0.980004, 0.9734 ]
-stackoverflow = [0.979641, 0.979372, 0.97967, 0.978169, 0.979624, 0.978846, 0.978428, 0.978397, 0.978925]
+stackoverflow = [0.9630,0.979641, 0.979372, 0.97967, 0.978169, 0.979624, 0.978846, 0.978428, 0.978397, 0.9749 ]
 # 创建新的图像和子图
 fig, axs = plt.subplots(1,3, figsize=(15,4))

--- a/starrygl/module/historical_cache.py
+++ b/starrygl/module/historical_cache.py
@@ -92,14 +92,19 @@ class HistoricalCache:
        if self.time_threshold is not None:
            mask = (self.ssim(new_data,self.local_historical_data[index]) > self.threshold | (ts - self.local_ts[index] > self.time_threshold | self.loss_count[index] > self.times_threshold))
            self.loss_count[index][~mask] += 1
            self.loss_count[index][mask] = 0
        else:
            #print('{} {} {} {}  \n'.format(index,self.ssim(new_data,self.local_historical_data[index]),new_data,self.local_historical_data[index]))
            #print(new_data,self.local_historical_data[index])
            #print(self.ssim(new_data,self.local_historical_data[index]) < self.threshold, (self.loss_count[index] > self.times_threshold))
-            mask = (self.ssim(new_data,self.local_historical_data[index]) > self.threshold) | (self.loss_count[index] > self.times_threshold)
+            mask = ((self.ssim(new_data,self.local_historical_data[index]) > self.threshold) | (self.loss_count[index] > self.times_threshold))
-            self.loss_count[index][~mask] += 1
+            (self.loss_count[index[~mask]]) += 1
-            self.loss_count[index][mask] = 0
+            #print(self.loss_count[index][~mask])
+            #if (~mask).sum() > 0:
+            #    print('loss count {}'.format((self.loss_count[index][~mask]).max()))
+            self.loss_count[index[mask]] = 0
        return mask
    def read_synchronize(self):

--- a/starrygl/module/utils.py
+++ b/starrygl/module/utils.py
@@ -159,16 +159,16 @@ class AdaParameter:
    self.beta = self.beta * average_gnn_aggregate/average_fetch * (1 + self.wait_threshold)
    average_memory_sync_time = self.average_memory_sync/self.count_memory_sync
    average_memory_update_time = self.average_memory_update/self.count_memory_update
-    self.alpha = self.alpha+math.log(average_memory_update_time/average_memory_sync_time * (1 + self.wait_threshold))
+    self.alpha = self.alpha-math.log(average_memory_update_time/average_memory_sync_time * (1 + self.wait_threshold))
    self.beta = max(min(self.beta, self.max_beta),self.min_beta)
    self.alpha = max(min(self.alpha, self.max_alpha),self.min_alpha)
    ctx = DistributedContext.get_default_context()
    beta_comm=torch.tensor([self.beta])
    torch.distributed.all_reduce(beta_comm,group=ctx.gloo_group)
-    self.beta = beta_comm[0].item()
+    self.beta = beta_comm[0].item()/ctx.world_size
    alpha_comm=torch.tensor([self.alpha])
    torch.distributed.all_reduce(alpha_comm,group=ctx.gloo_group)
-    self.alpha = alpha_comm[0].item()
+    self.alpha = alpha_comm[0].item()/ctx.world_size
    #print('gnn aggregate {} fetch {} memory sync {} memory update {}'.format(average_gnn_aggregate,average_fetch,average_memory_sync_time,average_memory_update_time))
    #print('beta is {} alpha is {}\n'.format(self.beta,self.alpha))
    #self.reset_time()

--- a/starrygl/sample/memory/_shared_mailbox.py
+++ b/starrygl/sample/memory/_shared_mailbox.py
@@ -275,8 +275,8 @@ class SharedMailBox():
            shared_index,shared_data,shared_ts = out
            index = self.shared_nodes_index[shared_index]
            mask= (shared_ts > self.node_memory_ts.accessor.data[index])
-            self.node_memory.accessor.data[index][mask] = shared_data[mask]
+            self.node_memory.accessor.data[index[mask]] = shared_data[mask]
-            self.node_memory_ts.accessor.data[index][mask] = shared_ts[mask]
+            self.node_memory_ts.accessor.data[index[mask]] = shared_ts[mask]
    def update_shared(self):
        ctx = DistributedContext.get_default_context()