Commit 1e0001f7 by zlj

fix topk setting

parent 48c10caf
......@@ -23,6 +23,7 @@ shared_memory_ssim=("0.3" "0.7")
#memory_type=("local" "all_update" "historical" "all_reduce")
#data_param=("WIKI" "REDDIT" "LASTFM" "WikiTalk")
neighbor_num=( "10" "20")
topk_list=("0.02" "0.04" "0.06" "0.08" "0.1" "0.2" "0.3")
data_param=("WIKI" "REDDIT" "LASTFM" "WikiTalk")
#"GDELT")
#data_param=("WIKI" "REDDIT" "LASTFM" "DGraphFin" "WikiTalk" "StackOverflow")
......@@ -36,7 +37,7 @@ data_param=("WIKI" "REDDIT" "LASTFM" "WikiTalk")
#seed=(( RANDOM % 1000000 + 1 ))
mkdir -p all_"$seed"
for data in "${data_param[@]}"; do
for neighbor in "${neighbor_num[@]}"; do
for topk in "${topk_list[@]}"; do
model="TGN_large"
#if [ "$data" = "WIKI" ] || [ "$data" = "REDDIT" ] || [ "$data" = "LASTFM" ]; then
# model="TGN"
......@@ -54,20 +55,20 @@ for data in "${data_param[@]}"; do
if [ "$mem" = "historical" ]; then
for ssim in "${shared_memory_ssim[@]}"; do
if [ "$partition" = "ours" ] || [ "$partition" = "metis" ]; then
torchrun --nnodes "$nnodes" --node_rank "$node_rank" --nproc-per-node "$node_per" --master-addr "$addr" --master-port 9445 train_boundery.py --dataname "$data" --mode "$model" --partition "$partition" --topk 0.1 --sample_type "$sample" --memory_type "$mem" --shared_memory_ssim "$ssim" --seed "$seed" --neighbor "$neighbor" > all_"$seed"/"$data"/"$model"/"$partitions"-"$partition"-0.01-"$mem"-"$ssim"-"$sample"-"$neighbor".out &
torchrun --nnodes "$nnodes" --node_rank "$node_rank" --nproc-per-node "$node_per" --master-addr "$addr" --master-port 9445 train_boundery.py --dataname "$data" --mode "$model" --partition "$partition" --topk "$topk" --sample_type "$sample" --memory_type "$mem" --shared_memory_ssim "$ssim" --seed "$seed" --neighbor "$neighbor" > all_"$seed"/"$data"/"$model"/"$partitions"-"$partition"-0.01-"$mem"-"$ssim"-"$sample"-"$neighbor".out &
wait
fi
done
elif [ "$mem" = "all_reduce" ]; then
if [ "$partition" = "ours" ]; then
torchrun --nnodes "$nnodes" --node_rank "$node_rank" --nproc-per-node "$node_per" --master-addr "$addr" --master-port 9445 train_boundery.py --dataname "$data" --mode "$model" --partition "$partition" --topk 0.1 --sample_type "$sample" --memory_type "$mem" --seed "$seed" --neighbor "$neighbor" > all_"$seed"/"$data"/"$model"/"$partitions"-"$partition"-0.01-"$mem"-"$ssim"-"$sample"-"$neighbor".out &
torchrun --nnodes "$nnodes" --node_rank "$node_rank" --nproc-per-node "$node_per" --master-addr "$addr" --master-port 9445 train_boundery.py --dataname "$data" --mode "$model" --partition "$partition" --topk "$topk" --sample_type "$sample" --memory_type "$mem" --seed "$seed" --neighbor "$neighbor" > all_"$seed"/"$data"/"$model"/"$partitions"-"$partition"-0.01-"$mem"-"$ssim"-"$sample"-"$neighbor".out &
wait
fi
else
#torchrun --nnodes "$nnodes" --node_rank "$node_rank" --nproc-per-node "$node_per" --master-addr "$addr" --master-port 9445 train_boundery.py --dataname "$data" --mode "$model" --partition "$partition" --topk 0 --sample_type "$sample" --memory_type "$mem" --seed "$seed" > all_"$seed"/"$data"/"$model"/"$partitions"-"$partition"-0-"$mem"-"$sample".out &
wait
if [ "$partition" = "ours" ] && [ "$mem" != "all_local" ]; then
torchrun --nnodes "$nnodes" --node_rank "$node_rank" --nproc-per-node "$node_per" --master-addr "$addr" --master-port 9445 train_boundery.py --dataname "$data" --mode "$model" --partition "$partition" --topk 0.1 --sample_type "$sample" --memory_type "$mem" --seed "$seed" --neighbor "$neighbor" > all_"$seed"/"$data"/"$model"/"$partitions"-"$partition"-0.01-"$mem"-"$ssim"-"$sample"-"$neighbor".out &
torchrun --nnodes "$nnodes" --node_rank "$node_rank" --nproc-per-node "$node_per" --master-addr "$addr" --master-port 9445 train_boundery.py --dataname "$data" --mode "$model" --partition "$partition" --topk "$topk" --sample_type "$sample" --memory_type "$mem" --seed "$seed" --neighbor "$neighbor" > all_"$seed"/"$data"/"$model"/"$partitions"-"$partition"-0.01-"$mem"-"$ssim"-"$sample"-"$neighbor".out &
wait
fi
fi
......@@ -78,20 +79,20 @@ for data in "${data_param[@]}"; do
if [ "$mem" = "historical" ]; then
for ssim in "${shared_memory_ssim[@]}"; do
if [ "$partition" = "ours" ] || [ "$partition" = "metis" ]; then
torchrun --nnodes "$nnodes" --node_rank "$node_rank" --nproc-per-node "$node_per" --master-addr "$addr" --master-port 9445 train_boundery.py --dataname "$data" --mode "$model" --partition "$partition" --topk 0.1 --sample_type "$sample" --probability "$pro" --memory_type "$mem" --shared_memory_ssim "$ssim" --seed "$seed" --neighbor "$neighbor" > all_"$seed"/"$data"/"$model"/"$partitions"-"$partition"-0.01-"$mem"-"$ssim"-"$sample"-"$neighbor".out &
torchrun --nnodes "$nnodes" --node_rank "$node_rank" --nproc-per-node "$node_per" --master-addr "$addr" --master-port 9445 train_boundery.py --dataname "$data" --mode "$model" --partition "$partition" --topk "$topk" --sample_type "$sample" --probability "$pro" --memory_type "$mem" --shared_memory_ssim "$ssim" --seed "$seed" --neighbor "$neighbor" > all_"$seed"/"$data"/"$model"/"$partitions"-"$partition"-0.01-"$mem"-"$ssim"-"$sample"-"$neighbor".out &
wait
fi
done
elif [ "$mem" = "all_reduce" ]; then
if [ "$partition" = "ours"]; then
torchrun --nnodes "$nnodes" --node_rank "$node_rank" --nproc-per-node "$node_per" --master-addr "$addr" --master-port 9445 train_boundery.py --dataname "$data" --mode "$model" --partition "$partition" --topk 0.1 --sample_type "$sample" --probability "$pro" --memory_type "$mem" --seed "$seed" --neighbor "$neighbor" > all_"$seed"/"$data"/"$model"/"$partitions"-"$partition"-0.01-"$mem"-"$ssim"-"$sample"-"$neighbor".out &
torchrun --nnodes "$nnodes" --node_rank "$node_rank" --nproc-per-node "$node_per" --master-addr "$addr" --master-port 9445 train_boundery.py --dataname "$data" --mode "$model" --partition "$partition" --topk "$topk" --sample_type "$sample" --probability "$pro" --memory_type "$mem" --seed "$seed" --neighbor "$neighbor" > all_"$seed"/"$data"/"$model"/"$partitions"-"$partition"-0.01-"$mem"-"$ssim"-"$sample"-"$neighbor".out &
wait
fi
else
#torchrun --nnodes "$nnodes" --node_rank "$node_rank" --nproc-per-node "$node_per" --master-addr "$addr" --master-port 9445 train_boundery.py --dataname "$data" --mode "$model" --partition "$partition" --topk 0 --sample_type "$sample" --probability "$pro" --memory_type "$mem" --seed "$seed" > all_"$seed"/"$data"/"$model"/"$partitions"-"$partition"-0-"$mem"-"$sample"-"$pro".out &
wait
if [ "$partition" = "ours" ] && [ "$mem" != "all_local" ]; then
torchrun --nnodes "$nnodes" --node_rank "$node_rank" --nproc-per-node "$node_per" --master-addr "$addr" --master-port 9445 train_boundery.py --dataname "$data" --mode "$model" --partition "$partition" --topk 0.1 --sample_type "$sample" --probability "$pro" --memory_type "$mem" --seed "$seed" --neighbor "$neighbor" > all_"$seed"/"$data"/"$model"/"$partitions"-"$partition"-0.01-"$mem"-"$ssim"-"$sample"-"$neighbor".out &
torchrun --nnodes "$nnodes" --node_rank "$node_rank" --nproc-per-node "$node_per" --master-addr "$addr" --master-port 9445 train_boundery.py --dataname "$data" --mode "$model" --partition "$partition" --topk "$topk" --sample_type "$sample" --probability "$pro" --memory_type "$mem" --seed "$seed" --neighbor "$neighbor" > all_"$seed"/"$data"/"$model"/"$partitions"-"$partition"-0.01-"$mem"-"$ssim"-"$sample"-"$neighbor".out &
wait
fi
fi
......
......@@ -204,7 +204,7 @@ def main():
#else:
graph,full_sampler_graph,train_mask,val_mask,test_mask,full_train_mask,cache_route = load_from_speed(args.dataname,seed=123457,top=args.topk,sampler_graph_add_rev=True, feature_device=torch.device('cuda:{}'.format(ctx.local_rank)),partition=args.partition)#torch.device('cpu'))
if(args.dataname=='GDELT'):
train_param['epoch'] = 1
train_param['epoch'] = 10
#torch.autograd.set_detect_anomaly(True)
# 确保 CUDA 可用
if torch.cuda.is_available():
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment