Commit 5a9ddff8 by zhlj

fist

parent 68140a57
...@@ -2,30 +2,25 @@ ...@@ -2,30 +2,25 @@
#跑了4卡的TaoBao #跑了4卡的TaoBao
# 定义数组变量 # 定义数组变量
seed=$1 seed=$1
addr="192.168.1.107" addr="192.168.1.105"
partition_params=("ours") partition_params=("ours")
partition="ours"
#"metis" "ldg" "random") #"metis" "ldg" "random")
#("ours" "metis" "ldg" "random") #("ours" "metis" "ldg" "random")
partitions="8" partitions="8"
node_per="4" node_per="4"
nnodes="2" nnodes="2"
node_rank="0" node_rank="0"
sample_type_params=("recent" "recent" "recent" "recent" "boundery_recent_decay")
probability_params=("0.1") probability_params=("0.1")
sample_type_params=("boundery_recent_decay") neg_policy=("all" "local" "local" "local" "local")
#"boundery_recent_decay") topk_list=("0" "0" "0.1" "0.1" "0.1")
#sample_type_params=("recent" "boundery_recent_decay") #"boundery_recent_uniform") memory_type=("all_update" "all_update" "all_update" "all_update" "historical")
#memory_type=("all_update" "p2p" "all_reduce" "historical" "local") shared_memory_ssim=("0.3")
memory_type=("historical") ssim="0.3"
#"historical")
#memory_type=("local" "all_update" "historical" "all_reduce")
shared_memory_ssim=("0.1" "0.3" "0.5" "0.7" "0.9" "1.3" "1.5" "1.7" "2")
#"historical")
#memory_type=("local" "all_update" "historical" "all_reduce")
#data_param=("WIKI" "REDDIT" "LASTFM" "WikiTalk")
neighbor_num=( "10" "20") neighbor_num=( "10" "20")
neighbor="10" neighbor="10"
topk_list=("0.1") data_param=("LASTFM" "WikiTalk" "StackOverflow" "GDELT")
data_param=("LASTFM" "StackOverflow" "GDELT")
#"GDELT") #"GDELT")
#data_param=("WIKI" "REDDIT" "LASTFM" "DGraphFin" "WikiTalk" "StackOverflow") #data_param=("WIKI" "REDDIT" "LASTFM" "DGraphFin" "WikiTalk" "StackOverflow")
#data_param=("WIKI" "REDDIT" "LASTFM" "WikiTalk" "StackOverflow") #data_param=("WIKI" "REDDIT" "LASTFM" "WikiTalk" "StackOverflow")
...@@ -38,79 +33,53 @@ data_param=("LASTFM" "StackOverflow" "GDELT") ...@@ -38,79 +33,53 @@ data_param=("LASTFM" "StackOverflow" "GDELT")
#seed=(( RANDOM % 1000000 + 1 )) #seed=(( RANDOM % 1000000 + 1 ))
mkdir -p all_"$seed" mkdir -p all_"$seed"
for data in "${data_param[@]}"; do for data in "${data_param[@]}"; do
for topk in "${topk_list[@]}"; do model="TGN_large"
model="TGN_large" neighbor="20"
# probability_params=("0" "0.0001" "0.001" "0.01" "0.1" "1") if [ "$data" = "WIKI" ] || [ "$data" = "REDDIT" ] || [ "$data" = "LASTFM" ]; then
# if [ "$data" = "StackOverflow" ]; then model="TGN"
# probability_params=("0.0001" "0.001" ) neighbor="10"
# fi fi
neighbor="20" mkdir all_"$seed"/"$data"
if [ "$data" = "WIKI" ] || [ "$data" = "REDDIT" ] || [ "$data" = "LASTFM" ]; then mkdir all_"$seed"/"$data"/"$model"
model="TGN" mkdir all_"$seed"/"$data"/"$model"/comm
neighbor="10" for i in "${!sample_type_params[@]}"; do
fi topk="${topk_list[i]}"
#model="APAN" sample="${sample_type_params[i]}"
mkdir all_"$seed"/"$data" neg_type="${neg_policy[i]}"
mkdir all_"$seed"/"$data"/"$model" mem="${memory_type[i]}"
mkdir all_"$seed"/"$data"/"$model"/comm
#torchrun --nnodes "$nnodes" --node_rank 0 --nproc-per-node 1 --master-addr "$addr" --master-port 9445 train_boundery.py --dataname "$data" --mode "$model" --partition ours --memory_type local --sample_type recent --topk 0 --seed "$seed" > all_"$seed"/"$data"/"$model"/1.out & if [ "$sample" = "recent" ]; then
wait if [ "$mem" = "historical" ]; then
for partition in "${partition_params[@]}"; do for ssim in "${shared_memory_ssim[@]}"; do
for sample in "${sample_type_params[@]}"; do if [ "$partition" = "ours" ] || [ "$partition" = "metis" ]; then
if [ "$sample" = "recent" ]; then torchrun --nnodes "$nnodes" --node_rank "$node_rank" --nproc-per-node "$node_per" --master-addr "$addr" --master-port 9445 train_boundery.py --dataname "$data" --mode "$model" --partition "$partition" --topk "$topk" --sample_type "$sample" --memory_type "$mem" --shared_memory_ssim "$ssim" --seed "$seed" --neighbor "$neighbor" --local_neg_sample "$neg_type" > all_"$seed"/"$data"/"$model"/"$partitions"-"$partition"-"$topk"-"$mem"-"$ssim"-"$sample"-"$neighbor"-"$neg_type".out &
for mem in "${memory_type[@]}"; do wait
if [ "$mem" = "historical" ]; then fi
for ssim in "${shared_memory_ssim[@]}"; do done
if [ "$partition" = "ours" ] || [ "$partition" = "metis" ]; then
torchrun --nnodes "$nnodes" --node_rank "$node_rank" --nproc-per-node "$node_per" --master-addr "$addr" --master-port 9445 train_boundery.py --dataname "$data" --mode "$model" --partition "$partition" --topk "$topk" --sample_type "$sample" --memory_type "$mem" --shared_memory_ssim "$ssim" --seed "$seed" --neighbor "$neighbor" > all_"$seed"/"$data"/"$model"/"$partitions"-"$partition"-"$topk"-"$mem"-"$ssim"-"$sample"-"$neighbor".out & else
wait
fi if [ "$partition" = "ours" ] && [ "$mem" != "all_local" ]; then
done torchrun --nnodes "$nnodes" --node_rank "$node_rank" --nproc-per-node "$node_per" --master-addr "$addr" --master-port 9445 train_boundery.py --dataname "$data" --mode "$model" --partition "$partition" --topk "$topk" --sample_type "$sample" --memory_type "$mem" --seed "$seed" --neighbor "$neighbor" --local_neg_sample "$neg_type" > all_"$seed"/"$data"/"$model"/"$partitions"-"$partition"-"$topk"-"$mem"-"$ssim"-"$sample"-"$neighbor"-"$neg_type".out &
elif [ "$mem" = "all_reduce" ]; then wait
if [ "$partition" = "ours" ]; then fi
torchrun --nnodes "$nnodes" --node_rank "$node_rank" --nproc-per-node "$node_per" --master-addr "$addr" --master-port 9445 train_boundery.py --dataname "$data" --mode "$model" --partition "$partition" --topk "$topk" --sample_type "$sample" --memory_type "$mem" --seed "$seed" --neighbor "$neighbor" > all_"$seed"/"$data"/"$model"/"$partitions"-"$partition"-"$topk"-"$mem"-"$ssim"-"$sample"-"$neighbor".out & fi
wait else
fi for pro in "${probability_params[@]}"; do
else if [ "$mem" = "historical" ]; then
#torchrun --nnodes "$nnodes" --node_rank "$node_rank" --nproc-per-node "$node_per" --master-addr "$addr" --master-port 9445 train_boundery.py --dataname "$data" --mode "$model" --partition "$partition" --topk 0 --sample_type "$sample" --memory_type "$mem" --seed "$seed" > all_"$seed"/"$data"/"$model"/"$partitions"-"$partition"-0-"$mem"-"$sample".out & for ssim in "${shared_memory_ssim[@]}"; do
if [ "$partition" = "ours" ] || [ "$partition" = "metis" ]; then
torchrun --nnodes "$nnodes" --node_rank "$node_rank" --nproc-per-node "$node_per" --master-addr "$addr" --master-port 9445 train_boundery.py --dataname "$data" --mode "$model" --partition "$partition" --topk "$topk" --sample_type "$sample" --probability "$pro" --memory_type "$mem" --shared_memory_ssim "$ssim" --seed "$seed" --neighbor "$neighbor" --local_neg_sample "$neg_type" > all_"$seed"/"$data"/"$model"/"$partitions"-"$partition"-"$topk"-"$mem"-"$ssim"-"$sample"-"$pro"-"$neighbor"-"$neg_type".out &
wait wait
if [ "$partition" = "ours" ] && [ "$mem" != "all_local" ]; then
torchrun --nnodes "$nnodes" --node_rank "$node_rank" --nproc-per-node "$node_per" --master-addr "$addr" --master-port 9445 train_boundery.py --dataname "$data" --mode "$model" --partition "$partition" --topk "$topk" --sample_type "$sample" --memory_type "$mem" --seed "$seed" --neighbor "$neighbor" > all_"$seed"/"$data"/"$model"/"$partitions"-"$partition"-"$topk"-"$mem"-"$ssim"-"$sample"-"$neighbor".out &
wait
fi
fi fi
done done
else else
for pro in "${probability_params[@]}"; do if [ "$partition" = "ours" ] && [ "$mem" != "all_local" ]; then
# shared_memory_ssim=("0.3") torchrun --nnodes "$nnodes" --node_rank "$node_rank" --nproc-per-node "$node_per" --master-addr "$addr" --master-port 9445 train_boundery.py --dataname "$data" --mode "$model" --partition "$partition" --topk "$topk" --sample_type "$sample" --probability "$pro" --memory_type "$mem" --seed "$seed" --neighbor "$neighbor" --local_neg_sample "$neg_type" > all_"$seed"/"$data"/"$model"/"$partitions"-"$partition"-"$topk"-"$mem"-"$ssim"-"$sample"-"$pro"-"$neighbor"-"$neg_type".out &
# if [ "$pro" = "0.1" ]; then wait
# shared_memory_ssim=("0" "0.1" "0.3" "0.5" "0.7" "0.9" "1.3" "1.5" "1.7" "2") fi
# fi
for mem in "${memory_type[@]}"; do
if [ "$mem" = "historical" ]; then
for ssim in "${shared_memory_ssim[@]}"; do
if [ "$partition" = "ours" ] || [ "$partition" = "metis" ]; then
torchrun --nnodes "$nnodes" --node_rank "$node_rank" --nproc-per-node "$node_per" --master-addr "$addr" --master-port 9445 train_boundery.py --dataname "$data" --mode "$model" --partition "$partition" --topk "$topk" --sample_type "$sample" --probability "$pro" --memory_type "$mem" --shared_memory_ssim "$ssim" --seed "$seed" --neighbor "$neighbor" > all_"$seed"/"$data"/"$model"/"$partitions"-"$partition"-"$topk"-"$mem"-"$ssim"-"$sample"-"$pro"-"$neighbor".out &
wait
fi
done
elif [ "$mem" = "all_reduce" ]; then
if [ "$partition" = "ours"]; then
torchrun --nnodes "$nnodes" --node_rank "$node_rank" --nproc-per-node "$node_per" --master-addr "$addr" --master-port 9445 train_boundery.py --dataname "$data" --mode "$model" --partition "$partition" --topk "$topk" --sample_type "$sample" --probability "$pro" --memory_type "$mem" --seed "$seed" --neighbor "$neighbor" > all_"$seed"/"$data"/"$model"/"$partitions"-"$partition"-"$topk"-"$mem"-"$ssim"-"$sample"-"$pro"-"$neighbor".out &
wait
fi
else
#torchrun --nnodes "$nnodes" --node_rank "$node_rank" --nproc-per-node "$node_per" --master-addr "$addr" --master-port 9445 train_boundery.py --dataname "$data" --mode "$model" --partition "$partition" --topk 0 --sample_type "$sample" --probability "$pro" --memory_type "$mem" --seed "$seed" > all_"$seed"/"$data"/"$model"/"$partitions"-"$partition"-0-"$mem"-"$sample"-"$pro".out &
wait
if [ "$partition" = "ours" ] && [ "$mem" != "all_local" ]; then
torchrun --nnodes "$nnodes" --node_rank "$node_rank" --nproc-per-node "$node_per" --master-addr "$addr" --master-port 9445 train_boundery.py --dataname "$data" --mode "$model" --partition "$partition" --topk "$topk" --sample_type "$sample" --probability "$pro" --memory_type "$mem" --seed "$seed" --neighbor "$neighbor" > all_"$seed"/"$data"/"$model"/"$partitions"-"$partition"-"$topk"-"$mem"-"$ssim"-"$sample"-"$pro"-"$neighbor".out &
wait
fi
fi
done
done
fi fi
done done
done fi
done done
done done
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment