Commit 5a9ddff8 by zhlj

fist

parent 68140a57
......@@ -2,30 +2,25 @@
#跑了4卡的TaoBao
# 定义数组变量
seed=$1
addr="192.168.1.107"
addr="192.168.1.105"
partition_params=("ours")
partition="ours"
#"metis" "ldg" "random")
#("ours" "metis" "ldg" "random")
partitions="8"
node_per="4"
nnodes="2"
node_rank="0"
sample_type_params=("recent" "recent" "recent" "recent" "boundery_recent_decay")
probability_params=("0.1")
sample_type_params=("boundery_recent_decay")
#"boundery_recent_decay")
#sample_type_params=("recent" "boundery_recent_decay") #"boundery_recent_uniform")
#memory_type=("all_update" "p2p" "all_reduce" "historical" "local")
memory_type=("historical")
#"historical")
#memory_type=("local" "all_update" "historical" "all_reduce")
shared_memory_ssim=("0.1" "0.3" "0.5" "0.7" "0.9" "1.3" "1.5" "1.7" "2")
#"historical")
#memory_type=("local" "all_update" "historical" "all_reduce")
#data_param=("WIKI" "REDDIT" "LASTFM" "WikiTalk")
neg_policy=("all" "local" "local" "local" "local")
topk_list=("0" "0" "0.1" "0.1" "0.1")
memory_type=("all_update" "all_update" "all_update" "all_update" "historical")
shared_memory_ssim=("0.3")
ssim="0.3"
neighbor_num=( "10" "20")
neighbor="10"
topk_list=("0.1")
data_param=("LASTFM" "StackOverflow" "GDELT")
data_param=("LASTFM" "WikiTalk" "StackOverflow" "GDELT")
#"GDELT")
#data_param=("WIKI" "REDDIT" "LASTFM" "DGraphFin" "WikiTalk" "StackOverflow")
#data_param=("WIKI" "REDDIT" "LASTFM" "WikiTalk" "StackOverflow")
......@@ -38,79 +33,53 @@ data_param=("LASTFM" "StackOverflow" "GDELT")
#seed=(( RANDOM % 1000000 + 1 ))
mkdir -p all_"$seed"
for data in "${data_param[@]}"; do
for topk in "${topk_list[@]}"; do
model="TGN_large"
# probability_params=("0" "0.0001" "0.001" "0.01" "0.1" "1")
# if [ "$data" = "StackOverflow" ]; then
# probability_params=("0.0001" "0.001" )
# fi
neighbor="20"
if [ "$data" = "WIKI" ] || [ "$data" = "REDDIT" ] || [ "$data" = "LASTFM" ]; then
model="TGN"
neighbor="10"
fi
#model="APAN"
mkdir all_"$seed"/"$data"
mkdir all_"$seed"/"$data"/"$model"
mkdir all_"$seed"/"$data"/"$model"/comm
#torchrun --nnodes "$nnodes" --node_rank 0 --nproc-per-node 1 --master-addr "$addr" --master-port 9445 train_boundery.py --dataname "$data" --mode "$model" --partition ours --memory_type local --sample_type recent --topk 0 --seed "$seed" > all_"$seed"/"$data"/"$model"/1.out &
wait
for partition in "${partition_params[@]}"; do
for sample in "${sample_type_params[@]}"; do
for i in "${!sample_type_params[@]}"; do
topk="${topk_list[i]}"
sample="${sample_type_params[i]}"
neg_type="${neg_policy[i]}"
mem="${memory_type[i]}"
if [ "$sample" = "recent" ]; then
for mem in "${memory_type[@]}"; do
if [ "$mem" = "historical" ]; then
for ssim in "${shared_memory_ssim[@]}"; do
if [ "$partition" = "ours" ] || [ "$partition" = "metis" ]; then
torchrun --nnodes "$nnodes" --node_rank "$node_rank" --nproc-per-node "$node_per" --master-addr "$addr" --master-port 9445 train_boundery.py --dataname "$data" --mode "$model" --partition "$partition" --topk "$topk" --sample_type "$sample" --memory_type "$mem" --shared_memory_ssim "$ssim" --seed "$seed" --neighbor "$neighbor" > all_"$seed"/"$data"/"$model"/"$partitions"-"$partition"-"$topk"-"$mem"-"$ssim"-"$sample"-"$neighbor".out &
torchrun --nnodes "$nnodes" --node_rank "$node_rank" --nproc-per-node "$node_per" --master-addr "$addr" --master-port 9445 train_boundery.py --dataname "$data" --mode "$model" --partition "$partition" --topk "$topk" --sample_type "$sample" --memory_type "$mem" --shared_memory_ssim "$ssim" --seed "$seed" --neighbor "$neighbor" --local_neg_sample "$neg_type" > all_"$seed"/"$data"/"$model"/"$partitions"-"$partition"-"$topk"-"$mem"-"$ssim"-"$sample"-"$neighbor"-"$neg_type".out &
wait
fi
done
elif [ "$mem" = "all_reduce" ]; then
if [ "$partition" = "ours" ]; then
torchrun --nnodes "$nnodes" --node_rank "$node_rank" --nproc-per-node "$node_per" --master-addr "$addr" --master-port 9445 train_boundery.py --dataname "$data" --mode "$model" --partition "$partition" --topk "$topk" --sample_type "$sample" --memory_type "$mem" --seed "$seed" --neighbor "$neighbor" > all_"$seed"/"$data"/"$model"/"$partitions"-"$partition"-"$topk"-"$mem"-"$ssim"-"$sample"-"$neighbor".out &
wait
fi
else
#torchrun --nnodes "$nnodes" --node_rank "$node_rank" --nproc-per-node "$node_per" --master-addr "$addr" --master-port 9445 train_boundery.py --dataname "$data" --mode "$model" --partition "$partition" --topk 0 --sample_type "$sample" --memory_type "$mem" --seed "$seed" > all_"$seed"/"$data"/"$model"/"$partitions"-"$partition"-0-"$mem"-"$sample".out &
wait
if [ "$partition" = "ours" ] && [ "$mem" != "all_local" ]; then
torchrun --nnodes "$nnodes" --node_rank "$node_rank" --nproc-per-node "$node_per" --master-addr "$addr" --master-port 9445 train_boundery.py --dataname "$data" --mode "$model" --partition "$partition" --topk "$topk" --sample_type "$sample" --memory_type "$mem" --seed "$seed" --neighbor "$neighbor" > all_"$seed"/"$data"/"$model"/"$partitions"-"$partition"-"$topk"-"$mem"-"$ssim"-"$sample"-"$neighbor".out &
torchrun --nnodes "$nnodes" --node_rank "$node_rank" --nproc-per-node "$node_per" --master-addr "$addr" --master-port 9445 train_boundery.py --dataname "$data" --mode "$model" --partition "$partition" --topk "$topk" --sample_type "$sample" --memory_type "$mem" --seed "$seed" --neighbor "$neighbor" --local_neg_sample "$neg_type" > all_"$seed"/"$data"/"$model"/"$partitions"-"$partition"-"$topk"-"$mem"-"$ssim"-"$sample"-"$neighbor"-"$neg_type".out &
wait
fi
fi
done
else
for pro in "${probability_params[@]}"; do
# shared_memory_ssim=("0.3")
# if [ "$pro" = "0.1" ]; then
# shared_memory_ssim=("0" "0.1" "0.3" "0.5" "0.7" "0.9" "1.3" "1.5" "1.7" "2")
# fi
for mem in "${memory_type[@]}"; do
if [ "$mem" = "historical" ]; then
for ssim in "${shared_memory_ssim[@]}"; do
if [ "$partition" = "ours" ] || [ "$partition" = "metis" ]; then
torchrun --nnodes "$nnodes" --node_rank "$node_rank" --nproc-per-node "$node_per" --master-addr "$addr" --master-port 9445 train_boundery.py --dataname "$data" --mode "$model" --partition "$partition" --topk "$topk" --sample_type "$sample" --probability "$pro" --memory_type "$mem" --shared_memory_ssim "$ssim" --seed "$seed" --neighbor "$neighbor" > all_"$seed"/"$data"/"$model"/"$partitions"-"$partition"-"$topk"-"$mem"-"$ssim"-"$sample"-"$pro"-"$neighbor".out &
torchrun --nnodes "$nnodes" --node_rank "$node_rank" --nproc-per-node "$node_per" --master-addr "$addr" --master-port 9445 train_boundery.py --dataname "$data" --mode "$model" --partition "$partition" --topk "$topk" --sample_type "$sample" --probability "$pro" --memory_type "$mem" --shared_memory_ssim "$ssim" --seed "$seed" --neighbor "$neighbor" --local_neg_sample "$neg_type" > all_"$seed"/"$data"/"$model"/"$partitions"-"$partition"-"$topk"-"$mem"-"$ssim"-"$sample"-"$pro"-"$neighbor"-"$neg_type".out &
wait
fi
done
elif [ "$mem" = "all_reduce" ]; then
if [ "$partition" = "ours"]; then
torchrun --nnodes "$nnodes" --node_rank "$node_rank" --nproc-per-node "$node_per" --master-addr "$addr" --master-port 9445 train_boundery.py --dataname "$data" --mode "$model" --partition "$partition" --topk "$topk" --sample_type "$sample" --probability "$pro" --memory_type "$mem" --seed "$seed" --neighbor "$neighbor" > all_"$seed"/"$data"/"$model"/"$partitions"-"$partition"-"$topk"-"$mem"-"$ssim"-"$sample"-"$pro"-"$neighbor".out &
wait
fi
else
#torchrun --nnodes "$nnodes" --node_rank "$node_rank" --nproc-per-node "$node_per" --master-addr "$addr" --master-port 9445 train_boundery.py --dataname "$data" --mode "$model" --partition "$partition" --topk 0 --sample_type "$sample" --probability "$pro" --memory_type "$mem" --seed "$seed" > all_"$seed"/"$data"/"$model"/"$partitions"-"$partition"-0-"$mem"-"$sample"-"$pro".out &
wait
if [ "$partition" = "ours" ] && [ "$mem" != "all_local" ]; then
torchrun --nnodes "$nnodes" --node_rank "$node_rank" --nproc-per-node "$node_per" --master-addr "$addr" --master-port 9445 train_boundery.py --dataname "$data" --mode "$model" --partition "$partition" --topk "$topk" --sample_type "$sample" --probability "$pro" --memory_type "$mem" --seed "$seed" --neighbor "$neighbor" > all_"$seed"/"$data"/"$model"/"$partitions"-"$partition"-"$topk"-"$mem"-"$ssim"-"$sample"-"$pro"-"$neighbor".out &
torchrun --nnodes "$nnodes" --node_rank "$node_rank" --nproc-per-node "$node_per" --master-addr "$addr" --master-port 9445 train_boundery.py --dataname "$data" --mode "$model" --partition "$partition" --topk "$topk" --sample_type "$sample" --probability "$pro" --memory_type "$mem" --seed "$seed" --neighbor "$neighbor" --local_neg_sample "$neg_type" > all_"$seed"/"$data"/"$model"/"$partitions"-"$partition"-"$topk"-"$mem"-"$ssim"-"$sample"-"$pro"-"$neighbor"-"$neg_type".out &
wait
fi
fi
done
done
fi
done
done
done
done
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment