Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
B
BTS-MTGNN
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
zhlj
BTS-MTGNN
Commits
82337762
Commit
82337762
authored
Oct 15, 2024
by
zlj
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add negative fix weight
parent
cc8abec4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
84 additions
and
42 deletions
+84
-42
examples/test_all.sh
+31
-27
examples/train_boundery.py
+14
-7
starrygl/module/layers.py
+9
-6
starrygl/module/modules.py
+30
-2
No files found.
examples/test_all.sh
View file @
82337762
#!/bin/bash
# 定义数组变量
seed
=
$1
addr
=
"192.168.1.107"
partition_params
=(
"ours"
)
#"metis" "ldg" "random")
...
...
@@ -9,7 +10,7 @@ partitions="4"
node_per
=
"4"
nnodes
=
"1"
node_rank
=
"0"
probability_params
=(
"0.1"
"0
"
"0.05"
"0.01
"
)
probability_params
=(
"0.1"
"0
.01"
"0.05
"
)
sample_type_params
=(
"boundery_recent_decay"
)
#sample_type_params=("recent" "boundery_recent_decay") #"boundery_recent_uniform")
#memory_type=("all_update" "p2p" "all_reduce" "historical" "local")
...
...
@@ -17,24 +18,27 @@ memory_type=( "all_update")
#memory_type=("local" "all_update" "historical" "all_reduce")
shared_memory_ssim
=(
"0.3"
"0.7"
)
#data_param=("WIKI" "REDDIT" "LASTFM" "WikiTalk")
data_param
=(
"
DGraphFin
"
)
data_param
=(
"
LASTFM
"
)
#data_param=("WIKI" "REDDIT" "LASTFM" "DGraphFin" "WikiTalk" "StackOverflow")
#data_param=("WIKI" "REDDIT" "LASTFM" "WikiTalk" "StackOverflow")
#data_param=("REDDIT" "WikiTalk")
# 创建输出目录
mkdir
-p
all
# 遍历数组并执行命令
#seed=(( RANDOM % 1000000 + 1 ))
mkdir
-p
all_
"
$seed
"
for
data
in
"
${
data_param
[@]
}
"
;
do
model
=
"TGN_large"
if
[
"
$data
"
=
"WIKI"
]
||
[
"
$data
"
=
"REDDIT"
]
||
[
"
$data
"
=
"LASTFM"
]
;
then
model
=
"TGN"
fi
#model="APAN"
mkdir all/
"
$data
"
mkdir all/
"
$data
"
/
"
$model
"
mkdir all/
"
$data
"
/
"
$model
"
/comm
#torchrun --nnodes "$nnodes" --node_rank 0 --nproc-per-node 1 --master-addr "$addr" --master-port 9445 train_boundery.py --dataname "$data" --mode "$model" --partition ours --memory_type local --sample_type recent --topk 0
> all
/"$data"/"$model"/1.out &
mkdir all
_
"
$seed
"
/
"
$data
"
mkdir all
_
"
$seed
"
/
"
$data
"
/
"
$model
"
mkdir all
_
"
$seed
"
/
"
$data
"
/
"
$model
"
/comm
#torchrun --nnodes "$nnodes" --node_rank 0 --nproc-per-node 1 --master-addr "$addr" --master-port 9445 train_boundery.py --dataname "$data" --mode "$model" --partition ours --memory_type local --sample_type recent --topk 0
--seed "$seed" > all_"$seed"
/"$data"/"$model"/1.out &
wait
for
partition
in
"
${
partition_params
[@]
}
"
;
do
for
sample
in
"
${
sample_type_params
[@]
}
"
;
do
...
...
@@ -43,20 +47,20 @@ for data in "${data_param[@]}"; do
if
[
"
$mem
"
=
"historical"
]
;
then
for
ssim
in
"
${
shared_memory_ssim
[@]
}
"
;
do
if
[
"
$partition
"
=
"ours"
]
;
then
torchrun
--nnodes
"
$nnodes
"
--node_rank
"
$node_rank
"
--nproc-per-node
"
$node_per
"
--master-addr
"
$addr
"
--master-port
9445 train_boundery.py
--dataname
"
$data
"
--mode
"
$model
"
--partition
"
$partition
"
--topk
0.1
--sample_type
"
$sample
"
--memory_type
"
$mem
"
--shared_memory_ssim
"
$ssim
"
>
all
/
"
$data
"
/
"
$model
"
/
"
$partitions
"
-ours_shared-0
.01-
"
$mem
"
-
"
$ssim
"
-
"
$sample
"
.out &
torchrun
--nnodes
"
$nnodes
"
--node_rank
"
$node_rank
"
--nproc-per-node
"
$node_per
"
--master-addr
"
$addr
"
--master-port
9445 train_boundery.py
--dataname
"
$data
"
--mode
"
$model
"
--partition
"
$partition
"
--topk
0.1
--sample_type
"
$sample
"
--memory_type
"
$mem
"
--shared_memory_ssim
"
$ssim
"
--seed
"
$seed
"
>
all_
"
$seed
"
/
"
$data
"
/
"
$model
"
/
"
$partitions
"
-ours_shared-0
.01-
"
$mem
"
-
"
$ssim
"
-
"
$sample
"
.out &
wait
fi
done
elif
[
"
$mem
"
=
"all_reduce"
]
;
then
if
[
"
$partition
"
=
"ours"
]
;
then
torchrun
--nnodes
"
$nnodes
"
--node_rank
"
$node_rank
"
--nproc-per-node
"
$node_per
"
--master-addr
"
$addr
"
--master-port
9445 train_boundery.py
--dataname
"
$data
"
--mode
"
$model
"
--partition
"
$partition
"
--topk
0.1
--sample_type
"
$sample
"
--memory_type
"
$mem
"
>
all
/
"
$data
"
/
"
$model
"
/
"
$partitions
"
-ours_shared-0
.01-
"
$mem
"
-
"
$sample
"
.out &
torchrun
--nnodes
"
$nnodes
"
--node_rank
"
$node_rank
"
--nproc-per-node
"
$node_per
"
--master-addr
"
$addr
"
--master-port
9445 train_boundery.py
--dataname
"
$data
"
--mode
"
$model
"
--partition
"
$partition
"
--topk
0.1
--sample_type
"
$sample
"
--memory_type
"
$mem
"
--seed
"
$seed
"
>
all_
"
$seed
"
/
"
$data
"
/
"
$model
"
/
"
$partitions
"
-ours_shared-0
.01-
"
$mem
"
-
"
$sample
"
.out &
wait
fi
else
torchrun
--nnodes
"
$nnodes
"
--node_rank
"
$node_rank
"
--nproc-per-node
"
$node_per
"
--master-addr
"
$addr
"
--master-port
9445 train_boundery.py
--dataname
"
$data
"
--mode
"
$model
"
--partition
"
$partition
"
--topk
0
--sample_type
"
$sample
"
--memory_type
"
$mem
"
>
all
/
"
$data
"
/
"
$model
"
/
"
$partitions
"
-
"
$partition
"
-0-
"
$mem
"
-
"
$sample
"
.out &
torchrun
--nnodes
"
$nnodes
"
--node_rank
"
$node_rank
"
--nproc-per-node
"
$node_per
"
--master-addr
"
$addr
"
--master-port
9445 train_boundery.py
--dataname
"
$data
"
--mode
"
$model
"
--partition
"
$partition
"
--topk
0
--sample_type
"
$sample
"
--memory_type
"
$mem
"
--seed
"
$seed
"
>
all_
"
$seed
"
/
"
$data
"
/
"
$model
"
/
"
$partitions
"
-
"
$partition
"
-0-
"
$mem
"
-
"
$sample
"
.out &
wait
if
[
"
$partition
"
=
"ours"
]
&&
[
"
$mem
"
!=
"all_local"
]
;
then
torchrun
--nnodes
"
$nnodes
"
--node_rank
"
$node_rank
"
--nproc-per-node
"
$node_per
"
--master-addr
"
$addr
"
--master-port
9445 train_boundery.py
--dataname
"
$data
"
--mode
"
$model
"
--partition
"
$partition
"
--topk
0.1
--sample_type
"
$sample
"
--memory_type
"
$mem
"
>
all
/
"
$data
"
/
"
$model
"
/
"
$partitions
"
-ours_shared-0
.01-
"
$mem
"
-
"
$sample
"
.out &
torchrun
--nnodes
"
$nnodes
"
--node_rank
"
$node_rank
"
--nproc-per-node
"
$node_per
"
--master-addr
"
$addr
"
--master-port
9445 train_boundery.py
--dataname
"
$data
"
--mode
"
$model
"
--partition
"
$partition
"
--topk
0.1
--sample_type
"
$sample
"
--memory_type
"
$mem
"
--seed
"
$seed
"
>
all_
"
$seed
"
/
"
$data
"
/
"
$model
"
/
"
$partitions
"
-ours_shared-0
.01-
"
$mem
"
-
"
$sample
"
.out &
wait
fi
fi
...
...
@@ -67,20 +71,20 @@ for data in "${data_param[@]}"; do
if
[
"
$mem
"
=
"historical"
]
;
then
for
ssim
in
"
${
shared_memory_ssim
[@]
}
"
;
do
if
[
"
$partition
"
=
"ours"
]
;
then
torchrun
--nnodes
"
$nnodes
"
--node_rank
"
$node_rank
"
--nproc-per-node
"
$node_per
"
--master-addr
"
$addr
"
--master-port
9445 train_boundery.py
--dataname
"
$data
"
--mode
"
$model
"
--partition
"
$partition
"
--topk
0.1
--sample_type
"
$sample
"
--probability
"
$pro
"
--memory_type
"
$mem
"
--shared_memory_ssim
"
$ssim
"
>
all
/
"
$data
"
/
"
$model
"
/
"
$partitions
"
-ours_shared-0
.01-
"
$mem
"
-
"
$ssim
"
-
"
$sample
"
-
"
$pro
"
.out &
torchrun
--nnodes
"
$nnodes
"
--node_rank
"
$node_rank
"
--nproc-per-node
"
$node_per
"
--master-addr
"
$addr
"
--master-port
9445 train_boundery.py
--dataname
"
$data
"
--mode
"
$model
"
--partition
"
$partition
"
--topk
0.1
--sample_type
"
$sample
"
--probability
"
$pro
"
--memory_type
"
$mem
"
--shared_memory_ssim
"
$ssim
"
--seed
"
$seed
"
>
all_
"
$seed
"
/
"
$data
"
/
"
$model
"
/
"
$partitions
"
-ours_shared-0
.01-
"
$mem
"
-
"
$ssim
"
-
"
$sample
"
-
"
$pro
"
.out &
wait
fi
done
elif
[
"
$mem
"
=
"all_reduce"
]
;
then
if
[
"
$partition
"
=
"ours"
]
;
then
torchrun
--nnodes
"
$nnodes
"
--node_rank
"
$node_rank
"
--nproc-per-node
"
$node_per
"
--master-addr
"
$addr
"
--master-port
9445 train_boundery.py
--dataname
"
$data
"
--mode
"
$model
"
--partition
"
$partition
"
--topk
0.1
--sample_type
"
$sample
"
--probability
"
$pro
"
--memory_type
"
$mem
"
>
all
/
"
$data
"
/
"
$model
"
/
"
$partitions
"
-ours_shared-0
.01-
"
$mem
"
-
"
$sample
"
-
"
$pro
"
.out&
torchrun
--nnodes
"
$nnodes
"
--node_rank
"
$node_rank
"
--nproc-per-node
"
$node_per
"
--master-addr
"
$addr
"
--master-port
9445 train_boundery.py
--dataname
"
$data
"
--mode
"
$model
"
--partition
"
$partition
"
--topk
0.1
--sample_type
"
$sample
"
--probability
"
$pro
"
--memory_type
"
$mem
"
--seed
"
$seed
"
>
all_
"
$seed
"
/
"
$data
"
/
"
$model
"
/
"
$partitions
"
-ours_shared-0
.01-
"
$mem
"
-
"
$sample
"
-
"
$pro
"
.out&
wait
fi
else
torchrun
--nnodes
"
$nnodes
"
--node_rank
"
$node_rank
"
--nproc-per-node
"
$node_per
"
--master-addr
"
$addr
"
--master-port
9445 train_boundery.py
--dataname
"
$data
"
--mode
"
$model
"
--partition
"
$partition
"
--topk
0
--sample_type
"
$sample
"
--probability
"
$pro
"
--memory_type
"
$mem
"
>
all
/
"
$data
"
/
"
$model
"
/
"
$partitions
"
-
"
$partition
"
-0-
"
$mem
"
-
"
$sample
"
-
"
$pro
"
.out &
#torchrun --nnodes "$nnodes" --node_rank "$node_rank" --nproc-per-node "$node_per" --master-addr "$addr" --master-port 9445 train_boundery.py --dataname "$data" --mode "$model" --partition "$partition" --topk 0 --sample_type "$sample" --probability "$pro" --memory_type "$mem" --seed "$seed" > all_"$seed"
/"$data"/"$model"/"$partitions"-"$partition"-0-"$mem"-"$sample"-"$pro".out &
wait
if
[
"
$partition
"
=
"ours"
]
&&
[
"
$mem
"
!=
"all_local"
]
;
then
torchrun
--nnodes
"
$nnodes
"
--node_rank
"
$node_rank
"
--nproc-per-node
"
$node_per
"
--master-addr
"
$addr
"
--master-port
9445 train_boundery.py
--dataname
"
$data
"
--mode
"
$model
"
--partition
"
$partition
"
--topk
0.1
--sample_type
"
$sample
"
--probability
"
$pro
"
--memory_type
"
$mem
"
>
all
/
"
$data
"
/
"
$model
"
/
"
$partitions
"
-ours_shared-0
.01-
"
$mem
"
-
"
$sample
"
-
"
$pro
"
.out &
torchrun
--nnodes
"
$nnodes
"
--node_rank
"
$node_rank
"
--nproc-per-node
"
$node_per
"
--master-addr
"
$addr
"
--master-port
9445 train_boundery.py
--dataname
"
$data
"
--mode
"
$model
"
--partition
"
$partition
"
--topk
0.1
--sample_type
"
$sample
"
--probability
"
$pro
"
--memory_type
"
$mem
"
--seed
"
$seed
"
>
all_
"
$seed
"
/
"
$data
"
/
"
$model
"
/
"
$partitions
"
-ours_shared-0
.01-
"
$mem
"
-
"
$sample
"
-
"
$pro
"
.out &
wait
fi
fi
...
...
@@ -99,10 +103,10 @@ done
# model="JODIE"
# fi
# #model="APAN"
# mkdir all/"$data"
# mkdir all/"$data"/"$model"
# mkdir all/"$data"/"$model"/comm
# #torchrun --nnodes "$nnodes" --node_rank 0 --nproc-per-node 1 --master-addr "$addr" --master-port 9445 train_boundery.py --dataname "$data" --mode "$model" --partition ours --memory_type local --sample_type recent --topk 0
> all
/"$data"/"$model"/1.out &
# mkdir all
_"$seed"
/"$data"
# mkdir all
_"$seed"
/"$data"/"$model"
# mkdir all
_"$seed"
/"$data"/"$model"/comm
# #torchrun --nnodes "$nnodes" --node_rank 0 --nproc-per-node 1 --master-addr "$addr" --master-port 9445 train_boundery.py --dataname "$data" --mode "$model" --partition ours --memory_type local --sample_type recent --topk 0
--seed "$seed" > all_"$seed"
/"$data"/"$model"/1.out &
# wait
# for partition in "${partition_params[@]}"; do
# for sample in "${sample_type_params[@]}"; do
...
...
@@ -111,20 +115,20 @@ done
# if [ "$mem" = "historical" ]; then
# for ssim in "${shared_memory_ssim[@]}"; do
# if [ "$partition" = "ours" ]; then
# torchrun --nnodes "$nnodes" --node_rank "$node_rank" --nproc-per-node "$node_per" --master-addr "$addr" --master-port 9445 train_boundery.py --dataname "$data" --mode "$model" --partition "$partition" --topk 0.01 --sample_type "$sample" --memory_type "$mem" --shared_memory_ssim "$ssim" > all/"$data"/"$model"/"$partitions"-ours_shared-0.01-"$mem"-"$ssim"-"$sample".out &
# torchrun --nnodes "$nnodes" --node_rank "$node_rank" --nproc-per-node "$node_per" --master-addr "$addr" --master-port 9445 train_boundery.py --dataname "$data" --mode "$model" --partition "$partition" --topk 0.01 --sample_type "$sample" --memory_type "$mem" --shared_memory_ssim "$ssim" > all
_"$seed"
/"$data"/"$model"/"$partitions"-ours_shared-0.01-"$mem"-"$ssim"-"$sample".out &
# wait
# fi
# done
# elif [ "$mem" = "all_reduce" ]; then
# if [ "$partition" = "ours" ]; then
# torchrun --nnodes "$nnodes" --node_rank "$node_rank" --nproc-per-node "$node_per" --master-addr "$addr" --master-port 9445 train_boundery.py --dataname "$data" --mode "$model" --partition "$partition" --topk 0.01 --sample_type "$sample" --memory_type "$mem" > all/"$data"/"$model"/"$partitions"-ours_shared-0.01-"$mem"-"$sample".out &
# torchrun --nnodes "$nnodes" --node_rank "$node_rank" --nproc-per-node "$node_per" --master-addr "$addr" --master-port 9445 train_boundery.py --dataname "$data" --mode "$model" --partition "$partition" --topk 0.01 --sample_type "$sample" --memory_type "$mem" > all
_"$seed"
/"$data"/"$model"/"$partitions"-ours_shared-0.01-"$mem"-"$sample".out &
# wait
# fi
# else
# torchrun --nnodes "$nnodes" --node_rank "$node_rank" --nproc-per-node "$node_per" --master-addr "$addr" --master-port 9445 train_boundery.py --dataname "$data" --mode "$model" --partition "$partition" --topk 0 --sample_type "$sample" --memory_type "$mem" > all/"$data"/"$model"/"$partitions"-"$partition"-0-"$mem"-"$sample".out &
# torchrun --nnodes "$nnodes" --node_rank "$node_rank" --nproc-per-node "$node_per" --master-addr "$addr" --master-port 9445 train_boundery.py --dataname "$data" --mode "$model" --partition "$partition" --topk 0 --sample_type "$sample" --memory_type "$mem" > all
_"$seed"
/"$data"/"$model"/"$partitions"-"$partition"-0-"$mem"-"$sample".out &
# wait
# if [ "$partition" = "ours" ] && [ "$mem" != "all_local" ]; then
# torchrun --nnodes "$nnodes" --node_rank "$node_rank" --nproc-per-node "$node_per" --master-addr "$addr" --master-port 9445 train_boundery.py --dataname "$data" --mode "$model" --partition "$partition" --topk 0.01 --sample_type "$sample" --memory_type "$mem" > all/"$data"/"$model"/"$partitions"-ours_shared-0.01-"$mem"-"$sample".out &
# torchrun --nnodes "$nnodes" --node_rank "$node_rank" --nproc-per-node "$node_per" --master-addr "$addr" --master-port 9445 train_boundery.py --dataname "$data" --mode "$model" --partition "$partition" --topk 0.01 --sample_type "$sample" --memory_type "$mem" > all
_"$seed"
/"$data"/"$model"/"$partitions"-ours_shared-0.01-"$mem"-"$sample".out &
# wait
# fi
# fi
...
...
@@ -136,20 +140,20 @@ done
# continue
# # for ssim in "${shared_memory_ssim[@]}"; do
# # if [ "$partition" = "ours" ]; then
# # torchrun --nnodes "$nnodes" --node_rank "$node_rank" --nproc-per-node "$node_per" --master-addr "$addr" --master-port 9445 train_boundery.py --dataname "$data" --mode "$model" --partition "$partition" --topk 0.01 --sample_type "$sample" --probability "$pro" --memory_type "$mem" --shared_memory_ssim "$ssim" > all/"$data"/"$partitions"-ours_shared-0.01"$mem"-"$ssim"-"$sample"-"$pro".out &
# # torchrun --nnodes "$nnodes" --node_rank "$node_rank" --nproc-per-node "$node_per" --master-addr "$addr" --master-port 9445 train_boundery.py --dataname "$data" --mode "$model" --partition "$partition" --topk 0.01 --sample_type "$sample" --probability "$pro" --memory_type "$mem" --shared_memory_ssim "$ssim" > all
_"$seed"
/"$data"/"$partitions"-ours_shared-0.01"$mem"-"$ssim"-"$sample"-"$pro".out &
# # wait
# # fi
# # done
# elif [ "$mem" = "all_reduce" ]; then
# if [ "$partition" = "ours"]; then
# torchrun --nnodes "$nnodes" --node_rank "$node_rank" --nproc-per-node "$node_per" --master-addr "$addr" --master-port 9445 train_boundery.py --dataname "$data" --mode "$model" --partition "$partition" --topk 0.01 --sample_type "$sample" --probability "$pro" --memory_type "$mem" > all/"$data"/"$model"/"$partitions"-ours_shared-0.01-"$mem"-"$sample"-"$pro".out&
# torchrun --nnodes "$nnodes" --node_rank "$node_rank" --nproc-per-node "$node_per" --master-addr "$addr" --master-port 9445 train_boundery.py --dataname "$data" --mode "$model" --partition "$partition" --topk 0.01 --sample_type "$sample" --probability "$pro" --memory_type "$mem" > all
_"$seed"
/"$data"/"$model"/"$partitions"-ours_shared-0.01-"$mem"-"$sample"-"$pro".out&
# wait
# fi
# else
# torchrun --nnodes "$nnodes" --node_rank "$node_rank" --nproc-per-node "$node_per" --master-addr "$addr" --master-port 9445 train_boundery.py --dataname "$data" --mode "$model" --partition "$partition" --topk 0 --sample_type "$sample" --probability "$pro" --memory_type "$mem" > all/"$data"/"$model"/"$partitions"-"$partition"-0-"$mem"-"$sample"-"$pro".out &
# torchrun --nnodes "$nnodes" --node_rank "$node_rank" --nproc-per-node "$node_per" --master-addr "$addr" --master-port 9445 train_boundery.py --dataname "$data" --mode "$model" --partition "$partition" --topk 0 --sample_type "$sample" --probability "$pro" --memory_type "$mem" > all
_"$seed"
/"$data"/"$model"/"$partitions"-"$partition"-0-"$mem"-"$sample"-"$pro".out &
# wait
# if [ "$partition" = "ours" ] && [ "$mem" != "all_local" ]; then
# torchrun --nnodes "$nnodes" --node_rank "$node_rank" --nproc-per-node "$node_per" --master-addr "$addr" --master-port 9445 train_boundery.py --dataname "$data" --mode "$model" --partition "$partition" --topk 0.01 --sample_type "$sample" --probability "$pro" --memory_type "$mem" > all/"$data"/"$model"/"$partitions"-ours_shared-0.01-"$mem"-"$sample"-"$pro".out &
# torchrun --nnodes "$nnodes" --node_rank "$node_rank" --nproc-per-node "$node_per" --master-addr "$addr" --master-port 9445 train_boundery.py --dataname "$data" --mode "$model" --partition "$partition" --topk 0.01 --sample_type "$sample" --probability "$pro" --memory_type "$mem" > all
_"$seed"
/"$data"/"$model"/"$partitions"-ours_shared-0.01-"$mem"-"$sample"-"$pro".out &
# wait
# fi
# fi
...
...
examples/train_boundery.py
View file @
82337762
...
...
@@ -126,7 +126,7 @@ def seed_everything(seed=42):
torch
.
cuda
.
manual_seed
(
seed
)
torch
.
backends
.
cudnn
.
deterministic
=
True
torch
.
backends
.
cudnn
.
benchmark
=
False
seed_everything
(
args
.
seed
)
total_next_batch
=
0
total_forward
=
0
total_count_score
=
0
...
...
@@ -267,9 +267,13 @@ def main():
if
args
.
local_neg_sample
:
print
(
'dst len {} origin len {}'
.
format
(
graph
.
edge_index
[
1
,
mask
]
.
unique
()
.
shape
[
0
],
full_dst
.
unique
()
.
shape
[
0
]))
train_neg_sampler
=
LocalNegativeSampling
(
'triplet'
,
amount
=
args
.
neg_samples
,
dst_node_list
=
graph
.
edge_index
[
1
,
mask
]
.
unique
())
else
:
#train_neg_sampler = LocalNegativeSampling('triplet',amount = args.neg_samples,dst_node_list = full_dst.unique())
train_neg_sampler
=
LocalNegativeSampling
(
'triplet'
,
amount
=
args
.
neg_samples
,
dst_node_list
=
full_dst
.
unique
(),
local_mask
=
(
DistIndex
(
graph
.
nids_mapper
[
full_dst
.
unique
()]
.
to
(
'cpu'
))
.
part
==
dist
.
get_rank
()),
prob
=
args
.
probability
)
remote_ratio
=
train_neg_sampler
.
local_dst
.
shape
[
0
]
/
train_neg_sampler
.
dst_node_list
.
shape
[
0
]
train_ratio_pos
=
(
1
-
args
.
probability
)
+
args
.
probability
*
remote_ratio
train_ratio_neg
=
args
.
probability
*
(
1
-
remote_ratio
)
print
(
train_neg_sampler
.
dst_node_list
)
neg_sampler
=
LocalNegativeSampling
(
'triplet'
,
amount
=
neg_samples
,
dst_node_list
=
full_dst
.
unique
(),
seed
=
args
.
seed
)
...
...
@@ -338,10 +342,10 @@ def main():
print
(
'dim_node {} dim_edge {}
\n
'
.
format
(
gnn_dim_node
,
gnn_dim_edge
))
avg_time
=
0
if
use_cuda
:
model
=
GeneralModel
(
gnn_dim_node
,
gnn_dim_edge
,
sample_param
,
memory_param
,
gnn_param
,
train_param
,
graph
.
ids
.
shape
[
0
],
mailbox
)
.
cuda
()
model
=
GeneralModel
(
gnn_dim_node
,
gnn_dim_edge
,
sample_param
,
memory_param
,
gnn_param
,
train_param
,
graph
.
ids
.
shape
[
0
],
mailbox
,
train_ratio
=
(
train_ratio_pos
,
train_ratio_neg
)
)
.
cuda
()
device
=
torch
.
device
(
'cuda'
)
else
:
model
=
GeneralModel
(
gnn_dim_node
,
gnn_dim_edge
,
sample_param
,
memory_param
,
gnn_param
,
train_param
,
graph
.
ids
.
shape
[
0
],
mailbox
)
model
=
GeneralModel
(
gnn_dim_node
,
gnn_dim_edge
,
sample_param
,
memory_param
,
gnn_param
,
train_param
,
graph
.
ids
.
shape
[
0
],
mailbox
,
train_ratio
=
(
train_ratio_pos
,
train_ratio_neg
)
)
device
=
torch
.
device
(
'cpu'
)
model
=
DDP
(
model
,
find_unused_parameters
=
True
)
def
count_parameters
(
model
):
...
...
@@ -531,9 +535,12 @@ def main():
model
.
train
()
optimizer
.
zero_grad
()
ones
=
torch
.
ones
(
metadata
[
'dst_neg_index'
]
.
shape
[
0
],
device
=
model
.
device
,
dtype
=
torch
.
float
)
weight
=
torch
.
where
(
DistIndex
(
mfgs
[
0
][
0
]
.
srcdata
[
'ID'
][
metadata
[
'dst_neg_index'
]])
.
part
==
torch
.
distributed
.
get_rank
(),
ones
/
train_ratio_pos
,
ones
/
train_ratio_neg
)
.
reshape
(
-
1
,
1
)
pred_pos
,
pred_neg
=
model
(
mfgs
,
metadata
,
neg_samples
=
args
.
neg_samples
,
async_param
=
param
)
loss
=
creterion
(
pred_pos
,
torch
.
ones_like
(
pred_pos
))
loss
+=
creterion
(
pred_neg
,
torch
.
zeros_like
(
pred_neg
))
neg_creterion
=
torch
.
nn
.
BCEWithLogitsLoss
(
weight
)
loss
+=
neg_creterion
(
pred_neg
,
torch
.
zeros_like
(
pred_neg
))
total_loss
+=
float
(
loss
.
item
())
#mailbox.handle_last_async()
#trainloader.async_feature()
...
...
@@ -663,9 +670,9 @@ def main():
pass
# print('weight {} {}\n'.format(tt.weight_count_local,tt.weight_count_remote))
# print('ssim {} {}\n'.format(tt.ssim_local/tt.ssim_cnt,tt.ssim_remote/tt.ssim_cnt))
torch
.
save
(
val_list
,
'all_
args.seed/{}/{}/val_{}_{}_{}_{}_{}_{}_{}_{}.pt'
.
format
(
args
.
dataname
,
args
.
model
,
args
.
partition
,
args
.
topk
,
dist
.
get_world_size
(),
dist
.
get_rank
(),
args
.
sample_type
,
args
.
probability
,
args
.
memory_type
,
args
.
shared_memory_ssim
))
torch
.
save
(
loss_list
,
'all_
args.seed/{}/{}/loss_{}_{}_{}_{}_{}_{}_{}_{}.pt'
.
format
(
args
.
dataname
,
args
.
model
,
args
.
partition
,
args
.
topk
,
dist
.
get_world_size
(),
dist
.
get_rank
(),
args
.
sample_type
,
args
.
probability
,
args
.
memory_type
,
args
.
shared_memory_ssim
))
torch
.
save
(
test_ap_list
,
'all_
args.seed/{}/{}/test_{}_{}_{}_{}_{}_{}_{}_{}.pt'
.
format
(
args
.
dataname
,
args
.
model
,
args
.
partition
,
args
.
topk
,
dist
.
get_world_size
(),
dist
.
get_rank
(),
args
.
sample_type
,
args
.
probability
,
args
.
memory_type
,
args
.
shared_memory_ssim
))
torch
.
save
(
val_list
,
'all_
{}/{}/{}/val_{}_{}_{}_{}_{}_{}_{}_{}.pt'
.
format
(
args
.
seed
,
args
.
dataname
,
args
.
model
,
args
.
partition
,
args
.
topk
,
dist
.
get_world_size
(),
dist
.
get_rank
(),
args
.
sample_type
,
args
.
probability
,
args
.
memory_type
,
args
.
shared_memory_ssim
))
torch
.
save
(
loss_list
,
'all_
{}/{}/{}/loss_{}_{}_{}_{}_{}_{}_{}_{}.pt'
.
format
(
args
.
seed
,
args
.
dataname
,
args
.
model
,
args
.
partition
,
args
.
topk
,
dist
.
get_world_size
(),
dist
.
get_rank
(),
args
.
sample_type
,
args
.
probability
,
args
.
memory_type
,
args
.
shared_memory_ssim
))
torch
.
save
(
test_ap_list
,
'all_
{}/{}/{}/test_{}_{}_{}_{}_{}_{}_{}_{}.pt'
.
format
(
args
.
seed
,
args
.
dataname
,
args
.
model
,
args
.
partition
,
args
.
topk
,
dist
.
get_world_size
(),
dist
.
get_rank
(),
args
.
sample_type
,
args
.
probability
,
args
.
memory_type
,
args
.
shared_memory_ssim
))
print
(
avg_time
)
if
not
early_stop
:
...
...
starrygl/module/layers.py
View file @
82337762
...
...
@@ -299,13 +299,16 @@ class TransfomerAttentionLayer(torch.nn.Module):
#b.edata['v1'] = V_remote
#b.update_all(dgl.function.copy_e('v0', 'm0'), dgl.function.sum('m0', 'h0'))
#b.update_all(dgl.function.copy_e('v1', 'm1'), dgl.function.sum('m1', 'h1'))
if
'weight'
in
b
.
edata
:
with
torch
.
no_grad
():
weight
=
b
.
edata
[
'weight'
]
.
reshape
(
-
1
,
1
)
#(b.edata['weight']/torch.sum(b.edata['weight']).item()).reshape(-1,1)
#if 'weight' in b.edata and self.training is True:
# with torch.no_grad():
# weight = b.edata['weight'].reshape(-1,1)#(b.edata['weight']/torch.sum(b.edata['weight']).item()).reshape(-1,1)
#weight =
#print(weight.max())
b
.
edata
[
'v'
]
=
V
*
weight
else
:
b
.
edata
[
'v'
]
=
V
# b.edata['v'] = V*weight
#else:
# weight = b.edata['weight'].reshape(-1,1)
b
.
edata
[
'v'
]
=
V
#print(torch.sum(torch.sum(((V-V*weight)**2))))
b
.
update_all
(
dgl
.
function
.
copy_e
(
'v'
,
'm'
),
dgl
.
function
.
sum
(
'm'
,
'h'
))
#tt.ssim_local+=torch.sum(torch.cosine_similarity(b.dstdata['h'],b.dstdata['h0']))
#tt.ssim_remote+=torch.sum(torch.cosine_similarity(b.dstdata['h'],b.dstdata['h1']))
...
...
starrygl/module/modules.py
View file @
82337762
...
...
@@ -52,20 +52,36 @@ class all_to_all_embedding(torch.autograd.Function):
grad
[
dst_pos_index
]
=
grad_pos_dst
grad
[
dst_neg_index
]
=
grad_neg_dst
return
grad
,
None
,
None
class
NegFixLayer
(
torch
.
autograd
.
Function
):
def
__init__
(
self
):
super
(
NegFixLayer
,
self
)
.
__init__
()
def
forward
(
ctx
,
input
,
weight
):
ctx
.
save_for_backward
(
weight
)
return
input
def
backward
(
ctx
,
grad_output
):
# Define your backward pass
# ...
weight
,
=
ctx
.
saved_tensors
#print(weight)
return
grad_output
/
weight
,
None
class
GeneralModel
(
torch
.
nn
.
Module
):
def
__init__
(
self
,
dim_node
,
dim_edge
,
sample_param
,
memory_param
,
gnn_param
,
train_param
,
num_nodes
=
None
,
mailbox
=
None
,
combined
=
False
):
def
__init__
(
self
,
dim_node
,
dim_edge
,
sample_param
,
memory_param
,
gnn_param
,
train_param
,
num_nodes
=
None
,
mailbox
=
None
,
combined
=
False
,
train_ratio
=
None
):
super
(
GeneralModel
,
self
)
.
__init__
()
self
.
dim_node
=
dim_node
self
.
dim_node_input
=
dim_node
self
.
dim_edge
=
dim_edge
self
.
sample_param
=
sample_param
self
.
memory_param
=
memory_param
self
.
train_pos_ratio
,
self
.
train_neg_ratio
=
train_ratio
if
not
'dim_out'
in
gnn_param
:
gnn_param
[
'dim_out'
]
=
memory_param
[
'dim_out'
]
self
.
gnn_param
=
gnn_param
self
.
train_param
=
train_param
self
.
neg_fix_layer
=
NegFixLayer
()
if
memory_param
[
'type'
]
==
'node'
:
if
memory_param
[
'memory_update'
]
==
'gru'
:
#if memory_param['async'] == False:
...
...
@@ -138,12 +154,24 @@ class GeneralModel(torch.nn.Module):
h_pos_src
=
out
[
metadata
[
'src_pos_index'
]]
h_pos_dst
=
out
[
metadata
[
'dst_pos_index'
]]
h_neg_dst
=
out
[
metadata
[
'dst_neg_index'
]]
#end.record()
#end.synchronize()
#elapsed_time_ms = start.elapsed_time(end)
#print('time {}\n'.format(elapsed_time_ms))
#print('pos src {} \n pos dst {} \n neg dst{} \n'.format(h_pos_src, h_pos_dst,h_neg_dst))
#print('pre predict {}'.format(mfgs[0][0].srcdata['ID']))
#if self.training is True:
# with torch.no_grad():
# ones = torch.ones(h_neg_dst.shape[0],device = h_neg_dst.device,dtype=torch.float)
# weight = torch.where(DistIndex(mfgs[0][0].srcdata['ID'][metadata['dst_neg_index']]).part == torch.distributed.get_rank(),ones/self.train_pos_ratio,ones/self.train_neg_ratio).reshape(-1,1)
#weight = torch.clip(weigh)
#weight = weight/weight.max().item()
#print(weight)
#weight =
#h_neg_dst*weight
# pred = self.edge_predictor(h_pos_src, h_pos_dst, None , self.neg_fix_layer.apply(h_neg_dst,weight), neg_samples=neg_samples, mode = mode)
#else:
pred
=
self
.
edge_predictor
(
h_pos_src
,
h_pos_dst
,
None
,
h_neg_dst
,
neg_samples
=
neg_samples
,
mode
=
mode
)
t_embedding
=
tt
.
elapsed_event
(
t1
)
tt
.
time_embedding
+=
t_embedding
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment