Commit 2d0be982 by zlj

Merge branch 'master' of http://192.168.1.53:8082/wjie98/starrygl into doc-v2

parents 7214f226 18244f6c
......@@ -169,6 +169,8 @@ cython_debug/
/third_party
/.vscode
/.history
/.cache
/run_route.py
/dataset
......
......@@ -113,7 +113,7 @@ endif()
if (WITH_LDG)
# Imports neighbor-clustering based (e.g. LDG algorithm) graph partitioning implementation
add_definitions(-DWITH_LDG)
set(LDG_DIR "csrc/partition/neighbor_clustering")
set(LDG_DIR "third_party/ldg_partition")
add_library(ldg_partition SHARED "csrc/partition/ldg.cpp")
target_link_libraries(ldg_partition PRIVATE ${TORCH_LIBRARIES})
......
......@@ -5,7 +5,6 @@
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
#ifdef WITH_CUDA
#ifdef WITH_CUDA
m.def("uvm_storage_new", &uvm_storage_new, "return storage of unified virtual memory");
m.def("uvm_storage_to_cuda", &uvm_storage_to_cuda, "share uvm storage with another cuda device");
m.def("uvm_storage_to_cpu", &uvm_storage_to_cpu, "share uvm storage with cpu");
......
This diff is collapsed. Click to expand it.
......@@ -3,7 +3,7 @@ torch==2.1.1+cu118
torchvision==0.16.1+cu118
torchaudio==2.1.1+cu118
--extra-index-url https://data.pyg.org/whl/torch-2.1.0+cu118.html
--find-links https://data.pyg.org/whl/torch-2.1.0+cu118.html
torch_geometric==2.4.0
pyg_lib==0.3.1+pt21cu118
torch_scatter==2.1.2+pt21cu118
......@@ -11,6 +11,12 @@ torch_sparse==0.6.18+pt21cu118
torch_cluster==1.6.3+pt21cu118
torch_spline_conv==1.2.2+pt21cu118
--find-links https://data.dgl.ai/wheels/cu118/repo.html
dgl==1.1.3+cu118
--find-links https://data.dgl.ai/wheels-test/repo.html
dglgo==0.0.2
ogb
tqdm
networkx
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.autograd as autograd
from torch import Tensor
from typing import *
from torch_scatter import segment_csr, gather_csr
from torch_sparse import SparseTensor
__all__ = [
"EmmaAttention",
"EmmaSum",
]
class EmmaAttention(nn.Module):
def __init__(self) -> None:
super().__init__()
self.register_buffer(
"his_x",
torch.empty(0),
persistent=False,
)
self.register_buffer(
"his_m",
torch.empty(0),
persistent=False,
)
self.register_buffer(
"inv_w",
torch.empty(0),
persistent=False,
)
self.reset_parameters()
def reset_parameters(self):
self.get_buffer("his_x").zero_()
self.get_buffer("his_m").fill_(-torch.inf)
self.get_buffer("inv_w").zero_()
def forward(self, x: Tensor, max_a: Tensor, agg_n: Tensor):
if self.training:
his_x = self.get_buffer("his_x")
his_m = self.get_buffer("his_m")
inv_w = self.get_buffer("inv_w")
x = EmmaAttentionFunction.apply(
x, max_a, his_x, his_m, agg_n, inv_w)
else:
inv_w = 1.0 / agg_n.data
inv_w[agg_n == 0] = 0.0
self._copy_or_clone("his_x", x)
self._copy_or_clone("his_m", max_a)
self._copy_or_clone("inv_w", inv_w)
return x
def _copy_or_clone(self, name: str, x: Tensor):
_x = self.get_buffer(name)
if _x.size() != x.size():
self.register_buffer(
name, x.data.clone(), persistent=False)
else:
_x.copy_(x.data)
@staticmethod
def softmax_gat(
src_a: Tensor,
dst_a: Tensor,
adj_t: SparseTensor,
negative_slope: float = 0.01,
) -> Tuple[SparseTensor, Tensor]:
assert src_a.dim() in {1, 2}
assert src_a.dim() == dst_a.dim()
ptr, ind, val = adj_t.csr()
a = src_a[ind] + gather_csr(dst_a, ptr)
a = F.leaky_relu(a, negative_slope=negative_slope)
with torch.no_grad():
max_a = torch.full_like(dst_a, -torch.inf)
max_a = segment_csr(a, ptr, reduce="max", out=max_a)
exp_a = torch.exp(a - gather_csr(max_a, ptr))
if val is not None:
assert val.dim() == 1
if exp_a.dim() == 1:
exp_a = exp_a * val
else:
exp_a = exp_a * val.unsqueeze(-1)
sum_exp_a = segment_csr(exp_a, ptr, reduce="sum")
exp_a = exp_a / gather_csr(sum_exp_a, ptr)
with torch.no_grad():
max_a.add_(sum_exp_a.log())
adj_t = SparseTensor(rowptr=ptr, col=ind, value=exp_a)
return adj_t, max_a
@staticmethod
def apply_gat(
x: Tensor,
src_a: Tensor,
dst_a: Tensor,
adj_t: SparseTensor,
negative_slope: float = 0.01,
) -> Tuple[Tensor, Tensor]:
adj_t, max_a = EmmaAttention.softmax_gat(
src_a=src_a, dst_a=dst_a,
adj_t=adj_t, negative_slope=negative_slope,
)
ptr, ind, val = adj_t.csr()
if val.dim() == 1:
assert x.dim() == 2
x = adj_t @ x
elif val.dim() == 2:
assert x.dim() == 3
assert x.size(1) == val.size(1)
xs = []
for i in range(x.size(1)):
xs.append(
SparseTensor(
rowptr=ptr, col=ind, value=val[:,i],
) @ x[:,i,:]
)
x = torch.cat(xs, dim=1).view(-1, *x.shape[1:])
return x, max_a
class EmmaAttentionFunction(autograd.Function):
@staticmethod
def forward(
ctx: autograd.function.FunctionCtx,
x: Tensor,
max_a: Tensor,
his_x: Tensor,
his_m: Tensor,
agg_n: Tensor,
inv_w: Tensor,
):
assert x.dim() in {2, 3}
assert x.dim() == his_x.dim()
assert max_a.dim() == his_m.dim()
beta = (1.0 - inv_w * agg_n).clamp_(0.0, 1.0)
if x.dim() == 2:
assert max_a.dim() == 1
elif x.dim() == 3:
assert max_a.dim() == 2
beta = beta.unsqueeze_(-1)
max_m = torch.max(max_a, his_m)
p = (his_m - max_m).nan_to_num_(0.0).exp_().mul_(beta)
q = (max_a - max_m).nan_to_num_(0.0).exp_()
t = p + q
p.div_(t).unsqueeze_(-1)
q.div_(t).unsqueeze_(-1)
his_x.mul_(p).add_(x * q)
his_m.copy_(max_m).add_(t.log_())
ctx.save_for_backward(q)
return his_x
@staticmethod
def backward(
ctx: autograd.function.FunctionCtx,
grad: Tensor,
):
q, = ctx.saved_tensors
return grad * q, None, None, None, None, None
class EmmaSum(nn.Module):
def __init__(self) -> None:
super().__init__()
self.register_buffer(
"his_x",
torch.empty(0),
persistent=False,
)
self.register_buffer(
"inv_w",
torch.empty(0),
persistent=False,
)
self.reset_parameters()
def reset_parameters(self):
self.get_buffer("his_x").zero_()
self.get_buffer("inv_w").zero_()
def forward(self, x: Tensor, agg_n: Tensor, aggr: str = "sum"):
assert aggr in {"sum", "mean"}
if self.training:
his_x = self.get_buffer("his_x")
inv_w = self.get_buffer("inv_w")
x = EmmaSumFunction.apply(x, his_x, agg_n, inv_w)
else:
inv_w = 1.0 / agg_n.data
inv_w[agg_n == 0] = 0.0
self._copy_or_clone("his_x", x)
self._copy_or_clone("inv_w", inv_w)
if aggr == "mean":
x = x * inv_w[:,None]
return x
def _copy_or_clone(self, name: str, x: Tensor):
_x = self.get_buffer(name)
if _x.size() != x.size():
self.register_buffer(
name, x.data.clone(), persistent=False)
else:
_x.copy_(x.data)
class EmmaSumFunction(autograd.Function):
@staticmethod
def forward(
ctx: autograd.function.FunctionCtx,
x: Tensor,
his_x: Tensor,
agg_n: Tensor,
inv_w: Tensor,
):
assert x.dim() == 2
assert his_x.dim() == x.dim()
beta = (1.0 - inv_w * agg_n) \
.clamp_(0.0, 1.0).unsqueeze_(-1)
his_x.mul_(beta).add_(x)
# ctx.save_for_backward(inv_w)
return his_x
@staticmethod
def backward(
ctx: autograd.function.FunctionCtx,
grad: Tensor,
):
# inv_w, = ctx.saved_tensors
# return grad * inv_w[:,None], None, None, None
return grad, None, None, None
\ No newline at end of file
......@@ -75,6 +75,12 @@ class LayerPipe(ABC):
models.append((key, val))
return tuple(models)
def parameters(self):
params: List[nn.Parameter] = []
for name, m in self.get_model():
params.extend(m.parameters())
return params
def register_route(self, *xs: Tensor):
for t in xs:
t.requires_route = True
......
......@@ -55,6 +55,12 @@ class SequencePipe(ABC):
models.append((key, val))
return tuple(models)
def parameters(self):
params: List[nn.Parameter] = []
for name, m in self.get_model():
params.extend(m.parameters())
return params
def to(self, device: Any):
for _, net in self.get_model():
net.to(device)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment