Commit 2bfb3403 by Wenjie Huang

add install.sh

parent d1488225
...@@ -162,3 +162,4 @@ cython_debug/ ...@@ -162,3 +162,4 @@ cython_debug/
/dataset /dataset
/test_* /test_*
/*.ipynb /*.ipynb
/third_party
...@@ -5,6 +5,7 @@ project(starrygl_ops VERSION 0.1) ...@@ -5,6 +5,7 @@ project(starrygl_ops VERSION 0.1)
option(WITH_PYTHON "Link to Python when building" ON) option(WITH_PYTHON "Link to Python when building" ON)
option(WITH_CUDA "Link to CUDA when building" ON) option(WITH_CUDA "Link to CUDA when building" ON)
option(WITH_METIS "Link to METIS when building" ON) option(WITH_METIS "Link to METIS when building" ON)
option(WITH_MTMETIS "Link to multi-threaded METIS when building" ON)
set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_STANDARD_REQUIRED ON)
...@@ -27,18 +28,38 @@ if(WITH_CUDA) ...@@ -27,18 +28,38 @@ if(WITH_CUDA)
endif() endif()
if(WITH_METIS) if(WITH_METIS)
set(GKLIB_DIR "third_party/GKlib") add_definitions(-DWITH_METIS)
set(METIS_DIR "third_party/METIS") set(GKLIB_DIR "${CMAKE_SOURCE_DIR}/third_party/GKlib")
set(METIS_DIR "${CMAKE_SOURCE_DIR}/third_party/METIS")
add_subdirectory(${GKLIB_DIR}) set(GKLIB_INCLUDE_DIRS "${GKLIB_DIR}/include")
set(GKLIB_LIBRARIES "${GKLIB_DIR}/lib")
set(METIS_INCLUDE_DIRS "${METIS_DIR}/include")
set(METIS_LIBRARIES "${METIS_DIR}/lib")
include_directories(${METIS_INCLUDE_DIRS})
link_libraries(${METIS_LIBRARIES})
endif()
if(WITH_MTMETIS)
add_definitions(-DWITH_MTMETIS)
set(MTMETIS_DIR "${CMAKE_SOURCE_DIR}/third_party/mt-metis")
set(MTMETIS_INCLUDE_DIRS "${MTMETIS_DIR}/include")
set(MTMETIS_LIBRARIES "${MTMETIS_DIR}/lib")
include_directories(${MTMETIS_INCLUDE_DIRS})
link_libraries(${MTMETIS_LIBRARIES})
endif() endif()
find_package(OpenMP REQUIRED)
link_libraries(OpenMP::OpenMP_CXX)
find_package(Torch REQUIRED) find_package(Torch REQUIRED)
include_directories(${TORCH_INCLUDE_DIRS}) include_directories(${TORCH_INCLUDE_DIRS})
add_compile_options(${TORCH_CXX_FLAGS}) add_compile_options(${TORCH_CXX_FLAGS})
# find_package(OpenMP REQUIRED)
include_directories("csrc/include") include_directories("csrc/include")
file(GLOB_RECURSE UVM_SRCS "csrc/uvm/*.cpp") file(GLOB_RECURSE UVM_SRCS "csrc/uvm/*.cpp")
...@@ -50,10 +71,9 @@ if(WITH_PYTHON) ...@@ -50,10 +71,9 @@ if(WITH_PYTHON)
find_library(TORCH_PYTHON_LIBRARY torch_python PATHS "${TORCH_INSTALL_PREFIX}/lib") find_library(TORCH_PYTHON_LIBRARY torch_python PATHS "${TORCH_INSTALL_PREFIX}/lib")
target_link_libraries(${PROJECT_NAME} PRIVATE ${TORCH_PYTHON_LIBRARY}) target_link_libraries(${PROJECT_NAME} PRIVATE ${TORCH_PYTHON_LIBRARY})
endif() endif()
target_link_libraries(${PROJECT_NAME} PRIVATE ${TORCH_LIBRARIES})
# target_link_libraries(${PROJECT_NAME} PRIVATE OpenMP::OpenMP_CXX)
target_link_libraries(${PROJECT_NAME} PRIVATE ${TORCH_LIBRARIES})
target_compile_definitions(${PROJECT_NAME} PRIVATE -DTORCH_EXTENSION_NAME=lib${PROJECT_NAME}) target_compile_definitions(${PROJECT_NAME} PRIVATE -DTORCH_EXTENSION_NAME=lib${PROJECT_NAME})
# set_target_properties(${PROJECT_NAME} PROPERTIES PREFIX "" OUTPUT_NAME "_C") # set_target_properties(${PROJECT_NAME} PROPERTIES PREFIX "" OUTPUT_NAME "_C")
install(TARGETS ${PROJECT_NAME} DESTINATION "${CMAKE_SOURCE_DIR}/starrygl/lib") # install(TARGETS ${PROJECT_NAME} DESTINATION "${CMAKE_SOURCE_DIR}/starrygl/lib")
#pragma once
#include "extension.h"
#define CHECK_CUDA(x) \
AT_ASSERTM(x.device().is_cuda(), #x "must be CUDA Tensor")
#include <torch/torch.h> #include <torch/all.h>
#include <metis.h>
#include <mtmetis.h>
at::Tensor metis_partition() { // at::Tensor metis_partition(
// at::Tensor rowptr,
// at::Tensor col,
// at::optional<at::Tensor> optional_value,
// ) {
} // }
at::Tensor metis_mt_partition() { // at::Tensor metis_mt_partition() {
} // }
\ No newline at end of file \ No newline at end of file
#!/bin/bash
mkdir -p build && cd build
cmake .. \
-DCMAKE_EXPORT_COMPILE_COMMANDS=ON \
-DCMAKE_PREFIX_PATH="/home/hwj/.miniconda3/envs/sgl/lib/python3.10/site-packages" \
-DPython3_ROOT_DIR="/home/hwj/.miniconda3/envs/sgl" \
-DCUDA_TOOLKIT_ROOT_DIR="/home/hwj/.local/cuda-11.7" \
&& make -j32 \
&& cp libstarrygl_ops.so ../starrygl/lib/
\ No newline at end of file
...@@ -45,8 +45,8 @@ def all_to_all_v( ...@@ -45,8 +45,8 @@ def all_to_all_v(
assert len(output_tensor_list) == world_size assert len(output_tensor_list) == world_size
assert len(input_tensor_list) == world_size assert len(input_tensor_list) == world_size
if group is None: # if group is None:
group = dist.distributed_c10d._get_default_group() # group = dist.distributed_c10d._get_default_group()
backend = dist.get_backend(group) backend = dist.get_backend(group)
if backend == "nccl": if backend == "nccl":
...@@ -101,20 +101,28 @@ def all_to_all_v( ...@@ -101,20 +101,28 @@ def all_to_all_v(
return work return work
work.wait() work.wait()
# else: def all_to_all_s(
# assert backend == "gloo", f"backend must be nccl, mpi or gloo" output_tensor: Tensor,
input_tensor: Tensor,
output_rowptr: List[int],
input_rowptr: List[int],
group: Optional[Any] = None,
async_op: bool = False,
):
# rank = dist.get_rank(group) # rank = dist.get_rank(group)
# world_size = dist.get_world_size(group) world_size = dist.get_world_size(group)
# p2p_op_list: List[dist.P2POp] = [] assert len(output_rowptr) == len(input_rowptr)
# for i in range(1, world_size): assert len(output_rowptr) == world_size + 1
# send_i = (rank + i) % world_size
# recv_i = (rank - i + world_size) % world_size output_sizes = [t-s for s, t in zip(output_rowptr, output_rowptr[1:])]
input_sizes = [t-s for s, t in zip(input_rowptr, input_rowptr[1:])]
# p2p_op_list.extend([
# dist.P2POp(dist.isend, input_tensor_list[send_i], send_i, group=group), return dist.all_to_all_single(
# dist.P2POp(dist.irecv, output_tensor_list[recv_i], recv_i, group=group), output=output_tensor,
# ]) input=input_tensor,
# dist.batch_isend_irecv(p2p_op_list) output_split_sizes=output_sizes,
# output_tensor_list[rank][:] = input_tensor_list[rank] input_split_sizes=input_sizes,
\ No newline at end of file group=group,
async_op=async_op,
)
...@@ -7,9 +7,11 @@ import os ...@@ -7,9 +7,11 @@ import os
from torch import Tensor from torch import Tensor
from typing import * from typing import *
from contextlib import contextmanager
import logging import logging
from .cclib import all_to_all_v from .cclib import all_to_all_v, all_to_all_s
from .rpc import rpc_remote_call, rpc_remote_void_call from .rpc import rpc_remote_call, rpc_remote_void_call
...@@ -157,61 +159,84 @@ class DistributedContext: ...@@ -157,61 +159,84 @@ class DistributedContext:
def remote_void_call(self, method, rref: rpc.RRef, *args, **kwargs): def remote_void_call(self, method, rref: rpc.RRef, *args, **kwargs):
return rpc_remote_void_call(method, rref, *args, **kwargs) return rpc_remote_void_call(method, rref, *args, **kwargs)
def all_to_all_v(self, # def all_to_all_v(self,
output_tensor_list: List[Tensor], # output_tensor_list: List[Tensor],
input_tensor_list: List[Tensor], # input_tensor_list: List[Tensor],
group: Optional[Any] = None, # group: Any = None,
async_op: bool = False, # async_op: bool = False,
): # ):
return all_to_all_v( # return all_to_all_v(
output_tensor_list, # output_tensor_list,
input_tensor_list, # input_tensor_list,
group=group, # group=group,
async_op=async_op, # async_op=async_op,
) # )
def all_to_all_g(self, # def all_to_all_g(self,
input_tensor_list: List[Tensor], # input_tensor_list: List[Tensor],
group: Optional[Any] = None, # group: Any = None,
async_op: bool = False, # async_op: bool = False,
): # ):
send_sizes = [t.size(0) for t in input_tensor_list] # send_sizes = [t.size(0) for t in input_tensor_list]
recv_sizes = self.get_all_to_all_recv_sizes(send_sizes, group) # recv_sizes = self.get_all_to_all_recv_sizes(send_sizes, group)
output_tensor_list: List[Tensor] = [] # output_tensor_list: List[Tensor] = []
for s, t in zip(recv_sizes, input_tensor_list): # for s, t in zip(recv_sizes, input_tensor_list):
output_tensor_list.append( # output_tensor_list.append(
torch.empty(s, *t.shape[1:], dtype=t.dtype, device=t.device), # torch.empty(s, *t.shape[1:], dtype=t.dtype, device=t.device),
) # )
work = all_to_all_v( # work = all_to_all_v(
output_tensor_list, # output_tensor_list,
input_tensor_list, # input_tensor_list,
group=group, # group=group,
async_op=async_op, # async_op=async_op,
) # )
if async_op: # if async_op:
assert work is not None # assert work is not None
return output_tensor_list, work # return output_tensor_list, work
else: # else:
return output_tensor_list # return output_tensor_list
def get_all_to_all_recv_sizes(self, # def all_to_all_s(self,
send_sizes: List[int], # output_tensor: Tensor,
group: Optional[Any] = None, # input_tensor: Tensor,
) -> List[int]: # output_rowptr: List[int],
world_size = dist.get_world_size(group) # input_rowptr: List[int],
assert len(send_sizes) == world_size # group: Any = None,
# async_op: bool = False,
if dist.get_backend(group) == "gloo": # ):
send_t = torch.tensor(send_sizes, dtype=torch.long) # return all_to_all_s(
else: # output_tensor, input_tensor,
send_t = torch.tensor(send_sizes, dtype=torch.long, device=self.device) # output_rowptr, input_rowptr,
recv_t = torch.empty_like(send_t) # group=group, async_op=async_op,
# )
dist.all_to_all_single(recv_t, send_t, group=group)
return recv_t.tolist() # def get_all_to_all_recv_sizes(self,
# send_sizes: List[int],
# group: Optional[Any] = None,
# ) -> List[int]:
# world_size = dist.get_world_size(group)
# assert len(send_sizes) == world_size
# if dist.get_backend(group) == "gloo":
# send_t = torch.tensor(send_sizes, dtype=torch.long)
# else:
# send_t = torch.tensor(send_sizes, dtype=torch.long, device=self.device)
# recv_t = torch.empty_like(send_t)
# dist.all_to_all_single(recv_t, send_t, group=group)
# return recv_t.tolist()
@contextmanager
def use_stream(self, stream: torch.cuda.Stream, with_event: bool = True):
event = torch.cuda.Event() if with_event else None
stream.wait_stream(torch.cuda.current_stream(self.device))
with torch.cuda.stream(stream):
yield event
if with_event:
event.record()
def all_gather_remote_objects(self, obj: Any) -> List[rpc.RRef]: def all_gather_remote_objects(self, obj: Any) -> List[rpc.RRef]:
if not isinstance(obj, rpc.RRef): if not isinstance(obj, rpc.RRef):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment