Commit bb8fcbda by Wenjie Huang

remove submodules metis and gklib

parent 41b0c86a
[submodule "third_party/METIS"] [submodule "third_party/METIS"]
path = third_party/METIS path = third_party/METIS
url = https://github.com/KarypisLab/METIS url = https://github.com/KarypisLab/METIS
[submodule "third_party/GKlib"]
path = third_party/GKlib
url = https://github.com/KarypisLab/GKlib
...@@ -3,6 +3,8 @@ cmake_minimum_required(VERSION 3.15) ...@@ -3,6 +3,8 @@ cmake_minimum_required(VERSION 3.15)
project(starrygl_ops VERSION 0.1) project(starrygl_ops VERSION 0.1)
option(WITH_PYTHON "Link to Python when building" ON) option(WITH_PYTHON "Link to Python when building" ON)
option(WITH_CUDA "Link to CUDA when building" ON)
option(WITH_METIS "Link to METIS when building" ON)
set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_STANDARD_REQUIRED ON)
...@@ -17,24 +19,39 @@ if(WITH_PYTHON) ...@@ -17,24 +19,39 @@ if(WITH_PYTHON)
include_directories(${Python3_INCLUDE_DIRS}) include_directories(${Python3_INCLUDE_DIRS})
endif() endif()
if(WITH_CUDA)
add_definitions(-DWITH_CUDA)
find_package(CUDA REQUIRED)
include_directories(${CUDA_INCLUDE_DIRS})
set(CUDA_LIBRARIES "${CUDA_TOOLKIT_ROOT_DIR}/lib64/libcudart.so")
endif()
if(WITH_METIS)
set(GKLIB_DIR "third_party/GKlib")
set(METIS_DIR "third_party/METIS")
add_subdirectory(${GKLIB_DIR})
endif()
find_package(Torch REQUIRED) find_package(Torch REQUIRED)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}") include_directories(${TORCH_INCLUDE_DIRS})
add_compile_options(${TORCH_CXX_FLAGS})
# find_package(OpenMP REQUIRED)
find_package(OpenMP REQUIRED) include_directories("csrc/include")
# add_subdirectory(third_party/cccl-2.2.0) file(GLOB_RECURSE UVM_SRCS "csrc/uvm/*.cpp")
# add_subdirectory(third_party/cutlass-3.2.1) file(GLOB_RECURSE METIS_SRCS "csrc/uvm/*.cpp")
# add_library(${PROJECT_NAME} SHARED csrc/custom_ops.cpp csrc/custom_kernel.cu) add_library(${PROJECT_NAME} SHARED csrc/export.cpp ${UVM_SRCS} ${METIS_SRCS})
add_library(${PROJECT_NAME} SHARED csrc/custom_ops.cpp)
if(WITH_PYTHON) if(WITH_PYTHON)
find_library(TORCH_PYTHON_LIBRARY torch_python PATHS "${TORCH_INSTALL_PREFIX}/lib") find_library(TORCH_PYTHON_LIBRARY torch_python PATHS "${TORCH_INSTALL_PREFIX}/lib")
target_link_libraries(${PROJECT_NAME} PRIVATE ${TORCH_PYTHON_LIBRARY}) target_link_libraries(${PROJECT_NAME} PRIVATE ${TORCH_PYTHON_LIBRARY})
endif() endif()
target_link_libraries(${PROJECT_NAME} PRIVATE ${TORCH_LIBRARIES}) target_link_libraries(${PROJECT_NAME} PRIVATE ${TORCH_LIBRARIES})
target_link_libraries(${PROJECT_NAME} PRIVATE OpenMP::OpenMP_CXX) # target_link_libraries(${PROJECT_NAME} PRIVATE OpenMP::OpenMP_CXX)
# target_link_libraries(${PROJECT_NAME} PRIVATE CCCL::CCCL)
target_compile_definitions(${PROJECT_NAME} PRIVATE -DTORCH_EXTENSION_NAME=lib${PROJECT_NAME}) target_compile_definitions(${PROJECT_NAME} PRIVATE -DTORCH_EXTENSION_NAME=lib${PROJECT_NAME})
......
// #include <cub/block/block_reduce.cuh>
\ No newline at end of file
#include <torch/extension.h>
#include <omp.h>
torch::Tensor add(torch::Tensor a, torch::Tensor b) {
return a + b;
}
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("add", &add, "a function implemented using pybind11");
}
\ No newline at end of file
#include "extension.h"
#include "uvm.h"
torch::Tensor add(torch::Tensor a, torch::Tensor b) {
return a + b;
}
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("add", &add, "a function implemented using pybind11");
m.def("uvm_storage_new", &uvm_storage_new, "return storage of unified virtual memory");
m.def("uvm_storage_to_cuda", &uvm_storage_to_cuda, "share uvm storage with another cuda device");
m.def("uvm_storage_to_cpu", &uvm_storage_to_cpu, "share uvm storage with cpu");
m.def("uvm_storage_advise", &uvm_storage_advise, "apply cudaMemAdvise() to uvm storage");
m.def("uvm_storage_prefetch", &uvm_storage_prefetch, "apply cudaMemPrefetchAsync() to uvm storage");
py::enum_<cudaMemoryAdvise>(m, "cudaMemoryAdvise")
.value("cudaMemAdviseSetAccessedBy", cudaMemoryAdvise::cudaMemAdviseSetAccessedBy)
.value("cudaMemAdviseUnsetAccessedBy", cudaMemoryAdvise::cudaMemAdviseUnsetAccessedBy)
.value("cudaMemAdviseSetPreferredLocation", cudaMemoryAdvise::cudaMemAdviseSetPreferredLocation)
.value("cudaMemAdviseUnsetPreferredLocation", cudaMemoryAdvise::cudaMemAdviseUnsetPreferredLocation)
.value("cudaMemAdviseSetReadMostly", cudaMemoryAdvise::cudaMemAdviseSetReadMostly)
.value("cudaMemAdviseUnsetReadMostly", cudaMemoryAdvise::cudaMemAdviseUnsetReadMostly);
}
#pragma once
#include <torch/extension.h>
#include <ATen/cuda/CUDAContext.h>
\ No newline at end of file
#pragma once
#include "extension.h"
#define CHECK_CUDA(x) \
AT_ASSERTM(x.device().is_cuda(), #x "must be CUDA Tensor")
#pragma once
#include "extension.h"
at::Storage uvm_storage_new(const std::size_t size_bytes, const at::DeviceIndex cuda_device);
at::Storage uvm_storage_to_cuda(const at::Storage& storage, const at::DeviceIndex cuda_device);
at::Storage uvm_storage_to_cpu(const at::Storage& storage);
void uvm_storage_advise(const at::Storage& storage, const cudaMemoryAdvise advise);
void uvm_storage_prefetch(const at::Storage& storage);
\ No newline at end of file
#include <torch/torch.h>
at::Tensor metis_partition() {
}
at::Tensor metis_mt_partition() {
}
\ No newline at end of file
#include <torch/all.h>
#include <ATen/cuda/CUDAContext.h>
#include <c10/cuda/CUDAGuard.h>
struct CUDAManagedContext {
void *_ptr;
at::DeviceIndex _dev;
CUDAManagedContext(void* ptr, const at::DeviceIndex cuda_device): _ptr(ptr), _dev(cuda_device) {}
~CUDAManagedContext() {
at::cuda::CUDAGuard device_guard(this->_dev);
AT_CUDA_CHECK(cudaFree(this->_ptr));
}
static void release(void* ptr) {
delete static_cast<CUDAManagedContext*>(ptr);
}
};
struct CUDAManagedContextViewer {
at::Storage _storage;
CUDAManagedContextViewer(at::Storage storage): _storage(std::move(storage)) {}
static void release(void* ptr) {
delete static_cast<CUDAManagedContextViewer*>(ptr);
}
};
at::Storage uvm_storage_new(const std::size_t size_bytes, const at::DeviceIndex cuda_device) {
at::cuda::CUDAGuard device_guard(cuda_device);
void *ptr;
AT_CUDA_CHECK(cudaMallocManaged(&ptr, size_bytes));
auto storage = at::Storage(
at::Storage::use_byte_size_t(),
size_bytes,
at::DataPtr(
ptr, new CUDAManagedContext(ptr, cuda_device),
&CUDAManagedContext::release,
{at::DeviceType::CUDA, cuda_device}
),
/* allocator= */ nullptr,
/* resizable= */ false
);
return at::Storage(
at::Storage::use_byte_size_t(),
size_bytes,
at::DataPtr(
ptr, new CUDAManagedContextViewer(storage),
&CUDAManagedContextViewer::release,
{at::DeviceType::CUDA, cuda_device}
),
/* allocator= */ nullptr,
/* resizable= */ false
);
}
bool uvm_storage_check(const at::Storage& storage) {
return storage.data_ptr().get_deleter() == &CUDAManagedContextViewer::release;
}
at::Storage uvm_storage_to_cuda(const at::Storage& storage, const at::DeviceIndex cuda_device) {
AT_ASSERT(uvm_storage_check(storage), "must be uvm storage");
auto& data_ptr = storage.data_ptr();
auto ctx_view = static_cast<CUDAManagedContextViewer*>(data_ptr.get_context());
at::cuda::CUDAGuard device_guard(cuda_device);
return at::Storage(
at::Storage::use_byte_size_t(),
storage.nbytes(),
at::DataPtr(
data_ptr.get(),
new CUDAManagedContextViewer(ctx_view->_storage),
&CUDAManagedContextViewer::release,
{at::DeviceType::CUDA, cuda_device}
),
/* allocator= */ nullptr,
/* resizable= */ false
);
}
at::Storage uvm_storage_to_cpu(const at::Storage& storage) {
AT_ASSERT(uvm_storage_check(storage), "must be uvm storage");
auto& data_ptr = storage.data_ptr();
auto ctx_view = static_cast<CUDAManagedContextViewer*>(data_ptr.get_context());
return at::Storage(
at::Storage::use_byte_size_t(),
storage.nbytes(),
at::DataPtr(
data_ptr.get(),
new CUDAManagedContextViewer(ctx_view->_storage),
&CUDAManagedContextViewer::release,
{at::DeviceType::CPU}
),
/* allocator= */ nullptr,
/* resizable= */ false
);
}
void uvm_storage_advise(const at::Storage& storage, const cudaMemoryAdvise advise) {
AT_ASSERT(uvm_storage_check(storage), "must be uvm storage");
at::cuda::OptionalCUDAGuard device_guard;
at::DeviceIndex hint_device = cudaCpuDeviceId;
if (storage.device_type() == at::DeviceType::CUDA) {
hint_device = storage.device().index();
device_guard.set_index(hint_device);
}
AT_CUDA_CHECK(cudaMemAdvise(
storage.data(),
storage.nbytes(),
// static_cast<enum cudaMemoryAdvise>(advise),
advise,
hint_device
));
}
void uvm_storage_prefetch(const at::Storage& storage) {
AT_ASSERT(uvm_storage_check(storage), "must be uvm storage");
at::cuda::OptionalCUDAGuard device_guard;
cudaStream_t stream = 0;
at::DeviceIndex hint_device = cudaCpuDeviceId;
if (storage.device_type() == at::DeviceType::CUDA) {
hint_device = storage.device().index();
device_guard.set_index(hint_device);
stream = at::cuda::getCurrentCUDAStream().stream();
}
AT_CUDA_CHECK(cudaMemPrefetchAsync(
storage.data(),
storage.nbytes(),
hint_device,
stream
));
}
Subproject commit f5ae915a84d3bbf1508b529af90292dd7085b9ec
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment