Commit 223bb6c0 by Tianxing Wang

initial commit

parents
.vscode
.idea
build
cmake-build-debug
.cache
95dc0207-5097-42db-90c6-b6da5316fba2
\ No newline at end of file
cmake_minimum_required(VERSION 3.15)
set(CUDAToolkit_ROOT /home/wtx/.local/cuda-11.7)
set(CMAKE_CUDA_ARCHITECTURES 86)
set(CMAKE_CUDA_COMPILER /home/wtx/.local/cuda-11.7/bin/nvcc)
project(uvm_utils CXX CUDA)
option(WITH_PYTHON "Link to Python when building" ON)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
# list(APPEND CMAKE_PREFIX_PATH /home/wtx/miniconda3/pkgs/pytorch-1.13.1-py3.10_cuda11.7_cudnn8.5.0_0/lib/python3.10/site-packages)
list(APPEND CMAKE_PREFIX_PATH /home/wtx/local/pkg/libtorch/share/cmake/Torch)
set(Python3_ROOT_DIR /home/wtx/miniconda3/envs/dgl )
# set(CMAKE_CUDA_COMPILER /home/wtx/.local/cuda/bin)
# set(CUDA_NVCC_EXECUTABLE /home/wtx/.local/cuda/bin)
# set(ENV{CUDA_HOME} /home/wtx/.local/cuda)
# set(ENV{Torch_DIR} /home/wtx/miniconda3/pkgs/pytorch-1.13.1-py3.10_cuda11.7_cudnn8.5.0_0/lib/python3.10/site-packages/torch/share/cmake/Torch/)
set(CUDAToolkit_INCLUDE_DIR /home/wtx/.local/cuda11.7+cudnn8.9)
if(WITH_PYTHON)
add_definitions(-DWITH_PYTHON)
find_package(Python3 COMPONENTS Interpreter Development REQUIRED)
include_directories(${Python3_INCLUDE_DIRS})
message(STATUS "python3 is found:" ${Python3_INCLUDE_DIRS} )
endif()
set(ENV{Torch_DIR} /home/wtx/miniconda3/pkgs/pytorch-1.13.1-py3.10_cuda11.7_cudnn8.5.0_0/lib/python3.10/site-packages/torch/share/cmake/Torch)
find_package(Torch REQUIRED)
include_directories(${TORCH_INCLUDE_DIRS})
message(STATUS "torch: ${TORCH_INCLUDE_DIRS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")
include_directories(${CMAKE_SOURCE_DIR}/include)
message(STATUS ${CMAKE_SOURCE_DIR})
find_package(OpenMP REQUIRED)
# add_library(${PROJECT_NAME} SHARED uvm/uvm_utils.cu)
# add_library(${PROJECT_NAME} SHARED csrc/custom_ops.cpp)
# add_library(${PROJECT_NAME} SHARED )
# file(GLOB SOURCES memory_utils/*.cpp memory_utils/*.cu)
file(GLOB SOURCES memory_utils/*.cpp memory_utils/*.cu)
message(STATUS "sources files: ${SOURCES}")
add_library(${PROJECT_NAME} SHARED ${SOURCES})
if(WITH_PYTHON)
find_library(TORCH_PYTHON_LIBRARY torch_python PATHS "${TORCH_INSTALL_PREFIX}/lib")
target_link_libraries(${PROJECT_NAME} PRIVATE ${TORCH_PYTHON_LIBRARY})
endif()
message(STATUS "TORCH_LIBRARIES: ${TORCH_LIBRARIES}")
target_link_libraries(${PROJECT_NAME} PRIVATE ${TORCH_LIBRARIES})
target_link_libraries(${PROJECT_NAME} PRIVATE OpenMP::OpenMP_CXX)
target_compile_definitions(${PROJECT_NAME} PRIVATE -DTORCH_EXTENSION_NAME=lib${PROJECT_NAME})
# set_target_properties(${PROJECT_NAME} PROPERTIES PREFIX "" OUTPUT_NAME "_C")
install(TARGETS ${PROJECT_NAME} DESTINATION "${CMAKE_SOURCE_DIR}/install")
e8f3f4b2-bd63-40b0-9b38-0616c1d2b0aa
\ No newline at end of file
#ifndef CUSTOM_OPS_DISPATCH_H
#define CUSTOM_OPS_DISPATCH_H
#define DISPATCH_TO_CUDA(name, function) \
m.impl(name, torch::dispatch(c10::DispatchKey::CUDA, TORCH_FN(function)))
#define DISPATCH_TO_CPU(name, function) \
m.impl(name, torch::dispatch(c10::DispatchKey::CPU, TORCH_FN(function)))
#define DISPATCH_TO_META(name, function) \
m.impl(name, torch::dispatch(c10::DispatchKey::Meta, TORCH_FN(function)))
#endif //CUSTOM_OPS_DISPATCH_H
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/
#pragma once
#include <ATen/ATen.h>
namespace uvm {
using Tensor = at::Tensor;
/// Allocate the ATen Tensor with unified managed memory (UVM)
/// and set both UVM storage preference to CPU and access from self.device
Tensor new_managed_tensor(
const Tensor& self,
const std::vector<std::int64_t>& sizes,
bool copy_data);
Tensor new_managed_tensor_meta(
const Tensor& self,
const std::vector<std::int64_t>& sizes,
bool copy_data);
// Allocate the ATen Tensor with host-mapped memory
Tensor new_host_mapped_tensor(
const Tensor& self,
const std::vector<std::int64_t>& sizes,
bool copy_data);
// Allocate the ATen Tensor with unified managed memory (UVM) or host-mapped
// memory.
Tensor new_unified_tensor(
const Tensor& self,
const std::vector<std::int64_t>& sizes,
bool is_host_mapped,
bool copy_data);
/// Allocate the ATen Tensor with unified managed memory (UVM)
Tensor new_vanilla_managed_tensor(
const Tensor& self,
const std::vector<std::int64_t>& sizes,
bool copy_data);
///@ingroup cumem-utils
/// Check if a tensor is allocated with UVM
bool uvm_storage(const Tensor& t);
///@ingroup cumem-utils
/// Check if a tensor is allocated with UVM *AND* is not on a CPU
bool is_uvm_tensor(const Tensor& t);
///@ingroup cumem-utils
/// Convert a UVM tensor to a CPU tensor
Tensor uvm_to_cpu(const Tensor& t);
///@ingroup cumem-utils
/// Create a UVM tensor on the same device as prototype sharing
/// the same uvm storage as t
Tensor uvm_to_device(const Tensor& t, const Tensor& prototype);
///@ingroup cumem-utils
/// Call cudaMemAdvise on UVM Storage. The hint enum is generated in Python
/// (fbgemm,uvm) using data returned from C++ op.
void uvm_cuda_mem_advise(const Tensor& t, int64_t cuda_memory_advise);
///@ingroup cumem-utils
/// Call cudaMemPrefetchAsync on UVM Storage
void uvm_cuda_mem_prefetch_async(
const Tensor& t,
c10::optional<Tensor> device_t);
///@ingroup cumem-utils
/// Call madvise(..MADV_DONTFORK) on the UVM storage. This is a workaround for
/// an issue where the UVM kernel driver unmaps UVM storage pages from the page
/// table on fork - causing slowdown on the next access from a CPU.
void uvm_mem_advice_dont_fork(const Tensor& t);
///@ingroup cumem-utils
/// Copy a contigious uvm Tensor (uvm_storage(t) is true) into a CPU Tensor
/// The copy uses single threaded memcpy
Tensor uvm_to_cpu_clone(const Tensor& t);
} // namespace uvm
e446cb7d-fe2e-47d9-adf1-88586cfccc97
\ No newline at end of file
4037be3b-719f-4cf3-a705-923fbfe4b316
\ No newline at end of file
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/
#pragma once
#include <ATen/ATen.h>
#include <ATen/cuda/Exceptions.h>
#include <c10/cuda/CUDAGuard.h>
#include <sys/mman.h>
#include <unistd.h>
#include <cstring>
#include "common.h"
// #include "cumem_utils.h"
#include "dispatch.h"
#include "uvm_utils.h"
// #include "enum_utils.h"
// namespace fbgemm_gpu {
// FBGEMM_GPU_ENUM_CREATE_TAG(uvm)
// } // namespace fbgemm_gpu
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/
#pragma once
#include <ATen/ATen.h>
using Tensor = at::Tensor;
namespace uvm {
Tensor new_unified_tensor_cpu(
const Tensor& self,
const std::vector<std::int64_t>& sizes,
bool is_host_mapped,
bool copy_data);
} // namespace uvm
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/
#include "common.h"
#include <c10/util/logging_is_not_google_glog.h>
using Tensor = at::Tensor;
using std::vector;
namespace uvm {
Tensor new_unified_tensor_cpu(
const Tensor& self,
const std::vector<std::int64_t>& sizes,
bool is_host_mapped,
bool copy_data) {
return at::empty({0}, self.options());
}
} // namespace uvm
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/
#include <torch/library.h>
#include "common.cuh"
//#define DISPATCH_TO_CPU(name, function) \
// m.impl(name, torch::dispatch(c10::DispatchKey::CPU, TORCH_FN(function)))
//
//#define DISPATCH_TO_META(name, function) \
// m.impl(name, torch::dispatch(c10::DispatchKey::Meta, TORCH_FN(function)))
using Tensor = at::Tensor;
namespace uvm {
TORCH_LIBRARY_FRAGMENT(uvm, m) {
m.def("is_uvm_tensor(Tensor t) -> bool", TORCH_FN(is_uvm_tensor));
m.def("uvm_storage(Tensor t) -> bool", TORCH_FN(uvm_storage));
m.def(
"uvm_to_device(Tensor self, Tensor prototype) -> Tensor",
TORCH_FN(uvm_to_device));
m.def("uvm_to_cpu(Tensor t) -> Tensor");
m.def("new_managed_tensor(Tensor self, int[] sizes, bool copy_data) -> Tensor");
m.def("new_host_mapped_tensor(Tensor self, int[] sizes, bool copy_data) -> Tensor");
m.def(
"new_unified_tensor(Tensor self, int[] sizes, bool is_host_mapped, bool copy_data) -> Tensor");
m.def("new_vanilla_managed_tensor(Tensor self, int[] sizes, bool copy_data) -> Tensor");
m.def(
"cuda_mem_advise(Tensor t, int advice) -> ()",
TORCH_FN(uvm_cuda_mem_advise));
m.def(
"cuda_mem_prefetch_async(Tensor t, Tensor? device_t) -> ()",
TORCH_FN(uvm_cuda_mem_prefetch_async));
m.def(
"uvm_mem_advice_dont_fork(Tensor t) -> ()",
TORCH_FN(uvm_mem_advice_dont_fork));
m.def("uvm_to_cpu_clone(Tensor t) -> Tensor", TORCH_FN(uvm_to_cpu_clone));
// m.def(FBGEMM_GPU_ENUM_OP(uvm, fbgemm_gpu_uvm_enum_query));
}
TORCH_LIBRARY_FRAGMENT(uvm, m) {
DISPATCH_TO_CPU("new_unified_tensor", new_unified_tensor_cpu);
DISPATCH_TO_META("new_managed_tensor_meta", new_managed_tensor_meta);
}
} // namespace uvn
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/
#include <torch/library.h>
#include "common.cuh"
namespace uvm {
TORCH_LIBRARY_FRAGMENT(uvm, m) {
DISPATCH_TO_CUDA("uvm_to_cpu", uvm_to_cpu);
DISPATCH_TO_CUDA("new_managed_tensor", new_managed_tensor);
DISPATCH_TO_META("new_managed_tensor", new_managed_tensor_meta);
DISPATCH_TO_CUDA("new_host_mapped_tensor", new_host_mapped_tensor);
DISPATCH_TO_CUDA("new_unified_tensor", new_unified_tensor);
DISPATCH_TO_CUDA("new_vanilla_managed_tensor", new_vanilla_managed_tensor);
}
} // namespace uvm
import subprocess
from distutils.sysconfig import get_python_lib
cmake_args = [
"-DCMAKE_PREFIX_PATH=" + get_python_lib(),
]
subprocess.check_call(["cmake", "-B", "build"] + cmake_args)
# subprocess.check_call1
\ No newline at end of file
cmake_minimum_required(VERSION 3.10)
set(CUDAToolkit_ROOT /home/wtx/.local/cuda-11.7)
set(CMAKE_CUDA_ARCHITECTURES 86)
set(CMAKE_CUDA_COMPILER /home/wtx/.local/cuda-11.7/bin/nvcc)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
list(APPEND CMAKE_PREFIX_PATH /home/wtx/local/pkg/libtorch/share/cmake/Torch)
set(Python3_ROOT_DIR /home/wtx/miniconda3/envs/dgl )
set(CUDAToolkit_INCLUDE_DIR /home/wtx/.local/cuda11.7+cudnn8.9)
project(uvm_test CXX CUDA)
set(ENV{Torch_DIR} /home/wtx/miniconda3/pkgs/pytorch-1.13.1-py3.10_cuda11.7_cudnn8.5.0_0/lib/python3.10/site-packages/torch/share/cmake/Torch)
find_package(Torch REQUIRED)
include_directories(${TORCH_INCLUDE_DIRS})
message(STATUS "torch: ${TORCH_INCLUDE_DIRS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")
# include_directories(/home/wtx/workspace/cpp_project/uvm/memory_utils_old)
include_directories("../include")
add_executable(uvm main.cpp)
set(ENV{LD_LIBRARY_PATH} "$ENV{LD_LIBRARY_PATH} /home/wtx/workspace/cpp_project/uvm/test")
target_link_libraries(uvm /home/wtx/workspace/cpp_project/uvm/test/libuvm_utils.so)
#include "uvm_utils.h"
#include <iostream>
using std::cout;
using std::endl;
int main(){
auto t = at::ones({1,1}, c10::TensorOptions());
cout << uvm::uvm_storage(t) << endl;
}
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import torch\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"a = torch.tensor([[1,2,3],[4,5,6]])"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"torch.Size([2, 3])"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a.shape"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"b = a.t()\n",
"b[0,0] = 100"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"id(b.storage()) == id(a.storage())"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor(100)"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a[0,0]"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(0, 0)"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a.storage_offset(), b.storage_offset()"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"((3, 1), (1, 3))"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a.stride(), b.stride()"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"c = b[(0,2),:]"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(tensor([[100, 4],\n",
" [ 3, 6]]),\n",
" tensor([[100, 4],\n",
" [ 2, 5],\n",
" [ 3, 6]]),\n",
" tensor([[100, 2, 3],\n",
" [ 4, 5, 6]]))"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"c, b, a"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(2, 1)"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"c.stride()"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [],
"source": [
"a = torch.arange(1,11).reshape(2,5)"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[ 1, 2, 3, 4, 5],\n",
" [ 6, 7, 8, 9, 10]])"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[ 1, 6],\n",
" [ 2, 7],\n",
" [ 3, 8],\n",
" [ 4, 9],\n",
" [ 5, 10]])"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"b = a.t()\n",
"b"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"torch.Size([2, 3])"
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"c = b[(1,3,4),:].t()\n",
"c.shape"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"torch.Size([10, 2])"
]
},
"execution_count": 44,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a = torch.arange(20).reshape(10,2)\n",
"a.shape"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"True\n",
"torch.Size([3, 2])\n",
" 8\n",
" 9\n",
" 10\n",
" 11\n",
" 18\n",
" 19\n",
"[torch.storage.TypedStorage(dtype=torch.int64, device=cpu) of size 6]\n",
"0\n",
"(2, 1)\n"
]
}
],
"source": [
"b = a[(4,5,9), :]\n",
"print(id(b.storage()) == id(a.storage()))\n",
"print(b.shape)\n",
"print(b.storage())\n",
"print(b.storage_offset())\n",
"print(b.stride())"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "dgl",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import torch \n",
"\n",
"torch.ops.load_library(\"cmake-build-debug/libuvm_utils.so\")"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"a = torch.arange(1,10).reshape(3,3).cuda()\n",
"a = torch.empty(0).cuda()\n",
"# x = torch.ops.fbgemm.new_managed_tensor(a, (3,3))\n",
"# print(x)\n",
"# torch.ops.fbgemm.is_uvm_tensor(x)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<module 'torch.ops.uvm' from 'torch.ops'>"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"torch.ops.uvm"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"tensor([[1., 1.],\n",
" [1., 1.]])\n",
"<bound method Tensor.storage of tensor([])>\n",
"tensor([])\n"
]
}
],
"source": [
"y = torch.ones((2,2))\n",
"z = torch.ops.uvm.new_unified_tensor(y,(2,2),True,True)\n",
"print(y)\n",
"print(z.storage)\n",
"print(z)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"tensor([])\n"
]
}
],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "dgl",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
This diff is collapsed. Click to expand it.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment