initial commit

223bb6c0 · Tianxing Wang · 223bb6c0 · 223bb6c0 · 223bb6c0 · 223bb6c0
Commit 223bb6c0 authored Dec 08, 2023 by Tianxing Wang
22 changed files
--- a/.gitignore
+++ b/.gitignore
+.vscode
+.idea
+build
+cmake-build-debug
+.cache
--- a/.uuid
+++ b/.uuid
+95dc0207-5097-42db-90c6-b6da5316fba2
\ No newline at end of file
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
+cmake_minimum_required(VERSION 3.15)
+
+
+set(CUDAToolkit_ROOT /home/wtx/.local/cuda-11.7)
+set(CMAKE_CUDA_ARCHITECTURES 86)
+set(CMAKE_CUDA_COMPILER /home/wtx/.local/cuda-11.7/bin/nvcc)
+project(uvm_utils CXX CUDA)
+
+option(WITH_PYTHON "Link to Python when building" ON)
+
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
+# list(APPEND CMAKE_PREFIX_PATH /home/wtx/miniconda3/pkgs/pytorch-1.13.1-py3.10_cuda11.7_cudnn8.5.0_0/lib/python3.10/site-packages)
+list(APPEND CMAKE_PREFIX_PATH /home/wtx/local/pkg/libtorch/share/cmake/Torch)
+set(Python3_ROOT_DIR /home/wtx/miniconda3/envs/dgl )
+# set(CMAKE_CUDA_COMPILER /home/wtx/.local/cuda/bin)
+# set(CUDA_NVCC_EXECUTABLE  /home/wtx/.local/cuda/bin)
+# set(ENV{CUDA_HOME} /home/wtx/.local/cuda)
+# set(ENV{Torch_DIR} /home/wtx/miniconda3/pkgs/pytorch-1.13.1-py3.10_cuda11.7_cudnn8.5.0_0/lib/python3.10/site-packages/torch/share/cmake/Torch/)
+set(CUDAToolkit_INCLUDE_DIR /home/wtx/.local/cuda11.7+cudnn8.9)
+
+
+
+if(WITH_PYTHON)
+    add_definitions(-DWITH_PYTHON)
+    find_package(Python3 COMPONENTS Interpreter Development REQUIRED)
+    include_directories(${Python3_INCLUDE_DIRS})
+    message(STATUS "python3 is found:" ${Python3_INCLUDE_DIRS} )
+endif()
+
+
+set(ENV{Torch_DIR} /home/wtx/miniconda3/pkgs/pytorch-1.13.1-py3.10_cuda11.7_cudnn8.5.0_0/lib/python3.10/site-packages/torch/share/cmake/Torch)
+find_package(Torch REQUIRED)
+include_directories(${TORCH_INCLUDE_DIRS})
+message(STATUS "torch: ${TORCH_INCLUDE_DIRS}")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")
+
+
+include_directories(${CMAKE_SOURCE_DIR}/include)
+message(STATUS ${CMAKE_SOURCE_DIR})
+
+find_package(OpenMP REQUIRED)
+
+# add_library(${PROJECT_NAME} SHARED uvm/uvm_utils.cu)
+# add_library(${PROJECT_NAME} SHARED csrc/custom_ops.cpp)
+# add_library(${PROJECT_NAME} SHARED )
+
+# file(GLOB SOURCES memory_utils/*.cpp memory_utils/*.cu)
+file(GLOB SOURCES memory_utils/*.cpp memory_utils/*.cu)
+message(STATUS "sources files: ${SOURCES}")
+add_library(${PROJECT_NAME} SHARED ${SOURCES})
+
+
+if(WITH_PYTHON)
+    find_library(TORCH_PYTHON_LIBRARY torch_python PATHS "${TORCH_INSTALL_PREFIX}/lib")
+    target_link_libraries(${PROJECT_NAME} PRIVATE ${TORCH_PYTHON_LIBRARY})
+endif()
+
+message(STATUS "TORCH_LIBRARIES: ${TORCH_LIBRARIES}")
+target_link_libraries(${PROJECT_NAME} PRIVATE ${TORCH_LIBRARIES})
+target_link_libraries(${PROJECT_NAME} PRIVATE OpenMP::OpenMP_CXX)
+target_compile_definitions(${PROJECT_NAME} PRIVATE -DTORCH_EXTENSION_NAME=lib${PROJECT_NAME})
+
+# set_target_properties(${PROJECT_NAME} PROPERTIES PREFIX "" OUTPUT_NAME "_C")
+install(TARGETS ${PROJECT_NAME} DESTINATION  "${CMAKE_SOURCE_DIR}/install")
--- a/include/.uuid
+++ b/include/.uuid
+e8f3f4b2-bd63-40b0-9b38-0616c1d2b0aa
\ No newline at end of file
--- a/include/dispatch.h
+++ b/include/dispatch.h
+#ifndef CUSTOM_OPS_DISPATCH_H
+#define CUSTOM_OPS_DISPATCH_H
+
+#define DISPATCH_TO_CUDA(name, function) \
+  m.impl(name, torch::dispatch(c10::DispatchKey::CUDA, TORCH_FN(function)))
+
+#define DISPATCH_TO_CPU(name, function) \
+  m.impl(name, torch::dispatch(c10::DispatchKey::CPU, TORCH_FN(function)))
+
+#define DISPATCH_TO_META(name, function) \
+  m.impl(name, torch::dispatch(c10::DispatchKey::Meta, TORCH_FN(function)))
+
+#endif //CUSTOM_OPS_DISPATCH_H
--- a/include/uvm_utils.h
+++ b/include/uvm_utils.h
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <ATen/ATen.h>
+
+namespace uvm {
+
+using Tensor = at::Tensor;
+
+
+/// Allocate the ATen Tensor with unified managed memory (UVM)
+/// and set both UVM storage preference to CPU and access from self.device
+Tensor new_managed_tensor(
+    const Tensor& self,
+    const std::vector<std::int64_t>& sizes,
+    bool copy_data);
+
+Tensor new_managed_tensor_meta(
+    const Tensor& self,
+    const std::vector<std::int64_t>& sizes,
+    bool copy_data);
+
+// Allocate the ATen Tensor with host-mapped memory
+Tensor new_host_mapped_tensor(
+    const Tensor& self,
+    const std::vector<std::int64_t>& sizes,
+    bool copy_data);
+
+// Allocate the ATen Tensor with unified managed memory (UVM) or host-mapped
+// memory.
+Tensor new_unified_tensor(
+    const Tensor& self,
+    const std::vector<std::int64_t>& sizes,
+    bool is_host_mapped,
+    bool copy_data);
+
+/// Allocate the ATen Tensor with unified managed memory (UVM)
+Tensor new_vanilla_managed_tensor(
+    const Tensor& self,
+    const std::vector<std::int64_t>& sizes,
+    bool copy_data);
+
+///@ingroup cumem-utils
+/// Check if a tensor is allocated with UVM
+bool uvm_storage(const Tensor& t);
+
+///@ingroup cumem-utils
+/// Check if a tensor is allocated with UVM *AND* is not on a CPU
+bool is_uvm_tensor(const Tensor& t);
+
+///@ingroup cumem-utils
+/// Convert a UVM tensor to a CPU tensor
+Tensor uvm_to_cpu(const Tensor& t);
+
+///@ingroup cumem-utils
+/// Create a UVM tensor on the same device as prototype sharing
+/// the same uvm storage as t
+Tensor uvm_to_device(const Tensor& t, const Tensor& prototype);
+
+///@ingroup cumem-utils
+/// Call cudaMemAdvise on UVM Storage. The hint enum is generated in Python
+/// (fbgemm,uvm) using data returned from C++ op.
+void uvm_cuda_mem_advise(const Tensor& t, int64_t cuda_memory_advise);
+
+///@ingroup cumem-utils
+/// Call cudaMemPrefetchAsync on UVM Storage
+void uvm_cuda_mem_prefetch_async(
+    const Tensor& t,
+    c10::optional<Tensor> device_t);
+
+///@ingroup cumem-utils
+/// Call madvise(..MADV_DONTFORK) on the UVM storage. This is a workaround for
+/// an issue where the UVM kernel driver unmaps UVM storage pages from the page
+/// table on fork - causing slowdown on the next access from a CPU.
+void uvm_mem_advice_dont_fork(const Tensor& t);
+
+///@ingroup cumem-utils
+/// Copy a contigious uvm Tensor (uvm_storage(t) is true) into a CPU Tensor
+/// The copy uses single threaded memcpy
+Tensor uvm_to_cpu_clone(const Tensor& t);
+
+} // namespace uvm
--- a/install/.uuid
+++ b/install/.uuid
+e446cb7d-fe2e-47d9-adf1-88586cfccc97
\ No newline at end of file
--- a/install/libuvm_utils.so
+++ b/install/libuvm_utils.so
--- a/memory_utils/.uuid
+++ b/memory_utils/.uuid
+4037be3b-719f-4cf3-a705-923fbfe4b316
\ No newline at end of file
--- a/memory_utils/common.cuh
+++ b/memory_utils/common.cuh
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <ATen/ATen.h>
+#include <ATen/cuda/Exceptions.h>
+#include <c10/cuda/CUDAGuard.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <cstring>
+
+#include "common.h"
+// #include "cumem_utils.h"
+#include "dispatch.h"
+#include "uvm_utils.h"
+// #include "enum_utils.h"
+
+// namespace fbgemm_gpu {
+
+// FBGEMM_GPU_ENUM_CREATE_TAG(uvm)
+
+// } // namespace fbgemm_gpu
--- a/memory_utils/common.h
+++ b/memory_utils/common.h
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <ATen/ATen.h>  
+
+using Tensor = at::Tensor;
+
+namespace uvm {
+
+Tensor new_unified_tensor_cpu(
+    const Tensor& self,
+    const std::vector<std::int64_t>& sizes,
+    bool is_host_mapped,
+    bool copy_data);
+
+} // namespace uvm
--- a/memory_utils/memory_utils.cpp
+++ b/memory_utils/memory_utils.cpp
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include "common.h"
+#include <c10/util/logging_is_not_google_glog.h>
+
+using Tensor = at::Tensor;
+using std::vector;
+
+namespace uvm {
+
+Tensor new_unified_tensor_cpu(
+    const Tensor& self,
+    const std::vector<std::int64_t>& sizes,
+    bool is_host_mapped,
+    bool copy_data) {
+  return at::empty({0}, self.options());
+}
+
+} // namespace uvm
--- a/memory_utils/memory_utils.cu
+++ b/memory_utils/memory_utils.cu
--- a/memory_utils/memory_utils_ops.cpp
+++ b/memory_utils/memory_utils_ops.cpp
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <torch/library.h>
+#include "common.cuh"
+
+
+//#define DISPATCH_TO_CPU(name, function) \
+//  m.impl(name, torch::dispatch(c10::DispatchKey::CPU, TORCH_FN(function)))
+//
+//#define DISPATCH_TO_META(name, function) \
+//  m.impl(name, torch::dispatch(c10::DispatchKey::Meta, TORCH_FN(function)))
+
+using Tensor = at::Tensor;
+
+namespace uvm {
+
+    TORCH_LIBRARY_FRAGMENT(uvm, m) {
+        m.def("is_uvm_tensor(Tensor t) -> bool", TORCH_FN(is_uvm_tensor));
+        m.def("uvm_storage(Tensor t) -> bool", TORCH_FN(uvm_storage));
+        m.def(
+                "uvm_to_device(Tensor self, Tensor prototype) -> Tensor",
+                TORCH_FN(uvm_to_device));
+        m.def("uvm_to_cpu(Tensor t) -> Tensor");
+        m.def("new_managed_tensor(Tensor self, int[] sizes, bool copy_data) -> Tensor");
+        m.def("new_host_mapped_tensor(Tensor self, int[] sizes, bool copy_data) -> Tensor");
+        m.def(
+                "new_unified_tensor(Tensor self, int[] sizes, bool is_host_mapped, bool copy_data) -> Tensor");
+        m.def("new_vanilla_managed_tensor(Tensor self, int[] sizes, bool copy_data) -> Tensor");
+        m.def(
+                "cuda_mem_advise(Tensor t, int advice) -> ()",
+                TORCH_FN(uvm_cuda_mem_advise));
+        m.def(
+                "cuda_mem_prefetch_async(Tensor t, Tensor? device_t) -> ()",
+                TORCH_FN(uvm_cuda_mem_prefetch_async));
+        m.def(
+                "uvm_mem_advice_dont_fork(Tensor t) -> ()",
+                TORCH_FN(uvm_mem_advice_dont_fork));
+
+        m.def("uvm_to_cpu_clone(Tensor t) -> Tensor", TORCH_FN(uvm_to_cpu_clone));
+        // m.def(FBGEMM_GPU_ENUM_OP(uvm, fbgemm_gpu_uvm_enum_query));
+    }
+
+    TORCH_LIBRARY_FRAGMENT(uvm, m) {
+        DISPATCH_TO_CPU("new_unified_tensor", new_unified_tensor_cpu);
+        DISPATCH_TO_META("new_managed_tensor_meta", new_managed_tensor_meta);
+    }
+
+} // namespace uvn
--- a/memory_utils/memory_utils_ops.cu
+++ b/memory_utils/memory_utils_ops.cu
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <torch/library.h>
+#include "common.cuh"
+
+namespace uvm {
+
+TORCH_LIBRARY_FRAGMENT(uvm, m) {
+  DISPATCH_TO_CUDA("uvm_to_cpu", uvm_to_cpu);
+  DISPATCH_TO_CUDA("new_managed_tensor", new_managed_tensor);
+  DISPATCH_TO_META("new_managed_tensor", new_managed_tensor_meta);
+  DISPATCH_TO_CUDA("new_host_mapped_tensor", new_host_mapped_tensor);
+  DISPATCH_TO_CUDA("new_unified_tensor", new_unified_tensor);
+  DISPATCH_TO_CUDA("new_vanilla_managed_tensor", new_vanilla_managed_tensor);
+}
+
+} // namespace uvm
--- a/setup.py
+++ b/setup.py
+import subprocess
+from distutils.sysconfig import get_python_lib
+
+cmake_args = [
+    "-DCMAKE_PREFIX_PATH=" + get_python_lib(),
+]
+
+subprocess.check_call(["cmake", "-B", "build"] + cmake_args)
+# subprocess.check_call1
\ No newline at end of file
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
+cmake_minimum_required(VERSION 3.10)
+
+set(CUDAToolkit_ROOT /home/wtx/.local/cuda-11.7)
+set(CMAKE_CUDA_ARCHITECTURES 86)
+set(CMAKE_CUDA_COMPILER /home/wtx/.local/cuda-11.7/bin/nvcc)
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
+list(APPEND CMAKE_PREFIX_PATH /home/wtx/local/pkg/libtorch/share/cmake/Torch)
+set(Python3_ROOT_DIR /home/wtx/miniconda3/envs/dgl )
+set(CUDAToolkit_INCLUDE_DIR /home/wtx/.local/cuda11.7+cudnn8.9)
+
+project(uvm_test CXX CUDA)
+
+
+set(ENV{Torch_DIR} /home/wtx/miniconda3/pkgs/pytorch-1.13.1-py3.10_cuda11.7_cudnn8.5.0_0/lib/python3.10/site-packages/torch/share/cmake/Torch)
+find_package(Torch REQUIRED)
+include_directories(${TORCH_INCLUDE_DIRS})
+message(STATUS "torch: ${TORCH_INCLUDE_DIRS}")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")
+# include_directories(/home/wtx/workspace/cpp_project/uvm/memory_utils_old)
+include_directories("../include")
+
+add_executable(uvm main.cpp)
+set(ENV{LD_LIBRARY_PATH} "$ENV{LD_LIBRARY_PATH} /home/wtx/workspace/cpp_project/uvm/test")
+target_link_libraries(uvm /home/wtx/workspace/cpp_project/uvm/test/libuvm_utils.so)
--- a/test/libuvm_utils.so
+++ b/test/libuvm_utils.so
--- a/test/main.cpp
+++ b/test/main.cpp
+#include "uvm_utils.h"
+#include <iostream>
+
+using std::cout;
+using std::endl;
+
+int main(){
+    auto t = at::ones({1,1}, c10::TensorOptions());
+    cout << uvm::uvm_storage(t) << endl;
+}
--- a/test2.ipynb
+++ b/test2.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "a = torch.tensor([[1,2,3],[4,5,6]])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "torch.Size([2, 3])"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "a.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "b = a.t()\n",
+    "b[0,0] = 100"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "True"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "id(b.storage()) == id(a.storage())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "tensor(100)"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "a[0,0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(0, 0)"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "a.storage_offset(), b.storage_offset()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "((3, 1), (1, 3))"
+      ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "a.stride(), b.stride()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "c = b[(0,2),:]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(tensor([[100,   4],\n",
+       "         [  3,   6]]),\n",
+       " tensor([[100,   4],\n",
+       "         [  2,   5],\n",
+       "         [  3,   6]]),\n",
+       " tensor([[100,   2,   3],\n",
+       "         [  4,   5,   6]]))"
+      ]
+     },
+     "execution_count": 25,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "c, b, a"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(2, 1)"
+      ]
+     },
+     "execution_count": 24,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "c.stride()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "a = torch.arange(1,11).reshape(2,5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "tensor([[ 1,  2,  3,  4,  5],\n",
+       "        [ 6,  7,  8,  9, 10]])"
+      ]
+     },
+     "execution_count": 27,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "a"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "tensor([[ 1,  6],\n",
+       "        [ 2,  7],\n",
+       "        [ 3,  8],\n",
+       "        [ 4,  9],\n",
+       "        [ 5, 10]])"
+      ]
+     },
+     "execution_count": 29,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "b = a.t()\n",
+    "b"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "torch.Size([2, 3])"
+      ]
+     },
+     "execution_count": 37,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "c = b[(1,3,4),:].t()\n",
+    "c.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 44,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "torch.Size([10, 2])"
+      ]
+     },
+     "execution_count": 44,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "a = torch.arange(20).reshape(10,2)\n",
+    "a.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 47,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "True\n",
+      "torch.Size([3, 2])\n",
+      " 8\n",
+      " 9\n",
+      " 10\n",
+      " 11\n",
+      " 18\n",
+      " 19\n",
+      "[torch.storage.TypedStorage(dtype=torch.int64, device=cpu) of size 6]\n",
+      "0\n",
+      "(2, 1)\n"
+     ]
+    }
+   ],
+   "source": [
+    "b = a[(4,5,9), :]\n",
+    "print(id(b.storage()) == id(a.storage()))\n",
+    "print(b.shape)\n",
+    "print(b.storage())\n",
+    "print(b.storage_offset())\n",
+    "print(b.stride())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "dgl",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/testuvm.ipynb
+++ b/testuvm.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch \n",
+    "\n",
+    "torch.ops.load_library(\"cmake-build-debug/libuvm_utils.so\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "a = torch.arange(1,10).reshape(3,3).cuda()\n",
+    "a = torch.empty(0).cuda()\n",
+    "# x = torch.ops.fbgemm.new_managed_tensor(a, (3,3))\n",
+    "# print(x)\n",
+    "# torch.ops.fbgemm.is_uvm_tensor(x)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<module 'torch.ops.uvm' from 'torch.ops'>"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "torch.ops.uvm"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "tensor([[1., 1.],\n",
+      "        [1., 1.]])\n",
+      "<bound method Tensor.storage of tensor([])>\n",
+      "tensor([])\n"
+     ]
+    }
+   ],
+   "source": [
+    "y = torch.ones((2,2))\n",
+    "z = torch.ops.uvm.new_unified_tensor(y,(2,2),True,True)\n",
+    "print(y)\n",
+    "print(z.storage)\n",
+    "print(z)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "tensor([])\n"
+     ]
+    }
+   ],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "dgl",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/uvm.ipynb
+++ b/uvm.ipynb