IntelPython
diff --git a/‎CMakeLists.txt‎
Lines changed: 36 additions & 0 deletions b/‎CMakeLists.txt‎
Lines changed: 36 additions & 0 deletions
diff --git a/‎ddptensor/__init__.py‎
Lines changed: 15 additions & 3 deletions b/‎ddptensor/__init__.py‎
Lines changed: 15 additions & 3 deletions
diff --git a/‎ddptensor/array_api.py‎
Lines changed: 6 additions & 0 deletions b/‎ddptensor/array_api.py‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎ddptensor/ddptensor.py‎
Lines changed: 8 additions & 6 deletions b/‎ddptensor/ddptensor.py‎
Lines changed: 8 additions & 6 deletions
diff --git a/‎setup.py‎
Lines changed: 30 additions & 24 deletions b/‎setup.py‎
Lines changed: 30 additions & 24 deletions
diff --git a/‎src/Creator.cpp‎
Lines changed: 52 additions & 0 deletions b/‎src/Creator.cpp‎
Lines changed: 52 additions & 0 deletions
diff --git a/‎src/EWBinOp.cpp‎
Lines changed: 136 additions & 0 deletions b/‎src/EWBinOp.cpp‎
Lines changed: 136 additions & 0 deletions
@@ -0,0 +1,36 @@
+cmake_minimum_required(VERSION 3.18.2)
+project(ddptensor VERSION 1.0)
+
+# C++ standard
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_C_EXTENSIONS OFF)
+set(CMAKE_CXX_EXTENSIONS OFF)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+
+# Common installation directories
+#include(GNUInstallDirs)
+
+# Use -fPIC even if statically compiled
+set(CMAKE_POSITION_INDEPENDENT_CODE ON)
+
+# ============
+# Target
+# ============
+FILE(GLOB MyCppSources ${PROJECT_SOURCE_DIR}/src/*.cpp ${PROJECT_SOURCE_DIR}/src/include/ddptensor/*.hpp)
+
+# Create the mymath library
+add_library(_ddptensor SHARED ${MyCppSources})
+
+target_include_directories(_ddptensor PRIVATE ${PROJECT_SOURCE_DIR}/src/include ${PROJECT_SOURCE_DIR}/third_party/xtl/include ${PROJECT_SOURCE_DIR}/third_party/xsimd/include ${PROJECT_SOURCE_DIR}/third_party/xtensor-blas/include ${PROJECT_SOURCE_DIR}/third_party/xtensor/include ${PROJECT_SOURCE_DIR}/third_party/bitsery/include)
+
+# ===============
+# Deps
+# ===============
+
+# Find Python3 and NumPy
+find_package(Python3 COMPONENTS Interpreter Development.Module NumPy REQUIRED)
+
+find_package(MPI REQUIRED)
+find_package(pybind11 CONFIG)
+include_directories(SYSTEM ${MPI_INCLUDE_PATH} ${pybind11_INCLUDE_DIRS})
+target_link_libraries(_ddptensor ${MPI_C_LIBRARIES})
@@ -1,12 +1,24 @@
+"""
+Distributed implementation of the array API as defined here:
+https://data-apis.org/array-api/latest
+"""
+
+# Many features of the API are very uniformly defined.
+# We make use of that by providing lists of operations which are similar
+# (see array_api.py). __init__.py and ddptensor.py simply generate the API
+# by iterating through these lists and forwarding the function calls the the
+# C++-extension. Python functions are defined and added by using "eval".
+# For many operations we assume the C++-extension defines enums which allow
+# us identifying each operation.
+# At this point there are no checks of input arguments whatsoever, arguments
+# are simply forwarded as-is.
+
 from . import _ddptensor as _cdt
 from .ddptensor import float64, int64, fini, dtensor
 from os import getenv
 from . import array_api as api
 from . import spmd
 
-#__impl_str = getenv("DDPNP_ARRAY", 'numpy')
-#exec(f"import {__impl_str} as __impl")
-
 for op in api.ew_binary_ops:
     OP = op.upper()
     exec(
 
@@ -1,3 +1,9 @@
+"""
+The list of data API operations for ddptensor, a
+distributed implementation of the array API as defined here:
+https://data-apis.org/array-api/latest
+"""
+
 creators = [
     "arange",  # (start, /, stop=None, step=1, *, dtype=None, device=None)
     "asarray",  # (obj, /, *, dtype=None, device=None, copy=None)
 
@@ -1,13 +1,15 @@
+"""
+The Tensor class for ddptensor, a
+distributed implementation of the array API as defined here:
+https://data-apis.org/array-api/latest
+"""
+#
+# See __init__.py for an implementation overview
+#
 from . import _ddptensor as _cdt
 from ._ddptensor import float64, int64, fini
 from . import array_api as api
 
-#def try_except(func, *args, **kwargs):
-#    try:
-#        return func(*args, **kwargs)
-#    except:
-#        return None
-    
 class dtensor:
     def __init__(self, t):
         self._t = t
 
@@ -1,33 +1,39 @@
-import os
-from os.path import join as jp
-from glob import glob
+import cmake_build_extension
 from setuptools import setup
-from pybind11.setup_helpers import Pybind11Extension
-
-mpiroot = os.environ.get('MPIROOT')
-mklroot = os.environ.get('MKLROOT')
-xtroot = os.getenv('XTROOT', 'third_party')
-
-xt_includes = [jp(xtroot, x, "include") for x in ("xtl", "xsimd", "xtensor-blas", "xtensor")]
+from pathlib import Path
 
 ext_modules = [
-    Pybind11Extension(
-        "ddptensor._ddptensor",
-        glob("src/*.cpp"),
-        include_dirs = xt_includes + [jp(mpiroot, "include"), jp("third_party", "bitsery", "include"), jp("src", "include"), ],
-        extra_compile_args = ["-DUSE_MKL", "-DXTENSOR_USE_XSIMD=1", "-DXTENSOR_USE_OPENMP=1",
-                              "-std=c++17", "-fopenmp",
-                              "-Wno-unused-but-set-variable", "-Wno-sign-compare", "-Wno-unused-local-typedefs", "-Wno-reorder",
-                              "-march=native", "-O0", "-g"],
-        libraries = ["mpi", "mkl_intel_lp64", "mkl_intel_thread", "mkl_core", "iomp5", "pthread", "rt", "dl", "m"],
-        library_dirs = [jp(mpiroot, "lib")],
-        language = 'c++'
-    ),
-]
+        cmake_build_extension.CMakeExtension(
+            name="_ddptensor",
+            # Name of the resulting package name (import mymath_pybind11)
+            install_prefix="ddptensor",
+            # Note: pybind11 is a build-system requirement specified in pyproject.toml,
+            #       therefore pypa/pip or pypa/build will install it in the virtual
+            #       environment created in /tmp during packaging.
+            #       This cmake_depends_on option adds the pybind11 installation path
+            #       to CMAKE_PREFIX_PATH so that the example finds the pybind11 targets
+            #       even if it is not installed in the system.
+            cmake_depends_on=["pybind11"],
+            # Exposes the binary print_answer to the environment.
+            # It requires also adding a new entry point in setup.cfg.
+            # expose_binaries=["bin/print_answer"],
+            # Writes the content to the top-level __init__.py
+            #write_top_level_init=init_py,
+            # Selects the folder where the main CMakeLists.txt is stored
+            # (it could be a subfolder)
+            source_dir=str(Path(__file__).parent.absolute()),
+            cmake_configure_options=[
+            ]
+        ),
+    ]
 
 setup(name="ddptensor",
       version="0.1",
       description="Distributed Tensor and more",
       packages=["ddptensor", "ddptensor.numpy", "ddptensor.torch"],
-      ext_modules=ext_modules
+      ext_modules=ext_modules,
+      cmdclass=dict(
+          # Enable the CMakeExtension entries defined above
+          build_ext=cmake_build_extension.BuildExtension,
+      ),
 )
@@ -0,0 +1,52 @@
+#include "ddptensor/Operations.hpp"
+#include "ddptensor/x.hpp"
+
+namespace x {
+
+    template<typename T>
+    class Creator
+    {
+    public:
+        using ptr_type = DPTensorBaseX::ptr_type;
+
+        static ptr_type op(CreatorId c, shape_type && shp)
+        {
+            PVSlice pvslice(std::forward<shape_type>(shp));
+            shape_type shape(std::move(pvslice.tile_shape()));
+            switch(c) {
+            case EMPTY:
+                return operatorx<T>::mk_tx(std::move(pvslice), std::move(xt::empty<T>(shape)));
+            case ONES:
+                return operatorx<T>::mk_tx(std::move(pvslice), std::move(xt::ones<T>(shape)));
+            case ZEROS:
+                return operatorx<T>::mk_tx(std::move(pvslice), std::move(xt::zeros<T>(shape)));
+            default:
+                throw std::runtime_error("Unknown creator");
+            };
+        };
+
+        template<typename V>
+        static ptr_type op(CreatorId c, shape_type && shp, V && v)
+        {
+            if(c == FULL) {
+                PVSlice pvslice(std::forward<shape_type>(shp));
+                shape_type shape(std::move(pvslice.tile_shape()));
+                auto a = xt::empty<T>(std::move(shape));
+                a.fill(to_native<T>(v));
+                return operatorx<T>::mk_tx(std::move(pvslice), std::move(a));
+            }
+            throw std::runtime_error("Unknown creator");
+        }
+    }; // class creatorx
+} // namespace x
+
+tensor_i::ptr_type Creator::create_from_shape(CreatorId op, shape_type && shape, DType dtype)
+{
+    return TypeDispatch<x::Creator>(dtype, op, std::forward<shape_type>(shape));
+}
+
+tensor_i::ptr_type Creator::full(shape_type && shape, py::object && val, DType dtype)
+{
+    auto op = FULL;
+    return TypeDispatch<x::Creator>(dtype, op, std::forward<shape_type>(shape), std::forward<py::object>(val));
+}
@@ -0,0 +1,136 @@
+#include "ddptensor/Operations.hpp"
+#include "ddptensor/x.hpp"
+
+namespace x {
+
+    template<typename T>
+    class EWBinOp
+    {
+    public:
+        using ptr_type = DPTensorBaseX::ptr_type;
+
+#pragma GCC diagnostic ignored "-Wswitch"
+
+        template<typename A, typename B, typename U = T, std::enable_if_t<std::is_floating_point<U>::value, bool> = true>
+        static ptr_type integral_op(EWBinOpId iop, const DPTensorX<T> & tx, A && a, B && b)
+        {
+            throw std::runtime_error("Illegal or unknown inplace elementwise binary operation");
+        }
+
+        template<typename A, typename B, typename U = T, std::enable_if_t<std::is_integral<U>::value, bool> = true>
+        static ptr_type integral_op(EWBinOpId iop, const DPTensorX<T> & tx, A && a, B && b)
+        {
+            switch(iop) {
+            case __AND__:
+            case BITWISE_AND:
+                return operatorx<T>::mk_tx_(tx, a & b);
+            case __RAND__:
+                return operatorx<T>::mk_tx_(tx, b & a);
+            case __LSHIFT__:
+            case BITWISE_LEFT_SHIFT:
+                return operatorx<T>::mk_tx_(tx, a << b);
+            case __MOD__:
+            case REMAINDER:
+                return operatorx<T>::mk_tx_(tx, a % b);
+            case __OR__:
+            case BITWISE_OR:
+                return operatorx<T>::mk_tx_(tx, a | b);
+            case __ROR__:
+                return operatorx<T>::mk_tx_(tx, b | a);
+            case __RSHIFT__:
+            case BITWISE_RIGHT_SHIFT:
+                return operatorx<T>::mk_tx_(tx, a >> b);
+            case __XOR__:
+            case BITWISE_XOR:
+                return operatorx<T>::mk_tx_(tx, a ^ b);
+            case __RXOR__:
+                return operatorx<T>::mk_tx_(tx, b ^ a);
+            case __RLSHIFT__:
+                return operatorx<T>::mk_tx_(tx, b << a);
+            case __RMOD__:
+                return operatorx<T>::mk_tx_(tx, b % a);
+            case __RRSHIFT__:
+                return operatorx<T>::mk_tx_(tx, b >> a);
+            default:
+                throw std::runtime_error("Unknown elementwise binary operation");
+            }
+        }
+
+        static ptr_type op(EWBinOpId bop, const ptr_type & a_ptr, const ptr_type & b_ptr)
+        {
+            const auto _a = dynamic_cast<DPTensorX<T>*>(a_ptr.get());
+            const auto _b = dynamic_cast<DPTensorX<T>*>(b_ptr.get());
+            if(!_a || !_b)
+                throw std::runtime_error("Invalid array object: could not dynamically cast");
+            const auto & a = xt::strided_view(_a->xarray(), _a->lslice());
+            const auto & b = xt::strided_view(_b->xarray(), _b->lslice());
+            
+            switch(bop) {
+            case __ADD__:
+            case ADD:
+                return operatorx<T>::mk_tx_(*_a, a + b);
+            case __RADD__:
+                return operatorx<T>::mk_tx_(*_a, b + a);
+            case ATAN2:
+                return  operatorx<T>::mk_tx_(*_a, xt::atan2(a, b));
+            case __EQ__:
+            case EQUAL:
+                return  operatorx<T>::mk_tx_(*_a, xt::equal(a, b));
+            case __FLOORDIV__:
+            case FLOOR_DIVIDE:
+                return operatorx<T>::mk_tx_(*_a, xt::floor(a / b));
+            case __GE__:
+            case GREATER_EQUAL:
+                return operatorx<T>::mk_tx_(*_a, a >= b);
+            case __GT__:
+            case GREATER:
+                return operatorx<T>::mk_tx_(*_a, a > b);
+            case __LE__:
+            case LESS_EQUAL:
+                return operatorx<T>::mk_tx_(*_a, a <= b);
+            case __LT__:
+            case LESS:
+                return operatorx<T>::mk_tx_(*_a, a < b);
+            case __MUL__:
+            case MULTIPLY:
+                return operatorx<T>::mk_tx_(*_a, a * b);
+            case __RMUL__:
+                return operatorx<T>::mk_tx_(*_a, b * a);
+            case __NE__:
+            case NOT_EQUAL:
+                return operatorx<T>::mk_tx_(*_a, xt::not_equal(a, b));
+            case __SUB__:
+            case SUBTRACT:
+                return operatorx<T>::mk_tx_(*_a, a - b);
+            case __TRUEDIV__:
+            case DIVIDE:
+                return operatorx<T>::mk_tx_(*_a, a / b);
+            case __RFLOORDIV__:
+                return operatorx<T>::mk_tx_(*_a, xt::floor(b / a));
+            case __RSUB__:
+                return operatorx<T>::mk_tx_(*_a, b - a);
+            case __RTRUEDIV__:
+                return operatorx<T>::mk_tx_(*_a, b / a);
+            case __MATMUL__:
+            case __POW__:
+            case POW:
+            case __RPOW__:
+            case LOGADDEXP:
+            case LOGICAL_AND:
+            case LOGICAL_OR:
+            case LOGICAL_XOR:
+                // FIXME
+                throw std::runtime_error("Binary operation not implemented");
+            }
+            return integral_op(bop, *_a, a, b);
+        }
+
+#pragma GCC diagnostic pop
+
+    };
+} // namespace x
+    
+tensor_i::ptr_type EWBinOp::op(EWBinOpId op, x::DPTensorBaseX::ptr_type a, x::DPTensorBaseX::ptr_type b)
+{
+    return TypeDispatch<x::EWBinOp>(a->dtype(), op, a, b);
+}