adding reductions

fschlimb · fschlimb · commit 6467b2bd84d9 · 2022-03-11T05:04:46.000-06:00
diff --git a/ddptensor/__init__.py b/ddptensor/__init__.py
@@ -22,17 +22,7 @@
             f"{func} = lambda shape, *args, **kwargs: dtensor(_cdt.create(shape, '{func}', '{__impl_str}', *args, **kwargs))"
         )
 
-statisticals = [
-    "max",   # (x, /, *, axis=None, keepdims=False)
-    "mean",  # (x, /, *, axis=None, keepdims=False)
-    "min",   # (x, /, *, axis=None, keepdims=False)
-    "prod",  # (x, /, *, axis=None, keepdims=False)
-    "sum",   # (x, /, *, axis=None, keepdims=False)
-    "std",   # (x, /, *, axis=None, correction=0.0, keepdims=False)
-    "var",   # (x, /, *, axis=None, correction=0.0, keepdims=False)
-]
-
-for func in statisticals:
+for func in api.statisticals:
     exec(
         f"{func} = lambda this, **kwargs: dtensor(_cdt.reduce_op(this._t, '{func}', **kwargs))"
     )
diff --git a/ddptensor/array_api.py b/ddptensor/array_api.py
@@ -134,3 +134,13 @@
     "remainder",  # (x1, x2, /)
     "subtract",  # (x1, x2, /)
 ]
+
+statisticals = [
+    "max",   # (x, /, *, axis=None, keepdims=False)
+    "mean",  # (x, /, *, axis=None, keepdims=False)
+    "min",   # (x, /, *, axis=None, keepdims=False)
+    "prod",  # (x, /, *, axis=None, keepdims=False)
+    "sum",   # (x, /, *, axis=None, keepdims=False)
+    "std",   # (x, /, *, axis=None, correction=0.0, keepdims=False)
+    "var",   # (x, /, *, axis=None, correction=0.0, keepdims=False)
+]
diff --git a/scripts/code_gen.py b/scripts/code_gen.py
@@ -36,7 +36,14 @@
 print("    EWBINOP_LAST")
 print("};\n")
 
-print("void def_enums(py::module_ & m)\n{")
+print("enum ReduceOpId : int {")
+for x in api.statisticals:
+    x = x + " = EWBINOP_LAST" if x == api.statisticals[0] else x
+    print(f"    {x.upper()},")
+print("    REDUCEOP_LAST")
+print("};\n")
+
+print("static void def_enums(py::module_ & m)\n{")
 
 print('    py::enum_<CreatorId>(m, "CreatorId")')
 for x in api.creators:
@@ -58,4 +65,9 @@
     print(f'        .value("{x.upper()}", {x.upper()})')
 print("        .export_values();\n")
 
+print('    py::enum_<ReduceOpId>(m, "ReduceOpId")')
+for x in api.statisticals:
+    print(f'        .value("{x.upper()}", {x.upper()})')
+print("        .export_values();\n")
+
 print("}")
diff --git a/setup.py b/setup.py
@@ -18,7 +18,7 @@
         extra_compile_args = ["-DUSE_MKL", "-DXTENSOR_USE_XSIMD=1", "-DXTENSOR_USE_OPENMP=1",
                               "-std=c++17", "-fopenmp",
                               "-Wno-unused-but-set-variable", "-Wno-sign-compare", "-Wno-unused-local-typedefs", "-Wno-reorder",
-                              "-march=native", "-O0", "-g"],
+                              "-march=native",], # "-O0", "-g"],
         libraries = ["mpi", "mkl_intel_lp64", "mkl_intel_thread", "mkl_core", "iomp5", "pthread", "rt", "dl", "m"],
         library_dirs = [jp(mpiroot, "lib")],
         language = 'c++'
diff --git a/src/MPITransceiver.cpp b/src/MPITransceiver.cpp
@@ -42,16 +42,16 @@ static MPI_Datatype to_mpi(DType T)
 static MPI_Op to_mpi(RedOpType o)
 {
     switch(o) {
-    case OP_MAX:  return MPI_MAX;
-    case OP_MIN:  return MPI_MIN;
-    case OP_SUM:  return MPI_SUM;
-    case OP_PROD: return MPI_PROD;
-    case OP_LAND: return MPI_LAND;
-    case OP_BAND: return MPI_BAND;
-    case OP_LOR:  return MPI_LOR;
-    case OP_BOR:  return MPI_BOR;
-    case OP_LXOR: return MPI_LXOR;
-    case OP_BXOR: return MPI_BXOR;
+    case MAX:  return MPI_MAX;
+    case MIN:  return MPI_MIN;
+    case SUM:  return MPI_SUM;
+    case PROD: return MPI_PROD;
+    // case OP_LAND: return MPI_LAND;
+    // case OP_BAND: return MPI_BAND;
+    // case OP_LOR:  return MPI_LOR;
+    // case OP_BOR:  return MPI_BOR;
+    // case OP_LXOR: return MPI_LXOR;
+    // case OP_BXOR: return MPI_BXOR;
     default: throw std::logic_error("unsupported operation type");
     }
 }
diff --git a/src/ddptensor.cpp b/src/ddptensor.cpp
@@ -244,6 +244,14 @@ struct EWUnyOp
     }
 };
 
+struct ReduceOp
+{
+    static auto op(ReduceOpId op, x::DPTensorBaseX::ptr_type a, const dim_vec_type & dim)
+    {
+        return TypeDispatch<x::ReduceOp>(a->dtype(), op, a, dim);
+    }
+};
+
 rank_type myrank()
 {
     return theTransceiver->rank();
@@ -279,6 +287,9 @@ PYBIND11_MODULE(_ddptensor, m) {
     py::class_<EWBinOp>(m, "EWBinOp")
         .def("op", &EWBinOp::op);
 
+    py::class_<ReduceOp>(m, "ReduceOp")
+        .def("op", &ReduceOp::op);
+
     py::class_<x::DPTensorBaseX, x::DPTensorBaseX::ptr_type>(m, "DPTensorX")
         .def("__repr__", &x::DPTensorBaseX::__repr__);
 
diff --git a/src/include/ddptensor/UtilsAndTypes.hpp b/src/include/ddptensor/UtilsAndTypes.hpp
@@ -8,6 +8,7 @@
 
 #include <pybind11/pybind11.h>
 namespace py = pybind11;
+#include "p2c_ids.hpp"
 
 using shape_type = std::vector<uint64_t>;
 using dim_vec_type = std::vector<int>;
@@ -81,32 +82,17 @@ inline const py::object & get_impl_dtype(const DType dt)
     return _dtypes[dt];
 }
 
-// identifies reduction operation
-enum RedOpType {
-    OP_MAX = 100,
-    OP_MIN,
-    OP_SUM,
-    OP_PROD,
-    OP_MEAN,
-    OP_STD,
-    OP_VAR,
-    OP_LAND,
-    OP_BAND,
-    OP_LOR,
-    OP_BOR,
-    OP_LXOR,
-    OP_BXOR
-};
+using RedOpType = ReduceOpId;
 
 inline RedOpType red_op(const char * op)
 {
-    if(!strcmp(op, "max")) return OP_MAX;
-    if(!strcmp(op, "min")) return OP_MIN;
-    if(!strcmp(op, "sum")) return OP_SUM;
-    if(!strcmp(op, "prod")) return OP_PROD;
-    if(!strcmp(op, "mean")) return OP_MEAN;
-    if(!strcmp(op, "std")) return OP_STD;
-    if(!strcmp(op, "var")) return OP_VAR;
+    if(!strcmp(op, "max")) return MAX;
+    if(!strcmp(op, "min")) return MIN;
+    if(!strcmp(op, "sum")) return SUM;
+    if(!strcmp(op, "prod")) return PROD;
+    if(!strcmp(op, "mean")) return MEAN;
+    if(!strcmp(op, "std")) return STD;
+    if(!strcmp(op, "var")) return VAR;
     throw std::logic_error("unsupported reduction operation");
 }
 
diff --git a/src/include/ddptensor/p2c_ids.hpp b/src/include/ddptensor/p2c_ids.hpp
@@ -137,7 +137,18 @@ enum EWBinOpId : int {
     EWBINOP_LAST
 };
 
-void def_enums(py::module_ & m)
+enum ReduceOpId : int {
+    MAX = EWBINOP_LAST,
+    MEAN,
+    MIN,
+    PROD,
+    SUM,
+    STD,
+    VAR,
+    REDUCEOP_LAST
+};
+
+static void def_enums(py::module_ & m)
 {
     py::enum_<CreatorId>(m, "CreatorId")
         .value("ARANGE", ARANGE)
@@ -268,4 +279,14 @@ void def_enums(py::module_ & m)
         .value("SUBTRACT", SUBTRACT)
         .export_values();
 
+    py::enum_<ReduceOpId>(m, "ReduceOpId")
+        .value("MAX", MAX)
+        .value("MEAN", MEAN)
+        .value("MIN", MIN)
+        .value("PROD", PROD)
+        .value("SUM", SUM)
+        .value("STD", STD)
+        .value("VAR", VAR)
+        .export_values();
+
 }
diff --git a/src/include/ddptensor/x.hpp b/src/include/ddptensor/x.hpp
@@ -429,5 +429,57 @@ namespace x
 #pragma GCC diagnostic pop
 
     };
+
+    
+    template<typename T>
+    class ReduceOp
+    {
+    public:
+        using ptr_type = DPTensorBaseX::ptr_type;
+
+#pragma GCC diagnostic ignored "-Wswitch"
+
+        template<typename X>
+        static ptr_type dist_reduce(ReduceOpId rop, const PVSlice & slice, const dim_vec_type & dims, X && x)
+        {
+            xt::xarray<typename X::value_type> a = x;
+            auto new_shape = reduce_shape(slice.shape(), dims);
+            if(slice.need_reduce(dims)) {
+                auto len = VPROD(new_shape);
+                theTransceiver->reduce_all(a.data(), DTYPE<typename X::value_type>::value, len, rop);
+            }
+            return std::make_shared<DPTensorX<typename X::value_type>>(new_shape, a);
+        }
+
+        static ptr_type op(ReduceOpId rop, const ptr_type & a_ptr, const dim_vec_type & dims)
+        {
+            auto const _a = dynamic_cast<DPTensorX<T>*>(a_ptr.get());
+            if(!_a )
+                throw std::runtime_error("Invalid array object: could not dynamically cast");
+            auto const & a = _a->xarray();
+
+            switch(rop) {
+            case MEAN:
+                return dist_reduce(rop, _a->slice(), dims, xt::mean(a, dims));
+            case PROD:
+                return dist_reduce(rop, _a->slice(), dims, xt::prod(a, dims));
+            case SUM:
+                return dist_reduce(rop, _a->slice(), dims, xt::sum(a, dims));
+            case STD:
+                return dist_reduce(rop, _a->slice(), dims, xt::stddev(a, dims));
+            case VAR:
+                return dist_reduce(rop, _a->slice(), dims, xt::variance(a, dims));
+            case MAX:
+            case MIN:
+                throw std::runtime_error("Reduction operation not implemented");
+            default:
+                throw std::runtime_error("Unknown reduction operation");
+            }
+        }
+
+#pragma GCC diagnostic pop
+
+    };
+
                 
 } // namespace x
diff --git a/test/test_x.py b/test/test_x.py
@@ -5,4 +5,5 @@
 print(a)
 print(dt.EWBinOp.op(dt.EQUAL, a, b))
 print(dt.EWUnyOp.op(dt.SQRT, a))
+print(dt.ReduceOp.op(dt.SUM, a, [1]))
 dt.fini()