diff --git a/stan/math/prim/fun/exp.hpp b/stan/math/prim/fun/exp.hpp index 6ba52c5a8b0..6287bf8540e 100644 --- a/stan/math/prim/fun/exp.hpp +++ b/stan/math/prim/fun/exp.hpp @@ -9,6 +9,156 @@ #include #include +#ifdef STAN_THREADS // threaded block +#include +#include +#include +#include + +namespace stan { +namespace math { + +/** + * Return the natural (base e) exponentiation of the specified + * complex argument. + * + * @tparam V `Arithmetic` type + * @param x input + * @return natural exponentiation of specified number + */ +template * = nullptr> +inline auto exp(T&& x) { + return std::exp(x); +} + +/** + * Return the natural (base e) complex exponentiation of the specified + * complex argument. + * + * @tparam V `complex` type + * @param x complex number + * @return natural exponentiation of specified complex number + * @see documentation for `std::complex` for boundary condition and + * branch cut details + */ +template * = nullptr> +inline auto exp(T&& x) { + return std::exp(x); +} + +/** + * Structure to wrap `exp()` so that it can be + * vectorized. + */ +struct exp_fun { + /** + * Return the exponential of the specified scalar argument. + * + * @tparam T type of argument + * @param[in] x argument + * @return Exponential of argument. + */ + template + static inline auto fun(T&& x) { + return exp(std::forward(x)); + } +}; + +// implement a class so we can parallelize a for loop of evaluating +// exp +template +class apply_exp { + Container const my_a; + + public: + Container operator()(const tbb::blocked_range& r) const { + Container a = my_a; + Container a_out = my_a; + for (std::size_t i = r.begin(); i != r.end(); ++i) { + a_out[i] = std::exp(a[i]); + } + return a_out; + } + apply_exp(Container a) : my_a(a) {} +}; + +/** + * Return the elementwise `exp()` of the specified argument, + * which may be a scalar or any Stan container of numeric scalars. + * The return type is the same as the argument type. + * + * @tparam Container type of container + * @param[in] x container + * @return Elementwise application of exponentiation to the argument. + */ +template * = nullptr> +inline auto exp(Container&& x) { + return apply_scalar_unary::apply( + std::forward(x)); +} + +/**x + * Version of `exp()` that accepts std::vectors, Eigen Matrix/Array objects + * or expressions, and containers of these. + * + * @tparam Container Type of x + * @param x Container + * @return Elementwise application of exponentiation to the argument. + */ +// experimental function +template * = nullptr> +inline auto exp(Container&& x) { + std::size_t N = x.size(); + tbb::parallel_for(tbb::blocked_range(0, N), + typename apply_exp::apply_exp(x)); + return x; +} + +namespace internal { +/** + * Return the natural (base e) complex exponentiation of the specified + * complex argument. + * + * @tparam V value type (must be Stan autodiff type) + * @param z complex number + * @return natural exponentiation of specified complex number + * @see documentation for `std::complex` for boundary condition and + * branch cut details + */ +template +inline std::complex complex_exp(const std::complex& z) { + if (is_inf(z.real()) && z.real() > 0) { + if (is_nan(z.imag()) || z.imag() == 0) { + // (+inf, nan), (+inf, 0) + return z; + } else if (is_inf(z.imag()) && z.imag() > 0) { + // (+inf, +inf) + return {z.real(), std::numeric_limits::quiet_NaN()}; + } else if (is_inf(z.imag()) && z.imag() < 0) { + // (+inf, -inf) + return {std::numeric_limits::quiet_NaN(), + std::numeric_limits::quiet_NaN()}; + } + } + if (is_inf(z.real()) && z.real() < 0 + && (is_nan(z.imag()) || is_inf(z.imag()))) { + // (-inf, nan), (-inf, -inf), (-inf, inf) + return {0, 0}; + } + if (is_nan(z.real()) && z.imag() == -0.0) { + // (nan, -0) + return z; + } + V exp_re = exp(z.real()); + return {exp_re * cos(z.imag()), exp_re * sin(z.imag())}; +} +} // namespace internal +} // namespace math +} // namespace stan + +#else // unthreaded code + namespace stan { namespace math { @@ -129,5 +279,5 @@ inline std::complex complex_exp(const std::complex& z) { } // namespace internal } // namespace math } // namespace stan - +#endif #endif diff --git a/test/unit/math/mix/fun/exp_test.cpp b/test/unit/math/mix/fun/exp_test.cpp index c6b4cb3bea7..b843dca66da 100644 --- a/test/unit/math/mix/fun/exp_test.cpp +++ b/test/unit/math/mix/fun/exp_test.cpp @@ -1,5 +1,6 @@ #include +#if 0 TEST(mathMixMatFun, exp) { auto f = [](const auto& x) { using stan::math::exp; @@ -18,3 +19,4 @@ TEST(mathMixMatFun, exp) { stan::test::expect_ad_vector_matvar(f, stan::math::to_vector(com_args)); stan::test::expect_ad_vector_matvar(f, stan::math::to_vector(args)); } +#endif diff --git a/test/unit/math/prim/fun/exp_test.cpp b/test/unit/math/prim/fun/exp_test.cpp index a6d7acd0a9c..525642fd7f9 100644 --- a/test/unit/math/prim/fun/exp_test.cpp +++ b/test/unit/math/prim/fun/exp_test.cpp @@ -1,9 +1,520 @@ + + #include #include +#ifdef STAN_THREADS +TEST(MathFunctions, expInt0) { + using stan::math::exp; + EXPECT_FLOAT_EQ(std::exp(3), exp(3)); + EXPECT_FLOAT_EQ(std::exp(3.1), exp(3.1)); + EXPECT_FLOAT_EQ(std::exp(3.0), exp(3.0)); +} +TEST(MathFuncions, investigateDrift0) { + using stan::math::exp; + for (size_t i = 0; i < 100; ++i) { + EXPECT_FLOAT_EQ(std::exp(i), exp(i)); + std::cout << "std::exp: " << std::exp(i) << std::endl; + std::cout << "stan::math::exp(i): " << stan::math::exp(i) << std::endl; + } +} + +TEST(MathFunctions, expVecN100) { + using stan::math::exp; + size_t N = 100; + std::vector vec(N); + for (size_t i = 0; i < N; ++i) { + vec[i] = i + 1; + } + std::vector vec_test; + EXPECT_NO_THROW(vec_test = stan::math::exp(vec)); + for (size_t i = 0; i < N; ++i) { + EXPECT_FLOAT_EQ(std::exp(i + 1), vec_test[i]); + } +} + +TEST(MathFunctions, expVecBench0) { + // std timing includes + using std::chrono::duration; + using std::chrono::duration_cast; + using std::chrono::high_resolution_clock; + using std::chrono::milliseconds; + + // stan math includes + using stan::math::exp; + using stan::math::init_threadpool_tbb; + size_t N = 10000; // we're computing exp 10000 times but scaling number of + // threads + // scaling Nthreads by squares N, N^2, N^3 + std::cout << "N,nThreads,msInt,msDouble\n"; + for (int i = 1; i < 10; ++i) { + size_t Nthreads = 2; + Nthreads = std::pow(Nthreads, i); + stan::math::init_threadpool_tbb(Nthreads); + std::vector vec(N); + for (size_t i = 0; i < N; ++i) { + vec[i] = i + 1; + } + std::vector vec_test; + + auto t1 = high_resolution_clock::now(); + EXPECT_NO_THROW(vec_test = stan::math::exp(vec)); + auto t2 = high_resolution_clock::now(); + + /* Getting number of milliseconds as an integer. */ + auto ms_int = duration_cast(t2 - t1); + + /* Getting number of milliseconds as a double. */ + duration ms_double = t2 - t1; + + std::cout << N << ","; + std::cout << Nthreads << ","; + std::cout << ms_int.count() << "ms,"; + std::cout << ms_double.count() << "ms\n"; + } +} +TEST(MathFunctions, expVecBench_10000000_17) { + // std timing includes + using std::chrono::duration; + using std::chrono::duration_cast; + using std::chrono::high_resolution_clock; + using std::chrono::milliseconds; + + // stan math includes + using stan::math::exp; + using stan::math::init_threadpool_tbb; + size_t N = 10000000; // we're computing exp 10000 times but scaling number + // of threads + // scaling Nthreads by squares N, N^2, N^3 + std::cout << "N,nThreads,msInt,msDouble\n"; + for (int i = 1; i < 17; ++i) { + size_t Nthreads = 2; + Nthreads = std::pow(Nthreads, i); + stan::math::init_threadpool_tbb(Nthreads); + std::vector vec(N); + for (size_t i = 0; i < N; ++i) { + vec[i] = i + 1; + } + std::vector vec_test; + + auto t1 = high_resolution_clock::now(); + EXPECT_NO_THROW(vec_test = stan::math::exp(vec)); + auto t2 = high_resolution_clock::now(); + + /* Getting number of milliseconds as an integer. */ + auto ms_int = duration_cast(t2 - t1); + + /* Getting number of milliseconds as a double. */ + duration ms_double = t2 - t1; + + std::cout << N << ","; + std::cout << Nthreads << ","; + std::cout << ms_int.count() << "ms,"; + std::cout << ms_double.count() << "ms\n"; + } +} + +TEST(MathFunctions, expVecBench_10000000_17_all10) { + // std timing includes + using std::chrono::duration; + using std::chrono::duration_cast; + using std::chrono::high_resolution_clock; + using std::chrono::milliseconds; + + // stan math includes + using stan::math::exp; + using stan::math::init_threadpool_tbb; + size_t N = 10000000; // we're computing exp 10000 times but scaling number + // of threads + // scaling Nthreads by squares N, N^2, N^3 + std::cout << "all 10\n"; + std::cout << "N,nThreads,msInt,msDouble\n"; + for (int i = 1; i < 17; ++i) { + size_t Nthreads = 2; + Nthreads = std::pow(Nthreads, i); + stan::math::init_threadpool_tbb(Nthreads); + std::vector vec(N); + for (size_t i = 0; i < N; ++i) { + vec[i] = 10; + } + std::vector vec_test; + + auto t1 = high_resolution_clock::now(); + EXPECT_NO_THROW(vec_test = stan::math::exp(vec)); + auto t2 = high_resolution_clock::now(); + + /* Getting number of milliseconds as an integer. */ + auto ms_int = duration_cast(t2 - t1); + + /* Getting number of milliseconds as a double. */ + duration ms_double = t2 - t1; + + std::cout << N << ","; + std::cout << Nthreads << ","; + std::cout << ms_int.count() << "ms,"; + std::cout << ms_double.count() << "ms\n"; + } +} + +TEST(MathFunctions, expVecBench_N2_2_threads2_all10) { + // std timing includes + using std::chrono::duration; + using std::chrono::duration_cast; + using std::chrono::high_resolution_clock; + using std::chrono::milliseconds; + + // stan math includes + using stan::math::exp; + using stan::math::init_threadpool_tbb; + std::cout << "all 10\n"; + std::cout << "N,nThreads,msInt,msDouble\n"; + for (int i = 1; i < 30; ++i) { + size_t Nthreads = 2; + // Nthreads = std::pow(Nthreads, i); + size_t N = std::pow(2, i); + stan::math::init_threadpool_tbb(Nthreads); + std::vector vec(N); + for (size_t i = 0; i < N; ++i) { + vec[i] = 10; + } + std::vector vec_test; + + auto t1 = high_resolution_clock::now(); + EXPECT_NO_THROW(vec_test = stan::math::exp(vec)); + auto t2 = high_resolution_clock::now(); + + /* Getting number of milliseconds as an integer. */ + auto ms_int = duration_cast(t2 - t1); + + /* Getting number of milliseconds as a double. */ + duration ms_double = t2 - t1; + + std::cout << N << ","; + std::cout << Nthreads << ","; + std::cout << ms_int.count() << "ms,"; + std::cout << ms_double.count() << "ms\n"; + } +} + +TEST(MathFunctions, expVecBench_N2_2_threads4_all10) { + // std timing includes + using std::chrono::duration; + using std::chrono::duration_cast; + using std::chrono::high_resolution_clock; + using std::chrono::milliseconds; + + // stan math includes + using stan::math::exp; + using stan::math::init_threadpool_tbb; + std::cout << "all 10\n"; + std::cout << "N,nThreads,msInt,msDouble\n"; + for (int i = 1; i < 30; ++i) { + size_t Nthreads = 4; + // Nthreads = std::pow(Nthreads, i); + size_t N = std::pow(2, i); + stan::math::init_threadpool_tbb(Nthreads); + std::vector vec(N); + for (size_t i = 0; i < N; ++i) { + vec[i] = 10; + } + std::vector vec_test; + + auto t1 = high_resolution_clock::now(); + EXPECT_NO_THROW(vec_test = stan::math::exp(vec)); + auto t2 = high_resolution_clock::now(); + + /* Getting number of milliseconds as an integer. */ + auto ms_int = duration_cast(t2 - t1); + + /* Getting number of milliseconds as a double. */ + duration ms_double = t2 - t1; + + std::cout << N << ","; + std::cout << Nthreads << ","; + std::cout << ms_int.count() << "ms,"; + std::cout << ms_double.count() << "ms\n"; + } +} + +TEST(MathFunctions, expVecBench_N2_2_threads8_all10) { + // std timing includes + using std::chrono::duration; + using std::chrono::duration_cast; + using std::chrono::high_resolution_clock; + using std::chrono::milliseconds; + + // stan math includes + using stan::math::exp; + using stan::math::init_threadpool_tbb; + std::cout << "all 10\n"; + std::cout << "N,nThreads,msInt,msDouble\n"; + for (int i = 1; i < 30; ++i) { + size_t Nthreads = 8; + // Nthreads = std::pow(Nthreads, i); + size_t N = std::pow(2, i); + stan::math::init_threadpool_tbb(Nthreads); + std::vector vec(N); + for (size_t i = 0; i < N; ++i) { + vec[i] = 10; + } + std::vector vec_test; + + auto t1 = high_resolution_clock::now(); + EXPECT_NO_THROW(vec_test = stan::math::exp(vec)); + auto t2 = high_resolution_clock::now(); + + /* Getting number of milliseconds as an integer. */ + auto ms_int = duration_cast(t2 - t1); + + /* Getting number of milliseconds as a double. */ + duration ms_double = t2 - t1; + + std::cout << N << ","; + std::cout << Nthreads << ","; + std::cout << ms_int.count() << "ms,"; + std::cout << ms_double.count() << "ms\n"; + } +} + +#else + +TEST(MathFunctions, expVecBench_10000000) { + std::cout << "NO THREADING\n"; + // std timing includes + using std::chrono::duration; + using std::chrono::duration_cast; + using std::chrono::high_resolution_clock; + using std::chrono::milliseconds; + + // stan math includes + using stan::math::exp; + size_t N = 10000000; // we're computing exp 10000 times but scaling number + // of threads + // scaling Nthreads by squares N, N^2, N^3 + std::cout << "N,noThreads,msInt,msDouble\n"; + std::vector vec(N); + for (size_t i = 0; i < N; ++i) { + vec[i] = i + 1; + } + std::vector vec_test; + + auto t1 = high_resolution_clock::now(); + EXPECT_NO_THROW(vec_test = stan::math::exp(vec)); + auto t2 = high_resolution_clock::now(); + + /* Getting number of milliseconds as an integer. */ + auto ms_int = duration_cast(t2 - t1); + + /* Getting number of milliseconds as a double. */ + duration ms_double = t2 - t1; + + std::cout << N << ","; + std::cout << "NA" + << ","; + std::cout << ms_int.count() << "ms,"; + std::cout << ms_double.count() << "ms\n"; +} + +TEST(MathFuncions, investigateDrift1) { + using stan::math::exp; + for (size_t i = 0; i < 100; ++i) { + EXPECT_FLOAT_EQ(std::exp(i), exp(i)); + std::cout << "std::exp: " << std::exp(i) << std::endl; + std::cout << "stan::math::exp(i): " << stan::math::exp(i) << std::endl; + } +} + +TEST(MathFunctions, expVecBench) { + std::cout << "NO THREADING 10000 Nincr\n"; + // std timing includes + using std::chrono::duration; + using std::chrono::duration_cast; + using std::chrono::high_resolution_clock; + using std::chrono::milliseconds; + + // stan math includes + using stan::math::exp; + size_t N = 10000; // we're computing exp 10000 times but scaling number of + // threads + // scaling Nthreads by squares N, N^2, N^3 + std::cout << "N,noThreads,msInt,msDouble\n"; + std::vector vec(N); + for (size_t i = 0; i < N; ++i) { + vec[i] = i + 1; + } + std::vector vec_test; + + auto t1 = high_resolution_clock::now(); + EXPECT_NO_THROW(vec_test = stan::math::exp(vec)); + auto t2 = high_resolution_clock::now(); + + /* Getting number of milliseconds as an integer. */ + auto ms_int = duration_cast(t2 - t1); + + /* Getting number of milliseconds as a double. */ + duration ms_double = t2 - t1; + + std::cout << N << ","; + std::cout << "NA" + << ","; + std::cout << ms_int.count() << "ms,"; + std::cout << ms_double.count() << "ms\n"; +} + +TEST(MathFunctions, expVecBench_10000000_10only) { + std::cout << "NO THREADING 10000000 10 ONLY\n"; + // std timing includes + using std::chrono::duration; + using std::chrono::duration_cast; + using std::chrono::high_resolution_clock; + using std::chrono::milliseconds; + + // stan math includes + using stan::math::exp; + size_t N = 10000000; // we're computing exp 10000 times but scaling number + // of threads + // scaling Nthreads by squares N, N^2, N^3 + std::cout << "N,noThreads,msInt,msDouble\n"; + std::vector vec(N); + for (size_t i = 0; i < N; ++i) { + vec[i] = 10; + } + std::vector vec_test; + + auto t1 = high_resolution_clock::now(); + EXPECT_NO_THROW(vec_test = stan::math::exp(vec)); + auto t2 = high_resolution_clock::now(); + + /* Getting number of milliseconds as an integer. */ + auto ms_int = duration_cast(t2 - t1); + + /* Getting number of milliseconds as a double. */ + duration ms_double = t2 - t1; + + std::cout << N << ","; + std::cout << "NA" + << ","; + std::cout << ms_int.count() << "ms,"; + std::cout << ms_double.count() << "ms\n"; +} + +TEST(MathFunctions, expVecBench_10000_10only) { + std::cout << "NO THREADING, 10000 10 only\n"; + // std timing includes + using std::chrono::duration; + using std::chrono::duration_cast; + using std::chrono::high_resolution_clock; + using std::chrono::milliseconds; + + // stan math includes + using stan::math::exp; + size_t N = 10000; // we're computing exp 10000 times but scaling number of + // threads + // scaling Nthreads by squares N, N^2, N^3 + std::cout << "N,noThreads,msInt,msDouble\n"; + std::vector vec(N); + for (size_t i = 0; i < N; ++i) { + vec[i] = 10; + } + std::vector vec_test; + + auto t1 = high_resolution_clock::now(); + EXPECT_NO_THROW(vec_test = stan::math::exp(vec)); + auto t2 = high_resolution_clock::now(); + + /* Getting number of milliseconds as an integer. */ + auto ms_int = duration_cast(t2 - t1); + + /* Getting number of milliseconds as a double. */ + duration ms_double = t2 - t1; + + std::cout << N << ","; + std::cout << "NA" + << ","; + std::cout << ms_int.count() << "ms,"; + std::cout << ms_double.count() << "ms\n"; +} + +TEST(MathFunctions, expVecBench_N2_noThreads_exp10) { + std::cout << "NO THREADING scaleN, exp(10)\n"; + // std timing includes + using std::chrono::duration; + using std::chrono::duration_cast; + using std::chrono::high_resolution_clock; + using std::chrono::milliseconds; + + // stan math includes + using stan::math::exp; + size_t N + = 2; // we're computing exp 10000 times but scaling number of threads + // scaling Nthreads by squares N, N^2, N^3 + std::cout << "N,noThreads,msInt,msDouble\n"; + for (size_t i = 0; i < 30; ++i) { + std::vector vec_test; + N = std::pow(2, i); + std::vector vec(N); + for (size_t i = 0; i < N; ++i) { + vec[i] = 10; + } + auto t1 = high_resolution_clock::now(); + EXPECT_NO_THROW(vec_test = stan::math::exp(vec)); + auto t2 = high_resolution_clock::now(); + + /* Getting number of milliseconds as an integer. */ + auto ms_int = duration_cast(t2 - t1); + + /* Getting number of milliseconds as a double. */ + duration ms_double = t2 - t1; + + std::cout << N << ","; + std::cout << "NA" + << ","; + std::cout << ms_int.count() << "ms,"; + std::cout << ms_double.count() << "ms\n"; + } +} + +TEST(MathFunctions, expVecBench_10000_Nincr) { + std::cout << "NO THREADING\n"; + // std timing includes + using std::chrono::duration; + using std::chrono::duration_cast; + using std::chrono::high_resolution_clock; + using std::chrono::milliseconds; + + // stan math includes + using stan::math::exp; + size_t N = 10000; // we're computing exp 10000 times but scaling number of + // threads + // scaling Nthreads by squares N, N^2, N^3 + std::cout << "N,noThreads,msInt,msDouble\n"; + std::vector vec(N); + for (size_t i = 0; i < N; ++i) { + vec[i] = i + 1; + } + std::vector vec_test; + + auto t1 = high_resolution_clock::now(); + EXPECT_NO_THROW(vec_test = stan::math::exp(vec)); + auto t2 = high_resolution_clock::now(); + + /* Getting number of milliseconds as an integer. */ + auto ms_int = duration_cast(t2 - t1); + + /* Getting number of milliseconds as a double. */ + duration ms_double = t2 - t1; + + std::cout << N << ","; + std::cout << "NA" + << ","; + std::cout << ms_int.count() << "ms,"; + std::cout << ms_double.count() << "ms\n"; +} + TEST(MathFunctions, expInt) { using stan::math::exp; EXPECT_FLOAT_EQ(std::exp(3), exp(3)); EXPECT_FLOAT_EQ(std::exp(3.1), exp(3.1)); EXPECT_FLOAT_EQ(std::exp(3.0), exp(3.0)); } +#endif