diff --git a/stan/math/prim/fun/exp.hpp b/stan/math/prim/fun/exp.hpp
index 6ba52c5a8b0..6287bf8540e 100644
--- a/stan/math/prim/fun/exp.hpp
+++ b/stan/math/prim/fun/exp.hpp
@@ -9,6 +9,156 @@
 #include <complex>
 #include <limits>
 
+#ifdef STAN_THREADS  // threaded block
+#include <stan/math/prim/core.hpp>
+#include <tbb/parallel_for.h>
+#include <tbb/blocked_range.h>
+#include <stan/math/prim/core/init_threadpool_tbb.hpp>
+
+namespace stan {
+namespace math {
+
+/**
+ * Return the natural (base e) exponentiation of the specified
+ * complex argument.
+ *
+ * @tparam V `Arithmetic` type
+ * @param x input
+ * @return natural exponentiation of specified number
+ */
+template <typename T, require_arithmetic_t<T>* = nullptr>
+inline auto exp(T&& x) {
+  return std::exp(x);
+}
+
+/**
+ * Return the natural (base e) complex exponentiation of the specified
+ * complex argument.
+ *
+ * @tparam V `complex<Arithmetic>` type
+ * @param x complex number
+ * @return natural exponentiation of specified complex number
+ * @see documentation for `std::complex` for boundary condition and
+ * branch cut details
+ */
+template <typename T, require_complex_bt<std::is_arithmetic, T>* = nullptr>
+inline auto exp(T&& x) {
+  return std::exp(x);
+}
+
+/**
+ * Structure to wrap `exp()` so that it can be
+ * vectorized.
+ */
+struct exp_fun {
+  /**
+   * Return the exponential of the specified scalar argument.
+   *
+   * @tparam T type of argument
+   * @param[in] x argument
+   * @return Exponential of argument.
+   */
+  template <typename T>
+  static inline auto fun(T&& x) {
+    return exp(std::forward<T>(x));
+  }
+};
+
+// implement a class so we can parallelize a for loop of evaluating
+// exp
+template <typename Container>
+class apply_exp {
+  Container const my_a;
+
+ public:
+  Container operator()(const tbb::blocked_range<std::size_t>& r) const {
+    Container a = my_a;
+    Container a_out = my_a;
+    for (std::size_t i = r.begin(); i != r.end(); ++i) {
+      a_out[i] = std::exp(a[i]);
+    }
+    return a_out;
+  }
+  apply_exp<Container>(Container a) : my_a(a) {}
+};
+
+/**
+ * Return the elementwise `exp()` of the specified argument,
+ * which may be a scalar or any Stan container of numeric scalars.
+ * The return type is the same as the argument type.
+ *
+ * @tparam Container type of container
+ * @param[in] x container
+ * @return Elementwise application of exponentiation to the argument.
+ */
+template <typename Container, require_ad_container_t<Container>* = nullptr>
+inline auto exp(Container&& x) {
+  return apply_scalar_unary<exp_fun, Container>::apply(
+      std::forward<Container>(x));
+}
+
+/**x
+ * Version of `exp()` that accepts std::vectors, Eigen Matrix/Array objects
+ *  or expressions, and containers of these.
+ *
+ * @tparam Container Type of x
+ * @param x Container
+ * @return Elementwise application of exponentiation to the argument.
+ */
+// experimental function
+template <typename Container,
+          require_container_bt<std::is_arithmetic, Container>* = nullptr>
+inline auto exp(Container&& x) {
+  std::size_t N = x.size();
+  tbb::parallel_for(tbb::blocked_range<size_t>(0, N),
+                    typename apply_exp<Container>::apply_exp(x));
+  return x;
+}
+
+namespace internal {
+/**
+ * Return the natural (base e) complex exponentiation of the specified
+ * complex argument.
+ *
+ * @tparam V value type (must be Stan autodiff type)
+ * @param z complex number
+ * @return natural exponentiation of specified complex number
+ * @see documentation for `std::complex` for boundary condition and
+ * branch cut details
+ */
+template <typename V>
+inline std::complex<V> complex_exp(const std::complex<V>& z) {
+  if (is_inf(z.real()) && z.real() > 0) {
+    if (is_nan(z.imag()) || z.imag() == 0) {
+      // (+inf, nan), (+inf, 0)
+      return z;
+    } else if (is_inf(z.imag()) && z.imag() > 0) {
+      // (+inf, +inf)
+      return {z.real(), std::numeric_limits<double>::quiet_NaN()};
+    } else if (is_inf(z.imag()) && z.imag() < 0) {
+      // (+inf, -inf)
+      return {std::numeric_limits<double>::quiet_NaN(),
+              std::numeric_limits<double>::quiet_NaN()};
+    }
+  }
+  if (is_inf(z.real()) && z.real() < 0
+      && (is_nan(z.imag()) || is_inf(z.imag()))) {
+    // (-inf, nan), (-inf, -inf), (-inf, inf)
+    return {0, 0};
+  }
+  if (is_nan(z.real()) && z.imag() == -0.0) {
+    // (nan, -0)
+    return z;
+  }
+  V exp_re = exp(z.real());
+  return {exp_re * cos(z.imag()), exp_re * sin(z.imag())};
+}
+}  // namespace internal
+}  // namespace math
+}  // namespace stan
+
+#else  // unthreaded code
+
 namespace stan {
 namespace math {
 
@@ -129,5 +279,5 @@ inline std::complex<V> complex_exp(const std::complex<V>& z) {
 }  // namespace internal
 }  // namespace math
 }  // namespace stan
-
+#endif
 #endif
diff --git a/test/unit/math/mix/fun/exp_test.cpp b/test/unit/math/mix/fun/exp_test.cpp
index c6b4cb3bea7..b843dca66da 100644
--- a/test/unit/math/mix/fun/exp_test.cpp
+++ b/test/unit/math/mix/fun/exp_test.cpp
@@ -1,5 +1,6 @@
 #include <test/unit/math/test_ad.hpp>
 
+#if 0
 TEST(mathMixMatFun, exp) {
   auto f = [](const auto& x) {
     using stan::math::exp;
@@ -18,3 +19,4 @@ TEST(mathMixMatFun, exp) {
   stan::test::expect_ad_vector_matvar(f, stan::math::to_vector(com_args));
   stan::test::expect_ad_vector_matvar(f, stan::math::to_vector(args));
 }
+#endif
diff --git a/test/unit/math/prim/fun/exp_test.cpp b/test/unit/math/prim/fun/exp_test.cpp
index a6d7acd0a9c..525642fd7f9 100644
--- a/test/unit/math/prim/fun/exp_test.cpp
+++ b/test/unit/math/prim/fun/exp_test.cpp
@@ -1,9 +1,520 @@
+
+
 #include <stan/math.hpp>
 #include <gtest/gtest.h>
 
+#ifdef STAN_THREADS
+TEST(MathFunctions, expInt0) {
+  using stan::math::exp;
+  EXPECT_FLOAT_EQ(std::exp(3), exp(3));
+  EXPECT_FLOAT_EQ(std::exp(3.1), exp(3.1));
+  EXPECT_FLOAT_EQ(std::exp(3.0), exp(3.0));
+}
+TEST(MathFuncions, investigateDrift0) {
+  using stan::math::exp;
+  for (size_t i = 0; i < 100; ++i) {
+    EXPECT_FLOAT_EQ(std::exp(i), exp(i));
+    std::cout << "std::exp: " << std::exp(i) << std::endl;
+    std::cout << "stan::math::exp(i): " << stan::math::exp(i) << std::endl;
+  }
+}
+
+TEST(MathFunctions, expVecN100) {
+  using stan::math::exp;
+  size_t N = 100;
+  std::vector<double> vec(N);
+  for (size_t i = 0; i < N; ++i) {
+    vec[i] = i + 1;
+  }
+  std::vector<double> vec_test;
+  EXPECT_NO_THROW(vec_test = stan::math::exp(vec));
+  for (size_t i = 0; i < N; ++i) {
+    EXPECT_FLOAT_EQ(std::exp(i + 1), vec_test[i]);
+  }
+}
+
+TEST(MathFunctions, expVecBench0) {
+  // std timing includes
+  using std::chrono::duration;
+  using std::chrono::duration_cast;
+  using std::chrono::high_resolution_clock;
+  using std::chrono::milliseconds;
+
+  // stan math includes
+  using stan::math::exp;
+  using stan::math::init_threadpool_tbb;
+  size_t N = 10000;  // we're computing exp 10000 times but  scaling number of
+                     // threads
+  // scaling Nthreads by squares N, N^2, N^3
+  std::cout << "N,nThreads,msInt,msDouble\n";
+  for (int i = 1; i < 10; ++i) {
+    size_t Nthreads = 2;
+    Nthreads = std::pow(Nthreads, i);
+    stan::math::init_threadpool_tbb(Nthreads);
+    std::vector<double> vec(N);
+    for (size_t i = 0; i < N; ++i) {
+      vec[i] = i + 1;
+    }
+    std::vector<double> vec_test;
+
+    auto t1 = high_resolution_clock::now();
+    EXPECT_NO_THROW(vec_test = stan::math::exp(vec));
+    auto t2 = high_resolution_clock::now();
+
+    /* Getting number of milliseconds as an integer. */
+    auto ms_int = duration_cast<milliseconds>(t2 - t1);
+
+    /* Getting number of milliseconds as a double. */
+    duration<double, std::milli> ms_double = t2 - t1;
+
+    std::cout << N << ",";
+    std::cout << Nthreads << ",";
+    std::cout << ms_int.count() << "ms,";
+    std::cout << ms_double.count() << "ms\n";
+  }
+}
+TEST(MathFunctions, expVecBench_10000000_17) {
+  // std timing includes
+  using std::chrono::duration;
+  using std::chrono::duration_cast;
+  using std::chrono::high_resolution_clock;
+  using std::chrono::milliseconds;
+
+  // stan math includes
+  using stan::math::exp;
+  using stan::math::init_threadpool_tbb;
+  size_t N = 10000000;  // we're computing exp 10000 times but  scaling number
+                        // of threads
+  // scaling Nthreads by squares N, N^2, N^3
+  std::cout << "N,nThreads,msInt,msDouble\n";
+  for (int i = 1; i < 17; ++i) {
+    size_t Nthreads = 2;
+    Nthreads = std::pow(Nthreads, i);
+    stan::math::init_threadpool_tbb(Nthreads);
+    std::vector<double> vec(N);
+    for (size_t i = 0; i < N; ++i) {
+      vec[i] = i + 1;
+    }
+    std::vector<double> vec_test;
+
+    auto t1 = high_resolution_clock::now();
+    EXPECT_NO_THROW(vec_test = stan::math::exp(vec));
+    auto t2 = high_resolution_clock::now();
+
+    /* Getting number of milliseconds as an integer. */
+    auto ms_int = duration_cast<milliseconds>(t2 - t1);
+
+    /* Getting number of milliseconds as a double. */
+    duration<double, std::milli> ms_double = t2 - t1;
+
+    std::cout << N << ",";
+    std::cout << Nthreads << ",";
+    std::cout << ms_int.count() << "ms,";
+    std::cout << ms_double.count() << "ms\n";
+  }
+}
+
+TEST(MathFunctions, expVecBench_10000000_17_all10) {
+  // std timing includes
+  using std::chrono::duration;
+  using std::chrono::duration_cast;
+  using std::chrono::high_resolution_clock;
+  using std::chrono::milliseconds;
+
+  // stan math includes
+  using stan::math::exp;
+  using stan::math::init_threadpool_tbb;
+  size_t N = 10000000;  // we're computing exp 10000 times but  scaling number
+                        // of threads
+  // scaling Nthreads by squares N, N^2, N^3
+  std::cout << "all 10\n";
+  std::cout << "N,nThreads,msInt,msDouble\n";
+  for (int i = 1; i < 17; ++i) {
+    size_t Nthreads = 2;
+    Nthreads = std::pow(Nthreads, i);
+    stan::math::init_threadpool_tbb(Nthreads);
+    std::vector<double> vec(N);
+    for (size_t i = 0; i < N; ++i) {
+      vec[i] = 10;
+    }
+    std::vector<double> vec_test;
+
+    auto t1 = high_resolution_clock::now();
+    EXPECT_NO_THROW(vec_test = stan::math::exp(vec));
+    auto t2 = high_resolution_clock::now();
+
+    /* Getting number of milliseconds as an integer. */
+    auto ms_int = duration_cast<milliseconds>(t2 - t1);
+
+    /* Getting number of milliseconds as a double. */
+    duration<double, std::milli> ms_double = t2 - t1;
+
+    std::cout << N << ",";
+    std::cout << Nthreads << ",";
+    std::cout << ms_int.count() << "ms,";
+    std::cout << ms_double.count() << "ms\n";
+  }
+}
+
+TEST(MathFunctions, expVecBench_N2_2_threads2_all10) {
+  // std timing includes
+  using std::chrono::duration;
+  using std::chrono::duration_cast;
+  using std::chrono::high_resolution_clock;
+  using std::chrono::milliseconds;
+
+  // stan math includes
+  using stan::math::exp;
+  using stan::math::init_threadpool_tbb;
+  std::cout << "all 10\n";
+  std::cout << "N,nThreads,msInt,msDouble\n";
+  for (int i = 1; i < 30; ++i) {
+    size_t Nthreads = 2;
+    //    Nthreads  = std::pow(Nthreads, i);
+    size_t N = std::pow(2, i);
+    stan::math::init_threadpool_tbb(Nthreads);
+    std::vector<double> vec(N);
+    for (size_t i = 0; i < N; ++i) {
+      vec[i] = 10;
+    }
+    std::vector<double> vec_test;
+
+    auto t1 = high_resolution_clock::now();
+    EXPECT_NO_THROW(vec_test = stan::math::exp(vec));
+    auto t2 = high_resolution_clock::now();
+
+    /* Getting number of milliseconds as an integer. */
+    auto ms_int = duration_cast<milliseconds>(t2 - t1);
+
+    /* Getting number of milliseconds as a double. */
+    duration<double, std::milli> ms_double = t2 - t1;
+
+    std::cout << N << ",";
+    std::cout << Nthreads << ",";
+    std::cout << ms_int.count() << "ms,";
+    std::cout << ms_double.count() << "ms\n";
+  }
+}
+
+TEST(MathFunctions, expVecBench_N2_2_threads4_all10) {
+  // std timing includes
+  using std::chrono::duration;
+  using std::chrono::duration_cast;
+  using std::chrono::high_resolution_clock;
+  using std::chrono::milliseconds;
+
+  // stan math includes
+  using stan::math::exp;
+  using stan::math::init_threadpool_tbb;
+  std::cout << "all 10\n";
+  std::cout << "N,nThreads,msInt,msDouble\n";
+  for (int i = 1; i < 30; ++i) {
+    size_t Nthreads = 4;
+    //    Nthreads  = std::pow(Nthreads, i);
+    size_t N = std::pow(2, i);
+    stan::math::init_threadpool_tbb(Nthreads);
+    std::vector<double> vec(N);
+    for (size_t i = 0; i < N; ++i) {
+      vec[i] = 10;
+    }
+    std::vector<double> vec_test;
+
+    auto t1 = high_resolution_clock::now();
+    EXPECT_NO_THROW(vec_test = stan::math::exp(vec));
+    auto t2 = high_resolution_clock::now();
+
+    /* Getting number of milliseconds as an integer. */
+    auto ms_int = duration_cast<milliseconds>(t2 - t1);
+
+    /* Getting number of milliseconds as a double. */
+    duration<double, std::milli> ms_double = t2 - t1;
+
+    std::cout << N << ",";
+    std::cout << Nthreads << ",";
+    std::cout << ms_int.count() << "ms,";
+    std::cout << ms_double.count() << "ms\n";
+  }
+}
+
+TEST(MathFunctions, expVecBench_N2_2_threads8_all10) {
+  // std timing includes
+  using std::chrono::duration;
+  using std::chrono::duration_cast;
+  using std::chrono::high_resolution_clock;
+  using std::chrono::milliseconds;
+
+  // stan math includes
+  using stan::math::exp;
+  using stan::math::init_threadpool_tbb;
+  std::cout << "all 10\n";
+  std::cout << "N,nThreads,msInt,msDouble\n";
+  for (int i = 1; i < 30; ++i) {
+    size_t Nthreads = 8;
+    //    Nthreads  = std::pow(Nthreads, i);
+    size_t N = std::pow(2, i);
+    stan::math::init_threadpool_tbb(Nthreads);
+    std::vector<double> vec(N);
+    for (size_t i = 0; i < N; ++i) {
+      vec[i] = 10;
+    }
+    std::vector<double> vec_test;
+
+    auto t1 = high_resolution_clock::now();
+    EXPECT_NO_THROW(vec_test = stan::math::exp(vec));
+    auto t2 = high_resolution_clock::now();
+
+    /* Getting number of milliseconds as an integer. */
+    auto ms_int = duration_cast<milliseconds>(t2 - t1);
+
+    /* Getting number of milliseconds as a double. */
+    duration<double, std::milli> ms_double = t2 - t1;
+
+    std::cout << N << ",";
+    std::cout << Nthreads << ",";
+    std::cout << ms_int.count() << "ms,";
+    std::cout << ms_double.count() << "ms\n";
+  }
+}
+
+#else
+
+TEST(MathFunctions, expVecBench_10000000) {
+  std::cout << "NO THREADING\n";
+  // std timing includes
+  using std::chrono::duration;
+  using std::chrono::duration_cast;
+  using std::chrono::high_resolution_clock;
+  using std::chrono::milliseconds;
+
+  // stan math includes
+  using stan::math::exp;
+  size_t N = 10000000;  // we're computing exp 10000 times but  scaling number
+                        // of threads
+  // scaling Nthreads by squares N, N^2, N^3
+  std::cout << "N,noThreads,msInt,msDouble\n";
+  std::vector<double> vec(N);
+  for (size_t i = 0; i < N; ++i) {
+    vec[i] = i + 1;
+  }
+  std::vector<double> vec_test;
+
+  auto t1 = high_resolution_clock::now();
+  EXPECT_NO_THROW(vec_test = stan::math::exp(vec));
+  auto t2 = high_resolution_clock::now();
+
+  /* Getting number of milliseconds as an integer. */
+  auto ms_int = duration_cast<milliseconds>(t2 - t1);
+
+  /* Getting number of milliseconds as a double. */
+  duration<double, std::milli> ms_double = t2 - t1;
+
+  std::cout << N << ",";
+  std::cout << "NA"
+            << ",";
+  std::cout << ms_int.count() << "ms,";
+  std::cout << ms_double.count() << "ms\n";
+}
+
+TEST(MathFuncions, investigateDrift1) {
+  using stan::math::exp;
+  for (size_t i = 0; i < 100; ++i) {
+    EXPECT_FLOAT_EQ(std::exp(i), exp(i));
+    std::cout << "std::exp: " << std::exp(i) << std::endl;
+    std::cout << "stan::math::exp(i): " << stan::math::exp(i) << std::endl;
+  }
+}
+
+TEST(MathFunctions, expVecBench) {
+  std::cout << "NO THREADING 10000 Nincr\n";
+  // std timing includes
+  using std::chrono::duration;
+  using std::chrono::duration_cast;
+  using std::chrono::high_resolution_clock;
+  using std::chrono::milliseconds;
+
+  // stan math includes
+  using stan::math::exp;
+  size_t N = 10000;  // we're computing exp 10000 times but  scaling number of
+                     // threads
+  // scaling Nthreads by squares N, N^2, N^3
+  std::cout << "N,noThreads,msInt,msDouble\n";
+  std::vector<double> vec(N);
+  for (size_t i = 0; i < N; ++i) {
+    vec[i] = i + 1;
+  }
+  std::vector<double> vec_test;
+
+  auto t1 = high_resolution_clock::now();
+  EXPECT_NO_THROW(vec_test = stan::math::exp(vec));
+  auto t2 = high_resolution_clock::now();
+
+  /* Getting number of milliseconds as an integer. */
+  auto ms_int = duration_cast<milliseconds>(t2 - t1);
+
+  /* Getting number of milliseconds as a double. */
+  duration<double, std::milli> ms_double = t2 - t1;
+
+  std::cout << N << ",";
+  std::cout << "NA"
+            << ",";
+  std::cout << ms_int.count() << "ms,";
+  std::cout << ms_double.count() << "ms\n";
+}
+
+TEST(MathFunctions, expVecBench_10000000_10only) {
+  std::cout << "NO THREADING 10000000 10 ONLY\n";
+  // std timing includes
+  using std::chrono::duration;
+  using std::chrono::duration_cast;
+  using std::chrono::high_resolution_clock;
+  using std::chrono::milliseconds;
+
+  // stan math includes
+  using stan::math::exp;
+  size_t N = 10000000;  // we're computing exp 10000 times but  scaling number
+                        // of threads
+  // scaling Nthreads by squares N, N^2, N^3
+  std::cout << "N,noThreads,msInt,msDouble\n";
+  std::vector<double> vec(N);
+  for (size_t i = 0; i < N; ++i) {
+    vec[i] = 10;
+  }
+  std::vector<double> vec_test;
+
+  auto t1 = high_resolution_clock::now();
+  EXPECT_NO_THROW(vec_test = stan::math::exp(vec));
+  auto t2 = high_resolution_clock::now();
+
+  /* Getting number of milliseconds as an integer. */
+  auto ms_int = duration_cast<milliseconds>(t2 - t1);
+
+  /* Getting number of milliseconds as a double. */
+  duration<double, std::milli> ms_double = t2 - t1;
+
+  std::cout << N << ",";
+  std::cout << "NA"
+            << ",";
+  std::cout << ms_int.count() << "ms,";
+  std::cout << ms_double.count() << "ms\n";
+}
+
+TEST(MathFunctions, expVecBench_10000_10only) {
+  std::cout << "NO THREADING, 10000 10 only\n";
+  // std timing includes
+  using std::chrono::duration;
+  using std::chrono::duration_cast;
+  using std::chrono::high_resolution_clock;
+  using std::chrono::milliseconds;
+
+  // stan math includes
+  using stan::math::exp;
+  size_t N = 10000;  // we're computing exp 10000 times but  scaling number of
+                     // threads
+  // scaling Nthreads by squares N, N^2, N^3
+  std::cout << "N,noThreads,msInt,msDouble\n";
+  std::vector<double> vec(N);
+  for (size_t i = 0; i < N; ++i) {
+    vec[i] = 10;
+  }
+  std::vector<double> vec_test;
+
+  auto t1 = high_resolution_clock::now();
+  EXPECT_NO_THROW(vec_test = stan::math::exp(vec));
+  auto t2 = high_resolution_clock::now();
+
+  /* Getting number of milliseconds as an integer. */
+  auto ms_int = duration_cast<milliseconds>(t2 - t1);
+
+  /* Getting number of milliseconds as a double. */
+  duration<double, std::milli> ms_double = t2 - t1;
+
+  std::cout << N << ",";
+  std::cout << "NA"
+            << ",";
+  std::cout << ms_int.count() << "ms,";
+  std::cout << ms_double.count() << "ms\n";
+}
+
+TEST(MathFunctions, expVecBench_N2_noThreads_exp10) {
+  std::cout << "NO THREADING scaleN, exp(10)\n";
+  // std timing includes
+  using std::chrono::duration;
+  using std::chrono::duration_cast;
+  using std::chrono::high_resolution_clock;
+  using std::chrono::milliseconds;
+
+  // stan math includes
+  using stan::math::exp;
+  size_t N
+      = 2;  // we're computing exp 10000 times but  scaling number of threads
+  // scaling Nthreads by squares N, N^2, N^3
+  std::cout << "N,noThreads,msInt,msDouble\n";
+  for (size_t i = 0; i < 30; ++i) {
+    std::vector<double> vec_test;
+    N = std::pow(2, i);
+    std::vector<double> vec(N);
+    for (size_t i = 0; i < N; ++i) {
+      vec[i] = 10;
+    }
+    auto t1 = high_resolution_clock::now();
+    EXPECT_NO_THROW(vec_test = stan::math::exp(vec));
+    auto t2 = high_resolution_clock::now();
+
+    /* Getting number of milliseconds as an integer. */
+    auto ms_int = duration_cast<milliseconds>(t2 - t1);
+
+    /* Getting number of milliseconds as a double. */
+    duration<double, std::milli> ms_double = t2 - t1;
+
+    std::cout << N << ",";
+    std::cout << "NA"
+              << ",";
+    std::cout << ms_int.count() << "ms,";
+    std::cout << ms_double.count() << "ms\n";
+  }
+}
+
+TEST(MathFunctions, expVecBench_10000_Nincr) {
+  std::cout << "NO THREADING\n";
+  // std timing includes
+  using std::chrono::duration;
+  using std::chrono::duration_cast;
+  using std::chrono::high_resolution_clock;
+  using std::chrono::milliseconds;
+
+  // stan math includes
+  using stan::math::exp;
+  size_t N = 10000;  // we're computing exp 10000 times but  scaling number of
+                     // threads
+  // scaling Nthreads by squares N, N^2, N^3
+  std::cout << "N,noThreads,msInt,msDouble\n";
+  std::vector<double> vec(N);
+  for (size_t i = 0; i < N; ++i) {
+    vec[i] = i + 1;
+  }
+  std::vector<double> vec_test;
+
+  auto t1 = high_resolution_clock::now();
+  EXPECT_NO_THROW(vec_test = stan::math::exp(vec));
+  auto t2 = high_resolution_clock::now();
+
+  /* Getting number of milliseconds as an integer. */
+  auto ms_int = duration_cast<milliseconds>(t2 - t1);
+
+  /* Getting number of milliseconds as a double. */
+  duration<double, std::milli> ms_double = t2 - t1;
+
+  std::cout << N << ",";
+  std::cout << "NA"
+            << ",";
+  std::cout << ms_int.count() << "ms,";
+  std::cout << ms_double.count() << "ms\n";
+}
+
 TEST(MathFunctions, expInt) {
   using stan::math::exp;
   EXPECT_FLOAT_EQ(std::exp(3), exp(3));
   EXPECT_FLOAT_EQ(std::exp(3.1), exp(3.1));
   EXPECT_FLOAT_EQ(std::exp(3.0), exp(3.0));
 }
+#endif