diff --git a/.github/workflows/cpp_extra.yml b/.github/workflows/cpp_extra.yml index 622cf8d1a79..301f7e4e40b 100644 --- a/.github/workflows/cpp_extra.yml +++ b/.github/workflows/cpp_extra.yml @@ -341,7 +341,11 @@ jobs: PARQUET_TEST_DATA: ${{ github.workspace }}/cpp/submodules/parquet-testing/data run: | # MinIO is required + # TODO: This doesn't seem like a good solution + # TODO: Waiting for PR 50215 to be merged exclude_tests="arrow-s3fs-test" + exclude_tests="${exclude_tests}|arrow-s3fs-module-test" + exclude_tests="${exclude_tests}|arrow-filesystem-test" # unstable exclude_tests="${exclude_tests}|arrow-acero-asof-join-node-test" exclude_tests="${exclude_tests}|arrow-acero-hash-join-node-test" diff --git a/ci/docker/conda-cpp.dockerfile b/ci/docker/conda-cpp.dockerfile index 474c18a9ade..151c8b1d52d 100644 --- a/ci/docker/conda-cpp.dockerfile +++ b/ci/docker/conda-cpp.dockerfile @@ -73,7 +73,6 @@ ENV ARROW_ACERO=ON \ ARROW_ORC=ON \ ARROW_PARQUET=ON \ ARROW_S3=ON \ - ARROW_S3_MODULE=ON \ ARROW_SUBSTRAIT=ON \ ARROW_USE_CCACHE=ON \ ARROW_USE_MOLD=ON \ diff --git a/ci/scripts/cpp_build.sh b/ci/scripts/cpp_build.sh index 3d9b2ba72d4..1e2f3e8f8f1 100755 --- a/ci/scripts/cpp_build.sh +++ b/ci/scripts/cpp_build.sh @@ -70,7 +70,6 @@ if [ "${ARROW_ENABLE_THREADING:-ON}" = "OFF" ]; then ARROW_JEMALLOC=OFF ARROW_MIMALLOC=OFF ARROW_S3=OFF - ARROW_S3_MODULE=OFF ARROW_WITH_OPENTELEMETRY=OFF fi @@ -230,7 +229,6 @@ else -DARROW_PARQUET=${ARROW_PARQUET:-OFF} \ -DARROW_RUNTIME_SIMD_LEVEL=${ARROW_RUNTIME_SIMD_LEVEL:-MAX} \ -DARROW_S3=${ARROW_S3:-OFF} \ - -DARROW_S3_MODULE=${ARROW_S3_MODULE:-OFF} \ -DARROW_SIMD_LEVEL=${ARROW_SIMD_LEVEL:-DEFAULT} \ -DARROW_SUBSTRAIT=${ARROW_SUBSTRAIT:-OFF} \ -DARROW_TEST_LINKAGE=${ARROW_TEST_LINKAGE:-shared} \ diff --git a/ci/scripts/cpp_test.sh b/ci/scripts/cpp_test.sh index 241addbfebd..81005cf4829 100755 --- a/ci/scripts/cpp_test.sh +++ b/ci/scripts/cpp_test.sh @@ -51,7 +51,11 @@ if ! type storage-testbench >/dev/null 2>&1; then exclude_tests+=("arrow-gcsfs-test") fi if ! type minio >/dev/null 2>&1; then + # TODO: This doesn't seem like a good solution + # TODO: Waiting for PR 50215 to be merged exclude_tests+=("arrow-s3fs-test") + exclude_tests+=("arrow-s3fs-module-test") + exclude_tests+=("arrow-filesystem-test") fi case "$(uname)" in Linux) @@ -60,7 +64,10 @@ case "$(uname)" in Darwin) n_jobs=$(sysctl -n hw.ncpu) # TODO: https://github.com/apache/arrow/issues/40410 + # TODO: Waiting for PR 50215 to be merged exclude_tests+=("arrow-s3fs-test") + exclude_tests+=("arrow-s3fs-module-test") + exclude_tests+=("arrow-filesystem-test") ;; MINGW*) n_jobs=${NUMBER_OF_PROCESSORS:-1} diff --git a/cpp/cmake_modules/DefineOptions.cmake b/cpp/cmake_modules/DefineOptions.cmake index c372f9f1989..a8cabdf9bb0 100644 --- a/cpp/cmake_modules/DefineOptions.cmake +++ b/cpp/cmake_modules/DefineOptions.cmake @@ -390,17 +390,11 @@ takes precedence over ccache if a storage backend is configured" ON) ARROW_JSON) define_option(ARROW_S3 - "Build Arrow with S3 support (requires the AWS SDK for C++)" + "Build Arrow S3 Module (requires the AWS SDK for C++)" OFF DEPENDS ARROW_FILESYSTEM) - define_option(ARROW_S3_MODULE - "Build the Arrow S3 filesystem as a dynamic module" - OFF - DEPENDS - ARROW_S3) - define_option(ARROW_SUBSTRAIT "Build the Arrow Substrait Consumer Module" OFF diff --git a/cpp/src/arrow/ArrowS3Config.cmake.in b/cpp/src/arrow/ArrowS3Config.cmake.in new file mode 100644 index 00000000000..7d95634420f --- /dev/null +++ b/cpp/src/arrow/ArrowS3Config.cmake.in @@ -0,0 +1,38 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# This config sets the following variables in your project:: +# +# ArrowS3_FOUND - true if Arrow S3 found on the system +# +# This config sets the following targets in your project:: +# +# ArrowS3::arrow_s3_shared - for linked as shared library if shared library is built +# ArrowS3::arrow_s3_static - for linked as static library if static library is built + +@PACKAGE_INIT@ + +include(CMakeFindDependencyMacro) +find_dependency(Arrow CONFIG) + +include("${CMAKE_CURRENT_LIST_DIR}/ArrowS3Targets.cmake") + +arrow_keep_backward_compatibility(ArrowS3 arrow_s3) + +check_required_components(ArrowS3) + +arrow_show_details(ArrowS3 ARROW_S3) diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt index 8750598f6c3..13855e5c538 100644 --- a/cpp/src/arrow/CMakeLists.txt +++ b/cpp/src/arrow/CMakeLists.txt @@ -960,11 +960,6 @@ if(ARROW_FILESYSTEM) if(ARROW_HDFS) list(APPEND ARROW_FILESYSTEM_SRCS filesystem/hdfs.cc) endif() - if(ARROW_S3) - list(APPEND ARROW_FILESYSTEM_SRCS filesystem/s3fs.cc) - set_source_files_properties(filesystem/s3fs.cc PROPERTIES SKIP_UNITY_BUILD_INCLUSION - ON) - endif() arrow_add_object_library(ARROW_FILESYSTEM ${ARROW_FILESYSTEM_SRCS}) if(ARROW_AZURE) @@ -984,21 +979,38 @@ if(ARROW_FILESYSTEM) endforeach() endif() if(ARROW_S3) - foreach(ARROW_FILESYSTEM_TARGET ${ARROW_FILESYSTEM_TARGETS}) - target_link_libraries(${ARROW_FILESYSTEM_TARGET} PRIVATE ${AWSSDK_LINK_LIBRARIES}) + if(NOT ARROW_BUILD_SHARED AND ARROW_BUILD_STATIC) + string(APPEND ARROW_S3_PC_CFLAGS "${ARROW_S3_PC_CFLAGS_PRIVATE}") + set(ARROW_S3_PC_CFLAGS_PRIVATE "") + endif() + list(APPEND ARROW_S3_LIB_SRCS filesystem/s3fs_module.cc filesystem/s3fs.cc) + add_arrow_lib(arrow_s3 + CMAKE_PACKAGE_NAME + ArrowS3 + PKG_CONFIG_NAME + arrow-s3 + SOURCES + ${ARROW_S3_LIB_SRCS} + SHARED_LINK_LIBS + arrow_shared + SHARED_PRIVATE_LINK_LIBS + ${AWSSDK_LINK_LIBRARIES} + SHARED_INSTALL_INTERFACE_LIBS + ${ARROW_S3_SHARED_INSTALL_INTERFACE_LIBS} + STATIC_LINK_LIBS + arrow_static + ${AWSSDK_LINK_LIBRARIES} + OUTPUTS + ARROW_S3_LIBRARIES) + foreach(LIB_TARGET ${ARROW_S3_LIBRARIES}) + target_compile_definitions(${LIB_TARGET} PRIVATE ARROW_S3_EXPORTING) endforeach() - - if(ARROW_S3_MODULE) - if(NOT ARROW_BUILD_SHARED) - message(FATAL_ERROR "ARROW_S3_MODULE without shared libarrow (-DARROW_BUILD_SHARED=ON) is not supported" - ) - endif() - - add_library(arrow_s3fs MODULE filesystem/s3fs_module.cc filesystem/s3fs.cc) - target_link_libraries(arrow_s3fs PRIVATE ${AWSSDK_LINK_LIBRARIES} arrow_shared) - set_source_files_properties(filesystem/s3fs.cc filesystem/s3fs_module.cc - PROPERTIES SKIP_UNITY_BUILD_INCLUSION ON) + if(ARROW_BUILD_STATIC AND WIN32) + target_compile_definitions(arrow_s3_static PUBLIC ARROW_S3_STATIC) endif() + + set_source_files_properties(filesystem/s3fs.cc filesystem/s3fs_module.cc + PROPERTIES SKIP_UNITY_BUILD_INCLUSION ON) endif() list(APPEND ARROW_TESTING_SHARED_LINK_LIBS ${ARROW_GTEST_GMOCK}) diff --git a/cpp/src/arrow/arrow-s3.pc.in b/cpp/src/arrow/arrow-s3.pc.in new file mode 100644 index 00000000000..cab1894d203 --- /dev/null +++ b/cpp/src/arrow/arrow-s3.pc.in @@ -0,0 +1,28 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +prefix=@CMAKE_INSTALL_PREFIX@ +includedir=@ARROW_PKG_CONFIG_INCLUDEDIR@ +libdir=@ARROW_PKG_CONFIG_LIBDIR@ + +Name: Apache Arrow S3 +Description: Apache Arrow's S3 filesystem implementation. +Version: @ARROW_VERSION@ +Requires: arrow +Libs: -L${libdir} -larrow_s3 +Cflags:@ARROW_S3_PC_CFLAGS@ +Cflags.private:@ARROW_S3_PC_CFLAGS_PRIVATE@ diff --git a/cpp/src/arrow/filesystem/CMakeLists.txt b/cpp/src/arrow/filesystem/CMakeLists.txt index ee46f4d256c..66113211858 100644 --- a/cpp/src/arrow/filesystem/CMakeLists.txt +++ b/cpp/src/arrow/filesystem/CMakeLists.txt @@ -73,9 +73,9 @@ if(ARROW_S3) # static variables storage of AWS SDK for C++ in libaws*.a may be # mixed with one in libarrow. if(ARROW_TEST_LINKAGE STREQUAL "shared") - list(APPEND ARROW_S3_TEST_EXTRA_LINK_LIBS arrow_shared) + list(APPEND ARROW_S3_TEST_EXTRA_LINK_LIBS arrow_s3_shared) else() - list(APPEND ARROW_S3_TEST_EXTRA_LINK_LIBS arrow_static) + list(APPEND ARROW_S3_TEST_EXTRA_LINK_LIBS arrow_s3_static) endif() list(APPEND ARROW_S3_TEST_EXTRA_LINK_LIBS ${AWSSDK_LINK_LIBRARIES}) add_arrow_test(s3fs_test @@ -107,7 +107,7 @@ if(ARROW_S3) if(ARROW_BUILD_TESTS) add_executable(arrow-s3fs-narrative-test s3fs_narrative_test.cc) target_link_libraries(arrow-s3fs-narrative-test ${ARROW_TEST_LINK_LIBS} - ${GFLAGS_LIBRARIES}) + ${ARROW_S3_TEST_EXTRA_LINK_LIBS} ${GFLAGS_LIBRARIES}) add_dependencies(arrow-tests arrow-s3fs-narrative-test) endif() @@ -119,6 +119,7 @@ if(ARROW_S3) s3fs_benchmark.cc s3_test_util.cc STATIC_LINK_LIBS + ${ARROW_S3_TEST_EXTRA_LINK_LIBS} ${AWSSDK_LINK_LIBRARIES} ${ARROW_BENCHMARK_LINK_LIBS}) if(ARROW_TEST_LINKAGE STREQUAL "static") @@ -128,7 +129,7 @@ if(ARROW_S3) endif() endif() - if(ARROW_S3_MODULE AND ARROW_BUILD_TESTS) + if(ARROW_BUILD_TESTS AND ARROW_BUILD_SHARED) add_arrow_test(s3fs_module_test SOURCES s3fs_module_test.cc @@ -136,13 +137,9 @@ if(ARROW_S3) EXTRA_LABELS filesystem DEFINITIONS - ARROW_S3_LIBPATH="$" + ARROW_S3_LIBPATH="$" EXTRA_LINK_LIBS Boost::filesystem) - target_compile_definitions(arrow-filesystem-test - PUBLIC ARROW_S3_LIBPATH="$") - target_sources(arrow-filesystem-test PUBLIC s3fs_module_test.cc s3_test_util.cc) - target_link_libraries(arrow-filesystem-test PUBLIC Boost::filesystem) endif() endif() diff --git a/cpp/src/arrow/filesystem/filesystem.h b/cpp/src/arrow/filesystem/filesystem.h index a2862d9c1f6..b5563d87003 100644 --- a/cpp/src/arrow/filesystem/filesystem.h +++ b/cpp/src/arrow/filesystem/filesystem.h @@ -560,6 +560,7 @@ class ARROW_EXPORT SlowFileSystem : public FileSystem { /// will fail with an error. /// /// The user is responsible for synchronization of calls to this function. +ARROW_EXPORT void EnsureFinalized(); /// \defgroup filesystem-factories Functions for creating FileSystem instances diff --git a/cpp/src/arrow/filesystem/s3_visibility.h b/cpp/src/arrow/filesystem/s3_visibility.h new file mode 100644 index 00000000000..c67d29d9b7e --- /dev/null +++ b/cpp/src/arrow/filesystem/s3_visibility.h @@ -0,0 +1,49 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#if defined(_WIN32) || defined(__CYGWIN__) +# if defined(_MSC_VER) +# pragma warning(push) +# pragma warning(disable : 4251) +# else +# pragma GCC diagnostic ignored "-Wattributes" +# endif + +# ifdef ARROW_S3_STATIC +# define ARROW_S3_EXPORT +# elif defined(ARROW_S3_EXPORTING) +# define ARROW_S3_EXPORT __declspec(dllexport) +# else +# define ARROW_S3_EXPORT __declspec(dllimport) +# endif + +# define ARROW_S3_NO_EXPORT + +# if defined(_MSC_VER) +# pragma warning(pop) +# endif + +#else // Not Windows +# ifndef ARROW_S3_EXPORT +# define ARROW_S3_EXPORT __attribute__((visibility("default"))) +# endif +# ifndef ARROW_S3_NO_EXPORT +# define ARROW_S3_NO_EXPORT __attribute__((visibility("hidden"))) +# endif +#endif diff --git a/cpp/src/arrow/filesystem/s3fs.h b/cpp/src/arrow/filesystem/s3fs.h index 47d249898be..5086b3407c1 100644 --- a/cpp/src/arrow/filesystem/s3fs.h +++ b/cpp/src/arrow/filesystem/s3fs.h @@ -23,6 +23,7 @@ #include #include "arrow/filesystem/filesystem.h" +#include "arrow/filesystem/s3_visibility.h" #include "arrow/util/macros.h" #include "arrow/util/uri.h" @@ -38,7 +39,7 @@ class STSClient; namespace arrow::fs { /// Options for using a proxy for S3 -struct ARROW_EXPORT S3ProxyOptions { +struct ARROW_S3_EXPORT S3ProxyOptions { std::string scheme; std::string host; int port = -1; @@ -67,7 +68,7 @@ enum class S3CredentialsKind : int8_t { }; /// Pure virtual class for describing custom S3 retry strategies -class ARROW_EXPORT S3RetryStrategy { +class ARROW_S3_EXPORT S3RetryStrategy { public: virtual ~S3RetryStrategy() = default; @@ -96,7 +97,7 @@ class ARROW_EXPORT S3RetryStrategy { }; /// Options for the S3FileSystem implementation. -struct ARROW_EXPORT S3Options { +struct ARROW_S3_EXPORT S3Options { /// \brief Smart defaults for option values /// /// The possible values for this setting are explained in the AWS docs: @@ -315,7 +316,7 @@ struct ARROW_EXPORT S3Options { /// Some implementation notes: /// - buckets are special and the operations available on them may be limited /// or more expensive than desired. -class ARROW_EXPORT S3FileSystem : public FileSystem { +class ARROW_S3_EXPORT S3FileSystem : public FileSystem { public: ~S3FileSystem() override; @@ -409,7 +410,7 @@ class ARROW_EXPORT S3FileSystem : public FileSystem { enum class S3LogLevel : int8_t { Off, Fatal, Error, Warn, Info, Debug, Trace }; -struct ARROW_EXPORT S3GlobalOptions { +struct ARROW_S3_EXPORT S3GlobalOptions { /// The log level for S3-originating messages. S3LogLevel log_level; @@ -444,21 +445,21 @@ struct ARROW_EXPORT S3GlobalOptions { /// /// Once this function is called you MUST call FinalizeS3 before the end of the /// application in order to avoid a segmentation fault at shutdown. -ARROW_EXPORT +ARROW_S3_EXPORT Status InitializeS3(const S3GlobalOptions& options); /// \brief Ensure the S3 APIs are initialized, but only if not already done. /// /// If necessary, this will call InitializeS3() with some default options. -ARROW_EXPORT +ARROW_S3_EXPORT Status EnsureS3Initialized(); /// Whether S3 was initialized, and not finalized. -ARROW_EXPORT +ARROW_S3_EXPORT bool IsS3Initialized(); /// Whether S3 was finalized. -ARROW_EXPORT +ARROW_S3_EXPORT bool IsS3Finalized(); /// \brief Shutdown the S3 APIs. @@ -470,16 +471,16 @@ bool IsS3Finalized(); /// Calls to InitializeS3() and FinalizeS3() should be serialized by the /// application (this also applies to EnsureS3Initialized() and /// EnsureS3Finalized()). -ARROW_EXPORT +ARROW_S3_EXPORT Status FinalizeS3(); /// \brief Ensure the S3 APIs are shutdown, but only if not already done. /// /// If necessary, this will call FinalizeS3(). -ARROW_EXPORT +ARROW_S3_EXPORT Status EnsureS3Finalized(); -ARROW_EXPORT +ARROW_S3_EXPORT Result ResolveS3BucketRegion(const std::string& bucket); } // namespace arrow::fs diff --git a/cpp/src/arrow/filesystem/s3fs_module_test.cc b/cpp/src/arrow/filesystem/s3fs_module_test.cc index f056b7fe55f..a4d85739f5e 100644 --- a/cpp/src/arrow/filesystem/s3fs_module_test.cc +++ b/cpp/src/arrow/filesystem/s3fs_module_test.cc @@ -57,12 +57,7 @@ MinioTestEnvironment* GetMinioEnv() { class RegistrationTestEnvironment : public ::testing::Environment { public: - void SetUp() override { - // Unregister the s3 filesystem factory so that we can be sure the module loading and - // the factories from the module are actually working - ASSERT_OK(internal::UnregisterFileSystemFactory("s3")); - ASSERT_OK(LoadFileSystemFactories(ARROW_S3_LIBPATH)); - } + void SetUp() override { ASSERT_OK(LoadFileSystemFactories(ARROW_S3_LIBPATH)); } void TearDown() override { EnsureFinalized(); } }; diff --git a/cpp/src/arrow/filesystem/util_internal.h b/cpp/src/arrow/filesystem/util_internal.h index 220640b657b..40f402f534c 100644 --- a/cpp/src/arrow/filesystem/util_internal.h +++ b/cpp/src/arrow/filesystem/util_internal.h @@ -83,6 +83,7 @@ enum class AuthorityHandlingBehavior { /// \param supported_schemes the set of URI schemes that should be accepted /// \param accept_local_paths if true, allow an absolute path /// \return the path portion of the URI +ARROW_EXPORT Result PathFromUriHelper(const std::string& uri_string, std::vector supported_schemes, bool accept_local_paths, @@ -95,6 +96,7 @@ ARROW_EXPORT Result GlobFiles(const std::shared_ptr& filesystem, const std::string& glob); +ARROW_EXPORT extern FileSystemGlobalOptions global_options; /// \brief Unregister filesystem factories diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index d0ddb9009f8..ee57bed130a 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -742,6 +742,14 @@ if(PYARROW_BUILD_S3) if(NOT ARROW_S3) message(FATAL_ERROR "You must build Arrow C++ with ARROW_S3=ON") endif() + find_package(ArrowS3 REQUIRED) + if(PYARROW_BUNDLE_ARROW_CPP) + bundle_arrow_lib(${ARROW_S3_SHARED_LIB} SO_VERSION ${ARROW_SO_VERSION}) + if(MSVC) + bundle_arrow_import_lib(${ARROW_S3_IMPORT_LIB}) + endif() + endif() + set(S3_LINK_LIBS ArrowS3::arrow_s3_shared) list(APPEND CYTHON_EXTENSIONS _s3fs) endif() @@ -1038,6 +1046,10 @@ if(PYARROW_BUILD_PARQUET) endif() endif() +if(PYARROW_BUILD_S3) + target_link_libraries(_s3fs PRIVATE ${S3_LINK_LIBS}) +endif() + # # Type stubs with docstring injection #