diff --git a/.env b/.env index 6985926772d..b2b5b5eb3b6 100644 --- a/.env +++ b/.env @@ -99,8 +99,8 @@ VCPKG="66c0373dc7fca549e5803087b9487edfe3aca0a1" # 2026.01.16 Release # ci/docker/python-*-windows-*.dockerfile or the vcpkg config. # This is a workaround for our CI problem that "archery docker build" doesn't # use pulled built images in dev/tasks/python-wheels/github.windows.yml. -PYTHON_WHEEL_WINDOWS_IMAGE_REVISION=2026-02-07 -PYTHON_WHEEL_WINDOWS_TEST_IMAGE_REVISION=2026-02-07 +PYTHON_WHEEL_WINDOWS_IMAGE_REVISION=2026-02-25 +PYTHON_WHEEL_WINDOWS_TEST_IMAGE_REVISION=2026-02-25 # Use conanio/${CONAN_BASE}:{CONAN_VERSION} for "docker compose run --rm conan". # See https://github.com/conan-io/conan-docker-tools#readme and diff --git a/ci/docker/python-wheel-manylinux.dockerfile b/ci/docker/python-wheel-manylinux.dockerfile index 4ced75bce55..334cbccc3b4 100644 --- a/ci/docker/python-wheel-manylinux.dockerfile +++ b/ci/docker/python-wheel-manylinux.dockerfile @@ -92,6 +92,7 @@ RUN --mount=type=secret,id=github_repository_owner \ --x-feature=flight \ --x-feature=gcs \ --x-feature=json \ + --x-feature=opentelemetry \ --x-feature=orc \ --x-feature=parquet \ --x-feature=s3 && \ diff --git a/ci/docker/python-wheel-musllinux.dockerfile b/ci/docker/python-wheel-musllinux.dockerfile index d00d44bd093..40b7bceda08 100644 --- a/ci/docker/python-wheel-musllinux.dockerfile +++ b/ci/docker/python-wheel-musllinux.dockerfile @@ -92,6 +92,7 @@ RUN --mount=type=secret,id=github_repository_owner \ --x-feature=flight \ --x-feature=gcs \ --x-feature=json \ + --x-feature=opentelemetry \ --x-feature=orc \ --x-feature=parquet \ --x-feature=s3 && \ diff --git a/ci/docker/python-wheel-windows-vs2022-base.dockerfile b/ci/docker/python-wheel-windows-vs2022-base.dockerfile index e4e2eaef82f..f1bc314d013 100644 --- a/ci/docker/python-wheel-windows-vs2022-base.dockerfile +++ b/ci/docker/python-wheel-windows-vs2022-base.dockerfile @@ -141,6 +141,7 @@ RUN vcpkg install ` --x-feature=flight ` --x-feature=gcs ` --x-feature=json ` + --x-feature=opentelemetry ` --x-feature=orc ` --x-feature=parquet ` --x-feature=s3 diff --git a/ci/scripts/python_wheel_macos_build.sh b/ci/scripts/python_wheel_macos_build.sh index 0990a842e94..94f3e7ba89c 100755 --- a/ci/scripts/python_wheel_macos_build.sh +++ b/ci/scripts/python_wheel_macos_build.sh @@ -73,6 +73,7 @@ echo "=== (${PYTHON_VERSION}) Building Arrow C++ libraries ===" : ${ARROW_WITH_BROTLI:=ON} : ${ARROW_WITH_BZ2:=ON} : ${ARROW_WITH_LZ4:=ON} +: ${ARROW_WITH_OPENTELEMETRY:=ON} : ${ARROW_WITH_SNAPPY:=ON} : ${ARROW_WITH_ZLIB:=ON} : ${ARROW_WITH_ZSTD:=ON} @@ -125,6 +126,7 @@ cmake \ -DARROW_WITH_BROTLI=${ARROW_WITH_BROTLI} \ -DARROW_WITH_BZ2=${ARROW_WITH_BZ2} \ -DARROW_WITH_LZ4=${ARROW_WITH_LZ4} \ + -DARROW_WITH_OPENTELEMETRY=${ARROW_WITH_OPENTELEMETRY} \ -DARROW_WITH_SNAPPY=${ARROW_WITH_SNAPPY} \ -DARROW_WITH_ZLIB=${ARROW_WITH_ZLIB} \ -DARROW_WITH_ZSTD=${ARROW_WITH_ZSTD} \ diff --git a/ci/scripts/python_wheel_windows_build.bat b/ci/scripts/python_wheel_windows_build.bat index e10766ef37e..d69a6aac54f 100644 --- a/ci/scripts/python_wheel_windows_build.bat +++ b/ci/scripts/python_wheel_windows_build.bat @@ -53,6 +53,7 @@ set ARROW_TENSORFLOW=ON set ARROW_WITH_BROTLI=ON set ARROW_WITH_BZ2=ON set ARROW_WITH_LZ4=ON +set ARROW_WITH_OPENTELEMETRY=ON set ARROW_WITH_SNAPPY=ON set ARROW_WITH_ZLIB=ON set ARROW_WITH_ZSTD=ON @@ -94,6 +95,7 @@ cmake ^ -DARROW_WITH_BROTLI=%ARROW_WITH_BROTLI% ^ -DARROW_WITH_BZ2=%ARROW_WITH_BZ2% ^ -DARROW_WITH_LZ4=%ARROW_WITH_LZ4% ^ + -DARROW_WITH_OPENTELEMETRY=%ARROW_WITH_OPENTELEMETRY% ^ -DARROW_WITH_SNAPPY=%ARROW_WITH_SNAPPY% ^ -DARROW_WITH_ZLIB=%ARROW_WITH_ZLIB% ^ -DARROW_WITH_ZSTD=%ARROW_WITH_ZSTD% ^ diff --git a/ci/scripts/python_wheel_xlinux_build.sh b/ci/scripts/python_wheel_xlinux_build.sh index ceebbc5ad01..013c09765fb 100755 --- a/ci/scripts/python_wheel_xlinux_build.sh +++ b/ci/scripts/python_wheel_xlinux_build.sh @@ -25,7 +25,13 @@ function check_arrow_visibility { # Filter out Arrow symbols and see if anything remains. # '_init' and '_fini' symbols may or not be present, we don't care. # (note we must ignore the grep exit status when no match is found) - grep ' T ' nm_arrow.log | grep -v -E '(arrow|\b_init\b|\b_fini\b)' | cat - > visible_symbols.log + local allowed_symbols='(arrow|\b_init\b|\b_fini\b)' + # OpenTelemetry symbols are intentionally exported for features like + # automatic span linking. See cpp/src/arrow/symbols.map for more details. + if [[ "${ARROW_WITH_OPENTELEMETRY:-OFF}" == "ON" ]]; then + allowed_symbols="${allowed_symbols}|(opentelemetry)" + fi + grep ' T ' nm_arrow.log | grep -v -E "${allowed_symbols}" | cat - > visible_symbols.log if [[ -f visible_symbols.log && `cat visible_symbols.log | wc -l` -eq 0 ]]; then return 0 @@ -65,6 +71,7 @@ echo "=== (${PYTHON_VERSION}) Building Arrow C++ libraries ===" : ${ARROW_WITH_BROTLI:=ON} : ${ARROW_WITH_BZ2:=ON} : ${ARROW_WITH_LZ4:=ON} +: ${ARROW_WITH_OPENTELEMETRY:=ON} : ${ARROW_WITH_SNAPPY:=ON} : ${ARROW_WITH_ZLIB:=ON} : ${ARROW_WITH_ZSTD:=ON} @@ -124,6 +131,7 @@ cmake \ -DARROW_WITH_BROTLI=${ARROW_WITH_BROTLI} \ -DARROW_WITH_BZ2=${ARROW_WITH_BZ2} \ -DARROW_WITH_LZ4=${ARROW_WITH_LZ4} \ + -DARROW_WITH_OPENTELEMETRY=${ARROW_WITH_OPENTELEMETRY} \ -DARROW_WITH_SNAPPY=${ARROW_WITH_SNAPPY} \ -DARROW_WITH_ZLIB=${ARROW_WITH_ZLIB} \ -DARROW_WITH_ZSTD=${ARROW_WITH_ZSTD} \ diff --git a/ci/vcpkg/vcpkg.json b/ci/vcpkg/vcpkg.json index 5dfe61a0c60..b0334690969 100644 --- a/ci/vcpkg/vcpkg.json +++ b/ci/vcpkg/vcpkg.json @@ -140,6 +140,17 @@ ] } ] + }, + "opentelemetry": { + "description": "OpenTelemetry support", + "dependencies": [ + { + "name": "opentelemetry-cpp", + "features": [ + "otlp-http" + ] + } + ] } } } diff --git a/cpp/src/arrow/flight/server_tracing_middleware.cc b/cpp/src/arrow/flight/server_tracing_middleware.cc index 0cdf59308d3..a22e1a98ce4 100644 --- a/cpp/src/arrow/flight/server_tracing_middleware.cc +++ b/cpp/src/arrow/flight/server_tracing_middleware.cc @@ -136,7 +136,7 @@ class TracingServerMiddlewareFactory : public ServerMiddlewareFactory { options.kind = otel::trace::SpanKind::kServer; options.parent = otel::trace::GetSpan(new_otel_context)->GetContext(); - auto tracer = otel::trace::Provider::GetTracerProvider()->GetTracer("arrow"); + auto tracer = arrow::internal::tracing::GetTracer(); auto method_name = ToString(info.method); auto span = tracer->StartSpan( method_name, diff --git a/cpp/src/arrow/util/tracing_internal.h b/cpp/src/arrow/util/tracing_internal.h index 8e20e657095..604e7e7d68d 100644 --- a/cpp/src/arrow/util/tracing_internal.h +++ b/cpp/src/arrow/util/tracing_internal.h @@ -23,6 +23,8 @@ #include "arrow/util/config.h" #ifdef ARROW_WITH_OPENTELEMETRY +// Avoid for example defining max() macro +# include "arrow/util/windows_compatibility.h" # ifdef _MSC_VER # pragma warning(push) # pragma warning(disable : 4522) @@ -123,13 +125,13 @@ struct Scope { opentelemetry::trace::Scope scope_impl; }; -opentelemetry::nostd::shared_ptr& UnwrapSpan( +ARROW_EXPORT opentelemetry::nostd::shared_ptr& UnwrapSpan( ::arrow::util::tracing::SpanDetails* span); -const opentelemetry::nostd::shared_ptr& UnwrapSpan( - const ::arrow::util::tracing::SpanDetails* span); +ARROW_EXPORT const opentelemetry::nostd::shared_ptr& +UnwrapSpan(const ::arrow::util::tracing::SpanDetails* span); -opentelemetry::nostd::shared_ptr& RewrapSpan( +ARROW_EXPORT opentelemetry::nostd::shared_ptr& RewrapSpan( ::arrow::util::tracing::SpanDetails* span, opentelemetry::nostd::shared_ptr ot_span); diff --git a/dev/tasks/python-wheels/github.osx.yml b/dev/tasks/python-wheels/github.osx.yml index fb57f131ad1..ab17d11da1f 100644 --- a/dev/tasks/python-wheels/github.osx.yml +++ b/dev/tasks/python-wheels/github.osx.yml @@ -89,6 +89,7 @@ jobs: --x-feature=flight \ --x-feature=gcs \ --x-feature=json \ + --x-feature=opentelemetry \ --x-feature=orc \ --x-feature=parquet \ --x-feature=s3 diff --git a/python/examples/flight/middleware.py b/python/examples/flight/middleware.py index 2056bae1f91..c4b0de0582d 100644 --- a/python/examples/flight/middleware.py +++ b/python/examples/flight/middleware.py @@ -138,6 +138,11 @@ def main(): "simply call the given server for the response. Demonstrates " "propagation of the trace ID between servers."), ) + server.add_argument( + "--otel", + action="store_true", + help="Use OpenTelemetry instrumentation." + ) args = parser.parse_args() if not getattr(args, "command"): @@ -145,10 +150,13 @@ def main(): return 1 if args.command == "server": + middleware = {"trace": TracingServerMiddlewareFactory()} + if args.otel: + middleware["otel"] = flight.TracingServerMiddlewareFactory() server = FlightServer( args.delegate, location=args.listen, - middleware={"trace": TracingServerMiddlewareFactory()}) + middleware=middleware) server.serve() elif args.command == "client": client = flight.connect( diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py index 18a40d877c3..27655e5cdab 100644 --- a/python/pyarrow/__init__.py +++ b/python/pyarrow/__init__.py @@ -60,7 +60,7 @@ def parse_git(root, **kwargs): MonthDayNano, VersionInfo, build_info, cpp_build_info, cpp_version, cpp_version_info, runtime_info, cpu_count, set_cpu_count, enable_signal_handlers, - io_thread_count, set_io_thread_count) + io_thread_count, is_opentelemetry_enabled, set_io_thread_count) def show_versions(): @@ -135,6 +135,7 @@ def print_entry(label, value): for module in modules: status = "Enabled" if _module_is_available(module) else "-" print(f" {module: <20}: {status: <8}") + print(f" {'opentelemetry': <20}: {'Enabled' if is_opentelemetry_enabled() else '-': <8}") print("\nFilesystems:") filesystems = ["AzureFileSystem", "GcsFileSystem", diff --git a/python/pyarrow/includes/libarrow_python.pxd b/python/pyarrow/includes/libarrow_python.pxd index 4724c52ccb5..72c278d3e74 100644 --- a/python/pyarrow/includes/libarrow_python.pxd +++ b/python/pyarrow/includes/libarrow_python.pxd @@ -286,11 +286,12 @@ cdef extern from "arrow/python/benchmark.h" namespace "arrow::py::benchmark": cdef extern from "arrow/python/gdb.h" namespace "arrow::gdb" nogil: void GdbTestSession "arrow::gdb::TestSession"() -cdef extern from "arrow/python/helpers.h" namespace "arrow::py::internal": - c_bool IsThreadingEnabled() - cdef extern from "arrow/python/config.h" namespace "arrow::py": cdef cppclass CBuildInfo "arrow::py::BuildInfo": c_string build_type const CBuildInfo& GetBuildInfo "arrow::py::GetBuildInfo"() + +cdef extern from "arrow/python/config.h" namespace "arrow::py::internal": + c_bool IsOpenTelemetryEnabled() + c_bool IsThreadingEnabled() diff --git a/python/pyarrow/lib.pyx b/python/pyarrow/lib.pyx index 5dca6fd8d2e..7e97177a6ec 100644 --- a/python/pyarrow/lib.pyx +++ b/python/pyarrow/lib.pyx @@ -85,6 +85,13 @@ def set_cpu_count(int count): check_status(SetCpuThreadPoolCapacity(count)) +def is_opentelemetry_enabled() -> bool: + """ + Returns True if OpenTelemetry is enabled in libarrow. + """ + return libarrow_python.IsOpenTelemetryEnabled() + + def is_threading_enabled() -> bool: """ Returns True if threading is enabled in libarrow. diff --git a/python/pyarrow/src/arrow/python/config.cc b/python/pyarrow/src/arrow/python/config.cc index d6647639872..a7a3cdf6918 100644 --- a/python/pyarrow/src/arrow/python/config.cc +++ b/python/pyarrow/src/arrow/python/config.cc @@ -18,6 +18,8 @@ #include "arrow/python/config.h" #include "arrow/python/config_internal.h" +#include "arrow/util/config.h" + namespace arrow { namespace py { @@ -29,6 +31,26 @@ const BuildInfo kBuildInfo = { } // namespace +namespace internal { + +bool IsOpenTelemetryEnabled() { +#ifdef ARROW_WITH_OPENTELEMETRY + return true; +#else + return false; +#endif +} + +bool IsThreadingEnabled() { +#ifdef ARROW_ENABLE_THREADING + return true; +#else + return false; +#endif +} + +} // namespace internal + const BuildInfo& GetBuildInfo() { return kBuildInfo; } } // namespace py diff --git a/python/pyarrow/src/arrow/python/config.h b/python/pyarrow/src/arrow/python/config.h index 9af06f27166..ac49b048eb1 100644 --- a/python/pyarrow/src/arrow/python/config.h +++ b/python/pyarrow/src/arrow/python/config.h @@ -34,5 +34,14 @@ struct BuildInfo { ARROW_PYTHON_EXPORT const BuildInfo& GetBuildInfo(); +namespace internal { + +ARROW_PYTHON_EXPORT +bool IsOpenTelemetryEnabled(); + +ARROW_PYTHON_EXPORT +bool IsThreadingEnabled(); + +} // namespace internal } // namespace py -} // namespace arrow \ No newline at end of file +} // namespace arrow diff --git a/python/pyarrow/src/arrow/python/helpers.cc b/python/pyarrow/src/arrow/python/helpers.cc index 0a24b259310..acdd84badf9 100644 --- a/python/pyarrow/src/arrow/python/helpers.cc +++ b/python/pyarrow/src/arrow/python/helpers.cc @@ -31,7 +31,6 @@ #include "arrow/python/decimal.h" #include "arrow/type_fwd.h" #include "arrow/util/checked_cast.h" -#include "arrow/util/config.h" #include "arrow/util/float16.h" #include "arrow/util/logging.h" @@ -491,14 +490,6 @@ void DebugPrint(PyObject* obj) { PySys_WriteStderr("%s\n", repr.c_str()); } -bool IsThreadingEnabled() { -#ifdef ARROW_ENABLE_THREADING - return true; -#else - return false; -#endif -} - } // namespace internal } // namespace py } // namespace arrow diff --git a/python/pyarrow/src/arrow/python/helpers.h b/python/pyarrow/src/arrow/python/helpers.h index b0cf1010289..a4eacdc3420 100644 --- a/python/pyarrow/src/arrow/python/helpers.h +++ b/python/pyarrow/src/arrow/python/helpers.h @@ -152,9 +152,6 @@ Status IntegerScalarToFloat32Safe(PyObject* obj, float* result); // \brief Print Python object __repr__ void DebugPrint(PyObject* obj); -ARROW_PYTHON_EXPORT -bool IsThreadingEnabled(); - } // namespace internal } // namespace py } // namespace arrow diff --git a/python/pyarrow/tests/test_flight.py b/python/pyarrow/tests/test_flight.py index 9e7bb312398..1ea767ee378 100644 --- a/python/pyarrow/tests/test_flight.py +++ b/python/pyarrow/tests/test_flight.py @@ -22,6 +22,8 @@ import pathlib import signal import struct +import subprocess +import sys import tempfile import threading import time @@ -36,7 +38,7 @@ import pytest import pyarrow as pa -from pyarrow.lib import IpcReadOptions, ReadStats, tobytes +from pyarrow.lib import IpcReadOptions, ReadStats, is_opentelemetry_enabled, tobytes from pyarrow.util import find_free_port from pyarrow.tests import util @@ -2644,6 +2646,35 @@ def test_tracing(): pass +def test_tracing_server_middleware_emits_traces(): + # Validate that we are able to emit traces to stdout when + # ARROW_TRACING_BACKEND=ostream + if not is_opentelemetry_enabled(): + pytest.skip("Arrow not built with OpenTelemetry") + code = """if 1: + import pyarrow.flight as flight + from pyarrow.flight import FlightServerBase, FlightClient + + class SimpleServer(FlightServerBase): + def do_action(self, context, action): + return [] + + with SimpleServer( + middleware={"otel": flight.TracingServerMiddlewareFactory()} + ) as server: + with FlightClient(('localhost', server.port)) as client: + list(client.do_action((b"", b""))) + """ + env = os.environ.copy() + env['ARROW_TRACING_BACKEND'] = "ostream" + env['OTEL_SERVICE_NAME'] = "pyarrow-testing-service" + res = subprocess.run([sys.executable, "-c", code], env=env, + capture_output=True) + assert res.returncode == 0, res.stderr + msg = "Expected service name in trace output" + assert b"service.name: pyarrow-testing-service" in res.stdout, msg + + def test_do_put_does_not_crash_when_schema_is_none(): client = FlightClient('grpc+tls://localhost:9643', disable_server_verification=True)