From 5cb9e8253539bdd6841046da9dc75fa2df02bb90 Mon Sep 17 00:00:00 2001 From: Yukang-Lian Date: Sat, 9 May 2026 17:49:40 +0800 Subject: [PATCH] [chore](be) Add S3 read diagnostics ### What problem does this PR solve? Issue Number: None Related PR: None Problem Summary: Add diagnostic logs and test-only config changes to capture S3 read context when GetObject fails or returns an unexpected length, helping debug AWS SDK response stream flush failures. This PR sets aws_log_level to Trace for test builds. ### Release note None ### Check List (For Author) - Test: Manual test - Ran `git diff --check -- conf/be.conf be/src/io/fs/s3_file_reader.cpp be/src/io/fs/s3_obj_storage_client.cpp`. - Attempted `./build.sh --be -j8`, but it failed before compiling the touched files because `/mnt/disk2/lianyukang/doris/thirdparty/installed/lib64/libaws-cpp-sdk-kinesis.a` is missing. - Behavior changed: Yes. Test-only PR changes default `aws_log_level` in `conf/be.conf` from Error to Trace to collect AWS SDK logs. - Does this need documentation: No --- be/src/io/fs/s3_file_reader.cpp | 10 ++++++++++ be/src/io/fs/s3_obj_storage_client.cpp | 18 ++++++++++++++++-- conf/be.conf | 4 ++-- 3 files changed, 28 insertions(+), 4 deletions(-) diff --git a/be/src/io/fs/s3_file_reader.cpp b/be/src/io/fs/s3_file_reader.cpp index 4eaa10f3311e06..4ac6ea1cda3c07 100644 --- a/be/src/io/fs/s3_file_reader.cpp +++ b/be/src/io/fs/s3_file_reader.cpp @@ -168,6 +168,12 @@ Status S3FileReader::read_at_impl(size_t offset, Slice result, size_t* bytes_rea // clang-format on _s3_stats.total_get_request_counter++; if (resp.status.code != ErrorCode::OK) { + LOG(WARNING) << "S3_READ_FAIL" + << " path=" << _path.native() << " key=" << _key << " offset=" << offset + << " bytes_req=" << bytes_req << " bytes_read=" << *bytes_read + << " file_size=" << _file_size << " http_code=" << resp.http_code + << " request_id=" << resp.request_id << " status_code=" << resp.status.code + << " status_msg=" << resp.status.msg; if (resp.http_code == static_cast(Aws::Http::HttpResponseCode::TOO_MANY_REQUESTS)) { s3_file_reader_too_many_request_counter << 1; @@ -186,6 +192,10 @@ Status S3FileReader::read_at_impl(size_t offset, Slice result, size_t* bytes_rea } } if (*bytes_read != bytes_req) { + LOG(WARNING) << "S3_READ_SIZE_MISMATCH" + << " path=" << _path.native() << " key=" << _key << " offset=" << offset + << " bytes_req=" << bytes_req << " bytes_read=" << *bytes_read + << " file_size=" << _file_size; std::string msg = fmt::format( "failed to get object, path={} offset={} bytes_req={} bytes_read={} " "file_size={} tries={}", diff --git a/be/src/io/fs/s3_obj_storage_client.cpp b/be/src/io/fs/s3_obj_storage_client.cpp index 1b8bf7b473c076..8968d44657eb02 100644 --- a/be/src/io/fs/s3_obj_storage_client.cpp +++ b/be/src/io/fs/s3_obj_storage_client.cpp @@ -318,12 +318,21 @@ ObjectStorageResponse S3ObjStorageClient::get_object(const ObjectStoragePathOpti size_t* size_return) { Aws::S3::Model::GetObjectRequest request; request.WithBucket(opts.bucket).WithKey(opts.key); - request.SetRange(fmt::format("bytes={}-{}", offset, offset + bytes_read - 1)); + auto range = fmt::format("bytes={}-{}", offset, offset + bytes_read - 1); + request.SetRange(range); request.SetResponseStreamFactory(AwsWriteableStreamFactory(buffer, bytes_read)); SCOPED_BVAR_LATENCY(s3_bvar::s3_get_latency); auto outcome = s3_get_rate_limit([&]() { return _client->GetObject(request); }); if (!outcome.IsSuccess()) { + LOG(WARNING) << "S3_GET_FAIL" + << " bucket=" << opts.bucket << " key=" << opts.key << " range=" << range + << " expected_nbytes=" << bytes_read + << " response_code=" << static_cast(outcome.GetError().GetResponseCode()) + << " exception_name=" << outcome.GetError().GetExceptionName() + << " error_type=" << static_cast(outcome.GetError().GetErrorType()) + << " message=" << outcome.GetError().GetMessage() + << " request_id=" << outcome.GetError().GetRequestId(); return {convert_to_obj_response(s3fs_error( outcome.GetError(), fmt::format("failed to read from {}", opts.key))), static_cast(outcome.GetError().GetResponseCode()), @@ -333,6 +342,11 @@ ObjectStorageResponse S3ObjStorageClient::get_object(const ObjectStoragePathOpti // case for incomplete read SYNC_POINT_CALLBACK("s3_obj_storage_client::get_object", size_return); if (*size_return != bytes_read) { + LOG(WARNING) << "S3_GET_SIZE_MISMATCH" + << " bucket=" << opts.bucket << " key=" << opts.key << " range=" << range + << " expected_nbytes=" << bytes_read << " content_length=" << *size_return + << " content_range=" << outcome.GetResult().GetContentRange() + << " request_id=" << outcome.GetResult().GetRequestId(); return {convert_to_obj_response(Status::InternalError( "failed to read from {}(bytes read: {}, bytes req: {}), request_id: {}", opts.key, *size_return, bytes_read, outcome.GetResult().GetRequestId()))}; @@ -502,4 +516,4 @@ std::string S3ObjStorageClient::generate_presigned_url(const ObjectStoragePathOp expiration_secs); } -} // namespace doris::io \ No newline at end of file +} // namespace doris::io diff --git a/conf/be.conf b/conf/be.conf index e9024580a65930..46ffcbadcd865f 100644 --- a/conf/be.conf +++ b/conf/be.conf @@ -83,8 +83,8 @@ sys_log_level = INFO # Info = 4, # Debug = 5, # Trace = 6 -# Default to turn off aws sdk log, because aws sdk errors that need to be cared will be output through Doris logs -aws_log_level = 2 +# Test only: enable AWS SDK trace logs to debug S3 read failures. +aws_log_level = 6 # azure sdk log level # Verbose = 1,