Skip to content

Commit a0a89a8

Browse files
[Common] Disabled the tuned NVFP4 kernels (#2615)
* Disabled the tuned NVFP4 kernels Signed-off-by: Oleg Goncharov <ogoncharov@nvidia.com> * Disabled fast math in cpp tests Signed-off-by: Oleg Goncharov <ogoncharov@nvidia.com> --------- Signed-off-by: Oleg Goncharov <ogoncharov@nvidia.com>
1 parent 52ee5ea commit a0a89a8

2 files changed

Lines changed: 5 additions & 10 deletions

File tree

tests/cpp/operator/test_cast_nvfp4_transpose.cu

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -677,11 +677,6 @@ std::vector<ActivationType> Activation_types = {
677677
ActivationType::Identity
678678
};
679679

680-
std::vector<bool> use_fast_nvfp4_scaling_vec = {
681-
false,
682-
true
683-
};
684-
685680
} // namespace
686681

687682
class FusedCastTransposeNVFP4TestSuite : public ::testing::TestWithParam
@@ -743,7 +738,7 @@ INSTANTIATE_TEST_SUITE_P(
743738
::testing::ValuesIn(Activation_types),
744739
::testing::ValuesIn(tensor_dims),
745740
::testing::Values(DType::kBFloat16),
746-
::testing::ValuesIn(use_fast_nvfp4_scaling_vec)),
741+
::testing::Values(false)),
747742
[](const testing::TestParamInfo<FusedCastTransposeNVFP4TestSuite::ParamType>& info) {
748743
std::string name = to_string(std::get<0>(info.param));
749744
const auto& shape = std::get<1>(info.param);

transformer_engine/common/cast/nvfp4/quantize_transpose_nvfp4.cuh

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1168,10 +1168,10 @@ void quantize_transpose(const Tensor &input, const Tensor *noop, Tensor *output,
11681168
// TODO(Frank): Is there a better way to do this?
11691169
bool return_transpose = output->has_columnwise_data();
11701170

1171-
if (!use_2d_quantization && (input.dtype() == DType::kBFloat16)) {
1172-
quantize_transpose_tuned_1D(input, noop, output, quant_config, stream);
1173-
return;
1174-
}
1171+
// if (!use_2d_quantization && (input.dtype() == DType::kBFloat16)) {
1172+
// quantize_transpose_tuned_1D(input, noop, output, quant_config, stream);
1173+
// return;
1174+
// }
11751175

11761176
constexpr bool COMPUTE_ACTIVATIONS = false;
11771177
using ParamOP = Empty;

0 commit comments

Comments
 (0)