0

Add vector_math::FCLAMP

This CL adds a new vector_math FCLAMP function, which clamps floats to
the [-1.0, 1.0] range, and replaces all NaNs with 0s.

Replacing NaNs with 0s is a change in behavior in some code paths, but
it is better than replacing them with -1.0 or 1.0. This prevents NaNs
from becoming audible samples, and has the added benefit of being easy
and efficient to write in various SIMD instruction sets.

Using std::min() + std::max() proved to be faster than std::clamp() in
one microbenchmark (on an x64 gLinux machine), as this likely compiles
down into 2 CMOV instructions.

Optimized versions of FCLAMP will be introduced in future CLs.

Bug: 401598584
Change-Id: I249f367c9824a2b5a1941154a0e1807edaa14d2b
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/6337035
Commit-Queue: Thomas Guilbert <tguilbert@chromium.org>
Reviewed-by: Dale Curtis <dalecurtis@chromium.org>
Cr-Commit-Position: refs/heads/main@{#1430551}
This commit is contained in:
Thomas Guilbert
2025-03-10 16:28:25 -07:00
committed by Chromium LUCI CQ
parent 340cde95c3
commit c5eb8f04f5
5 changed files with 205 additions and 14 deletions

@ -35,6 +35,10 @@
namespace media::vector_math {
static constexpr float kClampMin = -1.0f;
static constexpr float kClampMax = 1.0f;
static constexpr float kSilence = 0.0f;
void FMAC(base::span<const float> src, float scale, base::span<float> dest) {
if (src.empty()) {
return;
@ -93,6 +97,33 @@ void FMUL_C(const float src[], float scale, int len, float dest[]) {
}
}
void FCLAMP(base::span<const float> src, base::span<float> dest) {
if (src.empty()) {
return;
}
CHECK_LE(src.size(), dest.size());
CHECK(base::IsAligned(src.data(), kRequiredAlignment));
CHECK(base::IsAligned(dest.data(), kRequiredAlignment));
static const auto fclamp_func = [] {
// TODO(crbug.com/401598584): Add optimized versions of these functions.
return FCLAMP_C;
}();
return fclamp_func(src.data(), src.size(), dest.data());
}
void FCLAMP_C(const float src[], int len, float dest[]) {
for (int i = 0; i < len; ++i) {
const float sample = src[i];
const float temp = std::isnan(sample) ? kSilence : sample;
// Using std::max + std::min is faster than std::clamp on official builds.
// Indeed, there is an extra instruction to ensure conformity with the C++
// standard for some special cases. E.g., `std::clamp(-0.0f, +0.0f, +0.0f)`
// must return `-0.0f`.
dest[i] = std::max(std::min(temp, kClampMax), kClampMin);
}
}
std::pair<float, float> EWMAAndMaxPower(float initial_value,
base::span<const float> src,
float smoothing_factor) {

@ -27,6 +27,12 @@ MEDIA_SHMEM_EXPORT void FMUL(base::span<const float> src,
float scale,
base::span<float> dest);
// Clamps each element in `src` to the [-1.0, +1.0] range and store in `dest`.
// replacing NaNs with 0s (silence).
// `src` and `dest` must be aligned by `kRequiredAlignment`.
MEDIA_SHMEM_EXPORT void FCLAMP(base::span<const float> src,
base::span<float> dest);
// Computes the exponentially-weighted moving average power of a signal by
// iterating the recurrence:
//

@ -7,13 +7,16 @@
#pragma allow_unsafe_buffers
#endif
#include "media/base/vector_math.h"
#include <algorithm>
#include <memory>
#include "base/containers/span_writer.h"
#include "base/cpu.h"
#include "base/memory/aligned_memory.h"
#include "base/time/time.h"
#include "build/build_config.h"
#include "media/base/vector_math.h"
#include "media/base/vector_math_testing.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "testing/perf/perf_result_reporter.h"
@ -27,6 +30,7 @@ perf_test::PerfResultReporter SetUpReporter(const std::string& story_name) {
perf_test::PerfResultReporter reporter("vector_math", story_name);
reporter.RegisterImportantMetric("_fmac", "runs/s");
reporter.RegisterImportantMetric("_fmul", "runs/s");
reporter.RegisterImportantMetric("_clamp", "runs/s");
reporter.RegisterImportantMetric("_ewma_and_max_power", "runs/s");
return reporter;
}
@ -44,12 +48,12 @@ class VectorMathPerfTest : public testing::Test {
public:
VectorMathPerfTest() {
// Initialize input and output vectors.
input_vector_.reset(static_cast<float*>(base::AlignedAlloc(
sizeof(float) * kVectorSize, vector_math::kRequiredAlignment)));
output_vector_.reset(static_cast<float*>(base::AlignedAlloc(
sizeof(float) * kVectorSize, vector_math::kRequiredAlignment)));
fill(input_vector_.get(), input_vector_.get() + kVectorSize, 1.0f);
fill(output_vector_.get(), output_vector_.get() + kVectorSize, 0.0f);
input_vector_ = base::AlignedUninit<float>(kVectorSize,
vector_math::kRequiredAlignment);
output_vector_ = base::AlignedUninit<float>(
kVectorSize, vector_math::kRequiredAlignment);
std::ranges::fill(input_vector_, 1.0f);
std::ranges::fill(output_vector_, 0.0f);
}
VectorMathPerfTest(const VectorMathPerfTest&) = delete;
@ -61,10 +65,25 @@ class VectorMathPerfTest : public testing::Test {
const std::string& trace_name) {
TimeTicks start = TimeTicks::Now();
for (int i = 0; i < kBenchmarkIterations; ++i) {
fn(input_vector_.get(),
kScale,
kVectorSize - (aligned ? 0 : 1),
output_vector_.get());
fn(input_vector_.data(), kScale, kVectorSize - (aligned ? 0 : 1),
output_vector_.data());
}
double total_time_seconds = (TimeTicks::Now() - start).InSecondsF();
perf_test::PerfResultReporter reporter = SetUpReporter(trace_name);
reporter.AddResult(metric_suffix,
kBenchmarkIterations / total_time_seconds);
}
void RunClampingBenchmark(void (*fn)(const float[], int, float[]),
bool aligned,
const std::string& metric_suffix,
const std::string& trace_name) {
FillInputWithUnclampedData();
TimeTicks start = TimeTicks::Now();
for (int i = 0; i < kBenchmarkIterations; ++i) {
fn(input_vector_.data(), kVectorSize - (aligned ? 0 : 1),
output_vector_.data());
}
double total_time_seconds = (TimeTicks::Now() - start).InSecondsF();
perf_test::PerfResultReporter reporter = SetUpReporter(trace_name);
@ -79,7 +98,7 @@ class VectorMathPerfTest : public testing::Test {
const std::string& trace_name) {
TimeTicks start = TimeTicks::Now();
for (int i = 0; i < kEWMABenchmarkIterations; ++i) {
fn(0.5f, input_vector_.get(), len, 0.1f);
fn(0.5f, input_vector_.data(), len, 0.1f);
}
double total_time_seconds = (TimeTicks::Now() - start).InSecondsF();
perf_test::PerfResultReporter reporter = SetUpReporter(trace_name);
@ -88,8 +107,25 @@ class VectorMathPerfTest : public testing::Test {
}
protected:
std::unique_ptr<float, base::AlignedFreeDeleter> input_vector_;
std::unique_ptr<float, base::AlignedFreeDeleter> output_vector_;
base::AlignedHeapArray<float> input_vector_;
base::AlignedHeapArray<float> output_vector_;
private:
// Fills `input_vector_` with repeating values, some of which are unclamped.
void FillInputWithUnclampedData() {
static const float kUnclampedInput[] = {-2.0, -1.0, -0.5, 0.0,
0.5, 1.0, 2.0};
auto input_span = base::span(kUnclampedInput);
auto writer = base::SpanWriter(base::span(input_vector_));
while (writer.remaining() > input_span.size()) {
writer.Write(input_span);
}
if (writer.remaining()) {
writer.Write(input_span.first(writer.remaining()));
}
}
};
// Benchmarks for each optimized vector_math::FMAC() method.
@ -168,6 +204,12 @@ TEST_F(VectorMathPerfTest, FMUL_optimized_aligned) {
#endif
}
// Benchmarks for each optimized vector_math::FCLAMP() method.
// Benchmark FCLAMP_C().
TEST_F(VectorMathPerfTest, FCLAMP_unoptimized) {
RunClampingBenchmark(vector_math::FCLAMP_C, true, "_fclamp", "unoptimized");
}
// Benchmarks for each optimized vector_math::EWMAAndMaxPower() method.
// Benchmark EWMAAndMaxPower_C().
TEST_F(VectorMathPerfTest, EWMAAndMaxPower_unoptimized) {

@ -22,6 +22,7 @@ MEDIA_SHMEM_EXPORT void FMUL_C(const float src[],
float scale,
int len,
float dest[]);
MEDIA_SHMEM_EXPORT void FCLAMP_C(const float src[], int len, float dest[]);
MEDIA_SHMEM_EXPORT std::pair<float, float> EWMAAndMaxPower_C(
float initial_value,
const float src[],

@ -11,12 +11,16 @@
#include <algorithm>
#include <cmath>
#include <limits>
#include <memory>
#include "base/containers/span_reader.h"
#include "base/containers/span_writer.h"
#include "base/cpu.h"
#include "base/memory/aligned_memory.h"
#include "base/strings/string_number_conversions.h"
#include "base/strings/stringize_macros.h"
#include "base/types/zip.h"
#include "build/build_config.h"
#include "media/base/vector_math_testing.h"
#include "testing/gtest/include/gtest/gtest.h"
@ -29,6 +33,28 @@ static constexpr float kInputFillValue = 1.0;
static constexpr float kOutputFillValue = 3.0;
static constexpr int kVectorSize = 8192;
// List of unclamped values that are out of bounds and within bounds.
static const float kUnclampedInputValues[] = {
std::numeric_limits<float>::quiet_NaN(),
std::numeric_limits<float>::signaling_NaN(),
-std::numeric_limits<float>::infinity(),
std::numeric_limits<float>::infinity(),
-2.0,
-1.0,
-0.5,
0.0,
0.5,
1.0,
2.0,
};
// Expected result of clamping `kUnclampedInputValues`.
static const float kClampedOutputValues[] = {0, 0, -1.0, 1.0, -1.0, -1.0,
-0.5, 0.0, 0.5, 1.0, 1.0};
static_assert(std::size(kUnclampedInputValues) ==
std::size(kClampedOutputValues));
class VectorMathTest : public testing::Test {
public:
VectorMathTest() {
@ -48,14 +74,50 @@ class VectorMathTest : public testing::Test {
std::ranges::fill(output_array_, output);
}
void FillTestClampingVectors(base::span<const float> input, float output) {
// Setup input and output vectors.
FillSpan(input_array_, input);
std::ranges::fill(output_array_, output);
}
void VerifyOutput(float value) {
EXPECT_TRUE(std::ranges::all_of(
output_array_, [value](float datum) { return datum == value; }));
}
void VerifyClampOutput(base::span<const float> values) {
auto reader = base::SpanReader(base::span(output_array_));
while (reader.remaining() > values.size()) {
auto output_values = *reader.Read(values.size());
EXPECT_EQ(output_values, values);
}
if (reader.remaining()) {
auto remaining_values = reader.remaining_span();
EXPECT_EQ(remaining_values, values.first(remaining_values.size()));
}
}
protected:
base::AlignedHeapArray<float> input_array_;
base::AlignedHeapArray<float> output_array_;
private:
// Fills `dest` with `values`, repeating `values`.
void FillSpan(base::span<float> dest, base::span<const float> values) {
auto writer = base::SpanWriter(dest);
// Fill as much as possible with `values`.
while (writer.remaining() > values.size()) {
writer.Write(values);
}
// Fill the remaining space with the start of values.
if (writer.remaining()) {
writer.Write(values.first((writer.remaining())));
}
}
};
// Ensure each optimized vector_math::FMAC() method returns the same value.
@ -158,6 +220,48 @@ TEST_F(VectorMathTest, FMUL) {
#endif
}
// Ensure each optimized vector_math::FCLAMP() method returns the same value.
TEST_F(VectorMathTest, FCLAMP) {
{
SCOPED_TRACE("FCLAMP");
FillTestClampingVectors(kUnclampedInputValues, kOutputFillValue);
vector_math::FCLAMP(input_array_, output_array_);
VerifyClampOutput(kClampedOutputValues);
}
{
SCOPED_TRACE("FCLAMP_C");
FillTestClampingVectors(kUnclampedInputValues, kOutputFillValue);
vector_math::FCLAMP_C(input_array_.data(), kVectorSize,
output_array_.data());
VerifyClampOutput(kClampedOutputValues);
}
}
// Algorithms handle "leftover" data that is too small to fill an SIMD
// instruction differently. Make sure that this data is also properly sanitized.
TEST_F(VectorMathTest, FCLAMP_remainder_data) {
// Feed in values 1 at a time to guarantee we don't use SIMD.
static constexpr int kSmallVectorSize = 1;
static constexpr float kGuardValue = 123.0f;
const auto run_per_value_clamp_test =
[&](void (*fn)(const float[], int, float[])) {
for (auto [input, output] :
base::zip(kUnclampedInputValues, kClampedOutputValues)) {
input_array_[0] = input;
output_array_[0] = kGuardValue;
fn(input_array_.data(), kSmallVectorSize, output_array_.data());
EXPECT_EQ(output_array_[0], output);
}
};
{
SCOPED_TRACE("FCLAMP_C");
run_per_value_clamp_test(vector_math::FCLAMP_C);
}
}
TEST_F(VectorMathTest, EmptyInputs) {
{
SCOPED_TRACE("FMUL");
@ -172,6 +276,13 @@ TEST_F(VectorMathTest, EmptyInputs) {
vector_math::FMAC(base::span<float>(), kScale, output_array_);
VerifyOutput(kOutputFillValue);
}
{
SCOPED_TRACE("FCLAMP");
FillTestVectors(kInputFillValue, kOutputFillValue);
vector_math::FMAC(base::span<float>(), kScale, output_array_);
VerifyOutput(kOutputFillValue);
}
}
class EWMATestScenario {