Add vector_math::FCLAMP
This CL adds a new vector_math FCLAMP function, which clamps floats to the [-1.0, 1.0] range, and replaces all NaNs with 0s. Replacing NaNs with 0s is a change in behavior in some code paths, but it is better than replacing them with -1.0 or 1.0. This prevents NaNs from becoming audible samples, and has the added benefit of being easy and efficient to write in various SIMD instruction sets. Using std::min() + std::max() proved to be faster than std::clamp() in one microbenchmark (on an x64 gLinux machine), as this likely compiles down into 2 CMOV instructions. Optimized versions of FCLAMP will be introduced in future CLs. Bug: 401598584 Change-Id: I249f367c9824a2b5a1941154a0e1807edaa14d2b Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/6337035 Commit-Queue: Thomas Guilbert <tguilbert@chromium.org> Reviewed-by: Dale Curtis <dalecurtis@chromium.org> Cr-Commit-Position: refs/heads/main@{#1430551}
This commit is contained in:

committed by
Chromium LUCI CQ

parent
340cde95c3
commit
c5eb8f04f5
@ -35,6 +35,10 @@
|
||||
|
||||
namespace media::vector_math {
|
||||
|
||||
static constexpr float kClampMin = -1.0f;
|
||||
static constexpr float kClampMax = 1.0f;
|
||||
static constexpr float kSilence = 0.0f;
|
||||
|
||||
void FMAC(base::span<const float> src, float scale, base::span<float> dest) {
|
||||
if (src.empty()) {
|
||||
return;
|
||||
@ -93,6 +97,33 @@ void FMUL_C(const float src[], float scale, int len, float dest[]) {
|
||||
}
|
||||
}
|
||||
|
||||
void FCLAMP(base::span<const float> src, base::span<float> dest) {
|
||||
if (src.empty()) {
|
||||
return;
|
||||
}
|
||||
CHECK_LE(src.size(), dest.size());
|
||||
CHECK(base::IsAligned(src.data(), kRequiredAlignment));
|
||||
CHECK(base::IsAligned(dest.data(), kRequiredAlignment));
|
||||
static const auto fclamp_func = [] {
|
||||
// TODO(crbug.com/401598584): Add optimized versions of these functions.
|
||||
return FCLAMP_C;
|
||||
}();
|
||||
|
||||
return fclamp_func(src.data(), src.size(), dest.data());
|
||||
}
|
||||
|
||||
void FCLAMP_C(const float src[], int len, float dest[]) {
|
||||
for (int i = 0; i < len; ++i) {
|
||||
const float sample = src[i];
|
||||
const float temp = std::isnan(sample) ? kSilence : sample;
|
||||
// Using std::max + std::min is faster than std::clamp on official builds.
|
||||
// Indeed, there is an extra instruction to ensure conformity with the C++
|
||||
// standard for some special cases. E.g., `std::clamp(-0.0f, +0.0f, +0.0f)`
|
||||
// must return `-0.0f`.
|
||||
dest[i] = std::max(std::min(temp, kClampMax), kClampMin);
|
||||
}
|
||||
}
|
||||
|
||||
std::pair<float, float> EWMAAndMaxPower(float initial_value,
|
||||
base::span<const float> src,
|
||||
float smoothing_factor) {
|
||||
|
@ -27,6 +27,12 @@ MEDIA_SHMEM_EXPORT void FMUL(base::span<const float> src,
|
||||
float scale,
|
||||
base::span<float> dest);
|
||||
|
||||
// Clamps each element in `src` to the [-1.0, +1.0] range and store in `dest`.
|
||||
// replacing NaNs with 0s (silence).
|
||||
// `src` and `dest` must be aligned by `kRequiredAlignment`.
|
||||
MEDIA_SHMEM_EXPORT void FCLAMP(base::span<const float> src,
|
||||
base::span<float> dest);
|
||||
|
||||
// Computes the exponentially-weighted moving average power of a signal by
|
||||
// iterating the recurrence:
|
||||
//
|
||||
|
@ -7,13 +7,16 @@
|
||||
#pragma allow_unsafe_buffers
|
||||
#endif
|
||||
|
||||
#include "media/base/vector_math.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
|
||||
#include "base/containers/span_writer.h"
|
||||
#include "base/cpu.h"
|
||||
#include "base/memory/aligned_memory.h"
|
||||
#include "base/time/time.h"
|
||||
#include "build/build_config.h"
|
||||
#include "media/base/vector_math.h"
|
||||
#include "media/base/vector_math_testing.h"
|
||||
#include "testing/gtest/include/gtest/gtest.h"
|
||||
#include "testing/perf/perf_result_reporter.h"
|
||||
@ -27,6 +30,7 @@ perf_test::PerfResultReporter SetUpReporter(const std::string& story_name) {
|
||||
perf_test::PerfResultReporter reporter("vector_math", story_name);
|
||||
reporter.RegisterImportantMetric("_fmac", "runs/s");
|
||||
reporter.RegisterImportantMetric("_fmul", "runs/s");
|
||||
reporter.RegisterImportantMetric("_clamp", "runs/s");
|
||||
reporter.RegisterImportantMetric("_ewma_and_max_power", "runs/s");
|
||||
return reporter;
|
||||
}
|
||||
@ -44,12 +48,12 @@ class VectorMathPerfTest : public testing::Test {
|
||||
public:
|
||||
VectorMathPerfTest() {
|
||||
// Initialize input and output vectors.
|
||||
input_vector_.reset(static_cast<float*>(base::AlignedAlloc(
|
||||
sizeof(float) * kVectorSize, vector_math::kRequiredAlignment)));
|
||||
output_vector_.reset(static_cast<float*>(base::AlignedAlloc(
|
||||
sizeof(float) * kVectorSize, vector_math::kRequiredAlignment)));
|
||||
fill(input_vector_.get(), input_vector_.get() + kVectorSize, 1.0f);
|
||||
fill(output_vector_.get(), output_vector_.get() + kVectorSize, 0.0f);
|
||||
input_vector_ = base::AlignedUninit<float>(kVectorSize,
|
||||
vector_math::kRequiredAlignment);
|
||||
output_vector_ = base::AlignedUninit<float>(
|
||||
kVectorSize, vector_math::kRequiredAlignment);
|
||||
std::ranges::fill(input_vector_, 1.0f);
|
||||
std::ranges::fill(output_vector_, 0.0f);
|
||||
}
|
||||
|
||||
VectorMathPerfTest(const VectorMathPerfTest&) = delete;
|
||||
@ -61,10 +65,25 @@ class VectorMathPerfTest : public testing::Test {
|
||||
const std::string& trace_name) {
|
||||
TimeTicks start = TimeTicks::Now();
|
||||
for (int i = 0; i < kBenchmarkIterations; ++i) {
|
||||
fn(input_vector_.get(),
|
||||
kScale,
|
||||
kVectorSize - (aligned ? 0 : 1),
|
||||
output_vector_.get());
|
||||
fn(input_vector_.data(), kScale, kVectorSize - (aligned ? 0 : 1),
|
||||
output_vector_.data());
|
||||
}
|
||||
double total_time_seconds = (TimeTicks::Now() - start).InSecondsF();
|
||||
perf_test::PerfResultReporter reporter = SetUpReporter(trace_name);
|
||||
reporter.AddResult(metric_suffix,
|
||||
kBenchmarkIterations / total_time_seconds);
|
||||
}
|
||||
|
||||
void RunClampingBenchmark(void (*fn)(const float[], int, float[]),
|
||||
bool aligned,
|
||||
const std::string& metric_suffix,
|
||||
const std::string& trace_name) {
|
||||
FillInputWithUnclampedData();
|
||||
|
||||
TimeTicks start = TimeTicks::Now();
|
||||
for (int i = 0; i < kBenchmarkIterations; ++i) {
|
||||
fn(input_vector_.data(), kVectorSize - (aligned ? 0 : 1),
|
||||
output_vector_.data());
|
||||
}
|
||||
double total_time_seconds = (TimeTicks::Now() - start).InSecondsF();
|
||||
perf_test::PerfResultReporter reporter = SetUpReporter(trace_name);
|
||||
@ -79,7 +98,7 @@ class VectorMathPerfTest : public testing::Test {
|
||||
const std::string& trace_name) {
|
||||
TimeTicks start = TimeTicks::Now();
|
||||
for (int i = 0; i < kEWMABenchmarkIterations; ++i) {
|
||||
fn(0.5f, input_vector_.get(), len, 0.1f);
|
||||
fn(0.5f, input_vector_.data(), len, 0.1f);
|
||||
}
|
||||
double total_time_seconds = (TimeTicks::Now() - start).InSecondsF();
|
||||
perf_test::PerfResultReporter reporter = SetUpReporter(trace_name);
|
||||
@ -88,8 +107,25 @@ class VectorMathPerfTest : public testing::Test {
|
||||
}
|
||||
|
||||
protected:
|
||||
std::unique_ptr<float, base::AlignedFreeDeleter> input_vector_;
|
||||
std::unique_ptr<float, base::AlignedFreeDeleter> output_vector_;
|
||||
base::AlignedHeapArray<float> input_vector_;
|
||||
base::AlignedHeapArray<float> output_vector_;
|
||||
|
||||
private:
|
||||
// Fills `input_vector_` with repeating values, some of which are unclamped.
|
||||
void FillInputWithUnclampedData() {
|
||||
static const float kUnclampedInput[] = {-2.0, -1.0, -0.5, 0.0,
|
||||
0.5, 1.0, 2.0};
|
||||
auto input_span = base::span(kUnclampedInput);
|
||||
auto writer = base::SpanWriter(base::span(input_vector_));
|
||||
|
||||
while (writer.remaining() > input_span.size()) {
|
||||
writer.Write(input_span);
|
||||
}
|
||||
|
||||
if (writer.remaining()) {
|
||||
writer.Write(input_span.first(writer.remaining()));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Benchmarks for each optimized vector_math::FMAC() method.
|
||||
@ -168,6 +204,12 @@ TEST_F(VectorMathPerfTest, FMUL_optimized_aligned) {
|
||||
#endif
|
||||
}
|
||||
|
||||
// Benchmarks for each optimized vector_math::FCLAMP() method.
|
||||
// Benchmark FCLAMP_C().
|
||||
TEST_F(VectorMathPerfTest, FCLAMP_unoptimized) {
|
||||
RunClampingBenchmark(vector_math::FCLAMP_C, true, "_fclamp", "unoptimized");
|
||||
}
|
||||
|
||||
// Benchmarks for each optimized vector_math::EWMAAndMaxPower() method.
|
||||
// Benchmark EWMAAndMaxPower_C().
|
||||
TEST_F(VectorMathPerfTest, EWMAAndMaxPower_unoptimized) {
|
||||
|
@ -22,6 +22,7 @@ MEDIA_SHMEM_EXPORT void FMUL_C(const float src[],
|
||||
float scale,
|
||||
int len,
|
||||
float dest[]);
|
||||
MEDIA_SHMEM_EXPORT void FCLAMP_C(const float src[], int len, float dest[]);
|
||||
MEDIA_SHMEM_EXPORT std::pair<float, float> EWMAAndMaxPower_C(
|
||||
float initial_value,
|
||||
const float src[],
|
||||
|
@ -11,12 +11,16 @@
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <limits>
|
||||
#include <memory>
|
||||
|
||||
#include "base/containers/span_reader.h"
|
||||
#include "base/containers/span_writer.h"
|
||||
#include "base/cpu.h"
|
||||
#include "base/memory/aligned_memory.h"
|
||||
#include "base/strings/string_number_conversions.h"
|
||||
#include "base/strings/stringize_macros.h"
|
||||
#include "base/types/zip.h"
|
||||
#include "build/build_config.h"
|
||||
#include "media/base/vector_math_testing.h"
|
||||
#include "testing/gtest/include/gtest/gtest.h"
|
||||
@ -29,6 +33,28 @@ static constexpr float kInputFillValue = 1.0;
|
||||
static constexpr float kOutputFillValue = 3.0;
|
||||
static constexpr int kVectorSize = 8192;
|
||||
|
||||
// List of unclamped values that are out of bounds and within bounds.
|
||||
static const float kUnclampedInputValues[] = {
|
||||
std::numeric_limits<float>::quiet_NaN(),
|
||||
std::numeric_limits<float>::signaling_NaN(),
|
||||
-std::numeric_limits<float>::infinity(),
|
||||
std::numeric_limits<float>::infinity(),
|
||||
-2.0,
|
||||
-1.0,
|
||||
-0.5,
|
||||
0.0,
|
||||
0.5,
|
||||
1.0,
|
||||
2.0,
|
||||
};
|
||||
|
||||
// Expected result of clamping `kUnclampedInputValues`.
|
||||
static const float kClampedOutputValues[] = {0, 0, -1.0, 1.0, -1.0, -1.0,
|
||||
-0.5, 0.0, 0.5, 1.0, 1.0};
|
||||
|
||||
static_assert(std::size(kUnclampedInputValues) ==
|
||||
std::size(kClampedOutputValues));
|
||||
|
||||
class VectorMathTest : public testing::Test {
|
||||
public:
|
||||
VectorMathTest() {
|
||||
@ -48,14 +74,50 @@ class VectorMathTest : public testing::Test {
|
||||
std::ranges::fill(output_array_, output);
|
||||
}
|
||||
|
||||
void FillTestClampingVectors(base::span<const float> input, float output) {
|
||||
// Setup input and output vectors.
|
||||
FillSpan(input_array_, input);
|
||||
std::ranges::fill(output_array_, output);
|
||||
}
|
||||
|
||||
void VerifyOutput(float value) {
|
||||
EXPECT_TRUE(std::ranges::all_of(
|
||||
output_array_, [value](float datum) { return datum == value; }));
|
||||
}
|
||||
|
||||
void VerifyClampOutput(base::span<const float> values) {
|
||||
auto reader = base::SpanReader(base::span(output_array_));
|
||||
|
||||
while (reader.remaining() > values.size()) {
|
||||
auto output_values = *reader.Read(values.size());
|
||||
EXPECT_EQ(output_values, values);
|
||||
}
|
||||
|
||||
if (reader.remaining()) {
|
||||
auto remaining_values = reader.remaining_span();
|
||||
EXPECT_EQ(remaining_values, values.first(remaining_values.size()));
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
base::AlignedHeapArray<float> input_array_;
|
||||
base::AlignedHeapArray<float> output_array_;
|
||||
|
||||
private:
|
||||
// Fills `dest` with `values`, repeating `values`.
|
||||
void FillSpan(base::span<float> dest, base::span<const float> values) {
|
||||
auto writer = base::SpanWriter(dest);
|
||||
|
||||
// Fill as much as possible with `values`.
|
||||
while (writer.remaining() > values.size()) {
|
||||
writer.Write(values);
|
||||
}
|
||||
|
||||
// Fill the remaining space with the start of values.
|
||||
if (writer.remaining()) {
|
||||
writer.Write(values.first((writer.remaining())));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Ensure each optimized vector_math::FMAC() method returns the same value.
|
||||
@ -158,6 +220,48 @@ TEST_F(VectorMathTest, FMUL) {
|
||||
#endif
|
||||
}
|
||||
|
||||
// Ensure each optimized vector_math::FCLAMP() method returns the same value.
|
||||
TEST_F(VectorMathTest, FCLAMP) {
|
||||
{
|
||||
SCOPED_TRACE("FCLAMP");
|
||||
FillTestClampingVectors(kUnclampedInputValues, kOutputFillValue);
|
||||
vector_math::FCLAMP(input_array_, output_array_);
|
||||
VerifyClampOutput(kClampedOutputValues);
|
||||
}
|
||||
|
||||
{
|
||||
SCOPED_TRACE("FCLAMP_C");
|
||||
FillTestClampingVectors(kUnclampedInputValues, kOutputFillValue);
|
||||
vector_math::FCLAMP_C(input_array_.data(), kVectorSize,
|
||||
output_array_.data());
|
||||
VerifyClampOutput(kClampedOutputValues);
|
||||
}
|
||||
}
|
||||
|
||||
// Algorithms handle "leftover" data that is too small to fill an SIMD
|
||||
// instruction differently. Make sure that this data is also properly sanitized.
|
||||
TEST_F(VectorMathTest, FCLAMP_remainder_data) {
|
||||
// Feed in values 1 at a time to guarantee we don't use SIMD.
|
||||
static constexpr int kSmallVectorSize = 1;
|
||||
static constexpr float kGuardValue = 123.0f;
|
||||
|
||||
const auto run_per_value_clamp_test =
|
||||
[&](void (*fn)(const float[], int, float[])) {
|
||||
for (auto [input, output] :
|
||||
base::zip(kUnclampedInputValues, kClampedOutputValues)) {
|
||||
input_array_[0] = input;
|
||||
output_array_[0] = kGuardValue;
|
||||
fn(input_array_.data(), kSmallVectorSize, output_array_.data());
|
||||
EXPECT_EQ(output_array_[0], output);
|
||||
}
|
||||
};
|
||||
|
||||
{
|
||||
SCOPED_TRACE("FCLAMP_C");
|
||||
run_per_value_clamp_test(vector_math::FCLAMP_C);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(VectorMathTest, EmptyInputs) {
|
||||
{
|
||||
SCOPED_TRACE("FMUL");
|
||||
@ -172,6 +276,13 @@ TEST_F(VectorMathTest, EmptyInputs) {
|
||||
vector_math::FMAC(base::span<float>(), kScale, output_array_);
|
||||
VerifyOutput(kOutputFillValue);
|
||||
}
|
||||
|
||||
{
|
||||
SCOPED_TRACE("FCLAMP");
|
||||
FillTestVectors(kInputFillValue, kOutputFillValue);
|
||||
vector_math::FMAC(base::span<float>(), kScale, output_array_);
|
||||
VerifyOutput(kOutputFillValue);
|
||||
}
|
||||
}
|
||||
|
||||
class EWMATestScenario {
|
||||
|
Reference in New Issue
Block a user