0

[CodeHealth] Spanify vector_math top level APIs

This CL converts top level vector_math APIs to use spans instead of
array+length. The actual implementation of the algorithms is untouched,
and not spanified.

Spanifying the algorithms themselves had a large performance impact, and
not attempted. There was a ~2 orders of magnitude slow down from using
std::ranges::transform or SpanReaders.

Updating these APIs still provides some benefits: the source and
destinations are guaranteed to have the same length now.

Bug: 373960632
Change-Id: I2fcd44c5aa748d35819c77c314ba6bbc7d106add
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/6238738
Auto-Submit: Thomas Guilbert <tguilbert@chromium.org>
Reviewed-by: Kenneth MacKay <kmackay@chromium.org>
Reviewed-by: Dale Curtis <dalecurtis@chromium.org>
Reviewed-by: Ahmed Fakhry <afakhry@chromium.org>
Commit-Queue: Thomas Guilbert <tguilbert@chromium.org>
Cr-Commit-Position: refs/heads/main@{#1419023}
This commit is contained in:
Thomas Guilbert
2025-02-11 17:34:26 -08:00
committed by Chromium LUCI CQ
parent 634339b600
commit cbc9f60e71
14 changed files with 276 additions and 219 deletions

@ -9,6 +9,7 @@
#include <cstring>
#include "base/check_op.h"
#include "base/containers/span.h"
#include "media/base/vector_math.h"
namespace {
@ -24,7 +25,9 @@ struct FMACTraits {
float volume,
int frames,
float* dest) {
::media::vector_math::FMAC(src, volume, frames, dest);
const size_t size = static_cast<size_t>(frames);
::media::vector_math::FMAC(base::span(src, size), volume,
base::span(dest, size));
}
static void ProcessSingleDatum(const float* src, float volume, float* dest) {
@ -43,7 +46,9 @@ struct FMULTraits {
float volume,
int frames,
float* dest) {
::media::vector_math::FMUL(src, volume, frames, dest);
const size_t size = static_cast<size_t>(frames);
::media::vector_math::FMUL(base::span(src, size), volume,
base::span(dest, size));
}
static void ProcessSingleDatum(const float* src, float volume, float* dest) {

@ -10,6 +10,7 @@
#include "chromeos/ash/services/recording/audio_capture_util.h"
#include "base/memory/aligned_memory.h"
#include "base/numerics/safe_conversions.h"
#include "chromeos/ash/services/recording/recording_service_constants.h"
#include "media/base/audio_bus.h"
#include "media/base/audio_parameters.h"
@ -26,16 +27,17 @@ static_assert(kAudioSampleRate % 100 == 0,
// Using `media::vector_math::FMAC()` works only if the addresses of `src` and
// `dest` are `kRequiredAlignment` bit aligned.
// This returns true if that's the case.
bool CanUseVectorMath(const float* src, const float* dest) {
return base::IsAligned(src, media::vector_math::kRequiredAlignment) &&
base::IsAligned(dest, media::vector_math::kRequiredAlignment);
bool CanUseVectorMath(base::span<const float> src,
base::span<const float> dest) {
return base::IsAligned(src.data(), media::vector_math::kRequiredAlignment) &&
base::IsAligned(dest.data(), media::vector_math::kRequiredAlignment);
}
// If `media::vector_math::FMAC()` cannot be used due to lack of required
// alignment, this version can be used to accumulate the `length` number of
// items from `src` on top of the values existing in `dest`.
void Accumulate(const float* src, float* dest, int length) {
for (int i = 0; i < length; ++i) {
void Accumulate(base::span<const float> src, base::span<float> dest) {
for (size_t i = 0; i < src.size(); ++i) {
dest[i] += src[i];
}
}
@ -68,20 +70,23 @@ std::unique_ptr<media::AudioBus> CreateStereoZeroInitializedAudioBusForDuration(
void AccumulateBusTo(const media::AudioBus& source,
media::AudioBus* destination,
int source_start_frame,
int destination_start_frame,
int length) {
CHECK_EQ(source.channels(), source.channels());
CHECK_LE(source_start_frame + length, source.frames());
CHECK_LE(length, source.frames());
CHECK_LE(destination_start_frame + length, destination->frames());
const size_t dest_offset =
base::checked_cast<size_t>(destination_start_frame);
const size_t count = base::checked_cast<size_t>(length);
for (int i = 0; i < source.channels(); ++i) {
const float* src = &source.channel(i)[source_start_frame];
float* dest = &destination->channel(i)[destination_start_frame];
auto src = source.channel_span(i).first(count);
auto dest = destination->channel_span(i).subspan(dest_offset, count);
if (CanUseVectorMath(src, dest)) {
media::vector_math::FMAC(src, /*scale=*/1, length, dest);
media::vector_math::FMAC(src, /*scale=*/1, dest);
} else {
Accumulate(src, dest, length);
Accumulate(src, dest);
}
}
}

@ -47,16 +47,14 @@ std::unique_ptr<media::AudioBus> CreateStereoZeroInitializedAudioBusForFrames(
std::unique_ptr<media::AudioBus> CreateStereoZeroInitializedAudioBusForDuration(
base::TimeDelta duration);
// Accumulates the `length` number of audio frames in the `source` audio bus
// starting at `source_start_frame`, to the already existing frames in the
// `destination` bus starting at `destination_start_frame`.
// Both `source` and `destination` buses must have the same number of channels.
// `source_start_frame` + `length` must be within the bounds of the `source`
// bus, and `destination_start_frame` + `length` also must be within the bounds
// of the `destination` bus.
// Accumulates the first `length` number of audio frames in the `source` audio
// bus to the already existing frames in the `destination` bus starting at
// `destination_start_frame`. Both `source` and `destination` buses must have
// the same number of channels. `length` must not exceed the number of frames in
// `source`, and `destination_start_frame` + `length` also must be within the
// bounds of the `destination` bus.
void AccumulateBusTo(const media::AudioBus& source,
media::AudioBus* destination,
int source_start_frame,
int destination_start_frame,
int length);

@ -77,7 +77,6 @@ void AudioStream::ConsumeAndAccumulateTo(media::AudioBus* destination,
const int consumed = std::min(front->frames(), remaining_frames_to_consume);
audio_capture_util::AccumulateBusTo(
/*source=*/*front, /*destination=*/destination,
/*source_start_frame=*/0,
/*destination_start_frame=*/destination_start_frame,
/*length=*/consumed);
remaining_frames_to_consume -= consumed;

@ -236,7 +236,6 @@ TEST_F(AudioStreamMixerTest, StreamWithLaterTimestampsArrivesFirst) {
audio_capture_util::AccumulateBusTo(
/*source=*/*stream2_bus2,
/*destination=*/expected_bus.get(),
/*source_start_frame=*/0,
/*destination_start_frame=*/
audio_capture_util::NumberOfAudioFramesInDuration(
base::Milliseconds(45 - 40)),
@ -331,7 +330,6 @@ TEST_F(AudioStreamMixerTest, FlushingTheMixer) {
audio_capture_util::AccumulateBusTo(
/*source=*/*stream2_bus,
/*destination=*/expected_bus.get(),
/*source_start_frame=*/0,
/*destination_start_frame=*/
audio_capture_util::NumberOfAudioFramesInDuration(
base::Milliseconds(40 - 10)),

@ -381,7 +381,7 @@ void AudioBus::Scale(float volume) {
DCHECK(!is_bitstream_format_);
if (volume > 0 && volume != 1) {
for (auto channel : channel_data_) {
vector_math::FMUL(channel.data(), volume, frames(), channel.data());
vector_math::FMUL(channel, volume, channel);
}
} else if (volume == 0) {
Zero();

@ -230,9 +230,8 @@ void AudioConverter::SourceCallback(int fifo_frame_delay, AudioBus* dest) {
provide_input_dest->CopyTo(temp_dest);
} else if (volume > 0) {
for (int i = 0; i < provide_input_dest->channels(); ++i) {
vector_math::FMUL(provide_input_dest->channel_span(i).data(), volume,
provide_input_dest->frames(),
temp_dest->channel_span(i).data());
vector_math::FMUL(provide_input_dest->channel_span(i), volume,
temp_dest->channel_span(i));
}
} else {
// Zero |temp_dest| otherwise, so we're mixing into a clean buffer.
@ -245,9 +244,8 @@ void AudioConverter::SourceCallback(int fifo_frame_delay, AudioBus* dest) {
// Volume adjust and mix each mixer input into |temp_dest| after rendering.
if (volume > 0) {
for (int i = 0; i < mixer_input_audio_bus_->channels(); ++i) {
vector_math::FMAC(mixer_input_audio_bus_->channel_span(i).data(),
volume, mixer_input_audio_bus_->frames(),
temp_dest->channel_span(i).data());
vector_math::FMAC(mixer_input_audio_bus_->channel_span(i), volume,
temp_dest->channel_span(i));
}
}
}

@ -50,7 +50,8 @@ void AudioPowerMonitor::Scan(const AudioBus& buffer, int num_frames) {
float sum_power = 0.0f;
for (auto channel : buffer.AllChannels()) {
const std::pair<float, float> ewma_and_max = vector_math::EWMAAndMaxPower(
average_power_, channel.data(), num_frames, sample_weight_);
average_power_, channel.first(static_cast<size_t>(num_frames)),
sample_weight_);
// If data in audio buffer is garbage, ignore its effect on the result.
if (!std::isfinite(ewma_and_max.first)) {
sum_power += average_power_;

@ -90,9 +90,10 @@ void ChannelMixer::TransformPartial(const AudioBus* input,
float scale = matrix_[output_ch][input_ch];
// Scale should always be positive. Don't bother scaling by zero.
DCHECK_GE(scale, 0);
const size_t frames = static_cast<size_t>(frame_count);
if (scale > 0) {
vector_math::FMAC(input->channel_span(input_ch).data(), scale,
frame_count, output_channel.data());
vector_math::FMAC(input->channel_span(input_ch).first(frames), scale,
output_channel.first(frames));
}
}
}

@ -8,7 +8,6 @@
#endif
#include "media/base/vector_math.h"
#include "media/base/vector_math_testing.h"
#include <algorithm>
#include <cmath>
@ -17,6 +16,7 @@
#include "base/cpu.h"
#include "base/memory/aligned_memory.h"
#include "build/build_config.h"
#include "media/base/vector_math_testing.h"
// NaCl does not allow intrinsics.
#if defined(ARCH_CPU_X86_FAMILY) && !BUILDFLAG(IS_NACL)
@ -33,12 +33,15 @@
#include <arm_neon.h>
#endif
namespace media {
namespace vector_math {
namespace media::vector_math {
void FMAC(const float src[], float scale, int len, float dest[]) {
DCHECK(base::IsAligned(src, kRequiredAlignment));
DCHECK(base::IsAligned(dest, kRequiredAlignment));
void FMAC(base::span<const float> src, float scale, base::span<float> dest) {
if (src.empty()) {
return;
}
CHECK_LE(src.size(), dest.size());
DCHECK(base::IsAligned(src.data(), kRequiredAlignment));
DCHECK(base::IsAligned(dest.data(), kRequiredAlignment));
static const auto fmac_func = [] {
#if defined(ARCH_CPU_X86_FAMILY) && !BUILDFLAG(IS_NACL)
base::CPU cpu;
@ -52,17 +55,22 @@ void FMAC(const float src[], float scale, int len, float dest[]) {
#endif
}();
return fmac_func(src, scale, len, dest);
return fmac_func(src.data(), scale, src.size(), dest.data());
}
void FMAC_C(const float src[], float scale, int len, float dest[]) {
for (int i = 0; i < len; ++i)
for (int i = 0; i < len; ++i) {
dest[i] += src[i] * scale;
}
}
void FMUL(const float src[], float scale, int len, float dest[]) {
DCHECK(base::IsAligned(src, kRequiredAlignment));
DCHECK(base::IsAligned(dest, kRequiredAlignment));
void FMUL(base::span<const float> src, float scale, base::span<float> dest) {
if (src.empty()) {
return;
}
CHECK_LE(src.size(), dest.size());
DCHECK(base::IsAligned(src.data(), kRequiredAlignment));
DCHECK(base::IsAligned(dest.data(), kRequiredAlignment));
static const auto fmul_func = [] {
#if defined(ARCH_CPU_X86_FAMILY) && !BUILDFLAG(IS_NACL)
base::CPU cpu;
@ -76,17 +84,19 @@ void FMUL(const float src[], float scale, int len, float dest[]) {
#endif
}();
return fmul_func(src, scale, len, dest);
return fmul_func(src.data(), scale, src.size(), dest.data());
}
void FMUL_C(const float src[], float scale, int len, float dest[]) {
for (int i = 0; i < len; ++i)
for (int i = 0; i < len; ++i) {
dest[i] = src[i] * scale;
}
}
std::pair<float, float> EWMAAndMaxPower(
float initial_value, const float src[], int len, float smoothing_factor) {
DCHECK(base::IsAligned(src, kRequiredAlignment));
std::pair<float, float> EWMAAndMaxPower(float initial_value,
base::span<const float> src,
float smoothing_factor) {
DCHECK(base::IsAligned(src.data(), kRequiredAlignment));
static const auto ewma_and_max_power_func = [] {
#if defined(ARCH_CPU_X86_FAMILY) && !BUILDFLAG(IS_NACL)
base::CPU cpu;
@ -100,11 +110,14 @@ std::pair<float, float> EWMAAndMaxPower(
#endif
}();
return ewma_and_max_power_func(initial_value, src, len, smoothing_factor);
return ewma_and_max_power_func(initial_value, src.data(), src.size(),
smoothing_factor);
}
std::pair<float, float> EWMAAndMaxPower_C(
float initial_value, const float src[], int len, float smoothing_factor) {
std::pair<float, float> EWMAAndMaxPower_C(float initial_value,
const float src[],
int len,
float smoothing_factor) {
std::pair<float, float> result(initial_value, 0.0f);
const float weight_prev = 1.0f - smoothing_factor;
for (int i = 0; i < len; ++i) {
@ -122,12 +135,14 @@ void FMUL_SSE(const float src[], float scale, int len, float dest[]) {
const int rem = len % 4;
const int last_index = len - rem;
__m128 m_scale = _mm_set_ps1(scale);
for (int i = 0; i < last_index; i += 4)
for (int i = 0; i < last_index; i += 4) {
_mm_store_ps(dest + i, _mm_mul_ps(_mm_load_ps(src + i), m_scale));
}
// Handle any remaining values that wouldn't fit in an SSE pass.
for (int i = last_index; i < len; ++i)
for (int i = last_index; i < len; ++i) {
dest[i] = src[i] * scale;
}
}
__attribute__((target("avx2"))) void FMUL_AVX2(const float src[],
@ -143,29 +158,34 @@ __attribute__((target("avx2"))) void FMUL_AVX2(const float src[],
bool aligned_dest = (reinterpret_cast<uintptr_t>(dest) & 0x1F) == 0;
if (aligned_src) {
if (aligned_dest) {
for (int i = 0; i < last_index; i += 8)
for (int i = 0; i < last_index; i += 8) {
_mm256_store_ps(dest + i,
_mm256_mul_ps(_mm256_load_ps(src + i), m_scale));
}
} else {
for (int i = 0; i < last_index; i += 8)
for (int i = 0; i < last_index; i += 8) {
_mm256_storeu_ps(dest + i,
_mm256_mul_ps(_mm256_load_ps(src + i), m_scale));
}
}
} else {
if (aligned_dest) {
for (int i = 0; i < last_index; i += 8)
for (int i = 0; i < last_index; i += 8) {
_mm256_store_ps(dest + i,
_mm256_mul_ps(_mm256_loadu_ps(src + i), m_scale));
}
} else {
for (int i = 0; i < last_index; i += 8)
for (int i = 0; i < last_index; i += 8) {
_mm256_storeu_ps(dest + i,
_mm256_mul_ps(_mm256_loadu_ps(src + i), m_scale));
}
}
}
// Handle any remaining values that wouldn't fit in an SSE pass.
for (int i = last_index; i < len; ++i)
for (int i = last_index; i < len; ++i) {
dest[i] = src[i] * scale;
}
}
void FMAC_SSE(const float src[], float scale, int len, float dest[]) {
@ -173,13 +193,15 @@ void FMAC_SSE(const float src[], float scale, int len, float dest[]) {
const int last_index = len - rem;
__m128 m_scale = _mm_set_ps1(scale);
for (int i = 0; i < last_index; i += 4) {
_mm_store_ps(dest + i, _mm_add_ps(_mm_load_ps(dest + i),
_mm_mul_ps(_mm_load_ps(src + i), m_scale)));
_mm_store_ps(dest + i,
_mm_add_ps(_mm_load_ps(dest + i),
_mm_mul_ps(_mm_load_ps(src + i), m_scale)));
}
// Handle any remaining values that wouldn't fit in an SSE pass.
for (int i = last_index; i < len; ++i)
for (int i = last_index; i < len; ++i) {
dest[i] += src[i] * scale;
}
}
__attribute__((target("avx2,fma"))) void FMAC_AVX2(const float src[],
@ -195,45 +217,50 @@ __attribute__((target("avx2,fma"))) void FMAC_AVX2(const float src[],
bool aligned_dest = (reinterpret_cast<uintptr_t>(dest) & 0x1F) == 0;
if (aligned_src) {
if (aligned_dest) {
for (int i = 0; i < last_index; i += 8)
for (int i = 0; i < last_index; i += 8) {
_mm256_store_ps(dest + i,
_mm256_fmadd_ps(_mm256_load_ps(src + i), m_scale,
_mm256_load_ps(dest + i)));
}
} else {
for (int i = 0; i < last_index; i += 8)
for (int i = 0; i < last_index; i += 8) {
_mm256_storeu_ps(dest + i,
_mm256_fmadd_ps(_mm256_load_ps(src + i), m_scale,
_mm256_loadu_ps(dest + i)));
}
}
} else {
if (aligned_dest) {
for (int i = 0; i < last_index; i += 8)
for (int i = 0; i < last_index; i += 8) {
_mm256_store_ps(dest + i,
_mm256_fmadd_ps(_mm256_loadu_ps(src + i), m_scale,
_mm256_load_ps(dest + i)));
}
} else {
for (int i = 0; i < last_index; i += 8)
for (int i = 0; i < last_index; i += 8) {
_mm256_storeu_ps(dest + i,
_mm256_fmadd_ps(_mm256_loadu_ps(src + i), m_scale,
_mm256_loadu_ps(dest + i)));
}
}
}
// Handle any remaining values that wouldn't fit in an SSE pass.
for (int i = last_index; i < len; ++i)
for (int i = last_index; i < len; ++i) {
dest[i] += src[i] * scale;
}
}
// Convenience macro to extract float 0 through 3 from the vector |a|. This is
// needed because compilers other than clang don't support access via
// operator[]().
#define EXTRACT_FLOAT(a, i) \
(i == 0 ? \
_mm_cvtss_f32(a) : \
_mm_cvtss_f32(_mm_shuffle_ps(a, a, i)))
(i == 0 ? _mm_cvtss_f32(a) : _mm_cvtss_f32(_mm_shuffle_ps(a, a, i)))
std::pair<float, float> EWMAAndMaxPower_SSE(
float initial_value, const float src[], int len, float smoothing_factor) {
std::pair<float, float> EWMAAndMaxPower_SSE(float initial_value,
const float src[],
int len,
float smoothing_factor) {
// When the recurrence is unrolled, we see that we can split it into 4
// separate lanes of evaluation:
//
@ -269,8 +296,8 @@ std::pair<float, float> EWMAAndMaxPower_SSE(
max_x4 = _mm_max_ps(max_x4, sample_squared_x4);
// Note: The compiler optimizes this to a single multiply-and-accumulate
// instruction:
ewma_x4 = _mm_add_ps(ewma_x4,
_mm_mul_ps(sample_squared_x4, smoothing_factor_x4));
ewma_x4 =
_mm_add_ps(ewma_x4, _mm_mul_ps(sample_squared_x4, smoothing_factor_x4));
}
// y[n] = z[n] + (1-a)^1(z[n-1]) + (1-a)^2(z[n-2]) + (1-a)^3(z[n-3])
@ -381,29 +408,34 @@ void FMAC_NEON(const float src[], float scale, int len, float dest[]) {
const int last_index = len - rem;
float32x4_t m_scale = vmovq_n_f32(scale);
for (int i = 0; i < last_index; i += 4) {
vst1q_f32(dest + i, vmlaq_f32(
vld1q_f32(dest + i), vld1q_f32(src + i), m_scale));
vst1q_f32(dest + i,
vmlaq_f32(vld1q_f32(dest + i), vld1q_f32(src + i), m_scale));
}
// Handle any remaining values that wouldn't fit in an NEON pass.
for (int i = last_index; i < len; ++i)
for (int i = last_index; i < len; ++i) {
dest[i] += src[i] * scale;
}
}
void FMUL_NEON(const float src[], float scale, int len, float dest[]) {
const int rem = len % 4;
const int last_index = len - rem;
float32x4_t m_scale = vmovq_n_f32(scale);
for (int i = 0; i < last_index; i += 4)
for (int i = 0; i < last_index; i += 4) {
vst1q_f32(dest + i, vmulq_f32(vld1q_f32(src + i), m_scale));
}
// Handle any remaining values that wouldn't fit in an NEON pass.
for (int i = last_index; i < len; ++i)
for (int i = last_index; i < len; ++i) {
dest[i] = src[i] * scale;
}
}
std::pair<float, float> EWMAAndMaxPower_NEON(
float initial_value, const float src[], int len, float smoothing_factor) {
std::pair<float, float> EWMAAndMaxPower_NEON(float initial_value,
const float src[],
int len,
float smoothing_factor) {
// When the recurrence is unrolled, we see that we can split it into 4
// separate lanes of evaluation:
//
@ -468,5 +500,4 @@ std::pair<float, float> EWMAAndMaxPower_NEON(
}
#endif
} // namespace vector_math
} // namespace media
} // namespace media::vector_math

@ -7,27 +7,25 @@
#include <utility>
#include "base/containers/span.h"
#include "media/base/media_shmem_export.h"
namespace media {
namespace vector_math {
namespace media::vector_math {
// Required alignment for inputs and outputs to all vector math functions
enum { kRequiredAlignment = 16 };
// Multiply each element of |src| (up to |len|) by |scale| and add to |dest|.
// |src| and |dest| must be aligned by kRequiredAlignment.
MEDIA_SHMEM_EXPORT void FMAC(const float src[],
// Multiply each element of `src` by `scale` and add to `dest`.
// `src` and `dest` must be aligned by `kRequiredAlignment`.
MEDIA_SHMEM_EXPORT void FMAC(base::span<const float> src,
float scale,
int len,
float dest[]);
base::span<float> dest);
// Multiply each element of |src| by |scale| and store in |dest|. |src| and
// |dest| must be aligned by kRequiredAlignment.
MEDIA_SHMEM_EXPORT void FMUL(const float src[],
// Multiply each element of `src` by `scale` and store in `dest`.
// `src` and `dest` must be aligned by `kRequiredAlignment`.
MEDIA_SHMEM_EXPORT void FMUL(base::span<const float> src,
float scale,
int len,
float dest[]);
base::span<float> dest);
// Computes the exponentially-weighted moving average power of a signal by
// iterating the recurrence:
@ -38,11 +36,9 @@ MEDIA_SHMEM_EXPORT void FMUL(const float src[],
// Returns the final average power and the maximum squared element value.
MEDIA_SHMEM_EXPORT std::pair<float, float> EWMAAndMaxPower(
float initial_value,
const float src[],
int len,
base::span<const float> src,
float smoothing_factor);
} // namespace vector_math
} // namespace media
} // namespace media::vector_math
#endif // MEDIA_BASE_VECTOR_MATH_H_

@ -7,6 +7,9 @@
#pragma allow_unsafe_buffers
#endif
#include "media/base/vector_math.h"
#include <algorithm>
#include <cmath>
#include <memory>
@ -15,28 +18,25 @@
#include "base/strings/string_number_conversions.h"
#include "base/strings/stringize_macros.h"
#include "build/build_config.h"
#include "media/base/vector_math.h"
#include "media/base/vector_math_testing.h"
#include "testing/gtest/include/gtest/gtest.h"
using std::fill;
namespace media {
// Default test values.
static const float kScale = 0.5;
static const float kInputFillValue = 1.0;
static const float kOutputFillValue = 3.0;
static const int kVectorSize = 8192;
static constexpr float kScale = 0.5;
static constexpr float kInputFillValue = 1.0;
static constexpr float kOutputFillValue = 3.0;
static constexpr int kVectorSize = 8192;
class VectorMathTest : public testing::Test {
public:
VectorMathTest() {
// Initialize input and output vectors.
input_vector_.reset(static_cast<float*>(base::AlignedAlloc(
sizeof(float) * kVectorSize, vector_math::kRequiredAlignment)));
output_vector_.reset(static_cast<float*>(base::AlignedAlloc(
sizeof(float) * kVectorSize, vector_math::kRequiredAlignment)));
input_array_ = base::AlignedUninit<float>(kVectorSize,
vector_math::kRequiredAlignment);
output_array_ = base::AlignedUninit<float>(kVectorSize,
vector_math::kRequiredAlignment);
}
VectorMathTest(const VectorMathTest&) = delete;
@ -44,18 +44,18 @@ class VectorMathTest : public testing::Test {
void FillTestVectors(float input, float output) {
// Setup input and output vectors.
fill(input_vector_.get(), input_vector_.get() + kVectorSize, input);
fill(output_vector_.get(), output_vector_.get() + kVectorSize, output);
std::ranges::fill(input_array_, input);
std::ranges::fill(output_array_, output);
}
void VerifyOutput(float value) {
for (int i = 0; i < kVectorSize; ++i)
ASSERT_FLOAT_EQ(output_vector_[i], value);
EXPECT_TRUE(std::ranges::all_of(
output_array_, [value](float datum) { return datum == value; }));
}
protected:
std::unique_ptr<float[], base::AlignedFreeDeleter> input_vector_;
std::unique_ptr<float[], base::AlignedFreeDeleter> output_vector_;
base::AlignedHeapArray<float> input_array_;
base::AlignedHeapArray<float> output_array_;
};
// Ensure each optimized vector_math::FMAC() method returns the same value.
@ -65,16 +65,15 @@ TEST_F(VectorMathTest, FMAC) {
{
SCOPED_TRACE("FMAC");
FillTestVectors(kInputFillValue, kOutputFillValue);
vector_math::FMAC(
input_vector_.get(), kScale, kVectorSize, output_vector_.get());
vector_math::FMAC(input_array_, kScale, output_array_);
VerifyOutput(kResult);
}
{
SCOPED_TRACE("FMAC_C");
FillTestVectors(kInputFillValue, kOutputFillValue);
vector_math::FMAC_C(
input_vector_.get(), kScale, kVectorSize, output_vector_.get());
vector_math::FMAC_C(input_array_.data(), kScale, kVectorSize,
output_array_.data());
VerifyOutput(kResult);
}
@ -82,8 +81,8 @@ TEST_F(VectorMathTest, FMAC) {
{
SCOPED_TRACE("FMAC_SSE");
FillTestVectors(kInputFillValue, kOutputFillValue);
vector_math::FMAC_SSE(
input_vector_.get(), kScale, kVectorSize, output_vector_.get());
vector_math::FMAC_SSE(input_array_.data(), kScale, kVectorSize,
output_array_.data());
VerifyOutput(kResult);
}
{
@ -91,8 +90,8 @@ TEST_F(VectorMathTest, FMAC) {
if (cpu.has_avx2() && cpu.has_fma3()) {
SCOPED_TRACE("FMAC_AVX2");
FillTestVectors(kInputFillValue, kOutputFillValue);
vector_math::FMAC_AVX2(input_vector_.get(), kScale, kVectorSize,
output_vector_.get());
vector_math::FMAC_AVX2(input_array_.data(), kScale, kVectorSize,
output_array_.data());
VerifyOutput(kResult);
}
}
@ -102,8 +101,8 @@ TEST_F(VectorMathTest, FMAC) {
{
SCOPED_TRACE("FMAC_NEON");
FillTestVectors(kInputFillValue, kOutputFillValue);
vector_math::FMAC_NEON(
input_vector_.get(), kScale, kVectorSize, output_vector_.get());
vector_math::FMAC_NEON(input_array_.data(), kScale, kVectorSize,
output_array_.data());
VerifyOutput(kResult);
}
#endif
@ -116,16 +115,15 @@ TEST_F(VectorMathTest, FMUL) {
{
SCOPED_TRACE("FMUL");
FillTestVectors(kInputFillValue, kOutputFillValue);
vector_math::FMUL(
input_vector_.get(), kScale, kVectorSize, output_vector_.get());
vector_math::FMUL(input_array_, kScale, output_array_);
VerifyOutput(kResult);
}
{
SCOPED_TRACE("FMUL_C");
FillTestVectors(kInputFillValue, kOutputFillValue);
vector_math::FMUL_C(
input_vector_.get(), kScale, kVectorSize, output_vector_.get());
vector_math::FMUL_C(input_array_.data(), kScale, kVectorSize,
output_array_.data());
VerifyOutput(kResult);
}
@ -133,8 +131,8 @@ TEST_F(VectorMathTest, FMUL) {
{
SCOPED_TRACE("FMUL_SSE");
FillTestVectors(kInputFillValue, kOutputFillValue);
vector_math::FMUL_SSE(
input_vector_.get(), kScale, kVectorSize, output_vector_.get());
vector_math::FMUL_SSE(input_array_.data(), kScale, kVectorSize,
output_array_.data());
VerifyOutput(kResult);
}
{
@ -142,8 +140,8 @@ TEST_F(VectorMathTest, FMUL) {
if (cpu.has_avx2()) {
SCOPED_TRACE("FMUL_AVX2");
FillTestVectors(kInputFillValue, kOutputFillValue);
vector_math::FMUL_AVX2(input_vector_.get(), kScale, kVectorSize,
output_vector_.get());
vector_math::FMUL_AVX2(input_array_.data(), kScale, kVectorSize,
output_array_.data());
VerifyOutput(kResult);
}
}
@ -153,28 +151,38 @@ TEST_F(VectorMathTest, FMUL) {
{
SCOPED_TRACE("FMUL_NEON");
FillTestVectors(kInputFillValue, kOutputFillValue);
vector_math::FMUL_NEON(
input_vector_.get(), kScale, kVectorSize, output_vector_.get());
vector_math::FMUL_NEON(input_array_.data(), kScale, kVectorSize,
output_array_.data());
VerifyOutput(kResult);
}
#endif
}
TEST_F(VectorMathTest, EmptyInputs) {
{
SCOPED_TRACE("FMUL");
FillTestVectors(kInputFillValue, kOutputFillValue);
vector_math::FMUL(base::span<float>(), kScale, output_array_);
VerifyOutput(kOutputFillValue);
}
{
SCOPED_TRACE("FMAC");
FillTestVectors(kInputFillValue, kOutputFillValue);
vector_math::FMAC(base::span<float>(), kScale, output_array_);
VerifyOutput(kOutputFillValue);
}
}
class EWMATestScenario {
public:
EWMATestScenario(float initial_value, const float src[], int len,
EWMATestScenario(float initial_value,
base::span<const float> src,
float smoothing_factor)
: initial_value_(initial_value),
data_(static_cast<float*>(
len == 0 ? NULL :
base::AlignedAlloc(len * sizeof(float),
vector_math::kRequiredAlignment))),
data_len_(len),
smoothing_factor_(smoothing_factor),
expected_final_avg_(initial_value),
expected_max_(0.0f) {
if (data_len_ > 0)
memcpy(data_.get(), src, len * sizeof(float));
expected_final_avg_(initial_value) {
CopyDataAligned(src);
}
// Copy constructor and assignment operator for ::testing::Values(...).
@ -182,16 +190,7 @@ class EWMATestScenario {
EWMATestScenario& operator=(const EWMATestScenario& other) {
this->initial_value_ = other.initial_value_;
this->smoothing_factor_ = other.smoothing_factor_;
if (other.data_len_ == 0) {
this->data_.reset();
} else {
this->data_.reset(static_cast<float*>(
base::AlignedAlloc(other.data_len_ * sizeof(float),
vector_math::kRequiredAlignment)));
memcpy(this->data_.get(), other.data_.get(),
other.data_len_ * sizeof(float));
}
this->data_len_ = other.data_len_;
this->CopyDataAligned(other.data_);
this->expected_final_avg_ = other.expected_final_avg_;
this->expected_max_ = other.expected_max_;
return *this;
@ -199,16 +198,14 @@ class EWMATestScenario {
EWMATestScenario ScaledBy(float scale) const {
EWMATestScenario result(*this);
float* p = result.data_.get();
float* const p_end = p + result.data_len_;
for (; p < p_end; ++p)
*p *= scale;
std::ranges::for_each(result.data_,
[scale](float& datum) { datum *= scale; });
return result;
}
EWMATestScenario WithImpulse(float value, int offset) const {
EWMATestScenario result(*this);
result.data_.get()[offset] = value;
result.data_[offset] = value;
return result;
}
@ -224,7 +221,7 @@ class EWMATestScenario {
{
SCOPED_TRACE("EWMAAndMaxPower");
const std::pair<float, float>& result = vector_math::EWMAAndMaxPower(
initial_value_, data_.get(), data_len_, smoothing_factor_);
initial_value_, data_, smoothing_factor_);
EXPECT_NEAR(expected_final_avg_, result.first, 0.0000001f);
EXPECT_NEAR(expected_max_, result.second, 0.0000001f);
}
@ -232,7 +229,7 @@ class EWMATestScenario {
{
SCOPED_TRACE("EWMAAndMaxPower_C");
const std::pair<float, float>& result = vector_math::EWMAAndMaxPower_C(
initial_value_, data_.get(), data_len_, smoothing_factor_);
initial_value_, data_.data(), data_.size(), smoothing_factor_);
EXPECT_NEAR(expected_final_avg_, result.first, 0.0000001f);
EXPECT_NEAR(expected_max_, result.second, 0.0000001f);
}
@ -241,7 +238,7 @@ class EWMATestScenario {
{
SCOPED_TRACE("EWMAAndMaxPower_SSE");
const std::pair<float, float>& result = vector_math::EWMAAndMaxPower_SSE(
initial_value_, data_.get(), data_len_, smoothing_factor_);
initial_value_, data_.data(), data_.size(), smoothing_factor_);
EXPECT_NEAR(expected_final_avg_, result.first, 0.0000001f);
EXPECT_NEAR(expected_max_, result.second, 0.0000001f);
}
@ -250,8 +247,8 @@ class EWMATestScenario {
if (cpu.has_avx2() && cpu.has_fma3()) {
SCOPED_TRACE("EWMAAndMaxPower_AVX2");
const std::pair<float, float>& result =
vector_math::EWMAAndMaxPower_AVX2(initial_value_, data_.get(),
data_len_, smoothing_factor_);
vector_math::EWMAAndMaxPower_AVX2(initial_value_, data_.data(),
data_.size(), smoothing_factor_);
EXPECT_NEAR(expected_final_avg_, result.first, 0.0000001f);
EXPECT_NEAR(expected_max_, result.second, 0.0000001f);
}
@ -262,7 +259,7 @@ class EWMATestScenario {
{
SCOPED_TRACE("EWMAAndMaxPower_NEON");
const std::pair<float, float>& result = vector_math::EWMAAndMaxPower_NEON(
initial_value_, data_.get(), data_len_, smoothing_factor_);
initial_value_, data_.data(), data_.size(), smoothing_factor_);
EXPECT_NEAR(expected_final_avg_, result.first, 0.0000001f);
EXPECT_NEAR(expected_max_, result.second, 0.0000001f);
}
@ -270,12 +267,22 @@ class EWMATestScenario {
}
private:
void CopyDataAligned(base::span<const float> src) {
if (src.empty()) {
data_ = base::AlignedHeapArray<float>();
return;
}
data_ =
base::AlignedUninit<float>(src.size(), vector_math::kRequiredAlignment);
data_.copy_from(src);
}
float initial_value_;
std::unique_ptr<float, base::AlignedFreeDeleter> data_;
int data_len_;
base::AlignedHeapArray<float> data_;
float smoothing_factor_;
float expected_final_avg_;
float expected_max_;
float expected_max_ = 0.0f;
};
typedef testing::TestWithParam<EWMATestScenario> VectorMathEWMAAndMaxPowerTest;
@ -309,104 +316,121 @@ INSTANTIATE_TEST_SUITE_P(
VectorMathEWMAAndMaxPowerTest,
::testing::Values(
// Zero-length input: Result should equal initial value.
EWMATestScenario(0.0f, NULL, 0, 0.0f).HasExpectedResult(0.0f, 0.0f),
EWMATestScenario(1.0f, NULL, 0, 0.0f).HasExpectedResult(1.0f, 0.0f),
EWMATestScenario(0.0f, base::span<float>(), 0.0f)
.HasExpectedResult(0.0f, 0.0f),
EWMATestScenario(1.0f, base::span<float>(), 0.0f)
.HasExpectedResult(1.0f, 0.0f),
// Smoothing factor of zero: Samples have no effect on result.
EWMATestScenario(0.0f, kOnes, 32, 0.0f).HasExpectedResult(0.0f, 1.0f),
EWMATestScenario(1.0f, kZeros, 32, 0.0f).HasExpectedResult(1.0f, 0.0f),
EWMATestScenario(0.0f, kOnes, 0.0f).HasExpectedResult(0.0f, 1.0f),
EWMATestScenario(1.0f, kZeros, 0.0f).HasExpectedResult(1.0f, 0.0f),
// Smothing factor of one: Result = last sample squared.
EWMATestScenario(0.0f, kCheckerboard, 32, 1.0f)
EWMATestScenario(0.0f, kCheckerboard, 1.0f)
.ScaledBy(2.0f)
.HasExpectedResult(4.0f, 4.0f),
EWMATestScenario(1.0f, kInverseCheckerboard, 32, 1.0f)
EWMATestScenario(1.0f, kInverseCheckerboard, 1.0f)
.ScaledBy(2.0f)
.HasExpectedResult(0.0f, 4.0f),
// Smoothing factor of 1/4, muted signal.
EWMATestScenario(1.0f, kZeros, 1, 0.25f)
EWMATestScenario(1.0f, base::span(kZeros).first(1u), 0.25f)
.HasExpectedResult(std::pow(0.75f, 1.0f), 0.0f),
EWMATestScenario(1.0f, kZeros, 2, 0.25f)
EWMATestScenario(1.0f, base::span(kZeros).first(2u), 0.25f)
.HasExpectedResult(std::pow(0.75f, 2.0f), 0.0f),
EWMATestScenario(1.0f, kZeros, 3, 0.25f)
EWMATestScenario(1.0f, base::span(kZeros).first(3u), 0.25f)
.HasExpectedResult(std::pow(0.75f, 3.0f), 0.0f),
EWMATestScenario(1.0f, kZeros, 12, 0.25f)
EWMATestScenario(1.0f, base::span(kZeros).first(12u), 0.25f)
.HasExpectedResult(std::pow(0.75f, 12.0f), 0.0f),
EWMATestScenario(1.0f, kZeros, 13, 0.25f)
EWMATestScenario(1.0f, base::span(kZeros).first(13u), 0.25f)
.HasExpectedResult(std::pow(0.75f, 13.0f), 0.0f),
EWMATestScenario(1.0f, kZeros, 14, 0.25f)
EWMATestScenario(1.0f, base::span(kZeros).first(14u), 0.25f)
.HasExpectedResult(std::pow(0.75f, 14.0f), 0.0f),
EWMATestScenario(1.0f, kZeros, 15, 0.25f)
EWMATestScenario(1.0f, base::span(kZeros).first(15u), 0.25f)
.HasExpectedResult(std::pow(0.75f, 15.0f), 0.0f),
// Smoothing factor of 1/4, constant full-amplitude signal.
EWMATestScenario(0.0f, kOnes, 1, 0.25f).HasExpectedResult(0.25f, 1.0f),
EWMATestScenario(0.0f, kOnes, 2, 0.25f)
EWMATestScenario(0.0f, base::span(kOnes).first(1u), 0.25f)
.HasExpectedResult(0.25f, 1.0f),
EWMATestScenario(0.0f, base::span(kOnes).first(2u), 0.25f)
.HasExpectedResult(0.4375f, 1.0f),
EWMATestScenario(0.0f, kOnes, 3, 0.25f)
EWMATestScenario(0.0f, base::span(kOnes).first(3u), 0.25f)
.HasExpectedResult(0.578125f, 1.0f),
EWMATestScenario(0.0f, kOnes, 12, 0.25f)
EWMATestScenario(0.0f, base::span(kOnes).first(12u), 0.25f)
.HasExpectedResult(0.96832365f, 1.0f),
EWMATestScenario(0.0f, kOnes, 13, 0.25f)
EWMATestScenario(0.0f, base::span(kOnes).first(13u), 0.25f)
.HasExpectedResult(0.97624274f, 1.0f),
EWMATestScenario(0.0f, kOnes, 14, 0.25f)
EWMATestScenario(0.0f, base::span(kOnes).first(14u), 0.25f)
.HasExpectedResult(0.98218205f, 1.0f),
EWMATestScenario(0.0f, kOnes, 15, 0.25f)
EWMATestScenario(0.0f, base::span(kOnes).first(15u), 0.25f)
.HasExpectedResult(0.98663654f, 1.0f),
// Smoothing factor of 1/4, checkerboard signal.
EWMATestScenario(0.0f, kCheckerboard, 1, 0.25f)
EWMATestScenario(0.0f, base::span(kCheckerboard).first(1u), 0.25f)
.HasExpectedResult(0.0f, 0.0f),
EWMATestScenario(0.0f, kCheckerboard, 2, 0.25f)
EWMATestScenario(0.0f, base::span(kCheckerboard).first(2u), 0.25f)
.HasExpectedResult(0.25f, 1.0f),
EWMATestScenario(0.0f, kCheckerboard, 3, 0.25f)
EWMATestScenario(0.0f, base::span(kCheckerboard).first(3u), 0.25f)
.HasExpectedResult(0.1875f, 1.0f),
EWMATestScenario(0.0f, kCheckerboard, 12, 0.25f)
EWMATestScenario(0.0f, base::span(kCheckerboard).first(12u), 0.25f)
.HasExpectedResult(0.55332780f, 1.0f),
EWMATestScenario(0.0f, kCheckerboard, 13, 0.25f)
EWMATestScenario(0.0f, base::span(kCheckerboard).first(13u), 0.25f)
.HasExpectedResult(0.41499585f, 1.0f),
EWMATestScenario(0.0f, kCheckerboard, 14, 0.25f)
EWMATestScenario(0.0f, base::span(kCheckerboard).first(14u), 0.25f)
.HasExpectedResult(0.56124689f, 1.0f),
EWMATestScenario(0.0f, kCheckerboard, 15, 0.25f)
EWMATestScenario(0.0f, base::span(kCheckerboard).first(15u), 0.25f)
.HasExpectedResult(0.42093517f, 1.0f),
// Smoothing factor of 1/4, inverse checkerboard signal.
EWMATestScenario(0.0f, kInverseCheckerboard, 1, 0.25f)
EWMATestScenario(0.0f,
base::span(kInverseCheckerboard).first(1u),
0.25f)
.HasExpectedResult(0.25f, 1.0f),
EWMATestScenario(0.0f, kInverseCheckerboard, 2, 0.25f)
EWMATestScenario(0.0f,
base::span(kInverseCheckerboard).first(2u),
0.25f)
.HasExpectedResult(0.1875f, 1.0f),
EWMATestScenario(0.0f, kInverseCheckerboard, 3, 0.25f)
EWMATestScenario(0.0f,
base::span(kInverseCheckerboard).first(3u),
0.25f)
.HasExpectedResult(0.390625f, 1.0f),
EWMATestScenario(0.0f, kInverseCheckerboard, 12, 0.25f)
EWMATestScenario(0.0f,
base::span(kInverseCheckerboard).first(12u),
0.25f)
.HasExpectedResult(0.41499585f, 1.0f),
EWMATestScenario(0.0f, kInverseCheckerboard, 13, 0.25f)
EWMATestScenario(0.0f,
base::span(kInverseCheckerboard).first(13u),
0.25f)
.HasExpectedResult(0.56124689f, 1.0f),
EWMATestScenario(0.0f, kInverseCheckerboard, 14, 0.25f)
EWMATestScenario(0.0f,
base::span(kInverseCheckerboard).first(14u),
0.25f)
.HasExpectedResult(0.42093517f, 1.0f),
EWMATestScenario(0.0f, kInverseCheckerboard, 15, 0.25f)
EWMATestScenario(0.0f,
base::span(kInverseCheckerboard).first(15u),
0.25f)
.HasExpectedResult(0.56570137f, 1.0f),
// Smoothing factor of 1/4, impluse signal.
EWMATestScenario(0.0f, kZeros, 3, 0.25f)
EWMATestScenario(0.0f, base::span(kZeros).first(3u), 0.25f)
.WithImpulse(2.0f, 0)
.HasExpectedResult(0.562500f, 4.0f),
EWMATestScenario(0.0f, kZeros, 3, 0.25f)
EWMATestScenario(0.0f, base::span(kZeros).first(3u), 0.25f)
.WithImpulse(2.0f, 1)
.HasExpectedResult(0.75f, 4.0f),
EWMATestScenario(0.0f, kZeros, 3, 0.25f)
EWMATestScenario(0.0f, base::span(kZeros).first(3u), 0.25f)
.WithImpulse(2.0f, 2)
.HasExpectedResult(1.0f, 4.0f),
EWMATestScenario(0.0f, kZeros, 32, 0.25f)
EWMATestScenario(0.0f, kZeros, 0.25f)
.WithImpulse(2.0f, 0)
.HasExpectedResult(0.00013394f, 4.0f),
EWMATestScenario(0.0f, kZeros, 32, 0.25f)
EWMATestScenario(0.0f, kZeros, 0.25f)
.WithImpulse(2.0f, 1)
.HasExpectedResult(0.00017858f, 4.0f),
EWMATestScenario(0.0f, kZeros, 32, 0.25f)
EWMATestScenario(0.0f, kZeros, 0.25f)
.WithImpulse(2.0f, 2)
.HasExpectedResult(0.00023811f, 4.0f),
EWMATestScenario(0.0f, kZeros, 32, 0.25f)
EWMATestScenario(0.0f, kZeros, 0.25f)
.WithImpulse(2.0f, 3)
.HasExpectedResult(0.00031748f, 4.0f)));

@ -9,6 +9,7 @@
#include "base/numerics/safe_conversions.h"
#include "base/trace_event/trace_event.h"
#include "base/types/zip.h"
#include "media/base/audio_bus.h"
#include "media/base/vector_math.h"
@ -36,9 +37,9 @@ void DelayBuffer::Write(FrameTicks position,
// Make a copy of the AudioBus for later consumption. Apply the volume setting
// by scaling the audio signal during the copy.
auto copy = media::AudioBus::Create(input_bus.channels(), input_bus.frames());
for (int ch = 0; ch < input_bus.channels(); ++ch) {
media::vector_math::FMUL(input_bus.channel(ch), volume, input_bus.frames(),
copy->channel(ch));
for (auto [src_ch, dest_ch] :
base::zip(input_bus.AllChannels(), copy->AllChannels())) {
media::vector_math::FMUL(src_ch, volume, dest_ch);
}
chunks_.emplace_back(position, std::move(copy));

@ -14,6 +14,7 @@
#include "base/task/sequenced_task_runner.h"
#include "base/time/default_tick_clock.h"
#include "base/trace_event/trace_event.h"
#include "base/types/zip.h"
#include "media/base/audio_bus.h"
#include "media/base/vector_math.h"
#include "mojo/public/cpp/system/buffer.h"
@ -353,10 +354,9 @@ void LoopbackStream::FlowNetwork::GenerateMoreAudio() {
}
do {
(*it)->Render(delayed_capture_time, transfer_bus_.get());
for (int ch = 0; ch < transfer_bus_->channels(); ++ch) {
media::vector_math::FMAC(transfer_bus_->channel(ch), volume_,
transfer_bus_->frames(),
mix_bus_->channel(ch));
for (auto [src_ch, dest_ch] : base::zip(transfer_bus_->AllChannels(),
mix_bus_->AllChannels())) {
media::vector_math::FMAC(src_ch, volume_, dest_ch);
}
++it;
} while (it != inputs_.end());