0

webcodecs: Make RasterInterface use relaxed-atomic memcopy for readback

v8 team insists that in order to avoid C++ UB, all writes to memory
owned by SharedArrayBuffers should be done with C++ relaxed-atomics.
That's why in order to use async readback in WebCodecs API without
introducing extra copies, we need to use RelaxedAtomicWriteMemcpy().

My micro-benchmarks didn't find any difference in performance between
RelaxedAtomicWriteMemcpy() and memcpy(), therefore we don't need to make effort to use memcpy in cases where we don't copy to SharedArrayBuffers.

Bug: 340606792
Change-Id: I8faec3dd3e79cbd61c14aee292ea6d873317129f
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/6176228
Reviewed-by: Zhenyao Mo <zmo@chromium.org>
Commit-Queue: Eugene Zemtsov <eugene@chromium.org>
Cr-Commit-Position: refs/heads/main@{#1407599}
This commit is contained in:
Eugene Zemtsov
2025-01-16 14:58:15 -08:00
committed by Chromium LUCI CQ
parent 29eb6abeac
commit 780efe3803
3 changed files with 36 additions and 11 deletions

@ -16,6 +16,7 @@
#include <string>
#include <utility>
#include "base/atomicops.h"
#include "base/bits.h"
#include "base/check_op.h"
#include "base/containers/contains.h"
@ -457,8 +458,11 @@ void GLHelper::CopyTextureToImpl::ReadbackDone(Request* finished_request) {
dst += dst_stride * (request->size.height() - 1);
dst_stride = -dst_stride;
}
// We need to use `RelaxedAtomicWriteMemcpy` because we might be writing
// into memory observed by JS at the same time.
for (int y = 0; y < request->size.height(); y++) {
memcpy(dst, src, bytes_to_copy);
base::subtle::RelaxedAtomicWriteMemcpy(
base::span(dst, bytes_to_copy), base::span(src, bytes_to_copy));
dst += dst_stride;
src += src_stride;
}

@ -28,6 +28,7 @@
#include <string>
#include "base/atomic_sequence_num.h"
#include "base/atomicops.h"
#include "base/bits.h"
#include "base/compiler_specific.h"
#include "base/containers/heap_array.h"
@ -4849,7 +4850,12 @@ GLboolean GLES2Implementation::ReadbackARGBImagePixelsINTERNAL(
if (!*readback_result) {
return GL_FALSE;
}
memcpy(pixels, static_cast<uint8_t*>(shm_address) + pixels_offset, dst_size);
// We need to use `RelaxedAtomicWriteMemcpy` because we might be writing into
// memory observed by JS at the same time.
auto dst = base::span(static_cast<uint8_t*>(pixels), dst_size);
auto src =
base::span(static_cast<uint8_t*>(shm_address) + pixels_offset, dst_size);
base::subtle::RelaxedAtomicWriteMemcpy(dst, src);
return GL_TRUE;
}

@ -24,6 +24,7 @@
#include <string>
#include "base/atomic_sequence_num.h"
#include "base/atomicops.h"
#include "base/bits.h"
#include "base/compiler_specific.h"
#include "base/functional/bind.h"
@ -525,8 +526,13 @@ struct RasterImplementation::AsyncYUVReadbackRequest {
uint8_t* out_buffer) {
// RasterDecoder writes the pixels into |in_buffer| with the requested
// stride so we can copy the whole block here.
memcpy(out_buffer, static_cast<uint8_t*>(in_buffer) + plane_offset,
plane_height * plane_stride);
// We need to use `RelaxedAtomicWriteMemcpy` because we might be writing
// into memory observed by JS at the same time.
size_t plane_size = plane_height * plane_stride;
auto dst = base::span(out_buffer, plane_size);
auto src =
base::span(static_cast<uint8_t*>(in_buffer) + plane_offset, plane_size);
base::subtle::RelaxedAtomicWriteMemcpy(dst, src);
}
};
@ -1554,9 +1560,12 @@ bool RasterImplementation::ReadbackImagePixelsINTERNAL(
if (!*readback_result) {
return false;
}
memcpy(dst_pixels, static_cast<uint8_t*>(shm_address) + pixels_offset,
dst_size);
// We need to use `RelaxedAtomicWriteMemcpy` because we might be writing
// into memory observed by JS at the same time.
auto dst = base::span<uint8_t>(static_cast<uint8_t*>(dst_pixels), dst_size);
auto src = base::span<uint8_t>(
static_cast<uint8_t*>(shm_address) + pixels_offset, dst_size);
base::subtle::RelaxedAtomicWriteMemcpy(dst, src);
}
return true;
@ -1581,10 +1590,16 @@ void RasterImplementation::OnAsyncARGBReadbackDone(
static_cast<cmds::ReadbackARGBImagePixelsINTERNALImmediate::Result*>(
request->shared_memory->address());
if (*result) {
memcpy(request->dst_pixels,
static_cast<uint8_t*>(request->shared_memory->address()) +
request->pixels_offset,
request->dst_size);
// We need to use `RelaxedAtomicWriteMemcpy` because we might be writing
// into memory observed by JS at the same time.
size_t plane_size = request->dst_size;
auto dst = base::span<uint8_t>(
static_cast<uint8_t*>(request->dst_pixels.get()), plane_size);
auto src = base::span<uint8_t>(
static_cast<uint8_t*>(request->shared_memory->address()) +
request->pixels_offset,
plane_size);
base::subtle::RelaxedAtomicWriteMemcpy(dst, src);
request->readback_successful = true;
}