0

Add zlib compression to memory program cache

The memory program cache currently uses quite a lot of memory: Up to 6MB
on desktop, 2MB on normal Android and 512Kb on low-end Android.

Compressing shader binaries results in a 4-10x decrease in size
(depending on the driver's binary representation).

Compression adds a measureable performance hit (around 500-1000
microseconds). But when compared to overall shader compile/link time,
which is around 6000 to 25000 microseconds, this seems acceptable for
the memory wins. UMA stats have been added so we can ensure these values
behave as expected in the wild.

Decompression is fairly cheap (<200 microseconds).

Currently, this feature is only turned on for low-end devices, as
those are the ones with the most memory pressure. However, if the
results are good, it might make sense to enable this in general.

Cq-Include-Trybots: master.tryserver.chromium.android:android_optional_gpu_tests_rel;master.tryserver.chromium.linux:linux_optional_gpu_tests_rel;master.tryserver.chromium.mac:mac_optional_gpu_tests_rel;master.tryserver.chromium.win:win_optional_gpu_tests_rel
Change-Id: Ie5d389f060504837efc86993811e83af8d630ce8
Reviewed-on: https://chromium-review.googlesource.com/572546
Commit-Queue: Eric Karl <ericrk@chromium.org>
Reviewed-by: Antoine Labour <piman@chromium.org>
Reviewed-by: Leon Scroggins <scroggo@chromium.org>
Reviewed-by: Ilya Sherman <isherman@chromium.org>
Reviewed-by: Zhenyao Mo <zmo@chromium.org>
Cr-Commit-Position: refs/heads/master@{#491865}
This commit is contained in:
Eric Karl
2017-08-03 22:47:31 +00:00
committed by Commit Bot
parent caae5e54ba
commit d625a73a49
12 changed files with 209 additions and 54 deletions

@ -42,6 +42,7 @@
#include "gpu/config/gpu_util.h"
#include "gpu/config/software_rendering_list_autogen.h"
#include "gpu/ipc/common/memory_stats.h"
#include "gpu/ipc/host/shader_disk_cache.h"
#include "gpu/ipc/service/switches.h"
#include "media/media_features.h"
#include "ui/base/ui_base_switches.h"
@ -925,6 +926,9 @@ void GpuDataManagerImplPrivate::UpdateGpuPreferences(
(command_line->HasSwitch(switches::kEnableES3APIs) ||
!IsFeatureBlacklisted(gpu::GPU_FEATURE_TYPE_WEBGL2)) &&
!command_line->HasSwitch(switches::kDisableES3APIs);
gpu_preferences->gpu_program_cache_size =
gpu::ShaderDiskCache::CacheSizeBytes();
}
void GpuDataManagerImplPrivate::DisableHardwareAcceleration() {

@ -5,6 +5,7 @@ include_rules = [
"+third_party/smhasher",
"+third_party/swiftshader",
"+third_party/protbuf",
"+third_party/zlib",
"+crypto",
"+ui/gfx",
"+ui/gl",

@ -179,6 +179,7 @@ target(link_target_type, "service_sources") {
"//third_party/protobuf:protobuf_lite",
"//third_party/re2",
"//third_party/smhasher:cityhash",
"//third_party/zlib",
"//ui/gfx",
"//ui/gfx/geometry",
"//ui/gfx/ipc/color",

@ -61,6 +61,8 @@ message GpuProgramProto {
optional bytes sha = 1;
optional uint32 format = 2;
optional bytes program = 3;
optional bool program_is_compressed = 6;
optional uint32 program_decompressed_length = 7;
optional ShaderProto vertex_shader = 4;
optional ShaderProto fragment_shader = 5;

@ -8,13 +8,7 @@
namespace gpu {
GpuPreferences::GpuPreferences() {
gpu_program_cache_size = kDefaultMaxProgramCacheMemoryBytes;
#if defined(OS_ANDROID)
if (base::SysInfo::IsLowEndDevice())
gpu_program_cache_size = kLowEndMaxProgramCacheMemoryBytes;
#endif
}
GpuPreferences::GpuPreferences() = default;
GpuPreferences::GpuPreferences(const GpuPreferences& other) = default;

@ -12,8 +12,11 @@
#include "base/command_line.h"
#include "base/metrics/histogram_functions.h"
#include "base/metrics/histogram_macros.h"
#include "base/numerics/checked_math.h"
#include "base/sha1.h"
#include "base/strings/string_number_conversions.h"
#include "base/sys_info.h"
#include "build/build_config.h"
#include "gpu/command_buffer/common/activity_flags.h"
#include "gpu/command_buffer/common/constants.h"
#include "gpu/command_buffer/service/disk_cache_proto.pb.h"
@ -21,8 +24,19 @@
#include "gpu/command_buffer/service/gles2_cmd_decoder.h"
#include "gpu/command_buffer/service/gpu_preferences.h"
#include "gpu/command_buffer/service/shader_manager.h"
#include "third_party/zlib/zlib.h"
#include "ui/gl/gl_bindings.h"
// Macro to help with logging times under 10ms.
#define UMA_HISTOGRAM_VERY_SHORT_TIMES(name, time_delta) \
UMA_HISTOGRAM_CUSTOM_COUNTS( \
name, \
static_cast<base::HistogramBase::Sample>((time_delta).InMicroseconds()), \
1, \
static_cast<base::HistogramBase::Sample>( \
base::TimeDelta::FromMilliseconds(10).InMicroseconds()), \
50);
namespace gpu {
namespace gles2 {
@ -209,6 +223,64 @@ bool ProgramBinaryExtensionsAvailable() {
gl::g_current_gl_driver->ext.b_GL_OES_get_program_binary);
}
// Returns an empty vector if compression fails.
std::vector<uint8_t> CompressData(const std::vector<uint8_t>& data) {
auto start_time = base::TimeTicks::Now();
Cr_z_uLongf compressed_size = compressBound(data.size());
std::vector<uint8_t> compressed_data(compressed_size);
// Level indicates a trade-off between compression and speed. Level 1
// indicates fastest speed (with worst compression).
auto result = compress2(compressed_data.data(), &compressed_size, data.data(),
data.size(), 1 /* level */);
// It should be impossible for compression to fail with the provided
// parameters.
bool success = Z_OK == result;
UMA_HISTOGRAM_BOOLEAN("GPU.ProgramCache.CompressDataSuccess", success);
if (!success)
return std::vector<uint8_t>();
compressed_data.resize(compressed_size);
compressed_data.shrink_to_fit();
UMA_HISTOGRAM_VERY_SHORT_TIMES("GPU.ProgramCache.CompressDataTime",
base::TimeTicks::Now() - start_time);
UMA_HISTOGRAM_PERCENTAGE("GPU.ProgramCache.CompressionPercentage",
(100 * compressed_size) / data.size());
return compressed_data;
}
// Returns an empty vector if decompression fails.
std::vector<uint8_t> DecompressData(const std::vector<uint8_t>& data,
size_t decompressed_size,
size_t max_size_bytes) {
auto start_time = base::TimeTicks::Now();
std::vector<uint8_t> decompressed_data(decompressed_size);
Cr_z_uLongf decompressed_size_out =
static_cast<Cr_z_uLongf>(decompressed_size);
auto result = uncompress(decompressed_data.data(), &decompressed_size_out,
data.data(), data.size());
bool success =
result == Z_OK && decompressed_data.size() == decompressed_size_out;
UMA_HISTOGRAM_BOOLEAN("GPU.ProgramCache.DecompressDataSuccess", success);
if (!success)
return std::vector<uint8_t>();
UMA_HISTOGRAM_VERY_SHORT_TIMES("GPU.ProgramCache.DecompressDataTime",
base::TimeTicks::Now() - start_time);
return decompressed_data;
}
bool CompressProgramBinaries() {
#if !defined(OS_ANDROID)
return false;
#else // !defined(OS_ANDROID)
return base::SysInfo::IsLowEndDevice();
#endif // !defined(OS_ANDROID)
}
} // namespace
MemoryProgramCache::MemoryProgramCache(
@ -220,6 +292,7 @@ MemoryProgramCache::MemoryProgramCache(
disable_gpu_shader_disk_cache_(disable_gpu_shader_disk_cache),
disable_program_caching_for_transform_feedback_(
disable_program_caching_for_transform_feedback),
compress_program_binaries_(CompressProgramBinaries()),
curr_size_bytes_(0),
store_(ProgramMRUCache::NO_AUTO_EVICT),
activity_flags_(activity_flags) {}
@ -267,12 +340,22 @@ ProgramCache::ProgramLoadResult MemoryProgramCache::LoadLinkedProgram(
return PROGRAM_LOAD_FAILURE;
}
const scoped_refptr<ProgramCacheValue> value = found->second;
const std::vector<uint8_t>& decoded =
value->is_compressed()
? DecompressData(value->data(), value->decompressed_length(),
max_size_bytes_)
: value->data();
if (decoded.empty()) {
// Decompression failure.
DCHECK(value->is_compressed());
return PROGRAM_LOAD_FAILURE;
}
{
GpuProcessActivityFlags::ScopedSetFlag scoped_set_flag(
activity_flags_, ActivityFlagsBase::FLAG_LOADING_PROGRAM_BINARY);
glProgramBinary(program, value->format(),
static_cast<const GLvoid*>(value->data()), value->length());
static_cast<const GLvoid*>(decoded.data()), decoded.size());
}
GLint success = 0;
@ -296,7 +379,9 @@ ProgramCache::ProgramLoadResult MemoryProgramCache::LoadLinkedProgram(
GpuProgramProto::default_instance().New());
proto->set_sha(sha, kHashLength);
proto->set_format(value->format());
proto->set_program(value->data(), value->length());
proto->set_program(value->data().data(), value->data().size());
proto->set_program_is_compressed(value->is_compressed());
proto->set_program_decompressed_length(value->decompressed_length());
FillShaderProto(proto->mutable_vertex_shader(), a_sha, shader_a);
FillShaderProto(proto->mutable_fragment_shader(), b_sha, shader_b);
@ -324,17 +409,23 @@ void MemoryProgramCache::SaveLinkedProgram(
}
GLenum format;
GLsizei length = 0;
glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH_OES, &length);
glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &length);
if (length == 0 || static_cast<unsigned int>(length) > max_size_bytes_) {
return;
}
std::unique_ptr<char[]> binary(new char[length]);
glGetProgramBinary(program,
length,
NULL,
&format,
binary.get());
UMA_HISTOGRAM_COUNTS("GPU.ProgramCache.ProgramBinarySizeBytes", length);
std::vector<uint8_t> binary(length);
glGetProgramBinary(program, length, NULL, &format,
reinterpret_cast<char*>(binary.data()));
if (compress_program_binaries_) {
binary = CompressData(binary);
if (binary.empty()) {
// Zero size indicates failure.
return;
}
}
UMA_HISTOGRAM_COUNTS("GPU.ProgramCache.ProgramBinarySizeBytes",
binary.size());
char a_sha[kHashLength];
char b_sha[kHashLength];
@ -363,7 +454,7 @@ void MemoryProgramCache::SaveLinkedProgram(
if(existing != store_.end())
store_.Erase(existing);
while (curr_size_bytes_ + length > max_size_bytes_) {
while (curr_size_bytes_ + binary.size() > max_size_bytes_) {
DCHECK(!store_.empty());
store_.Erase(store_.rbegin());
}
@ -373,7 +464,9 @@ void MemoryProgramCache::SaveLinkedProgram(
GpuProgramProto::default_instance().New());
proto->set_sha(sha, kHashLength);
proto->set_format(format);
proto->set_program(binary.get(), length);
proto->set_program(binary.data(), binary.size());
proto->set_program_decompressed_length(length);
proto->set_program_is_compressed(compress_program_binaries_);
FillShaderProto(proto->mutable_vertex_shader(), a_sha, shader_a);
FillShaderProto(proto->mutable_fragment_shader(), b_sha, shader_b);
@ -383,13 +476,13 @@ void MemoryProgramCache::SaveLinkedProgram(
store_.Put(
sha_string,
new ProgramCacheValue(
length, format, binary.release(), sha_string, a_sha,
shader_a->attrib_map(), shader_a->uniform_map(),
format, std::move(binary), compress_program_binaries_, length,
sha_string, a_sha, shader_a->attrib_map(), shader_a->uniform_map(),
shader_a->varying_map(), shader_a->output_variable_list(),
shader_a->interface_block_map(), b_sha,
shader_b->attrib_map(), shader_b->uniform_map(),
shader_b->varying_map(), shader_b->output_variable_list(),
shader_b->interface_block_map(), this));
shader_a->interface_block_map(), b_sha, shader_b->attrib_map(),
shader_b->uniform_map(), shader_b->varying_map(),
shader_b->output_variable_list(), shader_b->interface_block_map(),
this));
UMA_HISTOGRAM_COUNTS("GPU.ProgramCache.MemorySizeAfterKb",
curr_size_bytes_ / 1024);
@ -454,14 +547,17 @@ void MemoryProgramCache::LoadProgram(const std::string& key,
&fragment_interface_blocks);
}
std::unique_ptr<char[]> binary(new char[proto->program().length()]);
memcpy(binary.get(), proto->program().c_str(), proto->program().length());
std::vector<uint8_t> binary(proto->program().length());
memcpy(binary.data(), proto->program().c_str(), proto->program().length());
store_.Put(
proto->sha(),
new ProgramCacheValue(
proto->program().length(), proto->format(), binary.release(),
proto->sha(), proto->vertex_shader().sha().c_str(), vertex_attribs,
proto->format(), std::move(binary),
proto->has_program_is_compressed() &&
proto->program_is_compressed(),
proto->program_decompressed_length(), proto->sha(),
proto->vertex_shader().sha().c_str(), vertex_attribs,
vertex_uniforms, vertex_varyings, vertex_output_variables,
vertex_interface_blocks, proto->fragment_shader().sha().c_str(),
fragment_attribs, fragment_uniforms, fragment_varyings,
@ -484,9 +580,10 @@ size_t MemoryProgramCache::Trim(size_t limit) {
}
MemoryProgramCache::ProgramCacheValue::ProgramCacheValue(
GLsizei length,
GLenum format,
const char* data,
std::vector<uint8_t> data,
bool is_compressed,
GLsizei decompressed_length,
const std::string& program_hash,
const char* shader_0_hash,
const AttributeMap& attrib_map_0,
@ -501,9 +598,10 @@ MemoryProgramCache::ProgramCacheValue::ProgramCacheValue(
const OutputVariableList& output_variable_list_1,
const InterfaceBlockMap& interface_block_map_1,
MemoryProgramCache* program_cache)
: length_(length),
format_(format),
data_(data),
: format_(format),
data_(std::move(data)),
is_compressed_(is_compressed),
decompressed_length_(decompressed_length),
program_hash_(program_hash),
shader_0_hash_(shader_0_hash, kHashLength),
attrib_map_0_(attrib_map_0),
@ -518,12 +616,12 @@ MemoryProgramCache::ProgramCacheValue::ProgramCacheValue(
output_variable_list_1_(output_variable_list_1),
interface_block_map_1_(interface_block_map_1),
program_cache_(program_cache) {
program_cache_->curr_size_bytes_ += length_;
program_cache_->curr_size_bytes_ += data_.size();
program_cache_->LinkedProgramCacheSuccess(program_hash);
}
MemoryProgramCache::ProgramCacheValue::~ProgramCacheValue() {
program_cache_->curr_size_bytes_ -= length_;
program_cache_->curr_size_bytes_ -= data_.size();
program_cache_->Evict(program_hash_);
}

@ -59,9 +59,10 @@ class GPU_EXPORT MemoryProgramCache : public ProgramCache {
class ProgramCacheValue : public base::RefCounted<ProgramCacheValue> {
public:
ProgramCacheValue(GLsizei length,
GLenum format,
const char* data,
ProgramCacheValue(GLenum format,
std::vector<uint8_t> data,
bool is_compressed,
GLsizei decompressed_length,
const std::string& program_hash,
const char* shader_0_hash,
const AttributeMap& attrib_map_0,
@ -77,17 +78,15 @@ class GPU_EXPORT MemoryProgramCache : public ProgramCache {
const InterfaceBlockMap& interface_block_map_1,
MemoryProgramCache* program_cache);
GLsizei length() const {
return length_;
}
GLenum format() const {
return format_;
}
const char* data() const {
return data_.get();
}
const std::vector<uint8_t>& data() const { return data_; }
bool is_compressed() const { return is_compressed_; }
GLsizei decompressed_length() const { return decompressed_length_; }
const std::string& shader_0_hash() const {
return shader_0_hash_;
@ -142,9 +141,10 @@ class GPU_EXPORT MemoryProgramCache : public ProgramCache {
~ProgramCacheValue();
const GLsizei length_;
const GLenum format_;
const std::unique_ptr<const char[]> data_;
const std::vector<uint8_t> data_;
const bool is_compressed_;
const GLsizei decompressed_length_;
const std::string program_hash_;
const std::string shader_0_hash_;
const AttributeMap attrib_map_0_;
@ -171,6 +171,7 @@ class GPU_EXPORT MemoryProgramCache : public ProgramCache {
const size_t max_size_bytes_;
const bool disable_gpu_shader_disk_cache_;
const bool disable_program_caching_for_transform_feedback_;
const bool compress_program_binaries_;
size_t curr_size_bytes_;
ProgramMRUCache store_;
GpuProcessActivityFlags* activity_flags_;

@ -1413,7 +1413,6 @@ bool Program::Link(ShaderManager* manager,
ExecuteProgramOutputBindCalls();
before_time = TimeTicks::Now();
if (cache && gl::g_current_gl_driver->ext.b_GL_ARB_get_program_binary) {
glProgramParameteri(service_id(),
PROGRAM_BINARY_RETRIEVABLE_HINT,

@ -7,7 +7,9 @@
#include "base/macros.h"
#include "base/memory/ptr_util.h"
#include "base/single_thread_task_runner.h"
#include "base/sys_info.h"
#include "base/threading/thread_checker.h"
#include "build/build_config.h"
#include "gpu/command_buffer/common/constants.h"
#include "net/base/cache_type.h"
#include "net/base/io_buffer.h"
@ -554,9 +556,9 @@ void ShaderDiskCache::Init(
int rv = disk_cache::CreateCacheBackend(
net::SHADER_CACHE, net::CACHE_BACKEND_DEFAULT,
cache_path_.Append(kGpuCachePath),
gpu::kDefaultMaxProgramCacheMemoryBytes, true, cache_task_runner, NULL,
&backend_, base::Bind(&ShaderDiskCache::CacheCreatedCallback, this));
cache_path_.Append(kGpuCachePath), CacheSizeBytes(), true,
cache_task_runner, NULL, &backend_,
base::Bind(&ShaderDiskCache::CacheCreatedCallback, this));
if (rv == net::OK)
cache_available_ = true;
@ -637,4 +639,16 @@ int ShaderDiskCache::SetCacheCompleteCallback(
return net::ERR_IO_PENDING;
}
// static
size_t ShaderDiskCache::CacheSizeBytes() {
#if !defined(OS_ANDROID)
return kDefaultMaxProgramCacheMemoryBytes;
#else // !defined(OS_ANDROID)
if (!base::SysInfo::IsLowEndDevice())
return kDefaultMaxProgramCacheMemoryBytes;
else
return kLowEndMaxProgramCacheMemoryBytes;
#endif // !defined(OS_ANDROID)
}
} // namespace gpu

@ -65,6 +65,9 @@ class ShaderDiskCache : public base::RefCounted<ShaderDiskCache> {
// been written to the cache.
int SetCacheCompleteCallback(const net::CompletionCallback& callback);
// Returns the size which should be used for the shader disk cache.
static size_t CacheSizeBytes();
private:
friend class base::RefCounted<ShaderDiskCache>;
friend class ShaderDiskCacheEntry;

@ -104,8 +104,7 @@ GpuChannelManager::~GpuChannelManager() {
}
gles2::ProgramCache* GpuChannelManager::program_cache() {
if (!program_cache_.get() &&
!gpu_preferences_.disable_gpu_program_cache) {
if (!program_cache_.get()) {
const GpuDriverBugWorkarounds& workarounds = gpu_driver_bug_workarounds_;
bool disable_disk_cache =
gpu_preferences_.disable_gpu_shader_disk_cache ||

@ -24606,6 +24606,45 @@ http://cs/file:chrome/histograms.xml - but prefer this file for new entries.
<summary>The time to compile a shader.</summary>
</histogram>
<histogram name="GPU.ProgramCache.CompressDataSuccess" units="BooleanSuccess">
<owner>ericrk@chromium.org</owner>
<summary>
Whether we succeeded in compressing program data. Expected to always be
true.
</summary>
</histogram>
<histogram name="GPU.ProgramCache.CompressDataTime" units="microseconds">
<owner>ericrk@chromium.org</owner>
<summary>
The time to compress a program's binary data during insertion into the
program cache.
</summary>
</histogram>
<histogram name="GPU.ProgramCache.CompressionPercentage" units="%">
<owner>ericrk@chromium.org</owner>
<summary>
The percentage of raw size that a program binary takes after compression.
</summary>
</histogram>
<histogram name="GPU.ProgramCache.DecompressDataSuccess" units="BooleanSuccess">
<owner>ericrk@chromium.org</owner>
<summary>
Whether we succeeded in decompressing program data. Failure indicates disk
or memory corruption.
</summary>
</histogram>
<histogram name="GPU.ProgramCache.DecompressDataTime" units="microseconds">
<owner>ericrk@chromium.org</owner>
<summary>
The time to decompress a program's binary data during retrieval from the
program cache.
</summary>
</histogram>
<histogram name="GPU.ProgramCache.LoadBinarySuccess" enum="BooleanSuccess">
<owner>vmiura@chromium.org</owner>
<summary>