0

[Courgette] Initial Implementation of LabelManager

This is part of the effort to reduce Courgette's peak memory. Main changes:
- Moving Label to image_utils.h, and change Label::count_ from int to int32.
- Adding utility class ConsecutiveRangeVisitor, with tests.
- Adding LabelManager, with tests.
The new code is not yet used in production.

Review URL: https://codereview.chromium.org/1491703003

Cr-Commit-Position: refs/heads/master@{#363688}
This commit is contained in:
huangs
2015-12-07 17:27:46 -08:00
committed by Commit bot
parent 56c43a0020
commit 7a2fea2555
9 changed files with 439 additions and 17 deletions

@ -11,6 +11,7 @@ static_library("courgette_lib") {
"adjustment_method_2.cc",
"assembly_program.cc",
"assembly_program.h",
"consecutive_range_visitor.h",
"courgette.h",
"crc.cc",
"crc.h",
@ -35,6 +36,8 @@ static_library("courgette_lib") {
"ensemble_apply.cc",
"ensemble_create.cc",
"image_utils.h",
"label_manager.cc",
"label_manager.h",
"memory_allocator.cc",
"memory_allocator.h",
"patch_generator_x86_32.h",
@ -100,6 +103,7 @@ test("courgette_unittests") {
"base_test_unittest.cc",
"base_test_unittest.h",
"bsdiff_memory_unittest.cc",
"consecutive_range_visitor_unittest.cc",
"difference_estimator_unittest.cc",
"disassembler_elf_32_x86_unittest.cc",
"disassembler_win32_x64_unittest.cc",
@ -108,6 +112,7 @@ test("courgette_unittests") {
"encoded_program_unittest.cc",
"ensemble_unittest.cc",
"image_utils_unittest.cc",
"label_manager_unittest.cc",
"memory_allocator_unittest.cc",
"rel32_finder_win32_x86_unittest.cc",
"streams_unittest.cc",

@ -13,29 +13,13 @@
#include "base/memory/scoped_ptr.h"
#include "courgette/disassembler.h"
#include "courgette/image_utils.h"
#include "courgette/memory_allocator.h"
namespace courgette {
class EncodedProgram;
// A Label is a symbolic reference to an address. Unlike a conventional
// assembly language, we always know the address. The address will later be
// stored in a table and the Label will be replaced with the index into the
// table.
//
// TODO(sra): Make fields private and add setters and getters.
class Label {
public:
static const int kNoIndex = -1;
Label() : rva_(0), index_(kNoIndex), count_(0) {}
explicit Label(RVA rva) : rva_(rva), index_(kNoIndex), count_(0) {}
RVA rva_; // Address referred to by the label.
int index_; // Index of address in address table, kNoIndex until assigned.
int count_;
};
typedef std::map<RVA, Label*> RVAToLabel;
// Opcodes of simple assembly language

@ -0,0 +1,57 @@
// Copyright 2015 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef COURGETTE_CONSECUTIVE_RANGE_VISITOR_H_
#define COURGETTE_CONSECUTIVE_RANGE_VISITOR_H_
#include <iterator>
#include "base/macros.h"
namespace courgette {
// Usage note: First check whether std::unique() would suffice.
//
// ConsecutiveRangeVisitor is a visitor to read equal consecutive items
// ("ranges") between two iterators. The base value of InputIterator must
// implement the == operator.
//
// Example: "AAAAABZZZZOO" consists of ranges ["AAAAA", "B", "ZZZZ", "OO"]. The
// visitor provides accessors to iterate through the ranges, and to access each
// range's value and repeat, i.e., [('A', 5), ('B', 1), ('Z', 4), ('O', 2)].
template <class InputIterator>
class ConsecutiveRangeVisitor {
public:
ConsecutiveRangeVisitor(InputIterator begin, InputIterator end)
: head_(begin), end_(end) {
advance();
}
// Returns whether there are more ranges to traverse.
bool has_more() const { return tail_ != end_; }
// Returns an iterator to an element in the current range.
InputIterator cur() const { return tail_; }
// Returns the number of repeated elements in the current range.
size_t repeat() const { return std::distance(tail_, head_); }
// Advances to the next range.
void advance() {
tail_ = head_;
if (head_ != end_)
while (++head_ != end_ && *head_ == *tail_) {}
}
private:
InputIterator tail_; // The trailing pionter of a range (inclusive).
InputIterator head_; // The leading pointer of a range (exclusive).
InputIterator end_; // Store the end pointer so we know when to stop.
DISALLOW_COPY_AND_ASSIGN(ConsecutiveRangeVisitor);
};
} // namespace courgette
#endif // COURGETTE_CONSECUTIVE_RANGE_VISITOR_H_

@ -0,0 +1,70 @@
// Copyright 2015 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "courgette/consecutive_range_visitor.h"
#include <string>
#include "testing/gtest/include/gtest/gtest.h"
namespace courgette {
TEST(ConsecutiveRangeVisitorTest, Basic) {
std::string s = "AAAAABZZZZOO";
ConsecutiveRangeVisitor<std::string::iterator> vis(s.begin(), s.end());
EXPECT_TRUE(vis.has_more());
EXPECT_EQ('A', *vis.cur());
EXPECT_EQ(5U, vis.repeat());
vis.advance();
EXPECT_TRUE(vis.has_more());
EXPECT_EQ('B', *vis.cur());
EXPECT_EQ(1U, vis.repeat());
vis.advance();
EXPECT_TRUE(vis.has_more());
EXPECT_EQ('Z', *vis.cur());
EXPECT_EQ(4U, vis.repeat());
vis.advance();
EXPECT_TRUE(vis.has_more());
EXPECT_EQ('O', *vis.cur());
EXPECT_EQ(2U, vis.repeat());
vis.advance();
EXPECT_FALSE(vis.has_more());
}
TEST(ConsecutiveRangeVisitorTest, UnitRanges) {
// Unsorted, no consecutive characters.
const char s[] = "elephant elephant";
ConsecutiveRangeVisitor<const char*> vis(std::begin(s), std::end(s) - 1);
for (const char* scan = &s[0]; *scan; ++scan) {
EXPECT_TRUE(vis.has_more());
EXPECT_EQ(*scan, *vis.cur());
EXPECT_EQ(1U, vis.repeat());
vis.advance();
}
EXPECT_FALSE(vis.has_more());
}
TEST(ConsecutiveRangeVisitorTest, SingleRange) {
for (size_t len = 1U; len < 10U; ++len) {
std::vector<int> v(len, 137);
ConsecutiveRangeVisitor<std::vector<int>::iterator> vis(v.begin(), v.end());
EXPECT_TRUE(vis.has_more());
EXPECT_EQ(137, *vis.cur());
EXPECT_EQ(len, vis.repeat());
vis.advance();
EXPECT_FALSE(vis.has_more());
}
}
TEST(ConsecutiveRangeVisitorTest, Empty) {
std::string s;
ConsecutiveRangeVisitor<std::string::iterator> vis(s.begin(), s.end());
EXPECT_FALSE(vis.has_more());
}
} // namespace courgette

@ -11,6 +11,7 @@
'adjustment_method.h',
'assembly_program.cc',
'assembly_program.h',
'consecutive_range_visitor.h',
'courgette.h',
'crc.cc',
'crc.h',
@ -35,6 +36,8 @@
'ensemble_apply.cc',
'ensemble_create.cc',
'image_utils.h',
'label_manager.cc',
'label_manager.h',
'memory_allocator.cc',
'memory_allocator.h',
'region.h',
@ -102,6 +105,7 @@
'bsdiff_memory_unittest.cc',
'base_test_unittest.cc',
'base_test_unittest.h',
'consecutive_range_visitor_unittest.cc',
'difference_estimator_unittest.cc',
'disassembler_elf_32_x86_unittest.cc',
'disassembler_win32_x86_unittest.cc',
@ -110,6 +114,7 @@
'encode_decode_unittest.cc',
'ensemble_unittest.cc',
'image_utils_unittest.cc',
'label_manager_unittest.cc',
'memory_allocator_unittest.cc',
'rel32_finder_win32_x86_unittest.cc',
'streams_unittest.cc',

@ -15,6 +15,26 @@ namespace courgette {
typedef uint32 RVA;
// A Label is a symbolic reference to an address. Unlike a conventional
// assembly language, we always know the address. The address will later be
// stored in a table and the Label will be replaced with the index into the
// table.
// TODO(huangs): Make this a struct, and remove "_" from member names.
class Label {
public:
enum : int { kNoIndex = -1 };
explicit Label(RVA rva) : rva_(rva) {}
bool operator==(const Label& other) const {
return rva_ == other.rva_ && index_ == other.index_ &&
count_ == other.count_;
}
RVA rva_ = 0; // Address referred to by the label.
int index_ = kNoIndex; // Index of address in address table.
int32 count_ = 0;
};
// These helper functions avoid the need for casts in the main code.
inline uint16 ReadU16(const uint8* address, size_t offset) {
return *reinterpret_cast<const uint16*>(address + offset);

@ -0,0 +1,74 @@
// Copyright 2015 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "courgette/label_manager.h"
#include <algorithm>
#include "base/logging.h"
#include "base/numerics/safe_math.h"
#include "courgette/consecutive_range_visitor.h"
namespace courgette {
LabelManager::RvaVisitor::~RvaVisitor() {}
LabelManager::LabelManager() {}
LabelManager::~LabelManager() {}
// We wish to minimize peak memory usage here. Analysis: Let
// m = number of (RVA) elements in |rva_visitor|,
// n = number of distinct (RVA) elements in |rva_visitor|.
// The final storage is n * sizeof(Label) bytes. During computation we uniquify
// m RVAs, and count repeats. Taking sizeof(RVA) = 4, an implementation using
// std::map or std::unordered_map would consume additionally 32 * n bytes.
// Meanwhile, our std::vector implementation consumes additionally 4 * m bytes
// For our typical usage (i.e. Chrome) we see m = ~4n, so we use 16 * n bytes of
// extra contiguous memory during computation. Assuming memory fragmentation
// would not be an issue, this is much better than using std::map.
void LabelManager::Read(RvaVisitor* rva_visitor) {
// Write all values in |rva_visitor| to |rvas|.
size_t num_rva = rva_visitor->Remaining();
std::vector<RVA> rvas(num_rva);
for (size_t i = 0; i < num_rva; ++i, rva_visitor->Next())
rvas[i] = rva_visitor->Get();
// Sort |rvas|, then count the number of distinct values.
using CRV = ConsecutiveRangeVisitor<std::vector<RVA>::iterator>;
std::sort(rvas.begin(), rvas.end());
size_t num_distinct_rva = 0;
for (CRV it(rvas.begin(), rvas.end()); it.has_more(); it.advance())
++num_distinct_rva;
// Reserve space for |labels_|, populate with sorted RVA and repeats.
DCHECK(labels_.empty());
labels_.reserve(num_distinct_rva);
for (CRV it(rvas.begin(), rvas.end()); it.has_more(); it.advance()) {
labels_.push_back(Label(*it.cur()));
base::CheckedNumeric<uint32> count = it.repeat();
labels_.back().count_ = count.ValueOrDie();
}
}
void LabelManager::RemoveUnderusedLabels(int32 count_threshold) {
if (count_threshold <= 0)
return;
labels_.erase(std::remove_if(labels_.begin(), labels_.end(),
[count_threshold](const Label& label) {
return label.count_ < count_threshold;
}),
labels_.end());
// Not shrinking |labels_|, since this may cause reallocation.
}
// Uses binary search to find |rva|.
Label* LabelManager::Find(RVA rva) {
auto it = std::lower_bound(
labels_.begin(), labels_.end(), Label(rva),
[](const Label& l1, const Label& l2) { return l1.rva_ < l2.rva_; });
return it == labels_.end() || it->rva_ != rva ? nullptr : &(*it);
}
} // namespace courgette

64
courgette/label_manager.h Normal file

@ -0,0 +1,64 @@
// Copyright 2015 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef COURGETTE_LABEL_MANAGER_H_
#define COURGETTE_LABEL_MANAGER_H_
#include <vector>
#include "base/macros.h"
#include "courgette/image_utils.h"
namespace courgette {
// A container to store and manage Label instances. A key consideration is peak
// memory usage reduction. To this end we preallocate Label instances in bulk,
// and carefully control transient memory usage when initializing Labels.
class LabelManager {
public:
// An adaptor to sequentially traverse multiple RVAs. This is useful for RVA
// translation without extra storage. For example, we might have a stored list
// of RVA locations, but we want to traverse the matching RVA targets.
class RvaVisitor {
public:
virtual ~RvaVisitor();
// Returns the number of remaining RVAs to visit.
virtual size_t Remaining() const = 0;
// Returns the current RVA.
virtual RVA Get() const = 0;
// Advances to the next RVA.
virtual void Next() = 0;
};
LabelManager();
virtual ~LabelManager();
// Initializes |labels_| using RVAs from |rva_visitor|. Each distinct RVA from
// |rva_visitor| yields a Label with |rva_| assigned as the RVA, and |count_|
// assigned as the repeat.
void Read(RvaVisitor* rva_visitor);
// Removes |labels_| elements whose |count_| is less than |count_threshold|.
void RemoveUnderusedLabels(int32 count_threshold);
// Efficiently searches for a Label that targets |rva|. Returns the pointer to
// the stored Label instance if found, or null otherwise.
Label* Find(RVA rva);
// TODO(huangs): Move AssignRemainingIndexes() here.
protected:
// The main list of Label instances, sorted by the |rva_| member.
std::vector<Label> labels_;
private:
DISALLOW_COPY_AND_ASSIGN(LabelManager);
};
} // namespace courgette
#endif // COURGETTE_LABEL_MANAGER_H_

@ -0,0 +1,143 @@
// Copyright 2015 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "courgette/label_manager.h"
#include <iterator>
#include <map>
#include <utility>
#include <vector>
#include "testing/gtest/include/gtest/gtest.h"
namespace courgette {
namespace {
// Test version of RvaVisitor: Just wrap std::vector<RVA>.
class TestRvaVisitor : public LabelManager::RvaVisitor {
public:
explicit TestRvaVisitor(std::vector<RVA>::const_iterator rva_begin,
std::vector<RVA>::const_iterator rva_end)
: rva_it_(rva_begin), rva_end_(rva_end) {}
~TestRvaVisitor() override {}
size_t Remaining() const override { return std::distance(rva_it_, rva_end_); }
RVA Get() const override { return *rva_it_; }
void Next() override { ++rva_it_; }
private:
std::vector<RVA>::const_iterator rva_it_;
std::vector<RVA>::const_iterator rva_end_;
};
// Test version of LabelManager: Expose data to test implementation.
class TestLabelManager : public LabelManager {
public:
const size_t LabelCount() const { return labels_.size(); };
};
void CheckLabelManagerContent(TestLabelManager* label_manager,
const std::map<RVA, int32>& expected) {
EXPECT_EQ(expected.size(), label_manager->LabelCount());
for (const auto& rva_and_count : expected) {
Label* label = label_manager->Find(rva_and_count.first);
EXPECT_TRUE(label != nullptr);
EXPECT_EQ(rva_and_count.first, label->rva_);
EXPECT_EQ(rva_and_count.second, label->count_);
}
}
} // namespace
TEST(LabelManagerTest, Basic) {
static const RVA kTestTargetsRaw[] = {
0x04000010,
0x04000030,
0x04000020,
0x04000010, // Redundant
0xFEEDF00D,
0x04000030, // Redundant
0xFEEDF00D, // Redundant
0x00000110,
0x04000010, // Redundant
0xABCD1234
};
std::vector<RVA> test_targets(std::begin(kTestTargetsRaw),
std::end(kTestTargetsRaw));
TestRvaVisitor visitor(test_targets.begin(), test_targets.end());
// Preallocate targets, then populate.
TestLabelManager label_manager;
label_manager.Read(&visitor);
static const std::pair<RVA, int32> kExpected1Raw[] = {
{0x00000110, 1},
{0x04000010, 3},
{0x04000020, 1},
{0x04000030, 2},
{0xABCD1234, 1},
{0xFEEDF00D, 2}
};
std::map<RVA, int32> expected1(std::begin(kExpected1Raw),
std::end(kExpected1Raw));
CheckLabelManagerContent(&label_manager, expected1);
// Expect to *not* find labels for various RVAs that were never added.
EXPECT_EQ(nullptr, label_manager.Find(RVA(0x00000000)));
EXPECT_EQ(nullptr, label_manager.Find(RVA(0x0400000F)));
EXPECT_EQ(nullptr, label_manager.Find(RVA(0x04000011)));
EXPECT_EQ(nullptr, label_manager.Find(RVA(0x5F3759DF)));
EXPECT_EQ(nullptr, label_manager.Find(RVA(0xFEEDFFF0)));
EXPECT_EQ(nullptr, label_manager.Find(RVA(0xFFFFFFFF)));
// Remove Labels with |count_| < 2.
label_manager.RemoveUnderusedLabels(2);
static const std::pair<RVA, int32> kExpected2Raw[] = {
{0x04000010, 3},
{0x04000030, 2},
{0xFEEDF00D, 2}
};
std::map<RVA, int32> expected2(std::begin(kExpected2Raw),
std::end(kExpected2Raw));
CheckLabelManagerContent(&label_manager, expected2);
}
TEST(LabelManagerTest, Single) {
const RVA kRva = 12U;
for (int dup = 1; dup < 8; ++dup) {
// Test data: |dup| copies of kRva.
std::vector<RVA> test_targets(dup, kRva);
TestRvaVisitor visitor(test_targets.begin(), test_targets.end());
TestLabelManager label_manager;
label_manager.Read(&visitor);
EXPECT_EQ(1U, label_manager.LabelCount()); // Deduped to 1 Label.
Label* label = label_manager.Find(kRva);
EXPECT_NE(nullptr, label);
EXPECT_EQ(kRva, label->rva_);
EXPECT_EQ(dup, label->count_);
for (RVA rva = 0U; rva < 16U; ++rva) {
if (rva != kRva)
EXPECT_EQ(nullptr, label_manager.Find(rva));
}
}
}
TEST(LabelManagerTest, Empty) {
std::vector<RVA> empty_test_targets;
TestRvaVisitor visitor(empty_test_targets.begin(), empty_test_targets.end());
TestLabelManager label_manager;
label_manager.Read(&visitor);
EXPECT_EQ(0U, label_manager.LabelCount());
for (RVA rva = 0U; rva < 16U; ++rva)
EXPECT_EQ(nullptr, label_manager.Find(rva));
}
} // namespace courgette