Improvements to Courgette's version of bsdiff
* Store 'diff' bytes by run-length encoding zeros. This reduces the memory needed to store the zeros by ~30MB for chrome.7z. * Store the control tuple elements in separate streams. The 'extra_bytes' counts are often zero so this brings all the zeros together. The uncompressed patch file is much smaller due to the run-length encoded zeros. It is slightly smaller (3-8%) after compression with lzma. Review URL: http://codereview.chromium.org/115435 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@16343 0039d316-1c4b-4281-b951-d872f2087c98
This commit is contained in:
courgette
@ -26,7 +26,7 @@ enum Status {
|
||||
C_BAD_ENSEMBLE_MAGIC = 5, // Ensemble patch has bad magic.
|
||||
C_BAD_ENSEMBLE_VERSION = 6, // Ensemble patch has wrong version.
|
||||
C_BAD_ENSEMBLE_HEADER = 7, // Ensemble patch has corrupt header.
|
||||
C_BAD_ENSEMBLE_CRC = 8, // Ensemble patch has corrupt header.
|
||||
C_BAD_ENSEMBLE_CRC = 8, // Ensemble patch has corrupt data.
|
||||
|
||||
C_BAD_TRANSFORM = 12, // Transform mis-specified.
|
||||
C_BAD_BASE = 13, // Base for transform malformed.
|
||||
|
3
courgette/third_party/bsdiff.h
vendored
3
courgette/third_party/bsdiff.h
vendored
@ -74,9 +74,6 @@ typedef struct MBSPatchHeader_ {
|
||||
uint32 slen; // Length of the file to be patched.
|
||||
uint32 scrc32; // CRC32 of the file to be patched.
|
||||
uint32 dlen; // Length of the result file.
|
||||
uint32 cblen; // Length of the control block in bytes.
|
||||
uint32 difflen; // Length of the diff block in bytes.
|
||||
uint32 extralen; // Length of the extra block in bytes.
|
||||
} MBSPatchHeader;
|
||||
|
||||
// This is the value for the tag field. Must match length exactly, not counting
|
||||
|
70
courgette/third_party/bsdiff_apply.cc
vendored
70
courgette/third_party/bsdiff_apply.cc
vendored
@ -44,9 +44,6 @@ BSDiffStatus MBS_ReadHeader(SourceStream* stream, MBSPatchHeader* header) {
|
||||
if (!stream->ReadVarint32(&header->slen)) return READ_ERROR;
|
||||
if (!stream->ReadVarint32(&header->scrc32)) return READ_ERROR;
|
||||
if (!stream->ReadVarint32(&header->dlen)) return READ_ERROR;
|
||||
if (!stream->ReadVarint32(&header->cblen)) return READ_ERROR;
|
||||
if (!stream->ReadVarint32(&header->difflen)) return READ_ERROR;
|
||||
if (!stream->ReadVarint32(&header->extralen)) return READ_ERROR;
|
||||
|
||||
// The string will have a NUL terminator that we don't use, hence '-1'.
|
||||
COMPILE_ASSERT(sizeof(MBS_PATCH_HEADER_TAG) - 1 == sizeof(header->tag),
|
||||
@ -54,12 +51,6 @@ BSDiffStatus MBS_ReadHeader(SourceStream* stream, MBSPatchHeader* header) {
|
||||
if (memcmp(header->tag, MBS_PATCH_HEADER_TAG, 8) != 0)
|
||||
return UNEXPECTED_ERROR;
|
||||
|
||||
size_t bytes_remaining = stream->Remaining();
|
||||
if (header->cblen +
|
||||
header->difflen +
|
||||
header->extralen != bytes_remaining)
|
||||
return UNEXPECTED_ERROR;
|
||||
|
||||
return OK;
|
||||
}
|
||||
|
||||
@ -69,35 +60,37 @@ BSDiffStatus MBS_ApplyPatch(const MBSPatchHeader *header,
|
||||
SinkStream* new_stream) {
|
||||
const uint8* old_end = old_start + old_size;
|
||||
|
||||
SourceStream control_stream;
|
||||
|
||||
const uint8* control_start = patch_stream->Buffer();
|
||||
if (!patch_stream->ReadSubstream(header->cblen, &control_stream))
|
||||
return READ_ERROR;
|
||||
if (!patch_stream->Skip(header->difflen + header->extralen))
|
||||
return READ_ERROR;
|
||||
if (!patch_stream->Empty())
|
||||
SourceStreamSet patch_streams;
|
||||
if (!patch_streams.Init(patch_stream))
|
||||
return READ_ERROR;
|
||||
|
||||
const uint8* diff_start = control_start + header->cblen;
|
||||
const uint8* diff_end = diff_start + header->difflen;
|
||||
const uint8* extra_start = diff_end;
|
||||
const uint8* extra_end = extra_start + header->extralen;
|
||||
SourceStream* control_stream_copy_counts = patch_streams.stream(0);
|
||||
SourceStream* control_stream_extra_counts = patch_streams.stream(1);
|
||||
SourceStream* control_stream_seeks = patch_streams.stream(2);
|
||||
SourceStream* diff_skips = patch_streams.stream(3);
|
||||
SourceStream* diff_bytes = patch_streams.stream(4);
|
||||
SourceStream* extra_bytes = patch_streams.stream(5);
|
||||
|
||||
const uint8* extra_start = extra_bytes->Buffer();
|
||||
const uint8* extra_end = extra_start + extra_bytes->Remaining();
|
||||
const uint8* extra_position = extra_start;
|
||||
|
||||
const uint8* old_position = old_start;
|
||||
const uint8* diff_position = diff_start;
|
||||
const uint8* extra_position = extra_start;
|
||||
|
||||
new_stream->Reserve(header->dlen);
|
||||
|
||||
while (!control_stream.Empty()) {
|
||||
uint32 pending_diff_zeros = 0;
|
||||
if (!diff_skips->ReadVarint32(&pending_diff_zeros))
|
||||
return UNEXPECTED_ERROR;
|
||||
|
||||
while (!control_stream_copy_counts->Empty()) {
|
||||
uint32 copy_count, extra_count;
|
||||
int32 seek_adjustment;
|
||||
if (!control_stream.ReadVarint32(©_count))
|
||||
if (!control_stream_copy_counts->ReadVarint32(©_count))
|
||||
return UNEXPECTED_ERROR;
|
||||
if (!control_stream.ReadVarint32(&extra_count))
|
||||
if (!control_stream_extra_counts->ReadVarint32(&extra_count))
|
||||
return UNEXPECTED_ERROR;
|
||||
if (!control_stream.ReadVarint32Signed(&seek_adjustment))
|
||||
if (!control_stream_seeks->ReadVarint32Signed(&seek_adjustment))
|
||||
return UNEXPECTED_ERROR;
|
||||
|
||||
#ifdef DEBUG_bsmedberg
|
||||
@ -108,16 +101,22 @@ BSDiffStatus MBS_ApplyPatch(const MBSPatchHeader *header,
|
||||
// block.
|
||||
if (copy_count > static_cast<size_t>(old_end - old_position))
|
||||
return UNEXPECTED_ERROR;
|
||||
if (copy_count > static_cast<size_t>(diff_end - diff_position))
|
||||
return UNEXPECTED_ERROR;
|
||||
|
||||
// Add together bytes from the 'old' file and the 'diff' stream.
|
||||
for (size_t i = 0; i < copy_count; ++i) {
|
||||
uint8 byte = old_position[i] + diff_position[i];
|
||||
uint8 diff_byte = 0;
|
||||
if (pending_diff_zeros) {
|
||||
--pending_diff_zeros;
|
||||
} else {
|
||||
if (!diff_skips->ReadVarint32(&pending_diff_zeros))
|
||||
return UNEXPECTED_ERROR;
|
||||
if (!diff_bytes->Read(&diff_byte, 1))
|
||||
return UNEXPECTED_ERROR;
|
||||
}
|
||||
uint8 byte = old_position[i] + diff_byte;
|
||||
new_stream->Write(&byte, 1);
|
||||
}
|
||||
old_position += copy_count;
|
||||
diff_position += copy_count;
|
||||
|
||||
// Copy bytes from the extra block.
|
||||
if (extra_count > static_cast<size_t>(extra_end - extra_position))
|
||||
@ -134,9 +133,12 @@ BSDiffStatus MBS_ApplyPatch(const MBSPatchHeader *header,
|
||||
old_position += seek_adjustment;
|
||||
}
|
||||
|
||||
if (diff_position != diff_end)
|
||||
return UNEXPECTED_ERROR;
|
||||
if (extra_position != extra_end)
|
||||
if (!control_stream_copy_counts->Empty() ||
|
||||
!control_stream_extra_counts->Empty() ||
|
||||
!control_stream_seeks->Empty() ||
|
||||
!diff_skips->Empty() ||
|
||||
!diff_bytes->Empty() ||
|
||||
!extra_bytes->Empty())
|
||||
return UNEXPECTED_ERROR;
|
||||
|
||||
return OK;
|
||||
|
55
courgette/third_party/bsdiff_create.cc
vendored
55
courgette/third_party/bsdiff_create.cc
vendored
@ -42,7 +42,7 @@ namespace courgette {
|
||||
//
|
||||
// The code appears to be a rewritten version of the suffix array algorithm
|
||||
// presented in "Faster Suffix Sorting" by N. Jesper Larsson and Kunihiko
|
||||
// Sadakane, special-cased for bytes.
|
||||
// Sadakane, special cased for bytes.
|
||||
|
||||
static void
|
||||
split(int *I,int *V,int start,int len,int h)
|
||||
@ -191,9 +191,6 @@ static void WriteHeader(SinkStream* stream, MBSPatchHeader* header) {
|
||||
stream->WriteVarint32(header->slen);
|
||||
stream->WriteVarint32(header->scrc32);
|
||||
stream->WriteVarint32(header->dlen);
|
||||
stream->WriteVarint32(header->cblen);
|
||||
stream->WriteVarint32(header->difflen);
|
||||
stream->WriteVarint32(header->extralen);
|
||||
}
|
||||
|
||||
BSDiffStatus CreateBinaryPatch(SourceStream* old_stream,
|
||||
@ -204,9 +201,19 @@ BSDiffStatus CreateBinaryPatch(SourceStream* old_stream,
|
||||
LOG(INFO) << "Start bsdiff";
|
||||
size_t initial_patch_stream_length = patch_stream->Length();
|
||||
|
||||
SinkStreamSet patch_streams;
|
||||
SinkStream* control_stream_copy_counts = patch_streams.stream(0);
|
||||
SinkStream* control_stream_extra_counts = patch_streams.stream(1);
|
||||
SinkStream* control_stream_seeks = patch_streams.stream(2);
|
||||
SinkStream* diff_skips = patch_streams.stream(3);
|
||||
SinkStream* diff_bytes = patch_streams.stream(4);
|
||||
SinkStream* extra_bytes = patch_streams.stream(5);
|
||||
|
||||
const uint8* old = old_stream->Buffer();
|
||||
const int oldsize = old_stream->Remaining();
|
||||
|
||||
uint32 pending_diff_zeros = 0;
|
||||
|
||||
scoped_array<int> I(new(std::nothrow) int[oldsize + 1]);
|
||||
scoped_array<int> V(new(std::nothrow) int[oldsize + 1]);
|
||||
if (I == NULL || V == NULL)
|
||||
@ -221,24 +228,12 @@ BSDiffStatus CreateBinaryPatch(SourceStream* old_stream,
|
||||
const uint8* newbuf = new_stream->Buffer();
|
||||
const int newsize = new_stream->Remaining();
|
||||
|
||||
// Allocate newsize+1 bytes instead of newsize bytes to ensure that we never
|
||||
// try to malloc(0) and get a NULL pointer.
|
||||
|
||||
scoped_array<uint8> diff_bytes_array(new(std::nothrow) uint8[newsize + 1]);
|
||||
scoped_array<uint8> extra_bytes_array(new(std::nothrow) uint8[newsize + 1]);
|
||||
if (diff_bytes_array == NULL || extra_bytes_array == NULL)
|
||||
return MEM_ERROR;
|
||||
|
||||
uint8* diff_bytes = diff_bytes_array.get();
|
||||
uint8* extra_bytes = extra_bytes_array.get();
|
||||
int control_length = 0;
|
||||
int diff_bytes_length = 0;
|
||||
int diff_bytes_nonzero = 0;
|
||||
int extra_bytes_length = 0;
|
||||
int eblen = 0;
|
||||
|
||||
SinkStream control_stream;
|
||||
|
||||
// The patch format is a sequence of triples <copy,extra,seek> where 'copy' is
|
||||
// the number of bytes to copy from the old file (possibly with mistakes),
|
||||
// 'extra' is the number of bytes to copy from a stream of fresh bytes, and
|
||||
@ -364,13 +359,18 @@ BSDiffStatus CreateBinaryPatch(SourceStream* old_stream,
|
||||
|
||||
for (int i = 0; i < lenf; i++) {
|
||||
uint8 diff_byte = newbuf[lastscan + i] - old[lastpos + i];
|
||||
diff_bytes[diff_bytes_length + i] = diff_byte;
|
||||
if (diff_byte)
|
||||
if (diff_byte) {
|
||||
++diff_bytes_nonzero;
|
||||
diff_skips->WriteVarint32(pending_diff_zeros);
|
||||
pending_diff_zeros = 0;
|
||||
diff_bytes->Write(&diff_byte, 1);
|
||||
} else {
|
||||
++pending_diff_zeros;
|
||||
}
|
||||
}
|
||||
int gap = (scan - lenb) - (lastscan + lenf);
|
||||
for (int i = 0; i < gap; i++)
|
||||
extra_bytes[extra_bytes_length + i] = newbuf[lastscan + lenf + i];
|
||||
extra_bytes->Write(&newbuf[lastscan + lenf + i], 1);
|
||||
|
||||
diff_bytes_length += lenf;
|
||||
extra_bytes_length += gap;
|
||||
@ -379,9 +379,9 @@ BSDiffStatus CreateBinaryPatch(SourceStream* old_stream,
|
||||
uint32 extra_count = gap;
|
||||
int32 seek_adjustment = ((pos - lenb) - (lastpos + lenf));
|
||||
|
||||
control_stream.WriteVarint32(copy_count);
|
||||
control_stream.WriteVarint32(extra_count);
|
||||
control_stream.WriteVarint32Signed(seek_adjustment);
|
||||
control_stream_copy_counts->WriteVarint32(copy_count);
|
||||
control_stream_extra_counts->WriteVarint32(extra_count);
|
||||
control_stream_seeks->WriteVarint32Signed(seek_adjustment);
|
||||
++control_length;
|
||||
#ifdef DEBUG_bsmedberg
|
||||
LOG(INFO) << StringPrintf(
|
||||
@ -395,6 +395,8 @@ BSDiffStatus CreateBinaryPatch(SourceStream* old_stream,
|
||||
}
|
||||
}
|
||||
|
||||
diff_skips->WriteVarint32(pending_diff_zeros);
|
||||
|
||||
I.reset();
|
||||
|
||||
MBSPatchHeader header;
|
||||
@ -405,19 +407,16 @@ BSDiffStatus CreateBinaryPatch(SourceStream* old_stream,
|
||||
header.slen = oldsize;
|
||||
header.scrc32 = CalculateCrc(old, oldsize);
|
||||
header.dlen = newsize;
|
||||
header.cblen = control_stream.Length();
|
||||
header.difflen = diff_bytes_length;
|
||||
header.extralen = extra_bytes_length;
|
||||
|
||||
WriteHeader(patch_stream, &header);
|
||||
|
||||
patch_stream->Append(&control_stream);
|
||||
patch_stream->Write(diff_bytes, diff_bytes_length);
|
||||
patch_stream->Write(extra_bytes, extra_bytes_length);
|
||||
size_t diff_skips_length = diff_skips->Length();
|
||||
patch_streams.CopyTo(patch_stream);
|
||||
|
||||
LOG(INFO) << "Control tuples: " << control_length
|
||||
<< " copy bytes: " << diff_bytes_length
|
||||
<< " mistakes: " << diff_bytes_nonzero
|
||||
<< " (skips: " << diff_skips_length << ")"
|
||||
<< " extra bytes: " << extra_bytes_length;
|
||||
|
||||
LOG(INFO) << "Uncompressed bsdiff patch size "
|
||||
|
Reference in New Issue
Block a user