0

Reland "dump-static-initializers.py: Improve accuracy and output usefulness"

This reverts commit 44fa7a8b3e.

Reason for reland: Additional changes for check_static_initializers.py
 * Implemented support for relocations (linux needs this)
 * Apply allowlist via basename (needed now that paths are full ones)
 * Change --diffable flag to --json for easier parsing
 * Added crtstuff.c to allowlist, now that script detects it

Other changes since original:
 * Changed llvm-symbolizer call to use JSON output, to handle case
   where symbols have multiple results
 * Simplified nm parsing and have it only look for relevant addresses
 * Disable disassembly when a lot of results exist

Original change's description:
> Revert "dump-static-initializers.py: Improve accuracy and output usefulness"
>
> This reverts commit 7acb1d948a.
>
> Reason for revert: Breaking build on Linux (https://ci.chromium.org/ui/p/chromium/builders/ci/Linux%20Builder/161417/overview)
>
> Original change's description:
> > dump-static-initializers.py: Improve accuracy and output usefulness
> >
> > Specific Improvements:
> > 1) Parse .init_array to find the addresses of static initializers
> >    (rather than using symbol name heuristics).
> > 2) Show the full path of the originating source file instead of just
> >    basename.
> > 3) Dump full disassembly instead of parsing out function calls. It's
> >    much easier to understand the symbol contents to see its full
> >    disassembly than to just see the list of function calls being made by
> >    it. This is simplified even further by showing source lines when
> >    available (via objdump --source).
> > 4) Now always uses our checked-in llvm toolchain.
> >
> > For googlers, before/after output when run on libmonochrome_64.so:
> > Before: https://paste.googleplex.com/5392989479239680?raw
> > After: https://paste.googleplex.com/6580632023990272?raw
> >
> > Bug: 1272795
> > Change-Id: I36907fe8c062afbd9da4b252d0292667426de450
> > Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/3938366
> > Commit-Queue: Andrew Grieve <agrieve@chromium.org>
> > Reviewed-by: Thomas Anderson <thomasanderson@chromium.org>
> > Cr-Commit-Position: refs/heads/main@{#1057472}
>
> Bug: 1272795
> Change-Id: Ie2a5676da109ede546c321196a6ab0500a7abe8e
> No-Presubmit: true
> No-Tree-Checks: true
> No-Try: true
> Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/3946858
> Reviewed-by: David Bertoni <dbertoni@chromium.org>
> Commit-Queue: David Bertoni <dbertoni@chromium.org>
> Auto-Submit: Fabio Tirelo <ftirelo@chromium.org>
> Owners-Override: Fabio Tirelo <ftirelo@chromium.org>
> Cr-Commit-Position: refs/heads/main@{#1057535}

Bug: 1272795
Change-Id: I3f9bdee203240a84c7675d40af018c98dff7d0ae
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/3949849
Reviewed-by: Thomas Anderson <thomasanderson@chromium.org>
Commit-Queue: Andrew Grieve <agrieve@chromium.org>
Cr-Commit-Position: refs/heads/main@{#1059049}
This commit is contained in:
Andrew Grieve
2022-10-14 01:12:00 +00:00
committed by Chromium LUCI CQ
parent 3a04e61a4e
commit 5d0ac9cfd5
5 changed files with 220 additions and 326 deletions

5
DEPS

@ -4134,10 +4134,11 @@ hooks = [
},
{
# Should run after the clang hook. Used on mac, as well as for orderfile
# generation on Android.
# generation and size tooling on Android. Used by
# dump-static-initializers.py on linux.
'name': 'objdump',
'pattern': '.',
'condition': 'checkout_mac or checkout_android and host_os != "mac"',
'condition': 'checkout_linux or checkout_mac or checkout_android and host_os != "mac"',
'action': ['python3', 'src/tools/clang/scripts/update.py',
'--package=objdump'],
},

@ -41,24 +41,14 @@ def _VerifyLibBuildIdsMatch(tool_prefix, *so_files):
'Your output directory is likely stale.')
def _GetStaticInitializers(so_path, tool_prefix):
output = subprocess.check_output(
[_DUMP_STATIC_INITIALIZERS_PATH, '-d', so_path, '-t', tool_prefix],
encoding='utf-8')
summary = re.search(r'Found \d+ static initializers in (\d+) files.', output)
return output.splitlines()[:-1], int(summary.group(1))
def _PrintDumpSIsCount(apk_so_name, unzipped_so, out_dir, tool_prefix):
def _DumpStaticInitializers(apk_so_name, unzipped_so, out_dir, tool_prefix):
lib_name = os.path.basename(apk_so_name).replace('crazy.', '')
so_with_symbols_path = os.path.join(out_dir, 'lib.unstripped', lib_name)
if not os.path.exists(so_with_symbols_path):
raise Exception('Unstripped .so not found. Looked here: %s' %
so_with_symbols_path)
_VerifyLibBuildIdsMatch(tool_prefix, unzipped_so, so_with_symbols_path)
sis, _ = _GetStaticInitializers(so_with_symbols_path, tool_prefix)
for si in sis:
print(si)
subprocess.check_call([_DUMP_STATIC_INITIALIZERS_PATH, so_with_symbols_path])
def _ReadInitArray(so_path, tool_prefix, expect_no_initializers):
@ -126,10 +116,7 @@ def _AnalyzeStaticInitializers(apk_or_aab, tool_prefix, dump_sis, out_dir,
si_count += _CountStaticInitializers(temp.name, tool_prefix,
expect_no_initializers)
if dump_sis:
# Print count and list of SIs reported by dump-static-initializers.py.
# Doesn't work well on all archs (particularly arm), which is why
# the readelf method is used for tracking SI counts.
_PrintDumpSIsCount(f.filename, temp.name, out_dir, tool_prefix)
_DumpStaticInitializers(f.filename, temp.name, out_dir, tool_prefix)
return si_count

@ -25,30 +25,21 @@ Common fixes include:
## Listing Static Initializers
### Step 1 - Use objdump to report them
### Option 1 - dump-static-initializers.py
For Linux:
tools/linux/dump-static-initializers.py out/Release/chrome
For Android (from easiest to hardest):
For Android:
# Build with: is_official_build=true is_chrome_branded=true
# This will dump the list of SI's only when they don't match the expected
# number in static_initializers.gni (this is what the bots use).
ninja chrome/android:monochrome_static_initializers
# or:
tools/binary_size/diagnose_bloat.py HEAD # See README.md for flags.
# or (the other two use this under the hood):
tools/linux/dump-static-initializers.py --toolchain-prefix third_party/android_ndk/toolchains/llvm/prebuilt/linux-x86_64/bin/arm-linux-androideabi- out/Release/lib.unstripped/libmonochrome.so
# arm32 ^^ vv arm64
tools/linux/dump-static-initializers.py --toolchain-prefix third_party/android_ndk/toolchains/llvm/prebuilt/linux-x86_64/bin/aarch64-linux-android- out/Release/lib.unstripped/libmonochrome.so
# Note: For arm64, having use_thin_lto=true seems to dump a couple extra
# initializers that don't actually exist.
# or, to dump directly:
tools/linux/dump-static-initializers.py out/Release/lib.unstripped/libmonochrome.so
The last one may actually be the easiest if you've already properly built
`libmonochrome.so` with `is_official_build=true`.
### Step 2 - Ask compiler to report them
### Option 2 - Ask compiler to report them
If the source of the new initializers is not obvious from Step 1, you can ask the
compiler to pinpoint the exact source line.
@ -68,11 +59,9 @@ More details in [crbug/1136086](https://bugs.chromium.org/p/chromium/issues/deta
* For more information about `diagnose_bloat.py`, refer to its [README.md](/tools/binary_size/README.md#diagnose_bloat.py)
* List of existing static initializers documented in [static_initializers.gni](/chrome/android/static_initializers.gni)
### Step 3 - Manual Verification
### Option 3 - Manual Verification
If the source of the new initializers is not revealed with
`dump-static-initializers.py` (e.g. for static initializers introduced in
compiler-rt), there's a manual option.
You can manually go through the steps that `dump-static-initializers.py` does.
1. Locate the address range of the .init_array section with:
```
@ -104,19 +93,3 @@ $ third_party/llvm-build/Release+Asserts/bin/llvm-readelf \
--relocations out/Release/lib.unstripped/libmonochrome.so | grep 0x04064624
03dfb7b0 00000017 R_ARM_RELATIVE 0
```
### Step 4 - Compiler Naming Heuristics
You might be able to find the static initialzer functions by listing symbols:
```sh
nm out/Release/lib.unstripped/libmonochrome.so | grep " _GLOBAL__"
```
This currently yields:
```
0214ea45 t _GLOBAL__I_000101
00cb2315 t _GLOBAL__sub_I_base_logging.cc
0214eca5 t _GLOBAL__sub_I_iostream.cpp
01c01219 t _GLOBAL__sub_I_token.cc
```

@ -23,8 +23,8 @@ _LINUX_SI_FILE_ALLOWLIST = {
'chrome': [
'InstrProfilingRuntime.cpp', # Only in coverage builds, not production.
'atomicops_internals_x86.cc', # TODO(crbug.com/973551): Remove.
'iostream.cpp:', # TODO(crbug.com/973554): Remove.
'000100', # libc++ uses init_priority 100 for iostreams.
'crtstuff.c', # Added by libgcc due to USE_EH_FRAME_REGISTRY.
'iostream.cpp', # TODO(crbug.com/973554): Remove.
'spinlock.cc', # TODO(crbug.com/973556): Remove.
],
'nacl_helper_bootstrap': [],
@ -168,32 +168,24 @@ def main_linux(src_dir, is_chromeos):
dump_static_initializers = os.path.join(src_dir, 'tools', 'linux',
'dump-static-initializers.py')
stdout = run_process([dump_static_initializers, '-d', binary_name])
# The output has the following format:
# First lines: '# <file_name> <si_name>'
# Last line: '# Found <num> static initializers in <num> files.'
#
# For example:
# # spinlock.cc GetSystemCPUsCount()
# # spinlock.cc adaptive_spin_count
# # Found 2 static initializers in 1 files.
stdout = run_process([dump_static_initializers, '--json', binary_name])
entries = json.loads(stdout)['entries']
files_with_si = set()
for line in stdout.splitlines()[:-1]:
parts = line.split(' ', 2)
assert len(parts) == 3 and parts[0] == '#'
files_with_si.add(parts[1])
for f in files_with_si:
if f not in allowlist[binary_name]:
for e in entries:
# Also remove line number suffix.
basename = os.path.basename(e['filename']).split(':')[0]
if basename not in allowlist[binary_name]:
ret = 1
print(('Error: file "%s" is not expected to have static initializers in'
' binary "%s"') % (f, binary_name))
' binary "%s", but found "%s"') % (e['filename'], binary_name,
e['symbol_name']))
print('\n# Static initializers in %s:' % binary_name)
print(stdout)
for e in entries:
print('# 0x%x %s %s' % (e['address'], e['filename'], e['symbol_name']))
print(e['disassembly'])
print('Found %d files containing static initializers.' % len(entries))
return ret

@ -3,291 +3,232 @@
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Dump functions called by static intializers in a Linux Release binary.
"""Dumps the names, addresses, and disassmebly of static initializers.
Usage example:
tools/linux/dump-static-intializers.py out/Release/chrome
A brief overview of static initialization:
1) the compiler writes out, per object file, a function that contains
the static intializers for that file.
2) the compiler also writes out a pointer to that function in a special
section.
3) at link time, the linker concatenates the function pointer sections
into a single list of all initializers.
4) at run time, on startup the binary runs all function pointers.
The functions in (1) all have mangled names of the form
_GLOBAL__I_foobar.cc or __cxx_global_var_initN
using objdump, we can disassemble those functions and dump all symbols that
they reference.
For an explanation of static initializers, see: //docs/static_initializers.md.
"""
# Needed so pylint does not complain about print('', end='').
from __future__ import print_function
import optparse
import argparse
import json
import os
import re
import pathlib
import subprocess
import sys
# A map of symbol => informative text about it.
NOTES = {
'__cxa_atexit@plt': 'registers a dtor to run at exit',
'std::__ioinit': '#includes <iostream>, use <ostream> instead',
}
_TOOLCHAIN_PREFIX = str(
pathlib.Path(__file__).parents[2] / 'third_party' / 'llvm-build' /
'Release+Asserts' / 'bin' / 'llvm-')
# Determine whether this is a git checkout (as opposed to e.g. svn).
IS_GIT_WORKSPACE = (subprocess.Popen(
['git', 'rev-parse'], stderr=subprocess.PIPE).wait() == 0)
# It is too slow to dump disassembly for a lot of symbols.
_MAX_DISASSEMBLY_SYMBOLS = 10
class Demangler:
"""A wrapper around c++filt to provide a function to demangle symbols."""
def __init__(self, toolchain):
# llvm toolchain uses cxx rather than c++.
path = toolchain + 'cxxfilt'
if not os.path.exists(path):
path = toolchain + 'c++filt'
if not os.path.exists(path):
# Android currently has an issue where the llvm toolchain in the ndk does
# not contain c++filt. Hopefully fixed in next NDK update...
path = 'c++filt'
self.cppfilt = subprocess.Popen([path],
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
universal_newlines=True)
def Demangle(self, sym):
"""Given mangled symbol |sym|, return its demangled form."""
self.cppfilt.stdin.write(sym + '\n')
self.cppfilt.stdin.flush()
return self.cppfilt.stdout.readline().strip()
def _ParseNm(binary, addresses):
# Example output:
# 000000000de66bd0 0000000000000026 t _GLOBAL__sub_I_add.cc
output = subprocess.check_output(
[_TOOLCHAIN_PREFIX + 'nm', '--print-size', binary], encoding='utf8')
addresses = set(addresses)
ret = {}
for line in output.splitlines():
parts = line.split()
if len(parts) != 4:
continue
address = int(parts[0], 16)
if address in addresses:
ret[address] = int(parts[1], 16)
return ret
# Matches for example: "cert_logger.pb.cc", capturing "cert_logger".
protobuf_filename_re = re.compile(r'(.*)\.pb\.cc$')
def QualifyFilenameAsProto(filename):
"""Attempt to qualify a bare |filename| with a src-relative path, assuming it
is a protoc-generated file. If a single match is found, it is returned.
Otherwise the original filename is returned."""
if not IS_GIT_WORKSPACE:
return filename
match = protobuf_filename_re.match(filename)
if not match:
return filename
basename = match.groups(0)
cmd = ['git', 'ls-files', '--', '*/%s.proto' % basename]
gitlsfiles = subprocess.Popen(cmd,
stdout=subprocess.PIPE,
universal_newlines=True)
candidate = filename
for line in gitlsfiles.stdout:
if candidate != filename:
return filename # Multiple hits, can't help.
candidate = line.strip()
return candidate
def _Disassemble(binary, start, end):
cmd = [
_TOOLCHAIN_PREFIX + 'objdump',
binary,
'--disassemble',
'--source',
'--demangle',
'--start-address=0x%x' % start,
'--stop-address=0x%x' % end,
]
stdout = subprocess.check_output(cmd, encoding='utf8')
all_lines = stdout.splitlines(keepends=True)
source_lines = [l for l in all_lines if l.startswith(';')]
ret = []
if source_lines:
ret = ['Showing source lines that appear in the symbol (via objdump).\n']
else:
ret = [
'Symbol missing source lines. Showing raw disassembly (via objdump).\n'
]
lines = source_lines or all_lines
if len(lines) > 10:
ret += ['This might be verbose due to inlined functions.\n']
ret += lines
return ''.join(ret)
# Regex matching the substring of a symbol's demangled text representation most
# likely to appear in a source file.
# Example: "v8::internal::Builtins::InitBuiltinFunctionTable()" becomes
# "InitBuiltinFunctionTable", since the first (optional & non-capturing) group
# picks up any ::-qualification and the last fragment picks up a suffix that
# starts with an opener.
symbol_code_name_re = re.compile(r'^(?:[^(<[]*::)?([^:(<[]*).*?$')
def QualifyFilename(filename, symbol):
"""Given a bare filename and a symbol that occurs in it, attempt to qualify
it with a src-relative path. If more than one file matches, return the
original filename."""
if not IS_GIT_WORKSPACE:
return filename
match = symbol_code_name_re.match(symbol)
if not match:
return filename
symbol = match.group(1)
cmd = ['git', 'grep', '-l', symbol, '--', '*/' + filename]
gitgrep = subprocess.Popen(cmd,
stdout=subprocess.PIPE,
universal_newlines=True)
candidate = filename
for line in gitgrep.stdout:
if candidate != filename: # More than one candidate; return bare filename.
return filename
candidate = line.strip()
return candidate
def _DumpInitArray(binary):
cmd = [_TOOLCHAIN_PREFIX + 'readobj', '--hex-dump=.init_array', binary]
output = subprocess.check_output(cmd, encoding='utf8')
# Example output:
# File: lib.unstripped/libmonochrome_64.so
# Format: elf64-littleaarch64
# Arch: aarch64
# AddressSize: 64bit
# LoadName: libmonochrome_64.so
# Hex dump of section '.init_array':
# 0x091f6198 14f80204 00000000 c0cf3003 00000000 ..........0.....
# 0x091f61a8 68c70104 00000000 h........^F.....
is_64_bit = False
is_arm = False
byte_order = 'little'
ret = []
for line in output.splitlines():
if line.startswith('Format:') and 'big' in line:
byte_order = 'big'
continue
if line == 'Arch: arm':
is_arm = True
continue
if line == 'AddressSize: 64bit':
is_64_bit = True
continue
if not line.startswith('0x'):
continue
init_array_address = int(line[:10], 16)
parts = line[10:-16].split()
assert len(parts) <= 4, 'Too many parts: ' + line
if is_64_bit:
parts = [parts[i] + parts[i + 1] for i in range(0, len(parts), 2)]
arrays = (bytearray.fromhex(p) for p in parts)
for a in arrays:
address = int.from_bytes(a, byteorder=byte_order, signed=False)
if is_arm:
address = address & ~1 # Adjust for arm thumb addresses being odd.
ret.append((init_array_address, address))
init_array_address += 8 if is_64_bit else 4
return ret
# Regex matching nm output for the symbols we're interested in. The two formats
# we are interested in are _GLOBAL__sub_I_<filename> and _cxx_global_var_initN.
# See test_ParseNmLine for examples.
nm_re = re.compile(
r'''(\S+)\s(\S+)\st\s # Symbol start address and size
(
(?:_ZN12)?_GLOBAL__(?:sub_)?I_ # Pattern with filename
|
__cxx_global_var_init\d* # Pattern without filename
)(.*) # capture the filename''',
re.X)
def ParseNmLine(line):
"""Parse static initializers from a line of nm output.
Given a line of nm output, parse static initializers as a
(file, start, size, symbol) tuple."""
match = nm_re.match(line)
if match:
addr, size, prefix, filename = match.groups()
return (filename, int(addr, 16), int(size, 16), prefix+filename)
return None
def _DumpRelativeRelocations(binary):
# Example output from: llvm-readobj --relocations chrome
# File: chrome
# Format: elf64-x86-64
# Arch: x86_64
# AddressSize: 64bit
# LoadName: <Not found>
# Relocations [
# Section (10) .rela.dyn {
# 0x26C2AD88 R_X86_64_RELATIVE - 0xA6DABE0
# 0x26C2AD90 R_X86_64_RELATIVE - 0xA6DC2B0
# ...
cmd = [_TOOLCHAIN_PREFIX + 'readobj', '--relocations', binary]
lines = subprocess.check_output(cmd, encoding='utf8').splitlines()
ret = {}
for line in lines:
if 'RELATIVE' in line:
parts = line.split()
ret[int(parts[0], 16)] = int(parts[-1], 16)
return ret
def test_ParseNmLine():
"""Verify the nm_re regex matches some sample lines."""
parse = ParseNmLine(
'0000000001919920 0000000000000008 t '
'_ZN12_GLOBAL__I_safe_browsing_service.cc')
assert parse == ('safe_browsing_service.cc', 26319136, 8,
'_ZN12_GLOBAL__I_safe_browsing_service.cc'), parse
parse = ParseNmLine(
'00000000026b9eb0 0000000000000024 t '
'_GLOBAL__sub_I_extension_specifics.pb.cc')
assert parse == ('extension_specifics.pb.cc', 40607408, 36,
'_GLOBAL__sub_I_extension_specifics.pb.cc'), parse
parse = ParseNmLine(
'0000000002e75a60 0000000000000016 t __cxx_global_var_init')
assert parse == ('', 48716384, 22, '__cxx_global_var_init'), parse
parse = ParseNmLine(
'0000000002e75a60 0000000000000016 t __cxx_global_var_init89')
assert parse == ('', 48716384, 22, '__cxx_global_var_init89'), parse
def _ResolveRelativeAddresses(binary, address_tuples):
relocations_dict = None
ret = []
for init_address, address in address_tuples:
if address == 0:
if relocations_dict is None:
relocations_dict = _DumpRelativeRelocations(binary)
address = relocations_dict.get(init_address)
if address is None:
raise Exception('Failed to resolve relocation for address: ' +
hex(init_address))
ret.append(address)
return ret
# Just always run the test; it is fast enough.
test_ParseNmLine()
def ParseNm(toolchain, binary):
"""Yield static initializers for the given binary.
Given a binary, yield static initializers as (file, start, size, symbol)
tuples."""
nm = subprocess.Popen([toolchain + 'nm', '-S', binary],
stdout=subprocess.PIPE,
universal_newlines=True)
for line in nm.stdout:
parse = ParseNmLine(line)
if parse:
yield parse
# Regex matching objdump output for the symbols we're interested in.
# Example line:
# 12354ab: (disassembly, including <FunctionReference>)
disassembly_re = re.compile(r'^\s+[0-9a-f]+:.*<(\S+)>')
def ExtractSymbolReferences(toolchain, binary, start, end, symbol):
"""Given a span of addresses, returns symbol references from disassembly."""
cmd = [toolchain + 'objdump', binary, '--disassemble',
'--start-address=0x%x' % start, '--stop-address=0x%x' % end]
objdump = subprocess.Popen(cmd,
stdout=subprocess.PIPE,
universal_newlines=True)
refs = set()
for line in objdump.stdout:
if '__static_initialization_and_destruction' in line:
raise RuntimeError('code mentions '
'__static_initialization_and_destruction; '
'did you accidentally run this on a Debug binary?')
match = disassembly_re.search(line)
if match:
(ref,) = match.groups()
if ref.startswith('.LC') or ref.startswith('_DYNAMIC'):
# Ignore these, they are uninformative.
continue
if re.match(symbol, ref):
# Probably a relative jump within this function.
continue
refs.add(ref)
return sorted(refs)
def _SymbolizeAddresses(binary, addresses):
# Example output from: llvm-symbolizer -e chrome \
# --output-style=JSON --functions 0x3323430 0x403a768 0x5489b98
# [{"Address":"0xa6afdd0","ModuleName":"chrome","Symbol":[...]}, ...]
# Where Symbol = {"Column":24,"Discriminator":0,"FileName":"...",
# "FunctionName":"MaybeStartBackgroundThread","Line":85,
# "StartAddress":"0xa6afdd0","StartFileName":"","StartLine":0}
ret = {}
if not addresses:
return ret
cmd = [
_TOOLCHAIN_PREFIX + 'symbolizer', '-e', binary, '--functions',
'--output-style=JSON'
] + [hex(a) for a in addresses]
output = subprocess.check_output(cmd, encoding='utf8')
for main_entry in json.loads(output):
# Multiple symbol entries can exist due to inlining. Last entry is the
# outer-most symbol.
symbols = main_entry['Symbol']
name_entry = symbols[-1]
# Take the last entry that has a line number as the best filename.
file_entry = next((x for x in symbols[::-1] if x['Line'] != 0), name_entry)
address = int(main_entry['Address'], 16)
filename = file_entry['FileName']
line = file_entry['Line']
if line:
filename += f':{line}'
ret[address] = (filename, name_entry['FunctionName'])
return ret
def main():
parser = optparse.OptionParser(usage='%prog [option] filename')
parser.add_option('-d', '--diffable', dest='diffable',
action='store_true', default=False,
help='Prints the filename on each line, for more easily '
'diff-able output. (Used by sizes.py)')
parser.add_option('-t', '--toolchain-prefix', dest='toolchain',
action='store', default='',
help='Toolchain prefix to append to all tool invocations '
'(nm, objdump).')
opts, args = parser.parse_args()
if len(args) != 1:
parser.error('missing filename argument')
return 1
binary = args[0]
parser = argparse.ArgumentParser()
parser.add_argument('--json',
action='store_true',
help='Output in JSON format')
parser.add_argument('binary', help='The non-stripped binary to analyze.')
args = parser.parse_args()
demangler = Demangler(opts.toolchain)
file_count = 0
initializer_count = 0
address_tuples = _DumpInitArray(args.binary)
addresses = _ResolveRelativeAddresses(args.binary, address_tuples)
symbolized_by_address = _SymbolizeAddresses(args.binary, addresses)
files = ParseNm(opts.toolchain, binary)
if opts.diffable:
files = sorted(files)
for filename, addr, size, symbol in files:
file_count += 1
ref_output = []
skip_disassembly = len(addresses) > _MAX_DISASSEMBLY_SYMBOLS
if skip_disassembly:
sys.stderr.write('Not collection disassembly due to the large number of '
'results.\n')
else:
size_by_address = _ParseNm(args.binary, addresses)
qualified_filename = QualifyFilenameAsProto(filename)
if size == 2:
# gcc generates a two-byte 'repz retq' initializer when there is a
# ctor even when the ctor is empty. This is fixed in gcc 4.6, but
# Android uses gcc 4.4.
ref_output.append('[empty ctor, but it still has cost on gcc <4.6]')
entries = []
for address in addresses:
filename, symbol_name = symbolized_by_address[address]
if skip_disassembly:
disassembly = ''
else:
for ref in ExtractSymbolReferences(opts.toolchain, binary, addr,
addr+size, symbol):
initializer_count += 1
ref = demangler.Demangle(ref)
if qualified_filename == filename:
qualified_filename = QualifyFilename(filename, ref)
note = ''
if ref in NOTES:
note = NOTES[ref]
elif ref.endswith('_2eproto()'):
note = 'protocol compiler bug: crbug.com/105626'
if note:
ref_output.append('%s [%s]' % (ref, note))
else:
ref_output.append(ref)
if opts.diffable:
if ref_output:
print('\n'.join(
'# ' + qualified_filename + ' ' + r for r in ref_output))
size = size_by_address.get(address, 0)
if size == 0:
disassembly = ('Not showing disassembly because of unknown symbol size '
'(assembly symbols sometimes omit size).\n')
else:
print('# %s: (empty initializer list)' % qualified_filename)
else:
print('%s (initializer offset 0x%x size 0x%x)' % (qualified_filename,
addr, size))
print(''.join(' %s\n' % r for r in ref_output))
disassembly = _Disassemble(args.binary, address, address + size)
entries.append({
'address': address,
'disassembly': disassembly,
'filename': filename,
'symbol_name': symbol_name,
})
if opts.diffable:
print('#', end=' ')
print('Found %d static initializers in %d files.' % (initializer_count,
file_count))
if args.json:
print(json.dumps({'entries': entries}))
return
return 0
for e in entries:
print(f'# 0x{e["address"]:x} {e["filename"]} {e["symbol_name"]}')
print(e['disassembly'])
print(f'Found {len(entries)} files containing static initializers.')
if '__main__' == __name__:
sys.exit(main())
main()